changeset 13020:4cd1b06ce224 mvt

multiple value return
author roland
date Thu, 08 Jun 2017 18:25:59 +0200
parents e31f5c06438d
children 06653fdee38e
files src/cpu/ppc/vm/globals_ppc.hpp src/cpu/sparc/vm/globals_sparc.hpp src/cpu/x86/vm/globals_x86.hpp src/cpu/x86/vm/interp_masm_x86.cpp src/cpu/x86/vm/interp_masm_x86.hpp src/cpu/x86/vm/interpreterRT_x86_32.cpp src/cpu/x86/vm/macroAssembler_x86.cpp src/cpu/x86/vm/sharedRuntime_x86_32.cpp src/cpu/x86/vm/sharedRuntime_x86_64.cpp src/cpu/x86/vm/stubGenerator_x86_64.cpp src/cpu/x86/vm/templateInterpreterGenerator_x86.cpp src/cpu/x86/vm/templateTable_x86.cpp src/cpu/zero/vm/globals_zero.hpp src/share/vm/c1/c1_IR.hpp src/share/vm/code/compiledMethod.cpp src/share/vm/code/debugInfoRec.cpp src/share/vm/code/debugInfoRec.hpp src/share/vm/code/nmethod.cpp src/share/vm/code/pcDesc.hpp src/share/vm/code/scopeDesc.cpp src/share/vm/code/scopeDesc.hpp src/share/vm/jvmci/jvmciCodeInstaller.cpp src/share/vm/oops/method.cpp src/share/vm/oops/method.hpp src/share/vm/oops/valueKlass.cpp src/share/vm/oops/valueKlass.hpp src/share/vm/opto/callGenerator.cpp src/share/vm/opto/callGenerator.hpp src/share/vm/opto/callnode.cpp src/share/vm/opto/callnode.hpp src/share/vm/opto/compile.cpp src/share/vm/opto/compile.hpp src/share/vm/opto/divnode.cpp src/share/vm/opto/divnode.hpp src/share/vm/opto/doCall.cpp src/share/vm/opto/escape.cpp src/share/vm/opto/generateOptoStub.cpp src/share/vm/opto/graphKit.cpp src/share/vm/opto/lcm.cpp src/share/vm/opto/machnode.cpp src/share/vm/opto/machnode.hpp src/share/vm/opto/macro.cpp src/share/vm/opto/matcher.cpp src/share/vm/opto/matcher.hpp src/share/vm/opto/memnode.cpp src/share/vm/opto/memnode.hpp src/share/vm/opto/multnode.cpp src/share/vm/opto/multnode.hpp src/share/vm/opto/output.cpp src/share/vm/opto/parse1.cpp src/share/vm/opto/runtime.cpp src/share/vm/opto/runtime.hpp src/share/vm/opto/type.cpp src/share/vm/opto/type.hpp src/share/vm/opto/valuetypenode.cpp src/share/vm/opto/valuetypenode.hpp src/share/vm/prims/jvmtiCodeBlobEvents.cpp src/share/vm/runtime/arguments.cpp src/share/vm/runtime/deoptimization.cpp src/share/vm/runtime/deoptimization.hpp src/share/vm/runtime/globals.hpp src/share/vm/runtime/safepoint.cpp src/share/vm/runtime/sharedRuntime.cpp src/share/vm/runtime/sharedRuntime.hpp src/share/vm/runtime/signature.cpp src/share/vm/runtime/signature.hpp src/share/vm/runtime/stubRoutines.cpp src/share/vm/runtime/stubRoutines.hpp src/share/vm/shark/sharkCacheDecache.cpp test/compiler/valhalla/valuetypes/ValueTypeTestBench.java
diffstat 70 files changed, 2185 insertions(+), 507 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/ppc/vm/globals_ppc.hpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/cpu/ppc/vm/globals_ppc.hpp	Thu Jun 08 18:25:59 2017 +0200
@@ -74,6 +74,7 @@
 define_pd_global(bool, PreserveFramePointer,  false);
 
 define_pd_global(bool, ValueTypePassFieldsAsArgs, false);
+define_pd_global(bool, ValueTypeReturnedAsFields, false);
 
 // GC Ergo Flags
 define_pd_global(size_t, CMSYoungGenPerWorker, 16*M);  // Default max size of CMS young gen, per GC worker thread.
--- a/src/cpu/sparc/vm/globals_sparc.hpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/cpu/sparc/vm/globals_sparc.hpp	Thu Jun 08 18:25:59 2017 +0200
@@ -79,6 +79,7 @@
 define_pd_global(bool, PreserveFramePointer, false);
 
 define_pd_global(bool, ValueTypePassFieldsAsArgs, false);
+define_pd_global(bool, ValueTypeReturnedAsFields, false);
 
 // GC Ergo Flags
 define_pd_global(size_t, CMSYoungGenPerWorker, 16*M);  // default max size of CMS young gen, per GC worker thread
--- a/src/cpu/x86/vm/globals_x86.hpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/cpu/x86/vm/globals_x86.hpp	Thu Jun 08 18:25:59 2017 +0200
@@ -102,6 +102,7 @@
 define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
 
 define_pd_global(bool, ValueTypePassFieldsAsArgs, LP64_ONLY(true) NOT_LP64(false));
+define_pd_global(bool, ValueTypeReturnedAsFields, LP64_ONLY(true) NOT_LP64(false));
 
 #define ARCH_FLAGS(develop, \
                    product, \
--- a/src/cpu/x86/vm/interp_masm_x86.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/cpu/x86/vm/interp_masm_x86.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -934,7 +934,8 @@
         Register ret_addr,
         bool throw_monitor_exception,
         bool install_monitor_exception,
-        bool notify_jvmdi) {
+        bool notify_jvmdi,
+        bool load_values) {
   // Note: Registers rdx xmm0 may be in use for the
   // result check if synchronized method
   Label unlocked, unlock, no_unlock;
@@ -1102,6 +1103,13 @@
 
     bind(no_reserved_zone_enabling);
   }
+  if (load_values) {
+    // We are returning a value type, load its fields into registers
+    super_call_VM_leaf(StubRoutines::load_value_type_fields_in_regs());
+
+    // call above kills the value in rbx. Reload it.
+    movptr(rbx, Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize));
+  }
   leave();                           // remove frame anchor
   pop(ret_addr);                     // get return address
   mov(rsp, rbx);                     // set sp to sender sp
--- a/src/cpu/x86/vm/interp_masm_x86.hpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/cpu/x86/vm/interp_masm_x86.hpp	Thu Jun 08 18:25:59 2017 +0200
@@ -215,7 +215,8 @@
   void remove_activation(TosState state, Register ret_addr,
                          bool throw_monitor_exception = true,
                          bool install_monitor_exception = true,
-                         bool notify_jvmdi = true);
+                         bool notify_jvmdi = true,
+                         bool load_values = false);
   void get_method_counters(Register method, Register mcs, Label& skip);
 
   // Object locking
--- a/src/cpu/x86/vm/interpreterRT_x86_32.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/cpu/x86/vm/interpreterRT_x86_32.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -60,10 +60,6 @@
   box (offset(), jni_offset() + 1);
 }
 
-void InterpreterRuntime::SignatureHandlerGenerator::pass_valuetype() {
-  box (offset(), jni_offset() + 1);
-}
-
 void InterpreterRuntime::SignatureHandlerGenerator::move(int from_offset, int to_offset) {
   __ movl(temp(), Address(from(), Interpreter::local_offset_in_bytes(from_offset)));
   __ movl(Address(to(), to_offset * wordSize), temp());
@@ -138,13 +134,6 @@
     _from -= Interpreter::stackElementSize;
    }
 
-  virtual void pass_valuetype() {
-    // pass address of from
-    intptr_t from_addr = (intptr_t)(_from + Interpreter::local_offset_in_bytes(0));
-    *_to++ = (*(intptr_t*)from_addr == 0) ? NULL_WORD : from_addr;
-    _from -= Interpreter::stackElementSize;
-  }
-
  public:
   SlowSignatureHandler(methodHandle method, address from, intptr_t* to) :
     NativeSignatureIterator(method) {
--- a/src/cpu/x86/vm/macroAssembler_x86.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/cpu/x86/vm/macroAssembler_x86.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -2616,6 +2616,10 @@
   call_VM_leaf(entry_point, 3);
 }
 
+void MacroAssembler::super_call_VM_leaf(address entry_point) {
+  MacroAssembler::call_VM_leaf_base(entry_point, 1);
+}
+
 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
   pass_arg0(this, arg_0);
   MacroAssembler::call_VM_leaf_base(entry_point, 1);
--- a/src/cpu/x86/vm/sharedRuntime_x86_32.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/cpu/x86/vm/sharedRuntime_x86_32.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -509,6 +509,15 @@
   return round_to(stack, 2);
 }
 
+const uint SharedRuntime::java_return_convention_max_int = 1;
+const uint SharedRuntime::java_return_convention_max_float = 1;
+int SharedRuntime::java_return_convention(const BasicType *sig_bt,
+                                          VMRegPair *regs,
+                                          int total_args_passed) {
+  Unimplemented();
+  return 0;
+}
+
 // Patch the callers callsite with entry to compiled code if it exists.
 static void patch_callers_callsite(MacroAssembler *masm) {
   Label L;
--- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -518,6 +518,87 @@
   return round_to(stk_args, 2);
 }
 
+// Same as java_calling_convention() but for multiple return
+// values. There's no way to store them on the stack so if we don't
+// have enough registers, multiple values can't be returned.
+const uint SharedRuntime::java_return_convention_max_int = Argument::n_int_register_parameters_j+1;
+const uint SharedRuntime::java_return_convention_max_float = Argument::n_float_register_parameters_j;
+int SharedRuntime::java_return_convention(const BasicType *sig_bt,
+                                          VMRegPair *regs,
+                                          int total_args_passed) {
+  // Create the mapping between argument positions and
+  // registers.
+  static const Register INT_ArgReg[java_return_convention_max_int] = {
+    rax, j_rarg5, j_rarg4, j_rarg3, j_rarg2, j_rarg1, j_rarg0
+  };
+  static const XMMRegister FP_ArgReg[java_return_convention_max_float] = {
+    j_farg0, j_farg1, j_farg2, j_farg3,
+    j_farg4, j_farg5, j_farg6, j_farg7
+  };
+
+
+  uint int_args = 0;
+  uint fp_args = 0;
+
+  for (int i = 0; i < total_args_passed; i++) {
+    switch (sig_bt[i]) {
+    case T_BOOLEAN:
+    case T_CHAR:
+    case T_BYTE:
+    case T_SHORT:
+    case T_INT:
+      if (int_args < Argument::n_int_register_parameters_j+1) {
+        regs[i].set1(INT_ArgReg[int_args]->as_VMReg());
+        int_args++;
+      } else {
+        return -1;
+      }
+      break;
+    case T_VOID:
+      // halves of T_LONG or T_DOUBLE
+      assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
+      regs[i].set_bad();
+      break;
+    case T_LONG:
+      assert(sig_bt[i + 1] == T_VOID, "expecting half");
+      // fall through
+    case T_OBJECT:
+    case T_ARRAY:
+    case T_ADDRESS:
+    case T_METADATA:
+      if (int_args < Argument::n_int_register_parameters_j+1) {
+        regs[i].set2(INT_ArgReg[int_args]->as_VMReg());
+        int_args++;
+      } else {
+        return -1;
+      }
+      break;
+    case T_FLOAT:
+      if (fp_args < Argument::n_float_register_parameters_j) {
+        regs[i].set1(FP_ArgReg[fp_args]->as_VMReg());
+        fp_args++;
+      } else {
+        return -1;
+      }
+      break;
+    case T_DOUBLE:
+      assert(sig_bt[i + 1] == T_VOID, "expecting half");
+      if (fp_args < Argument::n_float_register_parameters_j) {
+        regs[i].set2(FP_ArgReg[fp_args]->as_VMReg());
+        fp_args++;
+      } else {
+        return -1;
+      }
+      break;
+    default:
+      ShouldNotReachHere();
+      break;
+    }
+  }
+
+  return int_args + fp_args;
+}
+
 // Patch the callers callsite with entry to compiled code if it exists.
 static void patch_callers_callsite(MacroAssembler *masm) {
   Label L;
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -4975,6 +4975,146 @@
     StubRoutines::_fpu_subnormal_bias2[2]= 0x7bff;
   }
 
+  // Call here from the interpreter or compiled code to either load
+  // multiple returned values from the value type instance being
+  // returned to registers or to store returned values to a newly
+  // allocated value type instance.
+  address generate_return_value_stub(address destination, const char* name, bool has_res) {
+    // We need to save all registers the calling convention may use so
+    // the runtime calls read or update those registers. This needs to
+    // be in sync with SharedRuntime::java_return_convention().
+    enum layout {
+      pad_off = frame::arg_reg_save_area_bytes/BytesPerInt, pad_off_2,
+      rax_off, rax_off_2,
+      j_rarg5_off, j_rarg5_2,
+      j_rarg4_off, j_rarg4_2,
+      j_rarg3_off, j_rarg3_2,
+      j_rarg2_off, j_rarg2_2,
+      j_rarg1_off, j_rarg1_2,
+      j_rarg0_off, j_rarg0_2,
+      j_farg0_off, j_farg0_2,
+      j_farg1_off, j_farg1_2,
+      j_farg2_off, j_farg2_2,
+      j_farg3_off, j_farg3_2,
+      j_farg4_off, j_farg4_2,
+      j_farg5_off, j_farg5_2,
+      j_farg6_off, j_farg6_2,
+      j_farg7_off, j_farg7_2,
+      rbp_off, rbp_off_2,
+      return_off, return_off_2,
+
+      framesize
+    };
+
+    CodeBuffer buffer(name, 1000, 512);
+    MacroAssembler* masm = new MacroAssembler(&buffer);
+
+    int frame_size_in_bytes = round_to(framesize*BytesPerInt, 16);
+    assert(frame_size_in_bytes == framesize*BytesPerInt, "misaligned");
+    int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
+    int frame_size_in_words = frame_size_in_bytes / wordSize;
+
+    OopMapSet *oop_maps = new OopMapSet();
+    OopMap* map = new OopMap(frame_size_in_slots, 0);
+
+    map->set_callee_saved(VMRegImpl::stack2reg(rax_off), rax->as_VMReg());
+    map->set_callee_saved(VMRegImpl::stack2reg(j_rarg5_off), j_rarg5->as_VMReg());
+    map->set_callee_saved(VMRegImpl::stack2reg(j_rarg4_off), j_rarg4->as_VMReg());
+    map->set_callee_saved(VMRegImpl::stack2reg(j_rarg3_off), j_rarg3->as_VMReg());
+    map->set_callee_saved(VMRegImpl::stack2reg(j_rarg2_off), j_rarg2->as_VMReg());
+    map->set_callee_saved(VMRegImpl::stack2reg(j_rarg1_off), j_rarg1->as_VMReg());
+    map->set_callee_saved(VMRegImpl::stack2reg(j_rarg0_off), j_rarg0->as_VMReg());
+    map->set_callee_saved(VMRegImpl::stack2reg(j_farg0_off), j_farg0->as_VMReg());
+    map->set_callee_saved(VMRegImpl::stack2reg(j_farg1_off), j_farg1->as_VMReg());
+    map->set_callee_saved(VMRegImpl::stack2reg(j_farg2_off), j_farg2->as_VMReg());
+    map->set_callee_saved(VMRegImpl::stack2reg(j_farg3_off), j_farg3->as_VMReg());
+    map->set_callee_saved(VMRegImpl::stack2reg(j_farg4_off), j_farg4->as_VMReg());
+    map->set_callee_saved(VMRegImpl::stack2reg(j_farg5_off), j_farg5->as_VMReg());
+    map->set_callee_saved(VMRegImpl::stack2reg(j_farg6_off), j_farg6->as_VMReg());
+    map->set_callee_saved(VMRegImpl::stack2reg(j_farg7_off), j_farg7->as_VMReg());
+
+    int start = __ offset();
+
+    __ subptr(rsp, frame_size_in_bytes - 8 /* return address*/);
+
+    __ movptr(Address(rsp, rbp_off * BytesPerInt), rbp);
+    __ movdbl(Address(rsp, j_farg7_off * BytesPerInt), j_farg7);
+    __ movdbl(Address(rsp, j_farg6_off * BytesPerInt), j_farg6);
+    __ movdbl(Address(rsp, j_farg5_off * BytesPerInt), j_farg5);
+    __ movdbl(Address(rsp, j_farg4_off * BytesPerInt), j_farg4);
+    __ movdbl(Address(rsp, j_farg3_off * BytesPerInt), j_farg3);
+    __ movdbl(Address(rsp, j_farg2_off * BytesPerInt), j_farg2);
+    __ movdbl(Address(rsp, j_farg1_off * BytesPerInt), j_farg1);
+    __ movdbl(Address(rsp, j_farg0_off * BytesPerInt), j_farg0);
+
+    __ movptr(Address(rsp, j_rarg0_off * BytesPerInt), j_rarg0);
+    __ movptr(Address(rsp, j_rarg1_off * BytesPerInt), j_rarg1);
+    __ movptr(Address(rsp, j_rarg2_off * BytesPerInt), j_rarg2);
+    __ movptr(Address(rsp, j_rarg3_off * BytesPerInt), j_rarg3);
+    __ movptr(Address(rsp, j_rarg4_off * BytesPerInt), j_rarg4);
+    __ movptr(Address(rsp, j_rarg5_off * BytesPerInt), j_rarg5);
+    __ movptr(Address(rsp, rax_off * BytesPerInt), rax);
+
+    int frame_complete = __ offset();
+
+    __ set_last_Java_frame(noreg, noreg, NULL);
+
+    __ mov(c_rarg0, r15_thread);
+    __ mov(c_rarg1, rax);
+
+    __ call(RuntimeAddress(destination));
+
+    // Set an oopmap for the call site.
+
+    oop_maps->add_gc_map( __ offset() - start, map);
+
+    // clear last_Java_sp
+    __ reset_last_Java_frame(false);
+
+    __ movptr(rbp, Address(rsp, rbp_off * BytesPerInt));
+    __ movdbl(j_farg7, Address(rsp, j_farg7_off * BytesPerInt));
+    __ movdbl(j_farg6, Address(rsp, j_farg6_off * BytesPerInt));
+    __ movdbl(j_farg5, Address(rsp, j_farg5_off * BytesPerInt));
+    __ movdbl(j_farg4, Address(rsp, j_farg4_off * BytesPerInt));
+    __ movdbl(j_farg3, Address(rsp, j_farg3_off * BytesPerInt));
+    __ movdbl(j_farg2, Address(rsp, j_farg2_off * BytesPerInt));
+    __ movdbl(j_farg1, Address(rsp, j_farg1_off * BytesPerInt));
+    __ movdbl(j_farg0, Address(rsp, j_farg0_off * BytesPerInt));
+
+    __ movptr(j_rarg0, Address(rsp, j_rarg0_off * BytesPerInt));
+    __ movptr(j_rarg1, Address(rsp, j_rarg1_off * BytesPerInt));
+    __ movptr(j_rarg2, Address(rsp, j_rarg2_off * BytesPerInt));
+    __ movptr(j_rarg3, Address(rsp, j_rarg3_off * BytesPerInt));
+    __ movptr(j_rarg4, Address(rsp, j_rarg4_off * BytesPerInt));
+    __ movptr(j_rarg5, Address(rsp, j_rarg5_off * BytesPerInt));
+    __ movptr(rax, Address(rsp, rax_off * BytesPerInt));
+
+    __ addptr(rsp, frame_size_in_bytes-8);
+
+    // check for pending exceptions
+    Label pending;
+    __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
+    __ jcc(Assembler::notEqual, pending);
+
+    if (has_res) {
+      __ get_vm_result(rax, r15_thread);
+    }
+
+    __ ret(0);
+
+    __ bind(pending);
+
+    __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset()));
+    __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+
+    // -------------
+    // make sure all code is generated
+    masm->flush();
+
+    RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, false);
+    return stub->entry_point();
+  }
+
   // Initialization
   void generate_initial() {
     // Generates all stubs and initializes the entry points
@@ -5076,6 +5216,9 @@
         StubRoutines::_dtan = generate_libmTan();
       }
     }
+
+    StubRoutines::_load_value_type_fields_in_regs = generate_return_value_stub(CAST_FROM_FN_PTR(address, SharedRuntime::load_value_type_fields_in_regs), "load_value_type_fields_in_regs", false);
+    StubRoutines::_store_value_type_fields_to_buf = generate_return_value_stub(CAST_FROM_FN_PTR(address, SharedRuntime::store_value_type_fields_to_buf), "store_value_type_fields_to_buf", true);
   }
 
   void generate_all() {
--- a/src/cpu/x86/vm/templateInterpreterGenerator_x86.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/cpu/x86/vm/templateInterpreterGenerator_x86.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -203,9 +203,16 @@
   // and NULL it as marker that esp is now tos until next java call
   __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD);
 
+  if (state == qtos && ValueTypeReturnedAsFields) {
+    // A value type is being returned. If fields are in registers we
+    // need to allocate a value type instance and initialize it with
+    // the value of the fields.
+    __ super_call_VM_leaf(StubRoutines::store_value_type_fields_to_buf());
+  }
+
   __ restore_bcp();
   __ restore_locals();
-
+  
   if (state == atos) {
     Register mdp = rbx;
     Register tmp = rcx;
--- a/src/cpu/x86/vm/templateTable_x86.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/cpu/x86/vm/templateTable_x86.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -2626,7 +2626,7 @@
   if (state == itos) {
     __ narrow(rax);
   }
-  __ remove_activation(state, rbcp);
+  __ remove_activation(state, rbcp, true, true, true, state == qtos && ValueTypeReturnedAsFields);
 
   __ jmp(rbcp);
 }
--- a/src/cpu/zero/vm/globals_zero.hpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/cpu/zero/vm/globals_zero.hpp	Thu Jun 08 18:25:59 2017 +0200
@@ -79,6 +79,7 @@
 define_pd_global(bool, PreserveFramePointer, false);
 
 define_pd_global(bool, ValueTypePassFieldsAsArgs, false);
+define_pd_global(bool, ValueTypeReturnedAsFields, false);
 
 // No performance work done here yet.
 define_pd_global(bool, CompactStrings, false);
--- a/src/share/vm/c1/c1_IR.hpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/c1/c1_IR.hpp	Thu Jun 08 18:25:59 2017 +0200
@@ -244,7 +244,7 @@
     bool reexecute = topmost ? should_reexecute() : false;
     bool return_oop = false; // This flag will be ignored since it used only for C2 with escape analysis.
     bool rethrow_exception = false;
-    recorder->describe_scope(pc_offset, methodHandle(), scope()->method(), bci(), reexecute, rethrow_exception, is_method_handle_invoke, return_oop, locvals, expvals, monvals);
+    recorder->describe_scope(pc_offset, methodHandle(), scope()->method(), bci(), reexecute, rethrow_exception, is_method_handle_invoke, return_oop, false, locvals, expvals, monvals);
   }
 };
 
--- a/src/share/vm/code/compiledMethod.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/code/compiledMethod.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -215,7 +215,7 @@
   guarantee(pd != NULL, "scope must be present");
   return new ScopeDesc(this, pd->scope_decode_offset(),
                        pd->obj_decode_offset(), pd->should_reexecute(), pd->rethrow_exception(),
-                       pd->return_oop());
+                       pd->return_oop(), pd->return_vt());
 }
 
 void CompiledMethod::cleanup_inline_caches(bool clean_all/*=false*/) {
--- a/src/share/vm/code/debugInfoRec.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/code/debugInfoRec.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -287,6 +287,7 @@
                                               bool        rethrow_exception,
                                               bool        is_method_handle_invoke,
                                               bool        return_oop,
+                                              bool        return_vt,
                                               DebugToken* locals,
                                               DebugToken* expressions,
                                               DebugToken* monitors) {
@@ -303,6 +304,7 @@
   last_pd->set_rethrow_exception(rethrow_exception);
   last_pd->set_is_method_handle_invoke(is_method_handle_invoke);
   last_pd->set_return_oop(return_oop);
+  last_pd->set_return_vt(return_vt);
 
   // serialize sender stream offest
   stream()->write_int(sender_stream_offset);
--- a/src/share/vm/code/debugInfoRec.hpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/code/debugInfoRec.hpp	Thu Jun 08 18:25:59 2017 +0200
@@ -105,6 +105,7 @@
                       bool        rethrow_exception = false,
                       bool        is_method_handle_invoke = false,
                       bool        return_oop = false,
+                      bool        return_vt  = false,
                       DebugToken* locals      = NULL,
                       DebugToken* expressions = NULL,
                       DebugToken* monitors    = NULL);
--- a/src/share/vm/code/nmethod.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/code/nmethod.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -2159,7 +2159,7 @@
   assert(pd != NULL, "PcDesc must exist");
   for (ScopeDesc* sd = new ScopeDesc(this, pd->scope_decode_offset(),
                                      pd->obj_decode_offset(), pd->should_reexecute(), pd->rethrow_exception(),
-                                     pd->return_oop());
+                                     pd->return_oop(), pd->return_vt());
        !sd->is_top(); sd = sd->sender()) {
     sd->verify();
   }
@@ -2519,7 +2519,7 @@
   if (p != NULL && p->real_pc(this) <= end) {
     return new ScopeDesc(this, p->scope_decode_offset(),
                          p->obj_decode_offset(), p->should_reexecute(), p->rethrow_exception(),
-                         p->return_oop());
+                         p->return_oop(), p->return_vt());
   }
   return NULL;
 }
@@ -2695,7 +2695,7 @@
           }
         }
       }
-      st->print(" {reexecute=%d rethrow=%d return_oop=%d}", sd->should_reexecute(), sd->rethrow_exception(), sd->return_oop());
+      st->print(" {reexecute=%d rethrow=%d return_oop=%d return_vt=%d}", sd->should_reexecute(), sd->rethrow_exception(), sd->return_oop(), sd->return_vt());
     }
 
     // Print all scopes
--- a/src/share/vm/code/pcDesc.hpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/code/pcDesc.hpp	Thu Jun 08 18:25:59 2017 +0200
@@ -43,7 +43,8 @@
     PCDESC_reexecute               = 1 << 0,
     PCDESC_is_method_handle_invoke = 1 << 1,
     PCDESC_return_oop              = 1 << 2,
-    PCDESC_rethrow_exception       = 1 << 3
+    PCDESC_rethrow_exception       = 1 << 3,
+    PCDESC_return_vt               = 1 << 4
   };
 
   int _flags;
@@ -90,6 +91,9 @@
   bool     return_oop()                    const { return (_flags & PCDESC_return_oop) != 0;     }
   void set_return_oop(bool z)                    { set_flag(PCDESC_return_oop, z); }
 
+  bool     return_vt()                     const { return (_flags & PCDESC_return_vt) != 0;     }
+  void set_return_vt(bool z)                     { set_flag(PCDESC_return_vt, z); }
+
   // Returns the real pc
   address real_pc(const CompiledMethod* code) const;
 
--- a/src/share/vm/code/scopeDesc.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/code/scopeDesc.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -30,23 +30,25 @@
 #include "oops/oop.inline.hpp"
 #include "runtime/handles.inline.hpp"
 
-ScopeDesc::ScopeDesc(const CompiledMethod* code, int decode_offset, int obj_decode_offset, bool reexecute, bool rethrow_exception, bool return_oop) {
+ScopeDesc::ScopeDesc(const CompiledMethod* code, int decode_offset, int obj_decode_offset, bool reexecute, bool rethrow_exception, bool return_oop, bool return_vt) {
   _code          = code;
   _decode_offset = decode_offset;
   _objects       = decode_object_values(obj_decode_offset);
   _reexecute     = reexecute;
   _rethrow_exception = rethrow_exception;
   _return_oop    = return_oop;
+  _return_vt     = return_vt;
   decode_body();
 }
 
-ScopeDesc::ScopeDesc(const CompiledMethod* code, int decode_offset, bool reexecute, bool rethrow_exception, bool return_oop) {
+ScopeDesc::ScopeDesc(const CompiledMethod* code, int decode_offset, bool reexecute, bool rethrow_exception, bool return_oop, bool return_vt) {
   _code          = code;
   _decode_offset = decode_offset;
   _objects       = decode_object_values(DebugInformationRecorder::serialized_null);
   _reexecute     = reexecute;
   _rethrow_exception = rethrow_exception;
   _return_oop    = return_oop;
+  _return_vt     = return_vt;
   decode_body();
 }
 
@@ -58,6 +60,7 @@
   _reexecute     = false; //reexecute only applies to the first scope
   _rethrow_exception = false;
   _return_oop    = false;
+  _return_vt     = false;
   decode_body();
 }
 
--- a/src/share/vm/code/scopeDesc.hpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/code/scopeDesc.hpp	Thu Jun 08 18:25:59 2017 +0200
@@ -60,12 +60,12 @@
 class ScopeDesc : public ResourceObj {
  public:
   // Constructor
-  ScopeDesc(const CompiledMethod* code, int decode_offset, int obj_decode_offset, bool reexecute, bool rethrow_exception, bool return_oop);
+  ScopeDesc(const CompiledMethod* code, int decode_offset, int obj_decode_offset, bool reexecute, bool rethrow_exception, bool return_oop, bool return_vt);
 
   // Calls above, giving default value of "serialized_null" to the
   // "obj_decode_offset" argument.  (We don't use a default argument to
   // avoid a .hpp-.hpp dependency.)
-  ScopeDesc(const CompiledMethod* code, int decode_offset, bool reexecute, bool rethrow_exception, bool return_oop);
+  ScopeDesc(const CompiledMethod* code, int decode_offset, bool reexecute, bool rethrow_exception, bool return_oop, bool return_vt);
 
   // JVM state
   Method* method()      const { return _method; }
@@ -73,6 +73,7 @@
   bool should_reexecute() const { return _reexecute; }
   bool rethrow_exception() const { return _rethrow_exception; }
   bool return_oop()       const { return _return_oop; }
+  bool return_vt()        const { return _return_vt; }
 
   GrowableArray<ScopeValue*>*   locals();
   GrowableArray<ScopeValue*>*   expressions();
@@ -98,6 +99,7 @@
   bool          _reexecute;
   bool          _rethrow_exception;
   bool          _return_oop;
+  bool          _return_vt;
 
   // Decoding offsets
   int _decode_offset;
--- a/src/share/vm/jvmci/jvmciCodeInstaller.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/jvmci/jvmciCodeInstaller.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -1095,7 +1095,7 @@
     throw_exception = BytecodeFrame::rethrowException(frame) == JNI_TRUE;
   }
 
-  _debug_recorder->describe_scope(pc_offset, method, NULL, bci, reexecute, throw_exception, false, return_oop,
+  _debug_recorder->describe_scope(pc_offset, method, NULL, bci, reexecute, throw_exception, false, return_oop, false,
                                   locals_token, expressions_token, monitors_token);
 }
 
--- a/src/share/vm/oops/method.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/oops/method.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -46,6 +46,7 @@
 #include "oops/objArrayOop.inline.hpp"
 #include "oops/oop.inline.hpp"
 #include "oops/symbol.hpp"
+#include "oops/valueKlass.hpp"
 #include "prims/jvmtiExport.hpp"
 #include "prims/methodHandles.hpp"
 #include "prims/nativeLookup.hpp"
@@ -467,6 +468,27 @@
   return rtf.type();
 }
 
+#ifdef ASSERT
+// ValueKlass the method is declared to return. This must not
+// safepoint as it is called with references live on the stack at
+// locations the GC is unaware of.
+ValueKlass* Method::returned_value_type(Thread* thread) const {
+  assert(is_returning_vt(), "method return type should be value type");
+  SignatureStream ss(signature());
+  while (!ss.at_return_type()) {
+    ss.next();
+  }
+  Handle class_loader(thread, method_holder()->class_loader());
+  Handle protection_domain(thread, method_holder()->protection_domain());
+  Klass* k = NULL;
+  {
+    NoSafepointVerifier nsv;
+    k = ss.as_klass(class_loader, protection_domain, SignatureStream::ReturnNull, thread);
+  }
+  assert(k != NULL && !thread->has_pending_exception(), "can't resolve klass");
+  return ValueKlass::cast(k);
+}
+#endif
 
 bool Method::is_empty_method() const {
   return  code_size() == 1
--- a/src/share/vm/oops/method.hpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/oops/method.hpp	Thu Jun 08 18:25:59 2017 +0200
@@ -571,7 +571,11 @@
   Symbol* klass_name() const;                    // returns the name of the method holder
   BasicType result_type() const;                 // type of the method result
   bool is_returning_oop() const                  { BasicType r = result_type(); return (r == T_OBJECT || r == T_ARRAY); }
+  bool is_returning_vt() const                   { BasicType r = result_type(); return r == T_VALUETYPE; }
   bool is_returning_fp() const                   { BasicType r = result_type(); return (r == T_FLOAT || r == T_DOUBLE); }
+#ifdef ASSERT
+  ValueKlass* returned_value_type(Thread* thread) const;
+#endif
 
   // Checked exceptions thrown by this method (resolved to mirrors)
   objArrayHandle resolved_checked_exceptions(TRAPS) { return resolved_checked_exceptions_impl(this, THREAD); }
--- a/src/share/vm/oops/valueKlass.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/oops/valueKlass.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -23,6 +23,7 @@
  */
 
 #include "precompiled.hpp"
+#include "gc/shared/gcLocker.inline.hpp"
 #include "interpreter/interpreter.hpp"
 #include "oops/oop.inline.hpp"
 #include "oops/fieldStreams.hpp"
@@ -30,6 +31,7 @@
 #include "oops/objArrayKlass.hpp"
 #include "oops/valueKlass.hpp"
 #include "oops/valueArrayKlass.hpp"
+#include "runtime/signature.hpp"
 #include "utilities/copy.hpp"
 
 int ValueKlass::first_field_offset() const {
@@ -238,3 +240,264 @@
   value_store(data_for_oop(src()), data_for_oop(value), true, true);
   return value;
 }
+
+// Value type arguments are not passed by reference, instead each
+// field of the value type is passed as an argument. This helper
+// function collects the fields of the value types (including embedded
+// value type's fields) in a list. Included with the field's type is
+// the offset of each field in the value type: i2c and c2i adapters
+// need that to load or store fields. Finally, the list of fields is
+// sorted in order of increasing offsets: the adapters and the
+// compiled code need and agreed upon order of fields.
+//
+// The list of basic types that is returned starts with a T_VALUETYPE
+// and ends with an extra T_VOID. T_VALUETYPE/T_VOID are used as
+// delimiters. Every entry between the two is a field of the value
+// type. If there's an embedded value type in the list, it also starts
+// with a T_VALUETYPE and ends with a T_VOID. This is so we can
+// generate a unique fingerprint for the method's adapters and we can
+// generate the list of basic types from the interpreter point of view
+// (value types passed as reference: iterate on the list until a
+// T_VALUETYPE, drop everything until and including the closing
+// T_VOID) or the compiler point of view (each field of the value
+// types is an argument: drop all T_VALUETYPE/T_VOID from the list).
+GrowableArray<SigEntry> ValueKlass::collect_fields(int base_off) const {
+  GrowableArray<SigEntry> sig_extended;
+  sig_extended.push(SigEntry(T_VALUETYPE, base_off));
+  for (JavaFieldStream fs(this); !fs.done(); fs.next()) {
+    if (fs.access_flags().is_static())  continue;
+    fieldDescriptor& fd = fs.field_descriptor();
+    BasicType bt = fd.field_type();
+    int offset = base_off + fd.offset() - (base_off > 0 ? first_field_offset() : 0);
+    if (bt == T_VALUETYPE) {
+      Symbol* signature = fd.signature();
+      JavaThread* THREAD = JavaThread::current();
+      oop loader = class_loader();
+      oop domain = protection_domain();
+      ResetNoHandleMark rnhm;
+      HandleMark hm;
+      NoSafepointVerifier nsv;
+      Klass* klass = SystemDictionary::resolve_or_null(signature,
+                                                       Handle(THREAD, loader), Handle(THREAD, domain),
+                                                       THREAD);
+      assert(klass != NULL && !HAS_PENDING_EXCEPTION, "lookup shouldn't fail");
+      const GrowableArray<SigEntry>& embedded = ValueKlass::cast(klass)->collect_fields(offset);
+      sig_extended.appendAll(&embedded);
+    } else {
+      sig_extended.push(SigEntry(bt, offset));
+      if (bt == T_LONG || bt == T_DOUBLE) {
+        sig_extended.push(SigEntry(T_VOID, offset));
+      }
+    }
+  }
+  int offset = base_off + size_helper()*HeapWordSize - (base_off > 0 ? first_field_offset() : 0);
+  sig_extended.push(SigEntry(T_VOID, offset)); // hack: use T_VOID to mark end of value type fields
+  if (base_off == 0) {
+    sig_extended.sort(SigEntry::compare);
+  }
+  assert(sig_extended.at(0)._bt == T_VALUETYPE && sig_extended.at(sig_extended.length()-1)._bt == T_VOID, "broken structure");
+  return sig_extended;
+}
+
+// Returns the basic types and registers for fields to return an
+// instance of this value type in registers if possible.
+GrowableArray<SigEntry> ValueKlass::return_convention(VMRegPair*& regs, int& nb_fields) const {
+  assert(ValueTypeReturnedAsFields, "inconsistent");
+  const GrowableArray<SigEntry>& sig_vk = collect_fields();
+  nb_fields = SigEntry::count_fields(sig_vk)+1;
+  BasicType* sig_bt = NEW_RESOURCE_ARRAY(BasicType, nb_fields);
+  sig_bt[0] = T_METADATA;
+  SigEntry::fill_sig_bt(sig_vk, sig_bt+1, nb_fields-1, true);
+  regs = NEW_RESOURCE_ARRAY(VMRegPair, nb_fields);
+  int total = SharedRuntime::java_return_convention(sig_bt, regs, nb_fields);
+
+  if (total <= 0) {
+    regs = NULL;
+  }
+  
+  return sig_vk;
+}
+
+// Create handles for all oop fields returned in registers that are
+// going to be live across a safepoint.
+bool ValueKlass::save_oop_results(RegisterMap& reg_map, GrowableArray<Handle>& handles) const {
+  if (ValueTypeReturnedAsFields) {
+    int nb_fields;
+    VMRegPair* regs;
+    const GrowableArray<SigEntry>& sig_vk = return_convention(regs, nb_fields);
+    
+    if (regs != NULL) {
+      regs++;
+      nb_fields--;
+      save_oop_fields(sig_vk, reg_map, regs, handles, nb_fields);
+      return true;
+    }
+  }
+  return false;
+}
+
+// Same as above but with pre-computed return convention
+void ValueKlass::save_oop_fields(const GrowableArray<SigEntry>& sig_vk, RegisterMap& reg_map, const VMRegPair* regs, GrowableArray<Handle>& handles, int nb_fields) const {
+  int j = 0;
+  Thread* thread = Thread::current();
+  for (int i = 0; i < sig_vk.length(); i++) {
+    BasicType bt = sig_vk.at(i)._bt;
+    if (bt == T_OBJECT || bt == T_ARRAY) {
+      int off = sig_vk.at(i)._offset;
+      VMRegPair pair = regs[j];
+      address loc = reg_map.location(pair.first());
+      oop v = *(oop*)loc;
+      assert(v == NULL || v->is_oop(), "not an oop?");
+      assert(Universe::heap()->is_in_or_null(v), "must be heap pointer");
+      handles.push(Handle(thread, v));
+    }
+    if (bt == T_VALUETYPE) {
+      continue;
+    }
+    if (bt == T_VOID &&
+        sig_vk.at(i-1)._bt != T_LONG &&
+        sig_vk.at(i-1)._bt != T_DOUBLE) {
+      continue;
+    }
+    j++;
+  }
+  assert(j == nb_fields, "missed a field?");
+}
+
+// Update oop fields in registers from handles after a safepoint
+void ValueKlass::restore_oop_results(RegisterMap& reg_map, GrowableArray<Handle>& handles) const {
+  assert(ValueTypeReturnedAsFields, "inconsistent");
+  int nb_fields;
+  VMRegPair* regs;
+  const GrowableArray<SigEntry>& sig_vk = return_convention(regs, nb_fields);
+  assert(regs != NULL, "inconsistent");
+
+  regs++;
+  nb_fields--;
+
+  int j = 0;
+  for (int i = 0, k = 0; i < sig_vk.length(); i++) {
+    BasicType bt = sig_vk.at(i)._bt;
+    if (bt == T_OBJECT || bt == T_ARRAY) {
+      int off = sig_vk.at(i)._offset;
+      VMRegPair pair = regs[j];
+      address loc = reg_map.location(pair.first());
+      *(oop*)loc = handles.at(k++)();
+    }
+    if (bt == T_VALUETYPE) {
+      continue;
+    }
+    if (bt == T_VOID &&
+        sig_vk.at(i-1)._bt != T_LONG &&
+        sig_vk.at(i-1)._bt != T_DOUBLE) {
+      continue;
+    }
+    j++;
+  }
+  assert(j == nb_fields, "missed a field?");
+}
+
+// Fields are in registers. Create an instance of the value type and
+// initialize it with the values of the fields.
+oop ValueKlass::realloc_result(const GrowableArray<SigEntry>& sig_vk, const RegisterMap& reg_map, const VMRegPair* regs,
+                               const GrowableArray<Handle>& handles, int nb_fields, TRAPS) {
+  oop new_vt = allocate_instance(CHECK_NULL);
+
+  int j = 0;
+  int k = 0;
+  for (int i = 0; i < sig_vk.length(); i++) {
+    BasicType bt = sig_vk.at(i)._bt;
+    if (bt == T_VALUETYPE) {
+      continue;
+    } 
+    if (bt == T_VOID) {
+      if (sig_vk.at(i-1)._bt == T_LONG ||
+          sig_vk.at(i-1)._bt == T_DOUBLE) {
+        j++;
+      }
+      continue;
+    }
+    int off = sig_vk.at(i)._offset;
+    VMRegPair pair = regs[j];
+    address loc = reg_map.location(pair.first());
+    switch(bt) {
+    case T_BOOLEAN: {
+      jboolean v = *(intptr_t*)loc;
+      *(jboolean*)((address)new_vt + off) = v;
+      break;
+    }
+    case T_CHAR: {
+      jchar v = *(intptr_t*)loc;
+      *(jchar*)((address)new_vt + off) = v;
+      break;
+    }
+    case T_BYTE: {
+      jbyte v = *(intptr_t*)loc;
+      *(jbyte*)((address)new_vt + off) = v;
+      break;
+    }
+    case T_SHORT: {
+      jshort v = *(intptr_t*)loc;
+      *(jshort*)((address)new_vt + off) = v;
+      break;
+    }
+    case T_INT: {
+      jint v = *(intptr_t*)loc;
+      *(jint*)((address)new_vt + off) = v;
+      break;
+    }
+    case T_LONG: {
+#ifdef _LP64
+      jlong v = *(intptr_t*)loc;
+      *(jlong*)((address)new_vt + off) = v;
+#else
+      Unimplemented();
+#endif
+      break;
+    }
+    case T_OBJECT:
+    case T_ARRAY: {
+      Handle handle = handles.at(k++);
+      oop v = handle();
+      if (!UseCompressedOops) {
+        oop* p = (oop*)((address)new_vt + off);
+        oopDesc::store_heap_oop(p, v);
+      } else {
+        narrowOop* p = (narrowOop*)((address)new_vt + off);
+        oopDesc::encode_store_heap_oop(p, v);
+      }
+      break;
+    }
+    case T_FLOAT: {
+      jfloat v = *(jfloat*)loc;
+      *(jfloat*)((address)new_vt + off) = v;
+      break;
+    }
+    case T_DOUBLE: {
+      jdouble v = *(jdouble*)loc;
+      *(jdouble*)((address)new_vt + off) = v;
+      break;
+    }
+    default:
+      ShouldNotReachHere();
+    }
+    j++;
+  }
+  assert(j == nb_fields, "missed a field?");
+  assert(k == handles.length(), "missed an oop?");
+  return new_vt;
+}
+
+ValueKlass* ValueKlass::returned_value_type(const RegisterMap& map) {
+  BasicType bt = T_METADATA;
+  VMRegPair pair;
+  int nb = SharedRuntime::java_return_convention(&bt, &pair, 1);
+  assert(nb == 1, "broken");
+  
+  address loc = map.location(pair.first());
+  intptr_t ptr = *(intptr_t*)loc;
+  if (Universe::heap()->is_in_reserved((void*)ptr)) {
+    return NULL;
+  }
+  return (ValueKlass*)ptr;
+}
--- a/src/share/vm/oops/valueKlass.hpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/oops/valueKlass.hpp	Thu Jun 08 18:25:59 2017 +0200
@@ -114,6 +114,14 @@
   template <bool nv, typename T, class OopClosureType>
   inline void oop_iterate_specialized_bounded(const address oop_addr, OopClosureType* closure, void* lo, void* hi);
 
+  // calling convention support
+  GrowableArray<SigEntry> collect_fields(int base_off = 0) const;
+  GrowableArray<SigEntry> return_convention(VMRegPair*& regs, int& nb_fields) const;
+  void save_oop_fields(const GrowableArray<SigEntry>& sig_vk, RegisterMap& map, const VMRegPair* regs, GrowableArray<Handle>& handles, int nb_fields) const;
+  bool save_oop_results(RegisterMap& map, GrowableArray<Handle>& handles) const;
+  void restore_oop_results(RegisterMap& map, GrowableArray<Handle>& handles) const;
+  oop realloc_result(const GrowableArray<SigEntry>& sig_vk, const RegisterMap& reg_map, const VMRegPair* regs, const GrowableArray<Handle>& handles, int nb_fields, TRAPS);
+  static ValueKlass* returned_value_type(const RegisterMap& reg_map);
 };
 
 #endif /* SHARE_VM_OOPS_VALUEKLASS_HPP */
--- a/src/share/vm/opto/callGenerator.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/opto/callGenerator.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -117,7 +117,7 @@
  private:
   CallStaticJavaNode* _call_node;
   // Force separate memory and I/O projections for the exceptional
-  // paths to facilitate late inlinig.
+  // paths to facilitate late inlining.
   bool                _separate_io_proj;
 
  public:
@@ -125,6 +125,15 @@
     : CallGenerator(method),
       _separate_io_proj(separate_io_proj)
   {
+    if (method->is_method_handle_intrinsic() &&
+        method->signature()->return_type() == ciEnv::current()->___Value_klass()) {
+      // If that call has not been optimized by the time optimizations
+      // are over, we'll need to add a call to create a value type
+      // instance from the klass returned by the call. Separating
+      // memory and I/O projections for exceptions is required to
+      // perform that graph transformation.
+      _separate_io_proj = true;
+    }
   }
   virtual JVMState* generate(JVMState* jvms);
 
@@ -173,10 +182,15 @@
   kit.set_edges_for_java_call(call, false, _separate_io_proj);
   Node* ret = kit.set_results_for_java_call(call, _separate_io_proj);
   // Check if return value is a value type pointer
-  if (gvn.type(ret)->isa_valuetypeptr()) {
-    // Create ValueTypeNode from the oop and replace the return value
-    Node* vt = ValueTypeNode::make(gvn, kit.merged_memory(), ret);
-    kit.push_node(T_VALUETYPE, vt);
+  const TypeValueTypePtr* vtptr = gvn.type(ret)->isa_valuetypeptr();
+  if (vtptr != NULL) {
+    if (vtptr->klass() != kit.C->env()->___Value_klass()) {
+      // Create ValueTypeNode from the oop and replace the return value
+      Node* vt = ValueTypeNode::make(gvn, kit.merged_memory(), ret);
+      kit.push_node(T_VALUETYPE, vt);
+    } else {
+      kit.push_node(T_VALUETYPE, ret);
+    }
   } else {
     kit.push_node(method()->return_type()->basic_type(), ret);
   }
@@ -429,7 +443,7 @@
     } else {
       if (t->isa_valuetypeptr() && t->is_valuetypeptr()->klass() != C->env()->___Value_klass()) {
         ciValueKlass* vk = t->is_valuetypeptr()->value_type()->value_klass();
-        Node* vt = C->create_vt_node(call, vk, vk, 0, j);
+        Node* vt = C->create_vt_node(call, vk, vk, 0, j, true);
         map->set_argument(jvms, i1, gvn.transform(vt));
         j += vk->value_arg_slots();
       } else {
@@ -470,7 +484,8 @@
 
   // Find the result object
   Node* result = C->top();
-  int   result_size = method()->return_type()->size();
+  ciType* return_type = _inline_cg->method()->return_type();
+  int result_size = return_type->size();
   if (result_size != 0 && !kit.stopped()) {
     result = (result_size == 1) ? kit.pop() : kit.pop_pair();
   }
@@ -478,9 +493,29 @@
   C->set_has_loops(C->has_loops() || _inline_cg->method()->has_loops());
   C->env()->notice_inlined_method(_inline_cg->method());
   C->set_inlining_progress(true);
-
-  if (result->is_ValueType()) {
-    result = result->as_ValueType()->store_to_memory(&kit);
+  
+  if (return_type->is_valuetype() && return_type != C->env()->___Value_klass()) {
+    if (result->is_ValueType()) {
+      if (!call->tf()->returns_value_type_as_fields()) {
+        result = result->as_ValueType()->store_to_memory(&kit);
+      } else {
+        // Return of multiple values (the fields of a value type)
+        ValueTypeNode* vt = result->as_ValueType();
+        vt->replace_call_results(call, C);
+      }
+    } else {
+      assert(result->is_top(), "what else?");
+      for (DUIterator_Fast imax, i = call->fast_outs(imax); i < imax; i++) {
+        ProjNode *pn = call->fast_out(i)->as_Proj();
+        uint con = pn->_con;
+        if (con >= TypeFunc::Parms) {
+          // C->gvn_replace_by(pn, C->top());
+          C->initial_gvn()->hash_delete(pn);
+          pn->set_req(0, C->top());
+          --i; --imax;
+        }
+      }
+    }
   }
 
   kit.replace_call(call, result, true);
@@ -520,16 +555,16 @@
 };
 
 bool LateInlineMHCallGenerator::do_late_inline_check(JVMState* jvms) {
-
-  CallGenerator* cg = for_method_handle_inline(jvms, _caller, method(), _input_not_const);
+  
+  CallGenerator* cg = for_method_handle_inline(jvms, _caller, method(), _input_not_const, AlwaysIncrementalInline);
 
   Compile::current()->print_inlining_update_delayed(this);
 
   if (!_input_not_const) {
     _attempt++;
   }
-
-  if (cg != NULL && cg->is_inline()) {
+  
+  if (cg != NULL && (cg->is_inline() || cg->is_inlined_method_handle_intrinsic(jvms, cg->method()))) {
     assert(!cg->is_late_inline(), "we're doing late inlining");
     _inline_cg = cg;
     Compile::current()->dec_number_of_mh_late_inlines();
@@ -831,7 +866,7 @@
 CallGenerator* CallGenerator::for_method_handle_call(JVMState* jvms, ciMethod* caller, ciMethod* callee, bool delayed_forbidden) {
   assert(callee->is_method_handle_intrinsic(), "for_method_handle_call mismatch");
   bool input_not_const;
-  CallGenerator* cg = CallGenerator::for_method_handle_inline(jvms, caller, callee, input_not_const);
+  CallGenerator* cg = CallGenerator::for_method_handle_inline(jvms, caller, callee, input_not_const, false);
   Compile* C = Compile::current();
   if (cg != NULL) {
     if (!delayed_forbidden && AlwaysIncrementalInline) {
@@ -844,8 +879,8 @@
   ciCallProfile profile = caller->call_profile_at_bci(bci);
   int call_site_count = caller->scale_count(profile.count());
 
-  if (IncrementalInline && call_site_count > 0 &&
-      (input_not_const || !C->inlining_incrementally() || C->over_inlining_cutoff())) {
+  if (IncrementalInline && (AlwaysIncrementalInline ||
+                            (call_site_count > 0 && (input_not_const || !C->inlining_incrementally() || C->over_inlining_cutoff())))) {
     return CallGenerator::for_mh_late_inline(caller, callee, input_not_const);
   } else {
     // Out-of-line call.
@@ -853,7 +888,7 @@
   }
 }
 
-CallGenerator* CallGenerator::for_method_handle_inline(JVMState* jvms, ciMethod* caller, ciMethod* callee, bool& input_not_const) {
+CallGenerator* CallGenerator::for_method_handle_inline(JVMState* jvms, ciMethod* caller, ciMethod* callee, bool& input_not_const, bool delayed_forbidden) {
   GraphKit kit(jvms);
   PhaseGVN& gvn = kit.gvn();
   Compile* C = kit.C;
@@ -880,7 +915,10 @@
                                               false /* call_does_dispatch */,
                                               jvms,
                                               true /* allow_inline */,
-                                              PROB_ALWAYS);
+                                              PROB_ALWAYS,
+                                              NULL,
+                                              true,
+                                              delayed_forbidden);
         return cg;
       } else {
         print_inlining_failure(C, callee, jvms->depth() - 1, jvms->bci(),
@@ -964,7 +1002,9 @@
         CallGenerator* cg = C->call_generator(target, vtable_index, call_does_dispatch, jvms,
                                               true /* allow_inline */,
                                               PROB_ALWAYS,
-                                              speculative_receiver_type);
+                                              speculative_receiver_type,
+                                              true,
+                                              delayed_forbidden);
         return cg;
       } else {
         print_inlining_failure(C, callee, jvms->depth() - 1, jvms->bci(),
--- a/src/share/vm/opto/callGenerator.hpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/opto/callGenerator.hpp	Thu Jun 08 18:25:59 2017 +0200
@@ -125,7 +125,7 @@
   static CallGenerator* for_virtual_call(ciMethod* m, int vtable_index);  // virtual, interface
 
   static CallGenerator* for_method_handle_call(  JVMState* jvms, ciMethod* caller, ciMethod* callee, bool delayed_forbidden);
-  static CallGenerator* for_method_handle_inline(JVMState* jvms, ciMethod* caller, ciMethod* callee, bool& input_not_const);
+  static CallGenerator* for_method_handle_inline(JVMState* jvms, ciMethod* caller, ciMethod* callee, bool& input_not_const, bool delayed_forbidden);
 
   // How to generate a replace a direct call with an inline version
   static CallGenerator* for_late_inline(ciMethod* m, CallGenerator* inline_cg);
--- a/src/share/vm/opto/callnode.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/opto/callnode.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -40,6 +40,7 @@
 #include "opto/rootnode.hpp"
 #include "opto/runtime.hpp"
 #include "opto/valuetypenode.hpp"
+#include "runtime/sharedRuntime.hpp"
 
 // Portions of code courtesy of Clifford Click
 
@@ -73,7 +74,7 @@
 
 //------------------------------match------------------------------------------
 // Construct projections for incoming parameters, and their RegMask info
-Node *StartNode::match( const ProjNode *proj, const Matcher *match ) {
+Node *StartNode::match(const ProjNode *proj, const Matcher *match, const RegMask* mask) {
   switch (proj->_con) {
   case TypeFunc::Control:
   case TypeFunc::I_O:
@@ -687,14 +688,21 @@
 }
 #endif
 
-const Type *CallNode::bottom_type() const { return tf()->range(); }
+const Type *CallNode::bottom_type() const { return tf()->range_cc(); }
 const Type* CallNode::Value(PhaseGVN* phase) const {
   if (phase->type(in(0)) == Type::TOP)  return Type::TOP;
-  return tf()->range();
+  return tf()->range_cc();
 }
 
 //------------------------------calling_convention-----------------------------
-void CallNode::calling_convention( BasicType* sig_bt, VMRegPair *parm_regs, uint argcnt ) const {
+void CallNode::calling_convention(BasicType* sig_bt, VMRegPair *parm_regs, uint argcnt) const {
+  if (_entry_point == StubRoutines::store_value_type_fields_to_buf()) {
+    // The call to that stub is a special case: its inputs are
+    // multiple values returned from a call and so it should follow
+    // the return convention.
+    SharedRuntime::java_return_convention(sig_bt, parm_regs, argcnt);
+    return;
+  }
   // Use the standard compiler calling convention
   Matcher::calling_convention( sig_bt, parm_regs, argcnt, true );
 }
@@ -703,29 +711,39 @@
 //------------------------------match------------------------------------------
 // Construct projections for control, I/O, memory-fields, ..., and
 // return result(s) along with their RegMask info
-Node *CallNode::match( const ProjNode *proj, const Matcher *match ) {
-  switch (proj->_con) {
+Node *CallNode::match(const ProjNode *proj, const Matcher *match, const RegMask* mask) {
+  uint con = proj->_con;
+  const TypeTuple *range_cc = tf()->range_cc();
+  if (con >= TypeFunc::Parms) {
+    if (is_CallRuntime()) {
+      if (con == TypeFunc::Parms) {
+        uint ideal_reg = range_cc->field_at(TypeFunc::Parms)->ideal_reg();
+        OptoRegPair regs = match->c_return_value(ideal_reg,true);
+        RegMask rm = RegMask(regs.first());
+        if (OptoReg::is_valid(regs.second())) {
+          rm.Insert(regs.second());
+        }
+        return new MachProjNode(this,con,rm,ideal_reg);
+      } else {
+        assert(con == TypeFunc::Parms+1, "only one return value");
+        assert(range_cc->field_at(TypeFunc::Parms+1) == Type::HALF, "");
+        return new MachProjNode(this,con, RegMask::Empty, (uint)OptoReg::Bad);
+      }
+    } else {
+      // The Call may return multiple values (value type fields): we
+      // create one projection per returned values.
+      assert(con <= TypeFunc::Parms+1 || ValueTypeReturnedAsFields, "only for multi value return");
+      uint ideal_reg = range_cc->field_at(con)->ideal_reg();
+      return new MachProjNode(this, con, mask[con-TypeFunc::Parms], ideal_reg);
+    }
+  }
+
+  switch (con) {
   case TypeFunc::Control:
   case TypeFunc::I_O:
   case TypeFunc::Memory:
     return new MachProjNode(this,proj->_con,RegMask::Empty,MachProjNode::unmatched_proj);
 
-  case TypeFunc::Parms+1:       // For LONG & DOUBLE returns
-    assert(tf()->range()->field_at(TypeFunc::Parms+1) == Type::HALF, "");
-    // 2nd half of doubles and longs
-    return new MachProjNode(this,proj->_con, RegMask::Empty, (uint)OptoReg::Bad);
-
-  case TypeFunc::Parms: {       // Normal returns
-    uint ideal_reg = tf()->range()->field_at(TypeFunc::Parms)->ideal_reg();
-    OptoRegPair regs = is_CallRuntime()
-      ? match->c_return_value(ideal_reg,true)  // Calls into C runtime
-      : match->  return_value(ideal_reg,true); // Calls into compiled Java code
-    RegMask rm = RegMask(regs.first());
-    if( OptoReg::is_valid(regs.second()) )
-      rm.Insert( regs.second() );
-    return new MachProjNode(this,proj->_con,rm,ideal_reg);
-  }
-
   case TypeFunc::ReturnAdr:
   case TypeFunc::FramePtr:
   default:
--- a/src/share/vm/opto/callnode.hpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/opto/callnode.hpp	Thu Jun 08 18:25:59 2017 +0200
@@ -80,7 +80,7 @@
   virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
   virtual void  calling_convention( BasicType* sig_bt, VMRegPair *parm_reg, uint length ) const;
   virtual const RegMask &in_RegMask(uint) const;
-  virtual Node *match( const ProjNode *proj, const Matcher *m );
+  virtual Node *match(const ProjNode *proj, const Matcher *m, const RegMask* mask);
   virtual uint ideal_reg() const { return 0; }
 #ifndef PRODUCT
   virtual void  dump_spec(outputStream *st) const;
@@ -599,7 +599,7 @@
   virtual uint        cmp( const Node &n ) const;
   virtual uint        size_of() const = 0;
   virtual void        calling_convention( BasicType* sig_bt, VMRegPair *parm_regs, uint argcnt ) const;
-  virtual Node       *match( const ProjNode *proj, const Matcher *m );
+  virtual Node       *match(const ProjNode *proj, const Matcher *m, const RegMask* mask);
   virtual uint        ideal_reg() const { return NotAMachineReg; }
   // Are we guaranteed that this node is a safepoint?  Not true for leaf calls and
   // for some macro nodes whose expansion does not have a safepoint on the fast path.
@@ -625,8 +625,9 @@
   Node *result_cast();
   // Does this node returns pointer?
   bool returns_pointer() const {
-    const TypeTuple *r = tf()->range();
-    return (r->cnt() > TypeFunc::Parms &&
+    const TypeTuple *r = tf()->range_sig();
+    return (!tf()->returns_value_type_as_fields() &&
+            r->cnt() > TypeFunc::Parms &&
             r->field_at(TypeFunc::Parms)->isa_ptr());
   }
 
--- a/src/share/vm/opto/compile.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/opto/compile.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -2681,6 +2681,92 @@
   }
 }
 
+void Compile::value_type_return_from_mh_intrinsic(CallNode *call, Final_Reshape_Counts &frc) {
+  if (ValueTypeReturnedAsFields &&
+      call->is_CallStaticJava() &&
+      call->as_CallStaticJava()->method() != NULL &&
+      call->as_CallStaticJava()->method()->is_method_handle_intrinsic() &&
+      call->proj_out(TypeFunc::Parms) != NULL &&
+      call->proj_out(TypeFunc::Parms)->bottom_type()->isa_valuetypeptr()) {
+    // A value type is returned from the call but we don't know its
+    // type. One of the values being returned is the klass of the
+    // value type. We need to allocate a value type instance of that
+    // type and initialize it with other values being returned. This
+    // is done with the stub call below that we add right after this
+    // call.
+    Node* ret = call->proj_out(TypeFunc::Parms);
+    assert(ret->bottom_type()->is_valuetypeptr()->klass() == env()->___Value_klass(), "unexpected return type from MH intrinsic");
+    const TypeFunc* tf = call->_tf;
+    const TypeTuple* domain = OptoRuntime::store_value_type_fields_Type()->domain_cc();
+    const TypeFunc* new_tf = TypeFunc::make(tf->domain_sig(), tf->domain_cc(), tf->range_sig(), domain);
+    call->_tf = new_tf;
+
+    CallProjections projs;
+    call->extract_projections(&projs, true, true);
+    Node* ctl = projs.fallthrough_catchproj;
+    Node* mem = projs.fallthrough_memproj;
+    Node* io = projs.fallthrough_ioproj;
+    Node* ex_ctl = projs.catchall_catchproj;
+    Node* ex_mem = projs.catchall_memproj;
+    Node* ex_io = projs.catchall_ioproj;
+    CallStaticJavaNode* rt_call = new CallStaticJavaNode(OptoRuntime::store_value_type_fields_Type(),
+                                                         StubRoutines::store_value_type_fields_to_buf(),
+                                                         "store_value_type_fields",
+                                                         call->jvms()->bci(),
+                                                         TypePtr::BOTTOM);
+    Node* out_ctl = new ProjNode(rt_call, TypeFunc::Control);
+    Node* out_mem = new ProjNode(rt_call, TypeFunc::Memory);
+    Node* out_io = new ProjNode(rt_call, TypeFunc::I_O);
+    Node* res = new ProjNode(rt_call, TypeFunc::Parms);
+
+    Node* catc = new CatchNode(out_ctl, out_io, 2);
+    Node* norm = new CatchProjNode(catc, CatchProjNode::fall_through_index, CatchProjNode::no_handler_bci);
+    Node* excp = new CatchProjNode(catc, CatchProjNode::catch_all_index,    CatchProjNode::no_handler_bci);
+    Node* r = new RegionNode(3);
+    Node* mem_phi = new PhiNode(r, Type::MEMORY, TypePtr::BOTTOM);
+    Node* io_phi = new PhiNode(r, Type::ABIO);
+    r->init_req(1, excp);
+    mem_phi->init_req(1, out_mem);
+    io_phi->init_req(1, out_io);
+      
+    frc._visited.set(norm->_idx);
+    frc._visited.set(excp->_idx);
+
+    ctl->replace_by(norm);
+    mem->replace_by(out_mem);
+    io->replace_by(out_io);
+    ret->replace_by(res);
+    ex_ctl->replace_by(r);
+    ex_mem->replace_by(mem_phi);
+    ex_io->replace_by(io_phi);
+
+    r->init_req(2, ex_ctl);
+    mem_phi->init_req(2, ex_mem);
+    io_phi->init_req(2, ex_io);
+
+    rt_call->init_req(TypeFunc::Control, ctl);
+    rt_call->init_req(TypeFunc::Memory, mem);
+    rt_call->init_req(TypeFunc::I_O, io);
+    rt_call->init_req(TypeFunc::FramePtr, call->in(TypeFunc::FramePtr));
+    rt_call->init_req(TypeFunc::ReturnAdr, call->in(TypeFunc::ReturnAdr));
+
+    rt_call->init_req(TypeFunc::Parms, ret);
+    // We don't know how many values are returned. This assumes the
+    // worst case, that all available registers are used.
+    for (uint i = TypeFunc::Parms+1; i < domain->cnt(); i++) {
+      if (domain->field_at(i) == Type::HALF) {
+        rt_call->init_req(i, top());
+        continue;
+      }
+      Node* proj = new ProjNode(call, i);
+      rt_call->init_req(i, proj);
+    }
+
+    // We can safepoint at that new call
+    add_safepoint_edges(rt_call, call->jvms());
+  }
+}
+
 //------------------------------final_graph_reshaping_impl----------------------
 // Implement items 1-5 from final_graph_reshaping below.
 void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) {
@@ -2795,6 +2881,7 @@
         call->set_req( TypeFunc::Parms, x );
       }
     }
+    value_type_return_from_mh_intrinsic(call, frc);
     break;
   }
 
@@ -4618,3 +4705,141 @@
     ni.dump();
   }
 }
+
+// Helper function for enforcing certain bytecodes to reexecute if
+// deoptimization happens
+static bool should_reexecute_implied_by_bytecode(JVMState *jvms, bool is_anewarray) {
+  ciMethod* cur_method = jvms->method();
+  int       cur_bci   = jvms->bci();
+  if (cur_method != NULL && cur_bci != InvocationEntryBci) {
+    Bytecodes::Code code = cur_method->java_code_at_bci(cur_bci);
+    return Interpreter::bytecode_should_reexecute(code) ||
+           is_anewarray && code == Bytecodes::_multianewarray;
+    // Reexecute _multianewarray bytecode which was replaced with
+    // sequence of [a]newarray. See Parse::do_multianewarray().
+    //
+    // Note: interpreter should not have it set since this optimization
+    // is limited by dimensions and guarded by flag so in some cases
+    // multianewarray() runtime calls will be generated and
+    // the bytecode should not be reexecutes (stack will not be reset).
+  } else
+    return false;
+}
+
+void Compile::add_safepoint_edges(SafePointNode* call, JVMState* youngest_jvms, bool can_prune_locals, uint stack_slots_not_pruned) {
+  // do not scribble on the input jvms
+  JVMState* out_jvms = youngest_jvms->clone_deep(C);
+  call->set_jvms(out_jvms); // Start jvms list for call node
+
+  // For a known set of bytecodes, the interpreter should reexecute them if
+  // deoptimization happens. We set the reexecute state for them here
+  if (out_jvms->is_reexecute_undefined() && //don't change if already specified
+      should_reexecute_implied_by_bytecode(out_jvms, call->is_AllocateArray())) {
+    out_jvms->set_should_reexecute(true); //NOTE: youngest_jvms not changed
+  }
+
+  // Presize the call:
+  DEBUG_ONLY(uint non_debug_edges = call->req());
+  call->add_req_batch(top(), youngest_jvms->debug_depth());
+  assert(call->req() == non_debug_edges + youngest_jvms->debug_depth(), "");
+
+  // Set up edges so that the call looks like this:
+  //  Call [state:] ctl io mem fptr retadr
+  //       [parms:] parm0 ... parmN
+  //       [root:]  loc0 ... locN stk0 ... stkSP mon0 obj0 ... monN objN
+  //    [...mid:]   loc0 ... locN stk0 ... stkSP mon0 obj0 ... monN objN [...]
+  //       [young:] loc0 ... locN stk0 ... stkSP mon0 obj0 ... monN objN
+  // Note that caller debug info precedes callee debug info.
+
+  // Fill pointer walks backwards from "young:" to "root:" in the diagram above:
+  uint debug_ptr = call->req();
+
+  // Loop over the map input edges associated with jvms, add them
+  // to the call node, & reset all offsets to match call node array.
+  for (JVMState* in_jvms = youngest_jvms; in_jvms != NULL; ) {
+    uint debug_end   = debug_ptr;
+    uint debug_start = debug_ptr - in_jvms->debug_size();
+    debug_ptr = debug_start;  // back up the ptr
+
+    uint p = debug_start;  // walks forward in [debug_start, debug_end)
+    uint j, k, l;
+    SafePointNode* in_map = in_jvms->map();
+    out_jvms->set_map(call);
+
+    if (can_prune_locals) {
+      assert(in_jvms->method() == out_jvms->method(), "sanity");
+      // If the current throw can reach an exception handler in this JVMS,
+      // then we must keep everything live that can reach that handler.
+      // As a quick and dirty approximation, we look for any handlers at all.
+      if (in_jvms->method()->has_exception_handlers()) {
+        can_prune_locals = false;
+      }
+    }
+
+    // Add the Locals
+    k = in_jvms->locoff();
+    l = in_jvms->loc_size();
+    out_jvms->set_locoff(p);
+    if (!can_prune_locals) {
+      for (j = 0; j < l; j++)
+        call->set_req(p++, in_map->in(k+j));
+    } else {
+      p += l;  // already set to top above by add_req_batch
+    }
+
+    // Add the Expression Stack
+    k = in_jvms->stkoff();
+    l = in_jvms->sp();
+    out_jvms->set_stkoff(p);
+    if (!can_prune_locals) {
+      for (j = 0; j < l; j++)
+        call->set_req(p++, in_map->in(k+j));
+    } else if (can_prune_locals && stack_slots_not_pruned != 0) {
+      // Divide stack into {S0,...,S1}, where S0 is set to top.
+      uint s1 = stack_slots_not_pruned;
+      stack_slots_not_pruned = 0;  // for next iteration
+      if (s1 > l)  s1 = l;
+      uint s0 = l - s1;
+      p += s0;  // skip the tops preinstalled by add_req_batch
+      for (j = s0; j < l; j++)
+        call->set_req(p++, in_map->in(k+j));
+    } else {
+      p += l;  // already set to top above by add_req_batch
+    }
+
+    // Add the Monitors
+    k = in_jvms->monoff();
+    l = in_jvms->mon_size();
+    out_jvms->set_monoff(p);
+    for (j = 0; j < l; j++)
+      call->set_req(p++, in_map->in(k+j));
+
+    // Copy any scalar object fields.
+    k = in_jvms->scloff();
+    l = in_jvms->scl_size();
+    out_jvms->set_scloff(p);
+    for (j = 0; j < l; j++)
+      call->set_req(p++, in_map->in(k+j));
+
+    // Finish the new jvms.
+    out_jvms->set_endoff(p);
+
+    assert(out_jvms->endoff()     == debug_end,             "fill ptr must match");
+    assert(out_jvms->depth()      == in_jvms->depth(),      "depth must match");
+    assert(out_jvms->loc_size()   == in_jvms->loc_size(),   "size must match");
+    assert(out_jvms->mon_size()   == in_jvms->mon_size(),   "size must match");
+    assert(out_jvms->scl_size()   == in_jvms->scl_size(),   "size must match");
+    assert(out_jvms->debug_size() == in_jvms->debug_size(), "size must match");
+
+    // Update the two tail pointers in parallel.
+    out_jvms = out_jvms->caller();
+    in_jvms  = in_jvms->caller();
+  }
+
+  assert(debug_ptr == non_debug_edges, "debug info must fit exactly");
+
+  // Test the correctness of JVMState::debug_xxx accessors:
+  assert(call->jvms()->debug_start() == non_debug_edges, "");
+  assert(call->jvms()->debug_end()   == call->req(), "");
+  assert(call->jvms()->debug_depth() == call->req() - non_debug_edges, "");
+}
--- a/src/share/vm/opto/compile.hpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/opto/compile.hpp	Thu Jun 08 18:25:59 2017 +0200
@@ -47,6 +47,7 @@
 class AddPNode;
 class Block;
 class Bundle;
+class CallNode;
 class C2Compiler;
 class CallGenerator;
 class CloneMap;
@@ -1278,6 +1279,7 @@
   void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc);
   void final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &frc );
   void eliminate_redundant_card_marks(Node* n);
+  void value_type_return_from_mh_intrinsic(CallNode *call, Final_Reshape_Counts &frc);
 
  public:
 
@@ -1336,7 +1338,9 @@
   void          set_clone_map(Dict* d);
 
   // Create value type node from arguments at method entry
-  Node* create_vt_node(Node* n, ciValueKlass* vk, ciValueKlass* base_vk, int base_offset, int base_input);
+  Node* create_vt_node(Node* n, ciValueKlass* vk, ciValueKlass* base_vk, int base_offset, int base_input, bool in);
+
+  void add_safepoint_edges(SafePointNode* call, JVMState* youngest_jvms, bool can_prune_locals = false, uint stack_slots_not_pruned = 0);
 };
 
 #endif // SHARE_VM_OPTO_COMPILE_HPP
--- a/src/share/vm/opto/divnode.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/opto/divnode.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -1311,7 +1311,7 @@
 
 //------------------------------match------------------------------------------
 // return result(s) along with their RegMask info
-Node *DivModINode::match( const ProjNode *proj, const Matcher *match ) {
+Node *DivModINode::match(const ProjNode *proj, const Matcher *match, const RegMask* mask) {
   uint ideal_reg = proj->ideal_reg();
   RegMask rm;
   if (proj->_con == div_proj_num) {
@@ -1326,7 +1326,7 @@
 
 //------------------------------match------------------------------------------
 // return result(s) along with their RegMask info
-Node *DivModLNode::match( const ProjNode *proj, const Matcher *match ) {
+Node *DivModLNode::match(const ProjNode *proj, const Matcher *match, const RegMask* mask) {
   uint ideal_reg = proj->ideal_reg();
   RegMask rm;
   if (proj->_con == div_proj_num) {
--- a/src/share/vm/opto/divnode.hpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/opto/divnode.hpp	Thu Jun 08 18:25:59 2017 +0200
@@ -165,7 +165,7 @@
   DivModINode( Node *c, Node *dividend, Node *divisor ) : DivModNode(c, dividend, divisor) {}
   virtual int Opcode() const;
   virtual const Type *bottom_type() const { return TypeTuple::INT_PAIR; }
-  virtual Node *match( const ProjNode *proj, const Matcher *m );
+  virtual Node *match(const ProjNode *proj, const Matcher *m, const RegMask* mask);
 
   // Make a divmod and associated projections from a div or mod.
   static DivModINode* make(Node* div_or_mod);
@@ -178,7 +178,7 @@
   DivModLNode( Node *c, Node *dividend, Node *divisor ) : DivModNode(c, dividend, divisor) {}
   virtual int Opcode() const;
   virtual const Type *bottom_type() const { return TypeTuple::LONG_PAIR; }
-  virtual Node *match( const ProjNode *proj, const Matcher *m );
+  virtual Node *match(const ProjNode *proj, const Matcher *m, const RegMask* mask);
 
   // Make a divmod and associated projections from a div or mod.
   static DivModLNode* make(Node* div_or_mod);
--- a/src/share/vm/opto/doCall.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/opto/doCall.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -38,6 +38,7 @@
 #include "opto/rootnode.hpp"
 #include "opto/runtime.hpp"
 #include "opto/subnode.hpp"
+#include "opto/valuetypenode.hpp"
 #include "prims/nativeLookup.hpp"
 #include "runtime/sharedRuntime.hpp"
 
@@ -657,6 +658,21 @@
               push(cast_obj);
             }
           }
+        } else if (rt == T_VALUETYPE) {
+          assert(ct == T_VALUETYPE, "value type expected but got rt=%s, ct=%s", type2name(rt), type2name(ct));
+          if (rtype == C->env()->___Value_klass()) {
+            const Type* sig_type = TypeOopPtr::make_from_klass(ctype->as_klass());
+            Node* retnode = pop();
+            Node* cast = _gvn.transform(new CheckCastPPNode(control(), retnode, sig_type));
+            Node* vt = ValueTypeNode::make(_gvn, merged_memory(), cast);
+            push(vt);
+          } else {
+            assert(ctype == C->env()->___Value_klass(), "unexpected value type klass");
+            Node* retnode = pop();
+            assert(retnode->is_ValueType(), "inconsistent");
+            retnode = retnode->as_ValueType()->store_to_memory(this);
+            push(retnode);
+          }
         } else {
           assert(rt == ct, "unexpected mismatch: rt=%s, ct=%s", type2name(rt), type2name(ct));
           // push a zero; it's better than getting an oop/int mismatch
--- a/src/share/vm/opto/escape.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/opto/escape.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -370,6 +370,17 @@
           (n->is_CallStaticJava() &&
            n->as_CallStaticJava()->is_boxing_method())) {
         add_call_node(n->as_Call());
+      } else if (n->as_Call()->tf()->returns_value_type_as_fields()) {
+        bool returns_oop = false;
+        for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax && !returns_oop; i++) {
+          ProjNode* pn = n->fast_out(i)->as_Proj();
+          if (pn->_con >= TypeFunc::Parms && pn->bottom_type()->isa_oopptr()) {
+            returns_oop = true;
+          }
+        }
+        if (returns_oop) {
+          add_call_node(n->as_Call());
+        }
       }
     }
     return;
@@ -474,8 +485,10 @@
     }
     case Op_Proj: {
       // we are only interested in the oop result projection from a call
-      if (n->as_Proj()->_con == TypeFunc::Parms && n->in(0)->is_Call() &&
-          n->in(0)->as_Call()->returns_pointer()) {
+      if (n->as_Proj()->_con >= TypeFunc::Parms && n->in(0)->is_Call() &&
+          (n->in(0)->as_Call()->returns_pointer() || n->bottom_type()->isa_oopptr())) {
+        assert((n->as_Proj()->_con == TypeFunc::Parms && n->in(0)->as_Call()->returns_pointer()) ||
+               n->in(0)->as_Call()->tf()->returns_value_type_as_fields(), "what kind of oop return is it?");
         add_local_var_and_edge(n, PointsToNode::NoEscape,
                                n->in(0), delayed_worklist);
       }
@@ -681,8 +694,10 @@
     }
     case Op_Proj: {
       // we are only interested in the oop result projection from a call
-      if (n->as_Proj()->_con == TypeFunc::Parms && n->in(0)->is_Call() &&
-          n->in(0)->as_Call()->returns_pointer()) {
+      if (n->as_Proj()->_con >= TypeFunc::Parms && n->in(0)->is_Call() &&
+          (n->in(0)->as_Call()->returns_pointer()|| n->bottom_type()->isa_oopptr())) {
+        assert((n->as_Proj()->_con == TypeFunc::Parms && n->in(0)->as_Call()->returns_pointer()) ||
+               n->in(0)->as_Call()->tf()->returns_value_type_as_fields(), "what kind of oop return is it?");
         add_local_var_and_edge(n, PointsToNode::NoEscape, n->in(0), NULL);
         break;
       }
@@ -797,7 +812,7 @@
 }
 
 void ConnectionGraph::add_call_node(CallNode* call) {
-  assert(call->returns_pointer(), "only for call which returns pointer");
+  assert(call->returns_pointer() || call->tf()->returns_value_type_as_fields(), "only for call which returns pointer");
   uint call_idx = call->_idx;
   if (call->is_Allocate()) {
     Node* k = call->in(AllocateNode::KlassNode);
--- a/src/share/vm/opto/generateOptoStub.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/opto/generateOptoStub.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -48,7 +48,7 @@
   ResourceMark rm;
 
   const TypeTuple *jdomain = C->tf()->domain_sig();
-  const TypeTuple *jrange  = C->tf()->range();
+  const TypeTuple *jrange  = C->tf()->range_sig();
 
   // The procedure start
   StartNode* start = new StartNode(root(), jdomain);
@@ -286,7 +286,7 @@
                          exit_memory,
                          frameptr(),
                          returnadr());
-    if (C->tf()->range()->cnt() > TypeFunc::Parms)
+    if (C->tf()->range_sig()->cnt() > TypeFunc::Parms)
       ret->add_req( map()->in(TypeFunc::Parms) );
     break;
   case 1:    // This is a fancy tail-call jump.  Jump to computed address.
--- a/src/share/vm/opto/graphKit.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/opto/graphKit.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -815,26 +815,6 @@
 
 #endif //ASSERT
 
-// Helper function for enforcing certain bytecodes to reexecute if
-// deoptimization happens
-static bool should_reexecute_implied_by_bytecode(JVMState *jvms, bool is_anewarray) {
-  ciMethod* cur_method = jvms->method();
-  int       cur_bci   = jvms->bci();
-  if (cur_method != NULL && cur_bci != InvocationEntryBci) {
-    Bytecodes::Code code = cur_method->java_code_at_bci(cur_bci);
-    return Interpreter::bytecode_should_reexecute(code) ||
-           is_anewarray && code == Bytecodes::_multianewarray;
-    // Reexecute _multianewarray bytecode which was replaced with
-    // sequence of [a]newarray. See Parse::do_multianewarray().
-    //
-    // Note: interpreter should not have it set since this optimization
-    // is limited by dimensions and guarded by flag so in some cases
-    // multianewarray() runtime calls will be generated and
-    // the bytecode should not be reexecutes (stack will not be reset).
-  } else
-    return false;
-}
-
 // Helper function for adding JVMState and debug information to node
 void GraphKit::add_safepoint_edges(SafePointNode* call, bool must_throw) {
   // Add the safepoint edges to the call (or other safepoint).
@@ -878,121 +858,7 @@
     stack_slots_not_pruned = 0;
   }
 
-  // do not scribble on the input jvms
-  JVMState* out_jvms = youngest_jvms->clone_deep(C);
-  call->set_jvms(out_jvms); // Start jvms list for call node
-
-  // For a known set of bytecodes, the interpreter should reexecute them if
-  // deoptimization happens. We set the reexecute state for them here
-  if (out_jvms->is_reexecute_undefined() && //don't change if already specified
-      should_reexecute_implied_by_bytecode(out_jvms, call->is_AllocateArray())) {
-    out_jvms->set_should_reexecute(true); //NOTE: youngest_jvms not changed
-  }
-
-  // Presize the call:
-  DEBUG_ONLY(uint non_debug_edges = call->req());
-  call->add_req_batch(top(), youngest_jvms->debug_depth());
-  assert(call->req() == non_debug_edges + youngest_jvms->debug_depth(), "");
-
-  // Set up edges so that the call looks like this:
-  //  Call [state:] ctl io mem fptr retadr
-  //       [parms:] parm0 ... parmN
-  //       [root:]  loc0 ... locN stk0 ... stkSP mon0 obj0 ... monN objN
-  //    [...mid:]   loc0 ... locN stk0 ... stkSP mon0 obj0 ... monN objN [...]
-  //       [young:] loc0 ... locN stk0 ... stkSP mon0 obj0 ... monN objN
-  // Note that caller debug info precedes callee debug info.
-
-  // Fill pointer walks backwards from "young:" to "root:" in the diagram above:
-  uint debug_ptr = call->req();
-
-  // Loop over the map input edges associated with jvms, add them
-  // to the call node, & reset all offsets to match call node array.
-  for (JVMState* in_jvms = youngest_jvms; in_jvms != NULL; ) {
-    uint debug_end   = debug_ptr;
-    uint debug_start = debug_ptr - in_jvms->debug_size();
-    debug_ptr = debug_start;  // back up the ptr
-
-    uint p = debug_start;  // walks forward in [debug_start, debug_end)
-    uint j, k, l;
-    SafePointNode* in_map = in_jvms->map();
-    out_jvms->set_map(call);
-
-    if (can_prune_locals) {
-      assert(in_jvms->method() == out_jvms->method(), "sanity");
-      // If the current throw can reach an exception handler in this JVMS,
-      // then we must keep everything live that can reach that handler.
-      // As a quick and dirty approximation, we look for any handlers at all.
-      if (in_jvms->method()->has_exception_handlers()) {
-        can_prune_locals = false;
-      }
-    }
-
-    // Add the Locals
-    k = in_jvms->locoff();
-    l = in_jvms->loc_size();
-    out_jvms->set_locoff(p);
-    if (!can_prune_locals) {
-      for (j = 0; j < l; j++)
-        call->set_req(p++, in_map->in(k+j));
-    } else {
-      p += l;  // already set to top above by add_req_batch
-    }
-
-    // Add the Expression Stack
-    k = in_jvms->stkoff();
-    l = in_jvms->sp();
-    out_jvms->set_stkoff(p);
-    if (!can_prune_locals) {
-      for (j = 0; j < l; j++)
-        call->set_req(p++, in_map->in(k+j));
-    } else if (can_prune_locals && stack_slots_not_pruned != 0) {
-      // Divide stack into {S0,...,S1}, where S0 is set to top.
-      uint s1 = stack_slots_not_pruned;
-      stack_slots_not_pruned = 0;  // for next iteration
-      if (s1 > l)  s1 = l;
-      uint s0 = l - s1;
-      p += s0;  // skip the tops preinstalled by add_req_batch
-      for (j = s0; j < l; j++)
-        call->set_req(p++, in_map->in(k+j));
-    } else {
-      p += l;  // already set to top above by add_req_batch
-    }
-
-    // Add the Monitors
-    k = in_jvms->monoff();
-    l = in_jvms->mon_size();
-    out_jvms->set_monoff(p);
-    for (j = 0; j < l; j++)
-      call->set_req(p++, in_map->in(k+j));
-
-    // Copy any scalar object fields.
-    k = in_jvms->scloff();
-    l = in_jvms->scl_size();
-    out_jvms->set_scloff(p);
-    for (j = 0; j < l; j++)
-      call->set_req(p++, in_map->in(k+j));
-
-    // Finish the new jvms.
-    out_jvms->set_endoff(p);
-
-    assert(out_jvms->endoff()     == debug_end,             "fill ptr must match");
-    assert(out_jvms->depth()      == in_jvms->depth(),      "depth must match");
-    assert(out_jvms->loc_size()   == in_jvms->loc_size(),   "size must match");
-    assert(out_jvms->mon_size()   == in_jvms->mon_size(),   "size must match");
-    assert(out_jvms->scl_size()   == in_jvms->scl_size(),   "size must match");
-    assert(out_jvms->debug_size() == in_jvms->debug_size(), "size must match");
-
-    // Update the two tail pointers in parallel.
-    out_jvms = out_jvms->caller();
-    in_jvms  = in_jvms->caller();
-  }
-
-  assert(debug_ptr == non_debug_edges, "debug info must fit exactly");
-
-  // Test the correctness of JVMState::debug_xxx accessors:
-  assert(call->jvms()->debug_start() == non_debug_edges, "");
-  assert(call->jvms()->debug_end()   == call->req(), "");
-  assert(call->jvms()->debug_depth() == call->req() - non_debug_edges, "");
+  C->add_safepoint_edges(call, youngest_jvms, can_prune_locals, stack_slots_not_pruned);
 }
 
 bool GraphKit::compute_stack_effects(int& inputs, int& depth) {
@@ -1731,7 +1597,7 @@
         if (domain->field_at(i)->is_valuetypeptr()->klass() != C->env()->___Value_klass()) {
           // We don't pass value type arguments by reference but instead
           // pass each field of the value type
-          idx += vt->set_arguments_for_java_call(call, idx, *this);
+          idx += vt->pass_fields(call, idx, *this);
         } else {
           arg = arg->as_ValueType()->store_to_memory(this);
           call->init_req(idx, arg);
@@ -1794,7 +1660,19 @@
   if (call->method() == NULL ||
       call->method()->return_type()->basic_type() == T_VOID)
         ret = top();
-  else  ret = _gvn.transform(new ProjNode(call, TypeFunc::Parms));
+  else {
+    if (!call->tf()->returns_value_type_as_fields()) {
+      ret = _gvn.transform(new ProjNode(call, TypeFunc::Parms));
+    } else {
+      // Return of multiple values (value type fields): we create a
+      // ValueType node, each field is a projection from the call.
+      const TypeTuple *range_sig = call->tf()->range_sig();
+      const Type* t = range_sig->field_at(TypeFunc::Parms);
+      assert(t->isa_valuetypeptr(), "only value types for multiple return values");
+      ciValueKlass* vk = t->is_valuetypeptr()->value_type()->value_klass();
+      ret = C->create_vt_node(call, vk, vk, 0, TypeFunc::Parms+1, false);
+    }
+  }
 
   // Note:  Since any out-of-line call can produce an exception,
   // we always insert an I_O projection from the call into the result.
--- a/src/share/vm/opto/lcm.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/opto/lcm.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -839,7 +839,7 @@
   regs.Insert(_matcher.c_frame_pointer());
 
   // Set all registers killed and not already defined by the call.
-  uint r_cnt = mcall->tf()->range()->cnt();
+  uint r_cnt = mcall->tf()->range_cc()->cnt();
   int op = mcall->ideal_Opcode();
   MachProjNode *proj = new MachProjNode( mcall, r_cnt+1, RegMask::Empty, MachProjNode::fat_proj );
   map_node_to_block(proj, block);
--- a/src/share/vm/opto/machnode.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/opto/machnode.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -668,8 +668,8 @@
 
 uint MachCallNode::cmp( const Node &n ) const
 { return _tf == ((MachCallNode&)n)._tf; }
-const Type *MachCallNode::bottom_type() const { return tf()->range(); }
-const Type* MachCallNode::Value(PhaseGVN* phase) const { return tf()->range(); }
+const Type *MachCallNode::bottom_type() const { return tf()->range_cc(); }
+const Type* MachCallNode::Value(PhaseGVN* phase) const { return tf()->range_cc(); }
 
 #ifndef PRODUCT
 void MachCallNode::dump_spec(outputStream *st) const {
@@ -681,11 +681,13 @@
 #endif
 
 bool MachCallNode::return_value_is_used() const {
-  if (tf()->range()->cnt() == TypeFunc::Parms) {
+  if (tf()->range_sig()->cnt() == TypeFunc::Parms) {
     // void return
     return false;
   }
 
+  assert(tf()->returns_value_type_as_fields(), "multiple return values not supported");
+
   // find the projection corresponding to the return value
   for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
     Node *use = fast_out(i);
@@ -701,11 +703,15 @@
 // Because this is used in deoptimization, we want the type info, not the data
 // flow info; the interpreter will "use" things that are dead to the optimizer.
 bool MachCallNode::returns_pointer() const {
-  const TypeTuple *r = tf()->range();
+  const TypeTuple *r = tf()->range_sig();
   return (r->cnt() > TypeFunc::Parms &&
           r->field_at(TypeFunc::Parms)->isa_ptr());
 }
 
+bool MachCallNode::returns_vt() const {
+  return tf()->returns_value_type_as_fields();
+}
+
 //------------------------------Registers--------------------------------------
 const RegMask &MachCallNode::in_RegMask(uint idx) const {
   // Values in the domain use the users calling convention, embodied in the
--- a/src/share/vm/opto/machnode.hpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/opto/machnode.hpp	Thu Jun 08 18:25:59 2017 +0200
@@ -872,6 +872,7 @@
 
   // Similar to cousin class CallNode::returns_pointer
   bool returns_pointer() const;
+  bool returns_vt() const;
 
 #ifndef PRODUCT
   virtual void dump_spec(outputStream *st) const;
--- a/src/share/vm/opto/macro.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/opto/macro.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -1235,7 +1235,7 @@
 
   extract_call_projections(boxing);
 
-  const TypeTuple* r = boxing->tf()->range();
+  const TypeTuple* r = boxing->tf()->range_sig();
   assert(r->cnt() > TypeFunc::Parms, "sanity");
   const TypeInstPtr* t = r->field_at(TypeFunc::Parms)->isa_instptr();
   assert(t != NULL, "sanity");
--- a/src/share/vm/opto/matcher.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/opto/matcher.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -168,6 +168,52 @@
 }
 #endif
 
+// Array of RegMask, one per returned values (value type instances can
+// be returned as multiple return values, one per field)
+RegMask* Matcher::return_values_mask(const TypeTuple *range) {
+  uint cnt = range->cnt() - TypeFunc::Parms;
+  if (cnt == 0) {
+    return NULL;
+  }
+  RegMask* mask = NEW_RESOURCE_ARRAY(RegMask, cnt);
+
+  if (!ValueTypeReturnedAsFields) {
+    // Get ideal-register return type
+    uint ireg = range->field_at(TypeFunc::Parms)->ideal_reg();
+    // Get machine return register
+    OptoRegPair regs = return_value(ireg, false);
+
+    // And mask for same
+    mask[0].Clear();
+    mask[0].Insert(regs.first());
+    if (OptoReg::is_valid(regs.second())) {
+      mask[0].Insert(regs.second());
+    }
+  } else {
+    BasicType *sig_bt = NEW_RESOURCE_ARRAY(BasicType, cnt);
+    VMRegPair *vm_parm_regs = NEW_RESOURCE_ARRAY(VMRegPair, cnt);
+
+    for (uint i = 0; i < cnt; i++) {
+      sig_bt[i] = range->field_at(i+TypeFunc::Parms)->basic_type();
+    }
+
+    int regs = SharedRuntime::java_return_convention(sig_bt, vm_parm_regs, cnt);
+    assert(regs > 0, "should have been tested during graph construction");
+    for (uint i = 0; i < cnt; i++) {
+      mask[i].Clear();
+
+      OptoReg::Name reg1 = OptoReg::as_OptoReg(vm_parm_regs[i].first());
+      if (OptoReg::is_valid(reg1)) {
+        mask[i].Insert(reg1);
+      }
+      OptoReg::Name reg2 = OptoReg::as_OptoReg(vm_parm_regs[i].second());
+      if (OptoReg::is_valid(reg2)) {
+        mask[i].Insert(reg2);
+      }
+    }
+  }
+  return mask;
+}
 
 //---------------------------match---------------------------------------------
 void Matcher::match( ) {
@@ -183,21 +229,10 @@
   _return_addr_mask.Insert(OptoReg::add(return_addr(),1));
 #endif
 
-  // Map a Java-signature return type into return register-value
-  // machine registers for 0, 1 and 2 returned values.
-  const TypeTuple *range = C->tf()->range();
-  if( range->cnt() > TypeFunc::Parms ) { // If not a void function
-    // Get ideal-register return type
-    uint ireg = range->field_at(TypeFunc::Parms)->ideal_reg();
-    // Get machine return register
-    uint sop = C->start()->Opcode();
-    OptoRegPair regs = return_value(ireg, false);
-
-    // And mask for same
-    _return_value_mask = RegMask(regs.first());
-    if( OptoReg::is_valid(regs.second()) )
-      _return_value_mask.Insert(regs.second());
-  }
+  // Map Java-signature return types into return register-value
+  // machine registers.
+  const TypeTuple *range = C->tf()->range_cc();
+  _return_values_mask = return_values_mask(range);
 
   // ---------------
   // Frame Layout
@@ -651,12 +686,11 @@
   // Input RegMask array shared by all Returns.
   // The type for doubles and longs has a count of 2, but
   // there is only 1 returned value
-  uint ret_edge_cnt = TypeFunc::Parms + ((C->tf()->range()->cnt() == TypeFunc::Parms) ? 0 : 1);
+  uint ret_edge_cnt = C->tf()->range_cc()->cnt();
   RegMask *ret_rms  = init_input_masks( ret_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
-  // Returns have 0 or 1 returned values depending on call signature.
-  // Return register is specified by return_value in the AD file.
-  if (ret_edge_cnt > TypeFunc::Parms)
-    ret_rms[TypeFunc::Parms+0] = _return_value_mask;
+  for (i = TypeFunc::Parms; i < ret_edge_cnt; i++) {
+    ret_rms[i] = _return_values_mask[i-TypeFunc::Parms];
+  }
 
   // Input RegMask array shared by all Rethrows.
   uint reth_edge_cnt = TypeFunc::Parms+1;
@@ -1002,7 +1036,11 @@
           } else {                  // Nothing the matcher cares about
             if( n->is_Proj() && n->in(0)->is_Multi()) {       // Projections?
               // Convert to machine-dependent projection
-              m = n->in(0)->as_Multi()->match( n->as_Proj(), this );
+              RegMask* mask = NULL;
+              if (n->in(0)->is_Call()) {
+                mask = return_values_mask(n->in(0)->as_Call()->tf()->range_cc());
+              }
+              m = n->in(0)->as_Multi()->match(n->as_Proj(), this, mask);
 #ifdef ASSERT
               _new2old_map.map(m->_idx, n);
 #endif
@@ -1302,7 +1340,7 @@
     // Since the max-per-method covers the max-per-call-site and debug info
     // is excluded on the max-per-method basis, debug info cannot land in
     // this killed area.
-    uint r_cnt = mcall->tf()->range()->cnt();
+    uint r_cnt = mcall->tf()->range_sig()->cnt();
     MachProjNode *proj = new MachProjNode( mcall, r_cnt+10000, RegMask::Empty, MachProjNode::fat_proj );
     if (!RegMask::can_represent_arg(OptoReg::Name(out_arg_limit_per_call-1))) {
       C->record_method_not_compilable("unsupported outgoing calling sequence");
--- a/src/share/vm/opto/matcher.hpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/opto/matcher.hpp	Thu Jun 08 18:25:59 2017 +0200
@@ -254,6 +254,8 @@
   // Helper for match
   OptoReg::Name warp_incoming_stk_arg( VMReg reg );
 
+  RegMask* return_values_mask(const TypeTuple *range);
+
   // Transform, then walk.  Does implicit DCE while walking.
   // Name changed from "transform" to avoid it being virtual.
   Node *xform( Node *old_space_node, int Nodes );
@@ -379,7 +381,7 @@
   // Return value register.  On Intel it is EAX.  On Sparc i0/o0.
   static OptoRegPair   return_value(uint ideal_reg, bool is_outgoing);
   static OptoRegPair c_return_value(uint ideal_reg, bool is_outgoing);
-  RegMask                     _return_value_mask;
+  RegMask*             _return_values_mask;
   // Inline Cache Register
   static OptoReg::Name  inline_cache_reg();
   static int            inline_cache_reg_encode();
--- a/src/share/vm/opto/memnode.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/opto/memnode.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -3004,7 +3004,7 @@
 
 //------------------------------match------------------------------------------
 // Construct projections for memory.
-Node *MemBarNode::match( const ProjNode *proj, const Matcher *m ) {
+Node *MemBarNode::match(const ProjNode *proj, const Matcher *m, const RegMask* mask) {
   switch (proj->_con) {
   case TypeFunc::Control:
   case TypeFunc::Memory:
--- a/src/share/vm/opto/memnode.hpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/opto/memnode.hpp	Thu Jun 08 18:25:59 2017 +0200
@@ -1153,7 +1153,7 @@
   virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
   virtual uint match_edge(uint idx) const { return 0; }
   virtual const Type *bottom_type() const { return TypeTuple::MEMBAR; }
-  virtual Node *match( const ProjNode *proj, const Matcher *m );
+  virtual Node *match(const ProjNode *proj, const Matcher *m, const RegMask* mask);
   // Factory method.  Builds a wide or narrow membar.
   // Optional 'precedent' becomes an extra edge if not null.
   static MemBarNode* make(Compile* C, int opcode,
--- a/src/share/vm/opto/multnode.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/opto/multnode.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -39,7 +39,7 @@
   return RegMask::Empty;
 }
 
-Node *MultiNode::match( const ProjNode *proj, const Matcher *m ) { return proj->clone(); }
+Node *MultiNode::match(const ProjNode *proj, const Matcher *m, const RegMask* mask) { return proj->clone(); }
 
 //------------------------------proj_out---------------------------------------
 // Get a named projection
--- a/src/share/vm/opto/multnode.hpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/opto/multnode.hpp	Thu Jun 08 18:25:59 2017 +0200
@@ -44,7 +44,7 @@
   virtual uint hash() const { return NO_HASH; }  // CFG nodes do not hash
   virtual bool depends_only_on_test() const { return false; }
   virtual const RegMask &out_RegMask() const;
-  virtual Node *match( const ProjNode *proj, const Matcher *m );
+  virtual Node *match(const ProjNode *proj, const Matcher *m, const RegMask* mask);
   virtual uint ideal_reg() const { return NotAMachineReg; }
   ProjNode* proj_out(uint which_proj) const; // Get a named projection
 
--- a/src/share/vm/opto/output.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/opto/output.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -726,6 +726,7 @@
   int safepoint_pc_offset = current_offset;
   bool is_method_handle_invoke = false;
   bool return_oop = false;
+  bool return_vt = false;
 
   // Add the safepoint in the DebugInfoRecorder
   if( !mach->is_MachCall() ) {
@@ -743,9 +744,12 @@
     }
 
     // Check if a call returns an object.
-    if (mcall->returns_pointer()) {
+    if (mcall->returns_pointer() || mcall->returns_vt()) {
       return_oop = true;
     }
+    if (mcall->returns_vt()) {
+      return_vt = true;
+    }
     safepoint_pc_offset += mcall->ret_addr_offset();
     debug_info()->add_safepoint(safepoint_pc_offset, mcall->_oop_map);
   }
@@ -860,7 +864,7 @@
     // Now we can describe the scope.
     methodHandle null_mh;
     bool rethrow_exception = false;
-    debug_info()->describe_scope(safepoint_pc_offset, null_mh, scope_method, jvms->bci(), jvms->should_reexecute(), rethrow_exception, is_method_handle_invoke, return_oop, locvals, expvals, monvals);
+    debug_info()->describe_scope(safepoint_pc_offset, null_mh, scope_method, jvms->bci(), jvms->should_reexecute(), rethrow_exception, is_method_handle_invoke, return_oop, return_vt, locvals, expvals, monvals);
   } // End jvms loop
 
   // Mark the end of the scope set.
--- a/src/share/vm/opto/parse1.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/opto/parse1.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -781,8 +781,8 @@
   _exits.set_all_memory(memphi);
 
   // Add a return value to the exit state.  (Do not push it yet.)
-  if (tf()->range()->cnt() > TypeFunc::Parms) {
-    const Type* ret_type = tf()->range()->field_at(TypeFunc::Parms);
+  if (tf()->range_sig()->cnt() > TypeFunc::Parms) {
+    const Type* ret_type = tf()->range_sig()->field_at(TypeFunc::Parms);
     if (ret_type->isa_int()) {
       BasicType ret_bt = method()->return_type()->basic_type();
       if (ret_bt == T_BOOLEAN ||
@@ -800,15 +800,18 @@
     if (ret_oop_type && !ret_oop_type->klass()->is_loaded()) {
       ret_type = TypeOopPtr::BOTTOM;
     }
-    if (_caller->has_method() && ret_type->isa_valuetypeptr()) {
-      // When inlining, return value type as ValueTypeNode not as oop
+    if ((_caller->has_method() || tf()->returns_value_type_as_fields()) &&
+        ret_type->isa_valuetypeptr() &&
+        ret_type->is_valuetypeptr()->klass() != C->env()->___Value_klass()) {
+      // When inlining or with multiple return values: return value
+      // type as ValueTypeNode not as oop
       ret_type = ret_type->is_valuetypeptr()->value_type();
     }
     int         ret_size = type2size[ret_type->basic_type()];
     Node*       ret_phi  = new PhiNode(region, ret_type);
     gvn().set_type_bottom(ret_phi);
     _exits.ensure_stack(ret_size);
-    assert((int)(tf()->range()->cnt() - TypeFunc::Parms) == ret_size, "good tf range");
+    assert((int)(tf()->range_sig()->cnt() - TypeFunc::Parms) == ret_size, "good tf range");
     assert(method()->return_type()->size() == ret_size, "tf agrees w/ method");
     _exits.set_argument(0, ret_phi);  // here is where the parser finds it
     // Note:  ret_phi is not yet pushed, until do_exits.
@@ -817,7 +820,7 @@
 
 // Helper function to create a ValueTypeNode from its fields passed as
 // arguments. Fields are passed in order of increasing offsets.
-Node* Compile::create_vt_node(Node* n, ciValueKlass* vk, ciValueKlass* base_vk, int base_offset, int base_input) {
+Node* Compile::create_vt_node(Node* n, ciValueKlass* vk, ciValueKlass* base_vk, int base_offset, int base_input, bool in) {
   assert(base_offset >= 0, "offset in value type always positive");
   PhaseGVN& gvn = *initial_gvn();
   ValueTypeNode* vt = ValueTypeNode::make(gvn, vk);
@@ -826,7 +829,7 @@
     int offset = base_offset + vt->field_offset(i) - (base_offset > 0 ? vk->first_field_offset() : 0);
     if (field_type->is_valuetype()) {
       ciValueKlass* embedded_vk = field_type->as_value_klass();
-      Node* embedded_vt = create_vt_node(n, embedded_vk, base_vk, offset, base_input);
+      Node* embedded_vt = create_vt_node(n, embedded_vk, base_vk, offset, base_input, in);
       vt->set_field_value(i, embedded_vt);
     } else {
       int j = 0; int extra = 0;
@@ -844,10 +847,15 @@
       assert(j != base_vk->nof_nonstatic_fields(), "must find");
       Node* parm = NULL;
       if (n->is_Start()) {
+        assert(in, "return from start?");
         parm = gvn.transform(new ParmNode(n->as_Start(), base_input + j + extra));
       } else {
-        assert(n->is_Call(), "nothing else here");
-        parm = n->in(base_input + j + extra);
+        if (in) {
+          assert(n->is_Call(), "nothing else here");
+          parm = n->in(base_input + j + extra);
+        } else {
+          parm = gvn.transform(new ProjNode(n->as_Call(), base_input + j + extra));
+        }
       }
       vt->set_field_value(i, parm);
       // Record all these guys for later GVN.
@@ -862,7 +870,7 @@
 // unknown caller.  The method & bci will be NULL & InvocationEntryBci.
 JVMState* Compile::build_start_state(StartNode* start, const TypeFunc* tf) {
   int        arg_size_sig = tf->domain_sig()->cnt();
-  int        max_size = MAX2(arg_size_sig, (int)tf->range()->cnt());
+  int        max_size = MAX2(arg_size_sig, (int)tf->range_cc()->cnt());
   JVMState*  jvms     = new (this) JVMState(max_size - TypeFunc::Parms);
   SafePointNode* map  = new SafePointNode(max_size, NULL);
   record_for_igvn(map);
@@ -895,7 +903,7 @@
         const Type* t = tf->domain_sig()->field_at(i);
         if (t->isa_valuetypeptr() && t->is_valuetypeptr()->klass() != C->env()->___Value_klass()) {
           ciValueKlass* vk = t->is_valuetypeptr()->value_type()->value_klass();
-          Node* vt = create_vt_node(start, vk, vk, 0, j);
+          Node* vt = create_vt_node(start, vk, vk, 0, j, true);
           map->init_req(i, gvn.transform(vt));
           j += vk->value_arg_slots();
         } else {
@@ -952,12 +960,23 @@
                              kit.frameptr(),
                              kit.returnadr());
   // Add zero or 1 return values
-  int ret_size = tf()->range()->cnt() - TypeFunc::Parms;
+  int ret_size = tf()->range_sig()->cnt() - TypeFunc::Parms;
   if (ret_size > 0) {
     kit.inc_sp(-ret_size);  // pop the return value(s)
     kit.sync_jvms();
-    ret->add_req(kit.argument(0));
-    // Note:  The second dummy edge is not needed by a ReturnNode.
+    Node* res = kit.argument(0);
+    if (tf()->returns_value_type_as_fields()) {
+      // Multiple return values (value type fields): add as many edges
+      // to the Return node as returned values.
+      assert(res->is_ValueType(), "what else supports multi value return");
+      ValueTypeNode* vt = res->as_ValueType();
+      ret->add_req_batch(NULL, tf()->range_cc()->cnt() - TypeFunc::Parms);
+      vt->pass_klass(ret, TypeFunc::Parms, kit);
+      vt->pass_fields(ret, TypeFunc::Parms+1, kit);
+    } else {
+      ret->add_req(res);
+      // Note:  The second dummy edge is not needed by a ReturnNode.
+    }
   }
   // bind it to root
   root()->add_req(ret);
@@ -1114,8 +1133,8 @@
     mms.set_memory(_gvn.transform(mms.memory()));
   }
 
-  if (tf()->range()->cnt() > TypeFunc::Parms) {
-    const Type* ret_type = tf()->range()->field_at(TypeFunc::Parms);
+  if (tf()->range_sig()->cnt() > TypeFunc::Parms) {
+    const Type* ret_type = tf()->range_sig()->field_at(TypeFunc::Parms);
     Node*       ret_phi  = _gvn.transform( _exits.argument(0) );
     if (!_exits.control()->is_top() && _gvn.type(ret_phi)->empty()) {
       // In case of concurrent class loading, the type we set for the
@@ -2294,8 +2313,10 @@
 //------------------------------return_current---------------------------------
 // Append current _map to _exit_return
 void Parse::return_current(Node* value) {
-  if (value != NULL && value->is_ValueType() && !_caller->has_method()) {
-    // Returning from root JVMState, make sure value type is allocated
+  if (value != NULL && value->is_ValueType() && !_caller->has_method() &&
+      !tf()->returns_value_type_as_fields()) {
+    // Returning from root JVMState without multiple returned values,
+    // make sure value type is allocated
     value = value->as_ValueType()->store_to_memory(this);
   }
 
@@ -2305,7 +2326,10 @@
   }
 
   // Do not set_parse_bci, so that return goo is credited to the return insn.
-  set_bci(InvocationEntryBci);
+  // vreturn can trigger an allocation so vreturn can throw. Setting
+  // the bci here breaks exception handling. Commenting this out
+  // doesn't seem to break anything.
+  //  set_bci(InvocationEntryBci);
   if (method()->is_synchronized() && GenerateSynchronizationCode) {
     shared_unlock(_synch_lock->box_node(), _synch_lock->obj_node());
   }
--- a/src/share/vm/opto/runtime.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/opto/runtime.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -1688,3 +1688,30 @@
 
   st->print_raw_cr(tempst.as_string());
 }
+
+const TypeFunc *OptoRuntime::store_value_type_fields_Type() {
+  // create input type (domain)
+  uint total = SharedRuntime::java_return_convention_max_int + SharedRuntime::java_return_convention_max_float*2;
+  const Type **fields = TypeTuple::fields(total);
+  // We don't know the number of returned values and their
+  // types. Assume all registers available to the return convention
+  // are used.
+  fields[TypeFunc::Parms] = TypePtr::BOTTOM;
+  uint i = 1;
+  for (; i < SharedRuntime::java_return_convention_max_int; i++) {
+    fields[TypeFunc::Parms+i] = TypeInt::INT;
+  }
+  for (; i < total; i+=2) {
+    fields[TypeFunc::Parms+i] = Type::DOUBLE;
+    fields[TypeFunc::Parms+i+1] = Type::HALF;
+  }
+  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms + total, fields);
+
+  // create result type (range)
+  fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = TypeValueTypePtr::NOTNULL;
+
+  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+1,fields);
+
+  return TypeFunc::make(domain, range);
+}
--- a/src/share/vm/opto/runtime.hpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/opto/runtime.hpp	Thu Jun 08 18:25:59 2017 +0200
@@ -329,6 +329,8 @@
   static const TypeFunc* dtrace_method_entry_exit_Type();
   static const TypeFunc* dtrace_object_alloc_Type();
 
+  static const TypeFunc* store_value_type_fields_Type();
+
  private:
  static NamedCounter * volatile _named_counters;
 
--- a/src/share/vm/opto/type.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/opto/type.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -591,6 +591,8 @@
 
   TypeNarrowKlass::NULL_PTR = TypeNarrowKlass::make( TypePtr::NULL_PTR );
 
+  TypeValueTypePtr::NOTNULL = EnableValhalla ? TypeValueTypePtr::make(TypePtr::NotNull, current->env()->___Value_klass()->as_value_klass()) : NULL;
+
   mreg2type[Op_Node] = Type::BOTTOM;
   mreg2type[Op_Set ] = 0;
   mreg2type[Op_RegN] = TypeNarrowOop::BOTTOM;
@@ -1914,12 +1916,66 @@
 const TypeTuple *TypeTuple::INT_CC_PAIR;
 const TypeTuple *TypeTuple::LONG_CC_PAIR;
 
+static void collect_value_fields(ciValueKlass* vk, const Type** field_array, uint& pos) {
+  for (int j = 0; j < vk->nof_nonstatic_fields(); j++) {
+    ciField* f = vk->nonstatic_field_at(j);
+    BasicType bt = f->type()->basic_type();
+    assert(bt < T_VALUETYPE && bt >= T_BOOLEAN, "not yet supported");
+    field_array[pos++] = Type::get_const_type(f->type());
+    if (bt == T_LONG || bt == T_DOUBLE) {
+      field_array[pos++] = Type::HALF;
+    }
+  }
+}
+
+// Can a value type instance of this type be returned as multiple
+// returned values?
+static bool vt_can_be_returned_as_fields(ciValueKlass* vk) {
+  if (vk == ciEnv::current()->___Value_klass()) {
+    return false;
+  }
+
+  ResourceMark rm;
+  uint args = vk->value_arg_slots() + 1 /* return vk as well */;
+
+  BasicType* sig_bt = NEW_RESOURCE_ARRAY(BasicType, args);
+  VMRegPair* regs = NEW_RESOURCE_ARRAY(VMRegPair, args);
+
+  sig_bt[0] = T_METADATA;
+  for (uint i = 0, j = 1; i < (uint)vk->nof_nonstatic_fields(); i++) {
+    BasicType bt = vk->nonstatic_field_at(i)->layout_type();
+    assert(i+j < args, "out of bounds access");
+    sig_bt[i+j] = bt;
+    if (bt == T_LONG || bt == T_DOUBLE) {
+      j++;
+      assert(i+j < args, "out of bounds access");
+      sig_bt[i+j] = T_VOID;
+    }
+  }
+
+  if (SharedRuntime::java_return_convention(sig_bt, regs, args) <= 0) {
+    return false;
+  }
+  
+  return true;
+}
+
 
 //------------------------------make-------------------------------------------
 // Make a TypeTuple from the range of a method signature
-const TypeTuple *TypeTuple::make_range(ciSignature* sig) {
+const TypeTuple *TypeTuple::make_range(ciSignature* sig, bool ret_vt_fields) {
   ciType* return_type = sig->return_type();
-  uint arg_cnt = return_type->size();
+  uint arg_cnt = 0;
+  if (ret_vt_fields) {
+    ret_vt_fields = return_type->is_valuetype() && vt_can_be_returned_as_fields((ciValueKlass*)return_type);
+  }
+  if (ret_vt_fields) {
+    ciValueKlass* vk = (ciValueKlass*)return_type;
+    arg_cnt = vk->value_arg_slots()+1;
+  } else {
+    arg_cnt = return_type->size();
+  }
+
   const Type **field_array = fields(arg_cnt);
   switch (return_type->basic_type()) {
   case T_LONG:
@@ -1931,7 +1987,6 @@
     field_array[TypeFunc::Parms+1] = Type::HALF;
     break;
   case T_OBJECT:
-  case T_VALUETYPE:
   case T_ARRAY:
   case T_BOOLEAN:
   case T_CHAR:
@@ -1941,6 +1996,17 @@
   case T_INT:
     field_array[TypeFunc::Parms] = get_const_type(return_type);
     break;
+  case T_VALUETYPE:
+    if (ret_vt_fields) {
+      ciValueKlass* vk = (ciValueKlass*)return_type;
+      uint pos = TypeFunc::Parms;
+      field_array[pos] = TypeKlassPtr::make(vk);
+      pos++;
+      collect_value_fields(vk, field_array, pos);
+    } else {
+      field_array[TypeFunc::Parms] = get_const_type(return_type);
+    }
+    break;
   case T_VOID:
     break;
   default:
@@ -1949,18 +2015,6 @@
   return (TypeTuple*)(new TypeTuple(TypeFunc::Parms + arg_cnt, field_array))->hashcons();
 }
 
-static void collect_value_fields(ciValueKlass* vk, const Type**& field_array, uint& pos) {
-  for (int j = 0; j < vk->nof_nonstatic_fields(); j++) {
-    ciField* f = vk->nonstatic_field_at(j);
-    BasicType bt = f->type()->basic_type();
-    assert(bt < T_VALUETYPE && bt >= T_BOOLEAN, "not yet supported");
-    field_array[pos++] = Type::get_const_type(f->type());
-    if (bt == T_LONG || bt == T_DOUBLE) {
-      field_array[pos++] = Type::HALF;
-    }
-  }
-}
-
 // Make a TypeTuple from the domain of a method signature
 const TypeTuple *TypeTuple::make_domain(ciInstanceKlass* recv, ciSignature* sig, bool vt_fields_as_args) {
   uint arg_cnt = sig->size();
@@ -4701,6 +4755,7 @@
 
 //=============================================================================
 
+const TypeValueTypePtr* TypeValueTypePtr::NOTNULL;
 //------------------------------make-------------------------------------------
 const TypeValueTypePtr* TypeValueTypePtr::make(const TypeValueType* vt, PTR ptr, ciObject* o, Offset offset, int instance_id, const TypePtr* speculative, int inline_depth) {
   return (TypeValueTypePtr*)(new TypeValueTypePtr(vt, ptr, o, offset, instance_id, speculative, inline_depth))->hashcons();
@@ -4808,6 +4863,22 @@
       ciObject* o = NULL;
       ciObject* this_oop  = const_oop();
       ciObject* tp_oop = tp->const_oop();
+      const TypeValueType* vt = NULL;
+      if (_vt != tp->_vt) {
+        ciKlass* __value_klass = ciEnv::current()->___Value_klass();
+        assert(klass() == __value_klass || tp->klass() == __value_klass, "impossible meet");
+        if (above_centerline(ptr)) {
+          vt = klass() == __value_klass ? tp->_vt : _vt;
+        } else if (above_centerline(this->_ptr) && !above_centerline(tp->_ptr)) {
+          vt = tp->_vt;
+        } else if (above_centerline(tp->_ptr) && !above_centerline(this->_ptr)) {
+          vt = _vt;
+        } else {
+          vt = klass() == __value_klass ? _vt : tp->_vt;
+        }
+      } else {
+        vt = _vt;
+      }
       if (ptr == Constant) {
         if (this_oop != NULL && tp_oop != NULL &&
             this_oop->equals(tp_oop) ) {
@@ -4820,7 +4891,7 @@
           ptr = NotNull;
         }
       }
-      return make(_vt, ptr, o, offset, instance_id, speculative, depth);
+      return make(vt, ptr, o, offset, instance_id, speculative, depth);
     }
     }
 }
@@ -5593,12 +5664,13 @@
 // Convenience common pre-built types.
 
 //------------------------------make-------------------------------------------
-const TypeFunc *TypeFunc::make( const TypeTuple *domain_sig, const TypeTuple* domain_cc, const TypeTuple *range ) {
-  return (TypeFunc*)(new TypeFunc(domain_sig, domain_cc, range))->hashcons();
-}
-
-const TypeFunc *TypeFunc::make( const TypeTuple *domain, const TypeTuple *range ) {
-  return make(domain, domain, range);
+const TypeFunc *TypeFunc::make(const TypeTuple *domain_sig, const TypeTuple* domain_cc,
+                               const TypeTuple *range_sig, const TypeTuple *range_cc) {
+  return (TypeFunc*)(new TypeFunc(domain_sig, domain_cc, range_sig, range_cc))->hashcons();
+}
+
+const TypeFunc *TypeFunc::make(const TypeTuple *domain, const TypeTuple *range) {
+  return make(domain, domain, range, range);
 }
 
 //------------------------------make-------------------------------------------
@@ -5620,8 +5692,9 @@
     domain_sig = TypeTuple::make_domain(method->holder(), method->signature(), false);
     domain_cc = TypeTuple::make_domain(method->holder(), method->signature(), ValueTypePassFieldsAsArgs);
   }
-  const TypeTuple *range  = TypeTuple::make_range(method->signature());
-  tf = TypeFunc::make(domain_sig, domain_cc, range);
+  const TypeTuple *range_sig = TypeTuple::make_range(method->signature(), false);
+  const TypeTuple *range_cc = TypeTuple::make_range(method->signature(), ValueTypeReturnedAsFields);
+  tf = TypeFunc::make(domain_sig, domain_cc, range_sig, range_cc);
   C->set_last_tf(method, tf);  // fill cache
   return tf;
 }
@@ -5659,28 +5732,29 @@
   const TypeFunc *a = (const TypeFunc*)t;
   return _domain_sig == a->_domain_sig &&
     _domain_cc == a->_domain_cc &&
-    _range == a->_range;
+    _range_sig == a->_range_sig &&
+    _range_cc == a->_range_cc;
 }
 
 //------------------------------hash-------------------------------------------
 // Type-specific hashing function.
 int TypeFunc::hash(void) const {
-  return (intptr_t)_domain_sig + (intptr_t)_domain_cc + (intptr_t)_range;
+  return (intptr_t)_domain_sig + (intptr_t)_domain_cc + (intptr_t)_range_sig + (intptr_t)_range_cc;
 }
 
 //------------------------------dump2------------------------------------------
 // Dump Function Type
 #ifndef PRODUCT
 void TypeFunc::dump2( Dict &d, uint depth, outputStream *st ) const {
-  if( _range->cnt() <= Parms )
+  if( _range_sig->cnt() <= Parms )
     st->print("void");
   else {
     uint i;
-    for (i = Parms; i < _range->cnt()-1; i++) {
-      _range->field_at(i)->dump2(d,depth,st);
+    for (i = Parms; i < _range_sig->cnt()-1; i++) {
+      _range_sig->field_at(i)->dump2(d,depth,st);
       st->print("/");
     }
-    _range->field_at(i)->dump2(d,depth,st);
+    _range_sig->field_at(i)->dump2(d,depth,st);
   }
   st->print(" ");
   st->print("( ");
@@ -5713,8 +5787,8 @@
 
 
 BasicType TypeFunc::return_type() const{
-  if (range()->cnt() == TypeFunc::Parms) {
+  if (range_sig()->cnt() == TypeFunc::Parms) {
     return T_VOID;
   }
-  return range()->field_at(TypeFunc::Parms)->basic_type();
-}
+  return range_sig()->field_at(TypeFunc::Parms)->basic_type();
+}
--- a/src/share/vm/opto/type.hpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/opto/type.hpp	Thu Jun 08 18:25:59 2017 +0200
@@ -28,6 +28,7 @@
 #include "ci/ciValueKlass.hpp"
 #include "opto/adlcVMDeps.hpp"
 #include "runtime/handles.hpp"
+#include "runtime/sharedRuntime.hpp"
 
 // Portions of code courtesy of Clifford Click
 
@@ -691,7 +692,7 @@
   }
 
   static const TypeTuple *make( uint cnt, const Type **fields );
-  static const TypeTuple *make_range(ciSignature *sig);
+  static const TypeTuple *make_range(ciSignature *sig, bool ret_vt_fields = false);
   static const TypeTuple *make_domain(ciInstanceKlass* recv, ciSignature *sig, bool vt_fields_as_args = false);
 
   // Subroutine call type with space allocated for argument types
@@ -1344,6 +1345,8 @@
   virtual const Type* xmeet_helper(const Type* t) const;
   virtual const Type* xdual() const;
 
+  static const TypeValueTypePtr* NOTNULL;
+
 #ifndef PRODUCT
   virtual void dump2(Dict &d, uint depth, outputStream* st) const; // Specialized per-Type dumping
 #endif
@@ -1402,8 +1405,6 @@
   virtual bool singleton(void) const;    // TRUE if type is a singleton
  private:
 
-  static const TypeKlassPtr* make_from_klass_common(ciKlass* klass, bool klass_change, bool try_for_exact);
-
   ciKlass* _klass;
 
   // Does the type exclude subclasses of the klass?  (Inexact == polymorphic.)
@@ -1417,23 +1418,6 @@
 
   bool  is_loaded() const { return klass()->is_loaded(); }
 
-  // Creates a type given a klass. Correctly handles multi-dimensional arrays
-  // Respects UseUniqueSubclasses.
-  // If the klass is final, the resulting type will be exact.
-  static const TypeKlassPtr* make_from_klass(ciKlass* klass) {
-    return make_from_klass_common(klass, true, false);
-  }
-  // Same as before, but will produce an exact type, even if
-  // the klass is not final, as long as it has exactly one implementation.
-  static const TypeKlassPtr* make_from_klass_unique(ciKlass* klass) {
-    return make_from_klass_common(klass, true, true);
-  }
-  // Same as before, but does not respects UseUniqueSubclasses.
-  // Use this only for creating array element types.
-  static const TypeKlassPtr* make_from_klass_raw(ciKlass* klass) {
-    return make_from_klass_common(klass, false, false);
-  }
-
   // Make a generic (unclassed) pointer to metadata.
   static const TypeKlassPtr* make(PTR ptr, Offset offset);
 
@@ -1589,7 +1573,8 @@
 //------------------------------TypeFunc---------------------------------------
 // Class of Array Types
 class TypeFunc : public Type {
-  TypeFunc(const TypeTuple *domain_sig, const TypeTuple *domain_cc, const TypeTuple *range) : Type(Function), _domain_sig(domain_sig), _domain_cc(domain_cc), _range(range) {}
+  TypeFunc(const TypeTuple *domain_sig, const TypeTuple *domain_cc, const TypeTuple *range_sig, const TypeTuple *range_cc)
+    : Type(Function), _domain_sig(domain_sig), _domain_cc(domain_cc), _range_sig(range_sig), _range_cc(range_cc) {}
   virtual bool eq( const Type *t ) const;
   virtual int  hash() const;             // Type specific hashing
   virtual bool singleton(void) const;    // TRUE if type is a singleton
@@ -1603,7 +1588,11 @@
   // type argument as a list of its fields).
   const TypeTuple* const _domain_sig;
   const TypeTuple* const _domain_cc;
-  const TypeTuple* const _range;      // Range of results
+  // Range of results. Similar to domains: a value type result can be
+  // returned in registers in which case range_cc lists all fields and
+  // is the actual calling convention.
+  const TypeTuple* const _range_sig;
+  const TypeTuple* const _range_cc;
 
 public:
   // Constants are shared among ADLC and VM
@@ -1619,11 +1608,13 @@
   // Accessors:
   const TypeTuple* domain_sig() const { return _domain_sig; }
   const TypeTuple* domain_cc() const { return _domain_cc; }
-  const TypeTuple* range()  const { return _range; }
+  const TypeTuple* range_sig()  const { return _range_sig; }
+  const TypeTuple* range_cc()  const { return _range_cc; }
 
   static const TypeFunc *make(ciMethod* method);
   static const TypeFunc *make(ciSignature signature, const Type* extra);
-  static const TypeFunc *make(const TypeTuple* domain, const TypeTuple* domain_cc, const TypeTuple* range);
+  static const TypeFunc *make(const TypeTuple* domain_sig, const TypeTuple* domain_cc,
+                              const TypeTuple* range_sig, const TypeTuple* range_cc);
   static const TypeFunc *make(const TypeTuple* domain, const TypeTuple* range);
 
   virtual const Type *xmeet( const Type *t ) const;
@@ -1631,6 +1622,8 @@
 
   BasicType return_type() const;
 
+  bool returns_value_type_as_fields() const { return range_sig() != range_cc(); }
+
 #ifndef PRODUCT
   virtual void dump2( Dict &d, uint depth, outputStream *st ) const; // Specialized per-Type dumping
 #endif
--- a/src/share/vm/opto/valuetypenode.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/opto/valuetypenode.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -369,7 +369,14 @@
   }
 }
 
-uint ValueTypeNode::set_arguments_for_java_call(CallJavaNode* call, int base_input, const GraphKit& kit, ciValueKlass* base_vk, int base_offset) {
+void ValueTypeNode::pass_klass(Node* n, uint pos, const GraphKit& kit) {
+  ciValueKlass* vk = value_klass();
+  const TypeKlassPtr* tk = TypeKlassPtr::make(vk);
+  Node* arg = kit.makecon(tk);
+  n->init_req(pos, arg);
+}
+
+uint ValueTypeNode::pass_fields(Node* n, int base_input, const GraphKit& kit, ciValueKlass* base_vk, int base_offset) {
   ciValueKlass* vk = value_klass();
   if (base_vk == NULL) {
     base_vk = vk;
@@ -381,7 +388,7 @@
     Node* arg = field_value(i);
     if (f_type->is_valuetype()) {
       ciValueKlass* embedded_vk = f_type->as_value_klass();
-      edges += arg->as_ValueType()->set_arguments_for_java_call(call, base_input, kit, base_vk, offset);
+      edges += arg->as_ValueType()->pass_fields(n, base_input, kit, base_vk, offset);
     } else {
       int j = 0; int extra = 0;
       for (; j < base_vk->nof_nonstatic_fields(); j++) {
@@ -395,11 +402,11 @@
           extra++;
         }
       }
-      call->init_req(base_input + j + extra, arg);
+      n->init_req(base_input + j + extra, arg);
       edges++;
       BasicType bt = f_type->basic_type();
       if (bt == T_LONG || bt == T_DOUBLE) {
-        call->init_req(base_input + j + extra + 1, kit.top());
+        n->init_req(base_input + j + extra + 1, kit.top());
         edges++;
       }
     }
@@ -427,6 +434,37 @@
   return NULL;
 }
 
+// When a call returns multiple values, it has several result
+// projections, one per field. Replacing the result of the call by a
+// value type node (after late inlining) requires that for each result
+// projection, we find the corresponding value type field.
+void ValueTypeNode::replace_call_results(Node* call, Compile* C) {
+  ciValueKlass* vk = value_klass();
+  for (DUIterator_Fast imax, i = call->fast_outs(imax); i < imax; i++) {
+    ProjNode *pn = call->fast_out(i)->as_Proj();
+    uint con = pn->_con;
+    if (con >= TypeFunc::Parms+1) {
+      uint field_nb = con - (TypeFunc::Parms+1);
+      int extra = 0;
+      for (uint j = 0; j < field_nb - extra; j++) {
+        ciField* f = vk->nonstatic_field_at(j);
+        BasicType bt = f->type()->basic_type();
+        if (bt == T_LONG || bt == T_DOUBLE) {
+          extra++;
+        }
+      }
+      ciField* f = vk->nonstatic_field_at(field_nb - extra);
+      Node* field = field_value_by_offset(f->offset(), true);
+
+      C->gvn_replace_by(pn, field);
+      C->initial_gvn()->hash_delete(pn);
+      pn->set_req(0, C->top());
+      --i; --imax;
+    }
+  }
+}
+
+
 #ifndef PRODUCT
 
 void ValueTypeNode::dump_spec(outputStream* st) const {
--- a/src/share/vm/opto/valuetypenode.hpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/opto/valuetypenode.hpp	Thu Jun 08 18:25:59 2017 +0200
@@ -94,7 +94,11 @@
 
   // Replace ValueTypeNodes in debug info at safepoints with SafePointScalarObjectNodes
   void make_scalar_in_safepoints(Compile* C);
-  uint set_arguments_for_java_call(CallJavaNode* call, int base_input, const GraphKit& kit, ciValueKlass* base_vk = NULL, int base_offset = 0);
+  void pass_klass(Node* n, uint pos, const GraphKit& kit);
+  uint pass_fields(Node* call, int base_input, const GraphKit& kit, ciValueKlass* base_vk = NULL, int base_offset = 0);
+  void replace_call_results(Node* call, Compile* C);
+
+
   virtual Node* Ideal(PhaseGVN* phase, bool can_reshape);
   virtual int Opcode() const;
 
--- a/src/share/vm/prims/jvmtiCodeBlobEvents.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/prims/jvmtiCodeBlobEvents.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -264,7 +264,7 @@
 
     address scopes_data = nm->scopes_data_begin();
     for( pcd = nm->scopes_pcs_begin(); pcd < nm->scopes_pcs_end(); ++pcd ) {
-      ScopeDesc sc0(nm, pcd->scope_decode_offset(), pcd->should_reexecute(), pcd->rethrow_exception(), pcd->return_oop());
+      ScopeDesc sc0(nm, pcd->scope_decode_offset(), pcd->should_reexecute(), pcd->rethrow_exception(), pcd->return_oop(), pcd->return_vt());
       ScopeDesc *sd  = &sc0;
       while( !sd->is_top() ) { sd = sd->sender(); }
       int bci = sd->bci();
--- a/src/share/vm/runtime/arguments.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/runtime/arguments.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -2522,6 +2522,11 @@
     warning("ValueTypePassFieldsAsArgs is not supported on this platform");
   }
 
+  if (LP64_ONLY(false &&) !FLAG_IS_DEFAULT(ValueTypeReturnedAsFields)) {
+    FLAG_SET_CMDLINE(bool, ValueTypeReturnedAsFields, false);
+    warning("ValueTypeReturnedAsFields is not supported on this platform");
+  }
+
   if (FLAG_IS_DEFAULT(TieredCompilation)) {
     // C1 has no support for value types
     TieredCompilation = false;
--- a/src/share/vm/runtime/deoptimization.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/runtime/deoptimization.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -213,25 +213,46 @@
       // is set during method compilation (see Compile::Process_OopMap_Node()).
       // If the previous frame was popped or if we are dispatching an exception,
       // we don't have an oop result.
-      bool save_oop_result = chunk->at(0)->scope()->return_oop() && !thread->popframe_forcing_deopt_reexecution() && (exec_mode == Unpack_deopt);
-      Handle return_value;
+      ScopeDesc* scope = chunk->at(0)->scope();
+      bool save_oop_result = scope->return_oop() && !thread->popframe_forcing_deopt_reexecution() && (exec_mode == Unpack_deopt);
+      // In case of the return of multiple values, we must take care
+      // of all oop return values.
+      GrowableArray<Handle> return_oops;
+      ValueKlass* vk = NULL;
+      if (save_oop_result) {
+        if (scope->return_vt()) {
+          vk = ValueKlass::returned_value_type(map);
+          if (vk != NULL) {
+            bool success = vk->save_oop_results(map, return_oops);
+            assert(success, "found klass ptr being returned: saving oops can't fail");
+            save_oop_result = false;
+          } else {
+            vk = NULL;
+          }
+        }
+      }      
       if (save_oop_result) {
         // Reallocation may trigger GC. If deoptimization happened on return from
         // call which returns oop we need to save it since it is not in oopmap.
         oop result = deoptee.saved_oop_result(&map);
         assert(result == NULL || result->is_oop(), "must be oop");
-        return_value = Handle(thread, result);
+        return_oops.push(Handle(thread, result));
         assert(Universe::heap()->is_in_or_null(result), "must be heap pointer");
         if (TraceDeoptimization) {
           ttyLocker ttyl;
           tty->print_cr("SAVED OOP RESULT " INTPTR_FORMAT " in thread " INTPTR_FORMAT, p2i(result), p2i(thread));
         }
       }
-      if (objects != NULL) {
+      if (objects != NULL || vk != NULL) {
         bool skip_internal = (cm != NULL) && !cm->is_compiled_by_jvmci();
         JRT_BLOCK
-          realloc_failures = realloc_objects(thread, &deoptee, objects, THREAD);
-          reassign_fields(&deoptee, &map, objects, realloc_failures, skip_internal, THREAD);
+          if (vk != NULL) {
+            realloc_failures = realloc_value_type_result(vk, map, return_oops, THREAD);
+          }
+          if (objects != NULL) {
+            realloc_failures = realloc_failures || realloc_objects(thread, &deoptee, objects, THREAD);
+            reassign_fields(&deoptee, &map, objects, realloc_failures, skip_internal, THREAD);
+          }
         JRT_END
 #ifndef PRODUCT
         if (TraceDeoptimization) {
@@ -241,9 +262,10 @@
         }
 #endif
       }
-      if (save_oop_result) {
+      if (save_oop_result || vk != NULL) {
         // Restore result.
-        deoptee.set_saved_oop_result(&map, return_value());
+        assert(return_oops.length() == 1, "no value type");
+        deoptee.set_saved_oop_result(&map, return_oops.pop()());
       }
 #ifndef INCLUDE_JVMCI
     }
@@ -852,6 +874,26 @@
   return failures;
 }
 
+// We're deoptimizing at the return of a call, value type fields are
+// in registers. When we go back to the interpreter, it will expect a
+// reference to a value type instance. Allocate and initialize it from
+// the register values here.
+bool Deoptimization::realloc_value_type_result(ValueKlass* vk, const RegisterMap& map, GrowableArray<Handle>& return_oops, TRAPS) {
+  VMRegPair* regs;
+  int nb_fields;
+  const GrowableArray<SigEntry>& sig_vk = vk->return_convention(regs, nb_fields);
+  regs++;
+  nb_fields--;
+  oop new_vt = vk->realloc_result(sig_vk, map, regs, return_oops, nb_fields, THREAD);
+  if (new_vt == NULL) {
+    CLEAR_PENDING_EXCEPTION;
+    THROW_OOP_(Universe::out_of_memory_error_realloc_objects(), true);
+  }
+  return_oops.clear();
+  return_oops.push(Handle(THREAD, new_vt));
+  return false;
+}
+
 // restore elements of an eliminated type array
 void Deoptimization::reassign_type_array_elements(frame* fr, RegisterMap* reg_map, ObjectValue* sv, typeArrayOop obj, BasicType type) {
   int index = 0;
--- a/src/share/vm/runtime/deoptimization.hpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/runtime/deoptimization.hpp	Thu Jun 08 18:25:59 2017 +0200
@@ -151,6 +151,7 @@
 
   // Support for restoring non-escaping objects
   static bool realloc_objects(JavaThread* thread, frame* fr, GrowableArray<ScopeValue*>* objects, TRAPS);
+  static bool realloc_value_type_result(ValueKlass* vk, const RegisterMap& map, GrowableArray<Handle>& return_oops, TRAPS);
   static void reassign_type_array_elements(frame* fr, RegisterMap* reg_map, ObjectValue* sv, typeArrayOop obj, BasicType type);
   static void reassign_object_array_elements(frame* fr, RegisterMap* reg_map, ObjectValue* sv, objArrayOop obj);
   static void reassign_value_array_elements(frame* fr, RegisterMap* reg_map, ObjectValue* sv, valueArrayOop obj, ValueArrayKlass* vak, TRAPS);
--- a/src/share/vm/runtime/globals.hpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/runtime/globals.hpp	Thu Jun 08 18:25:59 2017 +0200
@@ -4088,6 +4088,9 @@
   develop_pd(bool, ValueTypePassFieldsAsArgs,                               \
              "Pass each value type field as an argument at calls")          \
                                                                             \
+  develop_pd(bool, ValueTypeReturnedAsFields,                               \
+            "return fields instead of a value type reference")              \
+                                                                            \
   develop(bool, FullGCALotWithValueTypes, false,                            \
           "Force full GCs to stress test handling of value types")          \
 
--- a/src/share/vm/runtime/safepoint.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/runtime/safepoint.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -39,6 +39,7 @@
 #include "memory/universe.inline.hpp"
 #include "oops/oop.inline.hpp"
 #include "oops/symbol.hpp"
+#include "oops/valueKlass.hpp"
 #include "runtime/atomic.hpp"
 #include "runtime/compilationPolicy.hpp"
 #include "runtime/deoptimization.hpp"
@@ -1029,16 +1030,33 @@
   // return point does not mark the return value as an oop (if it is), so
   // it needs a handle here to be updated.
   if( nm->is_at_poll_return(real_return_addr) ) {
+    ResourceMark rm;
     // See if return type is an oop.
-    bool return_oop = nm->method()->is_returning_oop();
-    Handle return_value;
+    Method* method = nm->method();
+    bool return_oop = method->is_returning_oop();
+    
+    GrowableArray<Handle> return_values;
+    ValueKlass* vk = NULL;
+    if (!return_oop && method->is_returning_vt()) {
+      // We're at a safepoint at the return of a method that returns
+      // multiple values. We must make sure we preserve the oop values
+      // across the safepoint.
+      vk = ValueKlass::returned_value_type(map);
+      assert(vk == NULL || vk == method->returned_value_type(thread()) ||
+             method->returned_value_type(thread()) == SystemDictionary::___Value_klass(), "Bad value klass");
+      if (vk != NULL && !vk->save_oop_results(map, return_values)) {
+        return_oop = true;
+        vk = NULL;
+      }
+    }
+
     if (return_oop) {
       // The oop result has been saved on the stack together with all
       // the other registers. In order to preserve it over GCs we need
       // to keep it in a handle.
       oop result = caller_fr.saved_oop_result(&map);
       assert(result == NULL || result->is_oop(), "must be oop");
-      return_value = Handle(thread(), result);
+      return_values.push(Handle(thread(), result));
       assert(Universe::heap()->is_in_or_null(result), "must be heap pointer");
     }
 
@@ -1047,7 +1065,10 @@
 
     // restore oop result, if any
     if (return_oop) {
-      caller_fr.set_saved_oop_result(&map, return_value());
+      assert(return_values.length() == 1, "only one return value");
+      caller_fr.set_saved_oop_result(&map, return_values.pop()());
+    } else if (vk != NULL) {
+      vk->restore_oop_results(map, return_values);
     }
   }
 
--- a/src/share/vm/runtime/sharedRuntime.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/runtime/sharedRuntime.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -2644,61 +2644,6 @@
   return _adapters->new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, sig_extended);
 }
 
-// Value type arguments are not passed by reference, instead each
-// field of the value type is passed as an argument. This helper
-// function collects the fields of the value types (including embedded
-// value type's fields) in a list. Included with the field's type is
-// the offset of each field in the value type: i2c and c2i adapters
-// need that to load or store fields. Finally, the list of fields is
-// sorted in order of increasing offsets: the adapters and the
-// compiled code need and agreed upon order of fields.
-//
-// The list of basic type that is returned starts with a T_VALUETYPE
-// and ends with an extra T_VOID. T_VALUETYPE/T_VOID are used as
-// delimiters. Every entry between the two is a field of the value
-// type. If there's an embedded value type in the list, it also starts
-// with a T_VALUETYPE and ends with a T_VOID. This is so we can
-// generate a unique fingerprint for the method's adapters and we can
-// generate the list of basic types from the interpreter point of view
-// (value types passed as reference: iterate on the list until a
-// T_VALUETYPE, drop everything until and including the closing
-// T_VOID) or the compiler point of view (each field of the value
-// types is an argument: drop all T_VALUETYPE/T_VOID from the list).
-static GrowableArray<SigEntry> collect_fields(ValueKlass* vk, int base_off = 0) {
-  GrowableArray<SigEntry> sig_extended;
-  sig_extended.push(SigEntry(T_VALUETYPE, base_off));
-  for (JavaFieldStream fs(vk); !fs.done(); fs.next()) {
-    if (fs.access_flags().is_static())  continue;
-    fieldDescriptor& fd = fs.field_descriptor();
-    BasicType bt = fd.field_type();
-    int offset = base_off + fd.offset() - (base_off > 0 ? vk->first_field_offset() : 0);
-    if (bt == T_VALUETYPE) {
-      Symbol* signature = fd.signature();
-      JavaThread* THREAD = JavaThread::current();
-      oop loader = vk->class_loader();
-      oop protection_domain = vk->protection_domain();
-      Klass* klass = SystemDictionary::resolve_or_null(signature,
-                                                       Handle(THREAD, loader), Handle(THREAD, protection_domain),
-                                                       THREAD);
-      assert(klass != NULL && !HAS_PENDING_EXCEPTION, "lookup shouldn't fail");
-      const GrowableArray<SigEntry>& embedded = collect_fields(ValueKlass::cast(klass), offset);
-      sig_extended.appendAll(&embedded);
-    } else {
-      sig_extended.push(SigEntry(bt, offset));
-      if (bt == T_LONG || bt == T_DOUBLE) {
-        sig_extended.push(SigEntry(T_VOID, offset));
-      }
-    }
-  }
-  int offset = base_off + vk->size_helper()*HeapWordSize - (base_off > 0 ? vk->first_field_offset() : 0);
-  sig_extended.push(SigEntry(T_VOID, offset)); // hack: use T_VOID to mark end of value type fields
-  if (base_off == 0) {
-    sig_extended.sort(SigEntry::compare);
-  }
-  assert(sig_extended.at(0)._bt == T_VALUETYPE && sig_extended.at(sig_extended.length()-1)._bt == T_VOID, "broken structure");
-  return sig_extended;
-}
-
 AdapterHandlerEntry* AdapterHandlerLibrary::get_adapter(const methodHandle& method) {
   AdapterHandlerEntry* entry = get_adapter0(method);
   if (method->is_shared()) {
@@ -2765,7 +2710,7 @@
             // fields for the value type.
             sig_extended.push(SigEntry(T_OBJECT));
           } else {
-            const GrowableArray<SigEntry>& sig_vk = collect_fields(vk);
+            const GrowableArray<SigEntry>& sig_vk = vk->collect_fields();
             sig_extended.appendAll(&sig_vk);
           }
         } else {
@@ -2778,10 +2723,13 @@
                                  Handle(THREAD, holder->protection_domain()),
                                  SignatureStream::ReturnNull, THREAD);
           assert(k != NULL && !HAS_PENDING_EXCEPTION, "can resolve klass?");
-          assert(k != SystemDictionary::___Value_klass(), "unsupported");
           ValueKlass* vk = ValueKlass::cast(k);
-          const GrowableArray<SigEntry>& sig_vk = collect_fields(vk);
-          sig_extended.appendAll(&sig_vk);
+          if (vk == SystemDictionary::___Value_klass()) {
+            sig_extended.push(SigEntry(T_OBJECT));
+          } else {
+            const GrowableArray<SigEntry>& sig_vk = vk->collect_fields();
+            sig_extended.appendAll(&sig_vk);
+          }
         } else {
           sig_extended.push(SigEntry(ss.type()));
           if (ss.type() == T_LONG || ss.type() == T_DOUBLE) {
@@ -2791,29 +2739,9 @@
       }
     }
 
-    int values = 0;
-    if (ValueTypePassFieldsAsArgs) {
-      for (int i = 0; i < sig_extended.length(); i++) {
-        if (sig_extended.at(i)._bt == T_VALUETYPE) {
-          values++;
-        }
-      }
-    }
-    int total_args_passed_cc = sig_extended.length() - 2 * values;
+    int total_args_passed_cc = ValueTypePassFieldsAsArgs ? SigEntry::count_fields(sig_extended) : sig_extended.length();
     BasicType* sig_bt_cc = NEW_RESOURCE_ARRAY(BasicType, total_args_passed_cc);
-
-    int j = 0;
-    for (int i = 0; i < sig_extended.length(); i++) {
-      if (!ValueTypePassFieldsAsArgs) {
-        sig_bt_cc[j++] = sig_extended.at(i)._bt;
-      } else if (sig_extended.at(i)._bt != T_VALUETYPE &&
-                 (sig_extended.at(i)._bt != T_VOID ||
-                  sig_extended.at(i-1)._bt == T_LONG ||
-                  sig_extended.at(i-1)._bt == T_DOUBLE)) {
-        sig_bt_cc[j++] = sig_extended.at(i)._bt;
-      }
-    }
-    assert(j == total_args_passed_cc, "bad number of arguments");
+    SigEntry::fill_sig_bt(sig_extended, sig_bt_cc, total_args_passed_cc, ValueTypePassFieldsAsArgs);
 
     int total_args_passed_fp = sig_extended.length();
     BasicType* sig_bt_fp = NEW_RESOURCE_ARRAY(BasicType, total_args_passed_fp);
@@ -3409,3 +3337,170 @@
   }
 }
 JRT_END
+
+// We're returning from an interpreted method: load each field into a
+// register following the calling convention
+JRT_LEAF(void, SharedRuntime::load_value_type_fields_in_regs(JavaThread* thread, oopDesc* res))
+{
+  assert(res->klass()->is_value(), "only value types here");
+  ResourceMark rm;
+  RegisterMap reg_map(thread);
+  frame stubFrame = thread->last_frame();
+  frame callerFrame = stubFrame.sender(&reg_map);
+  assert(callerFrame.is_interpreted_frame(), "should be coming from interpreter");
+
+  ValueKlass* vk = ValueKlass::cast(res->klass());
+
+  VMRegPair* regs;
+  int nb_fields;
+  const GrowableArray<SigEntry>& sig_vk = vk->return_convention(regs, nb_fields);
+
+  if (regs == NULL) {
+    // The fields of the value klass don't fit in registers, bail out
+    return;
+  }
+
+  int j = 1;
+  for (int i = 0; i < sig_vk.length(); i++) {
+    BasicType bt = sig_vk.at(i)._bt;
+    if (bt == T_VALUETYPE) {
+      continue;
+    } 
+    if (bt == T_VOID) {
+      if (sig_vk.at(i-1)._bt == T_LONG ||
+          sig_vk.at(i-1)._bt == T_DOUBLE) {
+        j++;
+      }
+      continue;
+    }
+    int off = sig_vk.at(i)._offset;
+    VMRegPair pair = regs[j];
+    address loc = reg_map.location(pair.first());
+    switch(bt) {
+    case T_BOOLEAN:
+      *(intptr_t*)loc = *(jboolean*)((address)res + off);
+      break;
+    case T_CHAR:
+      *(intptr_t*)loc = *(jchar*)((address)res + off);
+      break;
+    case T_BYTE:
+      *(intptr_t*)loc = *(jbyte*)((address)res + off);
+      break;
+    case T_SHORT:
+      *(intptr_t*)loc = *(jshort*)((address)res + off);
+      break;
+    case T_INT: {
+      jint v = *(jint*)((address)res + off);
+      *(intptr_t*)loc = v;
+      break;
+    }
+    case T_LONG:
+#ifdef _LP64
+      *(intptr_t*)loc = *(jlong*)((address)res + off);
+#else
+      Unimplemented();
+#endif
+      break;
+    case T_OBJECT:
+    case T_ARRAY: {
+      oop v = NULL;
+      if (!UseCompressedOops) {
+        oop* p = (oop*)((address)res + off);
+        v = oopDesc::load_heap_oop(p);
+      } else {
+        narrowOop* p = (narrowOop*)((address)res + off);
+        v = oopDesc::load_decode_heap_oop(p);
+      }
+      *(oop*)loc = v;
+      break;
+    }
+    case T_FLOAT:
+      *(jfloat*)loc = *(jfloat*)((address)res + off);
+      break;
+    case T_DOUBLE:
+      *(jdouble*)loc = *(jdouble*)((address)res + off);
+      break;
+    default:
+      ShouldNotReachHere();
+    }
+    j++;
+  }
+  assert(j == nb_fields, "missed a field?");
+
+#ifdef ASSERT  
+  VMRegPair pair = regs[0];
+  address loc = reg_map.location(pair.first());
+  assert(*(oopDesc**)loc == res, "overwritten object");
+#endif
+
+  thread->set_vm_result(res);
+}
+JRT_END
+
+// We've returned to an interpreted method, the interpreter needs a
+// reference to a value type instance. Allocate it and initialize it
+// from field's values in registers.
+JRT_BLOCK_ENTRY(void, SharedRuntime::store_value_type_fields_to_buf(JavaThread* thread, intptr_t res))
+{
+  ResourceMark rm;
+  RegisterMap reg_map(thread);
+  frame stubFrame = thread->last_frame();
+  frame callerFrame = stubFrame.sender(&reg_map);
+
+#ifdef ASSERT
+  ValueKlass* verif_vk = ValueKlass::returned_value_type(reg_map);
+  javaVFrame* vf = javaVFrame::cast(vframe::new_vframe(&callerFrame, &reg_map, thread));
+  Method* m = vf->method();
+  int bci = vf->bci();
+  Bytecode_invoke inv(m, bci);
+
+  {
+    NoSafepointVerifier nsv;
+    methodHandle callee = inv.static_target(thread);
+    assert(!thread->has_pending_exception(), "call resolution should work");
+    ValueKlass* verif_vk2 = callee->returned_value_type(thread);
+    assert(verif_vk == NULL || verif_vk == verif_vk2 ||
+           verif_vk2 == SystemDictionary::___Value_klass(), "Bad value klass");
+    
+  }
+#endif
+
+  if (Universe::heap()->is_in_reserved((void*)res)) {
+    // We're not returning with value type fields in registers (the
+    // calling convention didn't allow it for this value klass)
+    thread->set_vm_result((oopDesc*)res);
+    assert(verif_vk == NULL, "broken calling convention");
+    return;
+  }
+
+  ValueKlass* vk = (ValueKlass*)res;
+  assert(verif_vk == vk, "broken calling convention");
+
+  VMRegPair* regs;
+  int nb_fields;
+  const GrowableArray<SigEntry>& sig_vk = vk->return_convention(regs, nb_fields);
+  assert(regs != NULL, "return convention should allow return as fields");
+
+  regs++;
+  nb_fields--;
+
+  // Allocate handles for every oop fields so they are safe in case of
+  // a safepoint when allocating
+  GrowableArray<Handle> handles;
+  vk->save_oop_fields(sig_vk, reg_map, regs, handles, nb_fields);
+
+  // It's unsafe to safepoint until we are here
+
+  Handle new_vt;
+  JRT_BLOCK;
+  {
+    Thread* THREAD = thread;
+    oop vt = vk->realloc_result(sig_vk, reg_map, regs, handles, nb_fields, CHECK);
+    new_vt = Handle(thread, vt);
+  }
+  JRT_BLOCK_END;
+
+  thread->set_vm_result(new_vt());
+}
+JRT_END
+
--- a/src/share/vm/runtime/sharedRuntime.hpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/runtime/sharedRuntime.hpp	Thu Jun 08 18:25:59 2017 +0200
@@ -37,48 +37,7 @@
 class AdapterHandlerTable;
 class AdapterFingerPrint;
 class vframeStream;
-
-// Used for adapter generation. One SigEntry is used per element of
-// the signature of the method. Value type arguments are treated
-// specially. See comment for collect_fields().
-class SigEntry VALUE_OBJ_CLASS_SPEC {
- public:
-  BasicType _bt;
-  int _offset;
-
-  SigEntry()
-    : _bt(T_ILLEGAL), _offset(-1) {
-  }
-  SigEntry(BasicType bt, int offset)
-    : _bt(bt), _offset(offset) {}
-
-  SigEntry(BasicType bt)
-    : _bt(bt), _offset(-1) {}
-
-  static int compare(SigEntry* e1, SigEntry* e2) {
-    if (e1->_offset != e2->_offset) {
-      return e1->_offset - e2->_offset;
-    }
-    assert((e1->_bt == T_LONG && (e2->_bt == T_LONG || e2->_bt == T_VOID)) ||
-           (e1->_bt == T_DOUBLE && (e2->_bt == T_DOUBLE || e2->_bt == T_VOID)) ||
-           e1->_bt == T_VALUETYPE || e2->_bt == T_VALUETYPE || e1->_bt == T_VOID || e2->_bt == T_VOID, "bad bt");
-    if (e1->_bt == e2->_bt) {
-      assert(e1->_bt == T_VALUETYPE || e1->_bt == T_VOID, "only ones with duplicate offsets");
-      return 0;
-    }
-    if (e1->_bt == T_VOID ||
-        e2->_bt == T_VALUETYPE) {
-      return 1;
-    }
-    if (e1->_bt == T_VALUETYPE ||
-        e2->_bt == T_VOID) {
-      return -1;
-    }
-    ShouldNotReachHere();
-    return 0;
-  }
-};
-
+class SigEntry;
 
 // Runtime is the base class for various runtime interfaces
 // (InterpreterRuntime, CompilerRuntime, etc.). It provides
@@ -123,7 +82,7 @@
   enum { POLL_AT_RETURN,  POLL_AT_LOOP, POLL_AT_VECTOR_LOOP };
   static SafepointBlob* generate_handler_blob(address call_ptr, int poll_type);
   static RuntimeStub*   generate_resolve_blob(address destination, const char* name);
-
+  static RuntimeStub*   generate_return_value_blob(address destination, const char* name);
  public:
   static void generate_stubs(void);
 
@@ -415,6 +374,9 @@
   // will be just above it. (
   // return value is the maximum number of VMReg stack slots the convention will use.
   static int java_calling_convention(const BasicType* sig_bt, VMRegPair* regs, int total_args_passed, int is_outgoing);
+  static int java_return_convention(const BasicType* sig_bt, VMRegPair* regs, int total_args_passed);
+  static const uint java_return_convention_max_int;
+  static const uint java_return_convention_max_float;
 
   static void check_member_name_argument_is_last_argument(const methodHandle& method,
                                                           const BasicType* sig_bt,
@@ -543,6 +505,9 @@
   static address resolve_virtual_call_C    (JavaThread *thread);
   static address resolve_opt_virtual_call_C(JavaThread *thread);
 
+  static void load_value_type_fields_in_regs(JavaThread *thread, oopDesc* res);
+  static void store_value_type_fields_to_buf(JavaThread *thread, intptr_t res);
+
   // arraycopy, the non-leaf version.  (See StubRoutines for all the leaf calls.)
   static void slow_arraycopy_C(oopDesc* src,  jint src_pos,
                                oopDesc* dest, jint dest_pos,
--- a/src/share/vm/runtime/signature.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/runtime/signature.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -533,3 +533,28 @@
       return false;
   }
 }
+
+int SigEntry::count_fields(const GrowableArray<SigEntry>& sig_extended) {
+  int values = 0;
+  for (int i = 0; i < sig_extended.length(); i++) {
+    if (sig_extended.at(i)._bt == T_VALUETYPE) {
+      values++;
+    }
+  }
+  return sig_extended.length() - 2 * values;
+}
+
+void SigEntry::fill_sig_bt(const GrowableArray<SigEntry>& sig_extended, BasicType* sig_bt_cc, int total_args_passed_cc, bool skip_vt) {
+  int j = 0;
+  for (int i = 0; i < sig_extended.length(); i++) {
+    if (!skip_vt) {
+      sig_bt_cc[j++] = sig_extended.at(i)._bt;
+    } else if (sig_extended.at(i)._bt != T_VALUETYPE &&
+               (sig_extended.at(i)._bt != T_VOID ||
+                sig_extended.at(i-1)._bt == T_LONG ||
+                sig_extended.at(i-1)._bt == T_DOUBLE)) {
+      sig_bt_cc[j++] = sig_extended.at(i)._bt;
+    }
+  }
+  assert(j == total_args_passed_cc, "bad number of arguments");
+}
--- a/src/share/vm/runtime/signature.hpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/runtime/signature.hpp	Thu Jun 08 18:25:59 2017 +0200
@@ -436,4 +436,47 @@
     static bool invalid_name_char(char);
 };
 
+// Used for adapter generation. One SigEntry is used per element of
+// the signature of the method. Value type arguments are treated
+// specially. See comment for ValueKlass::collect_fields().
+class SigEntry VALUE_OBJ_CLASS_SPEC {
+ public:
+  BasicType _bt;
+  int _offset;
+    
+  SigEntry()
+    : _bt(T_ILLEGAL), _offset(-1) {
+  }
+  SigEntry(BasicType bt, int offset)
+    : _bt(bt), _offset(offset) {}
+
+  SigEntry(BasicType bt)
+    : _bt(bt), _offset(-1) {}
+  
+  static int compare(SigEntry* e1, SigEntry* e2) {
+    if (e1->_offset != e2->_offset) {
+      return e1->_offset - e2->_offset;
+    }
+    assert((e1->_bt == T_LONG && (e2->_bt == T_LONG || e2->_bt == T_VOID)) ||
+           (e1->_bt == T_DOUBLE && (e2->_bt == T_DOUBLE || e2->_bt == T_VOID)) ||
+           e1->_bt == T_VALUETYPE || e2->_bt == T_VALUETYPE || e1->_bt == T_VOID || e2->_bt == T_VOID, "bad bt");
+    if (e1->_bt == e2->_bt) {
+      assert(e1->_bt == T_VALUETYPE || e1->_bt == T_VOID, "only ones with duplicate offsets");
+      return 0;
+    }
+    if (e1->_bt == T_VOID ||
+        e2->_bt == T_VALUETYPE) {
+      return 1;
+    }
+    if (e1->_bt == T_VALUETYPE ||
+        e2->_bt == T_VOID) {
+      return -1;
+    }
+    ShouldNotReachHere();
+    return 0;
+  }
+  static int count_fields(const GrowableArray<SigEntry>& sig_extended);
+  static void fill_sig_bt(const GrowableArray<SigEntry>& sig_extended, BasicType* sig_bt_cc, int total_args_passed_cc, bool skip_vt);
+};
+
 #endif // SHARE_VM_RUNTIME_SIGNATURE_HPP
--- a/src/share/vm/runtime/stubRoutines.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/runtime/stubRoutines.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -173,6 +173,9 @@
 address StubRoutines::_safefetchN_fault_pc               = NULL;
 address StubRoutines::_safefetchN_continuation_pc        = NULL;
 
+address StubRoutines::_load_value_type_fields_in_regs = NULL;
+address StubRoutines::_store_value_type_fields_to_buf = NULL;
+
 // Initialization
 //
 // Note: to break cycle with universe initialization, stubs are generated in two phases.
--- a/src/share/vm/runtime/stubRoutines.hpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/runtime/stubRoutines.hpp	Thu Jun 08 18:25:59 2017 +0200
@@ -228,6 +228,9 @@
   static address _safefetchN_fault_pc;
   static address _safefetchN_continuation_pc;
 
+  static address _load_value_type_fields_in_regs;
+  static address _store_value_type_fields_to_buf;
+
  public:
   // Initialization/Testing
   static void    initialize1();                            // must happen before universe::genesis
@@ -455,6 +458,9 @@
   static void arrayof_jlong_copy     (HeapWord* src, HeapWord* dest, size_t count);
   static void arrayof_oop_copy       (HeapWord* src, HeapWord* dest, size_t count);
   static void arrayof_oop_copy_uninit(HeapWord* src, HeapWord* dest, size_t count);
+
+  static address load_value_type_fields_in_regs() { return _load_value_type_fields_in_regs; }
+  static address store_value_type_fields_to_buf() { return _store_value_type_fields_to_buf; }
 };
 
 // Safefetch allows to load a value from a location that's not known
--- a/src/share/vm/shark/sharkCacheDecache.cpp	Thu Jun 01 17:13:31 2017 +0200
+++ b/src/share/vm/shark/sharkCacheDecache.cpp	Thu Jun 08 18:25:59 2017 +0200
@@ -159,6 +159,7 @@
     true,
     false,
     false,
+    false,
     debug_info()->create_scope_values(locarray()),
     debug_info()->create_scope_values(exparray()),
     debug_info()->create_monitor_values(monarray()));
--- a/test/compiler/valhalla/valuetypes/ValueTypeTestBench.java	Thu Jun 01 17:13:31 2017 +0200
+++ b/test/compiler/valhalla/valuetypes/ValueTypeTestBench.java	Thu Jun 08 18:25:59 2017 +0200
@@ -33,16 +33,16 @@
  * @run main ClassFileInstaller jdk.test.lib.Platform
  * @run main/othervm -Xbootclasspath/a:. -ea -noverify -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions
  *                   -XX:+UnlockExperimentalVMOptions -XX:+WhiteBoxAPI -XX:-TieredCompilation -XX:+VerifyAdapterSharing
- *                   -XX:+EnableValhalla -XX:+EnableMVT -XX:+ValueTypePassFieldsAsArgs -XX:+ValueArrayFlatten
+ *                   -XX:+EnableValhalla -XX:+EnableMVT -XX:+ValueTypePassFieldsAsArgs -XX:+ValueTypeReturnedAsFields -XX:+ValueArrayFlatten
  *                   -XX:ValueArrayElemMaxFlatSize=-1 -XX:ValueArrayElemMaxFlatOops=-1 -XX:+FullGCALotWithValueTypes
  *                   compiler.valhalla.valuetypes.ValueTypeTestBench
  * @run main/othervm -Xbootclasspath/a:. -ea -noverify -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions
  *                   -XX:+UnlockExperimentalVMOptions -XX:+WhiteBoxAPI -XX:-TieredCompilation
- *                   -XX:+EnableValhalla -XX:+EnableMVT -XX:-ValueTypePassFieldsAsArgs -XX:-ValueArrayFlatten
+ *                   -XX:+EnableValhalla -XX:+EnableMVT -XX:-ValueTypePassFieldsAsArgs -XX:-ValueTypeReturnedAsFields -XX:-ValueArrayFlatten
  *                   compiler.valhalla.valuetypes.ValueTypeTestBench
  * @run main/othervm -Xbootclasspath/a:. -ea -noverify -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions
  *                   -XX:+UnlockExperimentalVMOptions -XX:+WhiteBoxAPI -XX:-TieredCompilation -XX:+AlwaysIncrementalInline
- *                   -XX:+EnableValhalla -XX:+EnableMVT -XX:+ValueTypePassFieldsAsArgs -XX:+ValueArrayFlatten
+ *                   -XX:+EnableValhalla -XX:+EnableMVT -XX:+ValueTypePassFieldsAsArgs -XX:+ValueTypeReturnedAsFields -XX:+ValueArrayFlatten
  *                   -XX:ValueArrayElemMaxFlatSize=-1 -XX:ValueArrayElemMaxFlatOops=-1
  *                   compiler.valhalla.valuetypes.ValueTypeTestBench
  */
@@ -67,7 +67,9 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Hashtable;
+import java.util.LinkedHashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import jdk.experimental.value.*;
@@ -301,6 +303,248 @@
     }
 }
 
+// Value type definition to stress test return of a value in registers
+// (uses all registers of calling convention on x86_64)
+__ByValue final class MyValue3 {
+    final char c;
+    final byte bb;
+    final short s;
+    final int i;
+    final long l;
+    final Object o;
+    final float f1;
+    final double f2;
+    final float f3;
+    final double f4;
+    final float f5;
+    final double f6;
+    final float f7;
+    final double f8;
+
+    private MyValue3(char c,
+                     byte bb,
+                     short s,
+                     int i,
+                     long l,
+                     Object o,
+                     float f1,
+                     double f2,
+                     float f3,
+                     double f4,
+                     float f5,
+                     double f6,
+                     float f7,
+                     double f8) {
+        this.c = c;
+        this.bb = bb;
+        this.s = s;
+        this.i = i;
+        this.l = l;
+        this.o = o;
+        this.f1 = f1;
+        this.f2 = f2;
+        this.f3 = f3;
+        this.f4 = f4;
+        this.f5 = f5;
+        this.f6 = f6;
+        this.f7 = f7;
+        this.f8 = f8;
+    }
+
+    private MyValue3() {
+        this.c = 0;
+        this.bb = 0;
+        this.s = 0;
+        this.i = 0;
+        this.l = 0;
+        this.o = null;
+        this.f1 = 0;
+        this.f2 = 0;
+        this.f3 = 0;
+        this.f4 = 0;
+        this.f5 = 0;
+        this.f6 = 0;
+        this.f7 = 0;
+        this.f8 = 0;
+    }
+
+    @ForceInline
+    __ValueFactory static MyValue3 setC(MyValue3 v, char c) {
+        v.c = c;
+        return v;
+    }
+
+    @ForceInline
+    __ValueFactory static MyValue3 setBB(MyValue3 v, byte bb) {
+        v.bb = bb;
+        return v;
+    }
+
+    @ForceInline
+    __ValueFactory static MyValue3 setS(MyValue3 v, short s) {
+        v.s = s;
+        return v;
+    }
+
+    @ForceInline
+    __ValueFactory static MyValue3 setI(MyValue3 v, int i) {
+        v.i = i;
+        return v;
+    }
+
+    @ForceInline
+    __ValueFactory static MyValue3 setL(MyValue3 v, long l) {
+        v.l = l;
+        return v;
+    }
+
+    @ForceInline
+    __ValueFactory static MyValue3 setO(MyValue3 v, Object o) {
+        v.o = o;
+        return v;
+    }
+
+    @ForceInline
+    __ValueFactory static MyValue3 setF1(MyValue3 v, float f1) {
+        v.f1 = f1;
+        return v;
+    }
+
+    @ForceInline
+    __ValueFactory static MyValue3 setF2(MyValue3 v, double f2) {
+        v.f2 = f2;
+        return v;
+    }
+
+    @ForceInline
+    __ValueFactory static MyValue3 setF3(MyValue3 v, float f3) {
+        v.f3 = f3;
+        return v;
+    }
+
+    @ForceInline
+    __ValueFactory static MyValue3 setF4(MyValue3 v, double f4) {
+        v.f4 = f4;
+        return v;
+    }
+
+    @ForceInline
+    __ValueFactory static MyValue3 setF5(MyValue3 v, float f5) {
+        v.f5 = f5;
+        return v;
+    }
+
+    @ForceInline
+    __ValueFactory static MyValue3 setF6(MyValue3 v, double f6) {
+        v.f6 = f6;
+        return v;
+    }
+
+    @ForceInline
+    __ValueFactory static MyValue3 setF7(MyValue3 v, float f7) {
+        v.f7 = f7;
+        return v;
+    }
+
+    @ForceInline
+    __ValueFactory static MyValue3 setF8(MyValue3 v, double f8) {
+        v.f8 = f8;
+        return v;
+    }
+
+    @ForceInline
+    __ValueFactory public static MyValue3 createDefault() {
+        return __MakeDefault MyValue3();
+    }
+
+    @ForceInline
+    public static MyValue3 create() {
+        java.util.Random r = Utils.getRandomInstance();
+        MyValue3 v = createDefault();
+        v = setC(v, (char)r.nextInt());
+        v = setBB(v, (byte)r.nextInt());
+        v = setS(v, (short)r.nextInt());
+        v = setI(v, r.nextInt());
+        v = setL(v, r.nextLong());
+        v = setO(v, new Object());
+        v = setF1(v, r.nextFloat());
+        v = setF2(v, r.nextDouble());
+        v = setF3(v, r.nextFloat());
+        v = setF4(v, r.nextDouble());
+        v = setF5(v, r.nextFloat());
+        v = setF6(v, r.nextDouble());
+        v = setF7(v, r.nextFloat());
+        v = setF8(v, r.nextDouble());
+        return v;
+    }
+
+    public void verify(MyValue3 other) {
+        Asserts.assertEQ(c, other.c);
+        Asserts.assertEQ(bb, other.bb);
+        Asserts.assertEQ(s, other.s);
+        Asserts.assertEQ(i, other.i);
+        Asserts.assertEQ(l, other.l);
+        Asserts.assertEQ(o, other.o);
+        Asserts.assertEQ(f1, other.f1);
+        Asserts.assertEQ(f2, other.f2);
+        Asserts.assertEQ(f3, other.f3);
+        Asserts.assertEQ(f4, other.f4);
+        Asserts.assertEQ(f5, other.f5);
+        Asserts.assertEQ(f6, other.f6);
+        Asserts.assertEQ(f7, other.f7);
+        Asserts.assertEQ(f8, other.f8);
+    }
+}
+
+// Value type definition with too many fields to return in registers
+__ByValue final class MyValue4 {
+    final MyValue3 v1;
+    final MyValue3 v2;
+
+    private MyValue4(MyValue3 v1, MyValue3 v2) {
+        this.v1 = v1;
+        this.v2 = v2;
+    }
+
+    private MyValue4() {
+        this.v1 = MyValue3.createDefault();
+        this.v2 = MyValue3.createDefault();
+    }
+
+    @ForceInline
+    __ValueFactory static MyValue4 setV1(MyValue4 v, MyValue3 v1) {
+        v.v1 = v1;
+        return v;
+    }
+
+    @ForceInline
+    __ValueFactory static MyValue4 setV2(MyValue4 v, MyValue3 v2) {
+        v.v2 = v2;
+        return v;
+    }
+
+    @ForceInline
+    __ValueFactory public static MyValue4 createDefault() {
+        return __MakeDefault MyValue4();
+    }
+
+    @ForceInline
+    public static MyValue4 create() {
+        MyValue4 v = createDefault();
+        MyValue3 v1 = MyValue3.create();
+        v = setV1(v, v1);
+        MyValue3 v2 = MyValue3.create();
+        v = setV2(v, v2);
+        return v;
+    }
+
+    public void verify(MyValue4 other) {
+        v1.verify(other.v1);
+        v2.verify(other.v2);
+    }
+}
+
+
 public class ValueTypeTestBench {
     // Print ideal graph after execution of each test
     private static final boolean PRINT_GRAPH = true;
@@ -1505,8 +1749,7 @@
      * unboxed in nullcvvUnboxLoadLong is always null. Therefore, the
      * compiler generates only the path leading to the corresponding
      * uncommon trap. */
-    @Test(valid = AlwaysIncrementalInlineOff, failOn = RETURN)
-    @Test(valid = AlwaysIncrementalInlineOn, match = {LINKTOSTATIC}, matchCount= {1})
+    @Test(failOn = RETURN)
     public long test61() throws Throwable {
         return (long)nullvccUnboxLoadLongMH.invokeExact();
     }
@@ -1538,8 +1781,7 @@
      * ValueCapableClass1 instance is never null (and therefore not
      * generate a null check). Also, the source and target type match
      * (known at compile time), so no type check is needed either.*/
-    @Test(valid = AlwaysIncrementalInlineOff, failOn = NPE)
-    @Test(valid = AlwaysIncrementalInlineOn, match = {LINKTOSTATIC}, matchCount= {1})
+    @Test(failOn = NPE)
     public long test62() throws Throwable {
         return (long)checkedvccUnboxLoadLongMH.invokeExact();
     }
@@ -1568,8 +1810,7 @@
      * it does not have enough information to determine that the value
      * to be unboxed is not null (and either that it is null). The
      * declared type of the */
-    @Test(valid = AlwaysIncrementalInlineOff, match = {NPE}, matchCount = {1})
-    @Test(valid = AlwaysIncrementalInlineOn, match = {LINKTOSTATIC}, matchCount= {1})
+    @Test(match = {NPE}, matchCount = {1})
     public long test63(ValueCapableClass1 vcc) throws Throwable {
         return (long)vccUnboxLoadLongMH.invokeExact(vcc);
     }
@@ -1586,8 +1827,7 @@
     /* Attempt to unbox an object that is not a subclass of the
      * value-capable class derived from the value type specified in
      * the vunbox bytecode. */
-    @Test(valid = AlwaysIncrementalInlineOff, match = {NPE,CCE}, matchCount = {1,1})
-    @Test(valid = AlwaysIncrementalInlineOn, match = {LINKTOSTATIC}, matchCount= {1})
+    @Test(match = {NPE,CCE}, matchCount = {1,1})
     public long test64(Object vcc) throws Throwable {
         return (long)objectUnboxLoadLongMH.invokeExact(vcc);
     }
@@ -1626,8 +1866,7 @@
     /* Generate an if-then-else construct with one path that contains
      * an invalid boxing operation (boxing of a value-type to a
      * non-matching value-capable class).*/
-    @Test(valid = AlwaysIncrementalInlineOff, match = {NPE,CCE}, matchCount = {2,3})
-    @Test(valid = AlwaysIncrementalInlineOn, match = {LINKTOSTATIC}, matchCount= {1})
+    @Test(match = {NPE,CCE}, matchCount = {2,3})
     public long test65(Object obj, boolean warmup) throws Throwable {
         return (long)objectBoxMH.invokeExact(obj, warmup);
     }
@@ -1678,6 +1917,250 @@
                 );
     }
 
+    // Test deoptimization at call return with return value in registers
+    @DontCompile
+    public MyValue2 test66_interp(boolean deopt) {
+        if (deopt) {
+            // uncommon trap
+            WHITE_BOX.deoptimizeMethod(tests.get("ValueTypeTestBench::test66"));
+        }
+        return MyValue2.createWithFieldsInline(rI, true);
+    }
+
+    @Test()
+    public MyValue2 test66(boolean flag) {
+        return test66_interp(flag);
+    }
+
+    @DontCompile
+    public void test66_verifier(boolean warmup) {
+        MyValue2 result = test66(!warmup);
+        MyValue2 v = MyValue2.createWithFieldsInline(rI, true);
+        Asserts.assertEQ(result.hash(), v.hash());
+    }
+
+    // Return value types in registers from interpreter -> compiled
+    final MyValue3 test67_vt = MyValue3.create();
+    @DontCompile
+    public MyValue3 test67_interp() {
+        return test67_vt;
+    }
+
+    MyValue3 test67_vt2;
+    @Test(valid = ValueTypeReturnedAsFieldsOn, failOn = ALLOC + LOAD + TRAP)
+    @Test(valid = ValueTypeReturnedAsFieldsOff)
+    public void test67() {
+        test67_vt2 = test67_interp();
+    }
+
+    @DontCompile
+    public void test67_verifier(boolean warmup) {
+        test67();
+        test67_vt.verify(test67_vt2);
+    }
+
+    // Return value types in registers from compiled -> interpreter
+    final MyValue3 test68_vt = MyValue3.create();
+    @Test(valid = ValueTypeReturnedAsFieldsOn, failOn = ALLOC + STORE + TRAP)
+    @Test(valid = ValueTypeReturnedAsFieldsOff)
+    public MyValue3 test68() {
+        return test68_vt;
+    }
+
+    @DontCompile
+    public void test68_verifier(boolean warmup) {
+        MyValue3 vt = test68();
+        test68_vt.verify(vt);
+    }
+
+    // Return value types in registers from compiled -> compiled
+    final MyValue3 test69_vt = MyValue3.create();
+    @DontInline
+    public MyValue3 test69_comp() {
+        return test69_vt;
+    }
+
+    MyValue3 test69_vt2;
+    @Test(valid = ValueTypeReturnedAsFieldsOn, failOn = ALLOC + LOAD + TRAP)
+    @Test(valid = ValueTypeReturnedAsFieldsOff)
+    public void test69() {
+        test69_vt2 = test69_comp();
+    }
+
+    @DontCompile
+    public void test69_verifier(boolean warmup) throws Exception {
+        Method helper_m = getClass().getDeclaredMethod("test69_comp");
+        if (!warmup && USE_COMPILER && !WHITE_BOX.isMethodCompiled(helper_m, false)) {
+            WHITE_BOX.enqueueMethodForCompilation(helper_m, COMP_LEVEL_FULL_OPTIMIZATION);
+            Asserts.assertTrue(WHITE_BOX.isMethodCompiled(helper_m, false), "test69_comp not compiled");
+        }
+        test69();
+        test69_vt.verify(test69_vt2);
+    }
+
+    // Same tests as above but with a value type that cannot be returned in registers
+
+    // Return value types in registers from interpreter -> compiled
+    final MyValue4 test70_vt = MyValue4.create();
+    @DontCompile
+    public MyValue4 test70_interp() {
+        return test70_vt;
+    }
+
+    MyValue4 test70_vt2;
+    @Test
+    public void test70() {
+        test70_vt2 = test70_interp();
+    }
+
+    @DontCompile
+    public void test70_verifier(boolean warmup) {
+        test70();
+        test70_vt.verify(test70_vt2);
+    }
+
+    // Return value types in registers from compiled -> interpreter
+    final MyValue4 test71_vt = MyValue4.create();
+    @Test
+    public MyValue4 test71() {
+        return test71_vt;
+    }
+
+    @DontCompile
+    public void test71_verifier(boolean warmup) {
+        MyValue4 vt = test71();
+        test71_vt.verify(vt);
+    }
+
+    // Return value types in registers from compiled -> compiled
+    final MyValue4 test72_vt = MyValue4.create();
+    @DontInline
+    public MyValue4 test72_comp() {
+        return test72_vt;
+    }
+
+    MyValue4 test72_vt2;
+    @Test
+    public void test72() {
+        test72_vt2 = test72_comp();
+    }
+
+    @DontCompile
+    public void test72_verifier(boolean warmup) throws Exception {
+        Method helper_m = getClass().getDeclaredMethod("test72_comp");
+        if (!warmup && USE_COMPILER && !WHITE_BOX.isMethodCompiled(helper_m, false)) {
+            WHITE_BOX.enqueueMethodForCompilation(helper_m, COMP_LEVEL_FULL_OPTIMIZATION);
+            Asserts.assertTrue(WHITE_BOX.isMethodCompiled(helper_m, false), "test72_comp not compiled");
+        }
+        test72();
+        test72_vt.verify(test72_vt2);
+    }
+
+    // Return values and method handles tests
+
+    // Everything inlined
+    final MyValue3 test73_vt = MyValue3.create();
+    @ForceInline
+    MyValue3 test73_target() {
+        return test73_vt;
+    }
+
+    static final MethodHandle test73_mh;
+
+    @Test(valid = ValueTypeReturnedAsFieldsOn, failOn = ALLOC + STORE + CALL)
+    @Test(valid = ValueTypeReturnedAsFieldsOff, match = { ALLOC, STORE }, matchCount = { 1, 11 })
+    MyValue3 test73() throws Throwable {
+        return (MyValue3)test73_mh.invokeExact(this);
+    }
+
+    @DontCompile
+    public void test73_verifier(boolean warmup) throws Throwable {
+        MyValue3 vt = test73();
+        test73_vt.verify(vt);
+    }
+
+    // Leaf method not inlined but returned type is known
+    final MyValue3 test74_vt = MyValue3.create();
+    @DontInline
+    MyValue3 test74_target() {
+        return test74_vt;
+    }
+
+    static final MethodHandle test74_mh;
+
+    @Test
+    MyValue3 test74() throws Throwable {
+        return (MyValue3)test74_mh.invokeExact(this);
+    }
+
+    @DontCompile
+    public void test74_verifier(boolean warmup) throws Throwable {
+        Method helper_m = getClass().getDeclaredMethod("test74_target");
+        if (!warmup && USE_COMPILER && !WHITE_BOX.isMethodCompiled(helper_m, false)) {
+            WHITE_BOX.enqueueMethodForCompilation(helper_m, COMP_LEVEL_FULL_OPTIMIZATION);
+            Asserts.assertTrue(WHITE_BOX.isMethodCompiled(helper_m, false), "test74_target not compiled");
+        }
+        MyValue3 vt = test74();
+        test74_vt.verify(vt);
+    }
+
+    // Leaf method not inlined and returned type not known
+    final MyValue3 test75_vt = MyValue3.create();
+    @DontInline
+    MyValue3 test75_target() {
+        return test75_vt;
+    }
+
+    static final MethodHandle test75_mh;
+
+    @Test
+    MyValue3 test75() throws Throwable {
+        return (MyValue3)test75_mh.invokeExact(this);
+    }
+
+    @DontCompile
+    public void test75_verifier(boolean warmup) throws Throwable {
+        // hack so C2 doesn't know the target of the invoke call
+        Class c = Class.forName("java.lang.invoke.DirectMethodHandle");
+        Method m = c.getDeclaredMethod("internalMemberName", Object.class);
+        WHITE_BOX.testSetDontInlineMethod(m, warmup);
+        MyValue3 vt = test75();
+        test75_vt.verify(vt);
+    }
+
+    // Test no result from inlined method for incremental inlining
+    final MyValue3 test76_vt = MyValue3.create();
+    public MyValue3 test76_inlined() {
+        throw new RuntimeException();
+    }
+
+    @Test
+    public MyValue3 test76() {
+        try {
+            return test76_inlined();
+        } catch (RuntimeException ex) {
+            return test76_vt;
+        }
+    }
+
+    @DontCompile
+    public void test76_verifier(boolean warmup) throws Exception {
+        MyValue3 vt = test76();
+        test76_vt.verify(vt);
+    }
+
+    static {
+        try {
+            MethodHandles.Lookup lookup = MethodHandles.lookup();
+            MethodType mt = MethodType.fromMethodDescriptorString("()Qcompiler/valhalla/valuetypes/MyValue3;", ValueTypeTestBench.class.getClassLoader());
+            test73_mh = lookup.findVirtual(ValueTypeTestBench.class, "test73_target", mt);
+            test74_mh = lookup.findVirtual(ValueTypeTestBench.class, "test74_target", mt);
+            test75_mh = lookup.findVirtual(ValueTypeTestBench.class, "test75_target", mt);
+        } catch (NoSuchMethodException|IllegalAccessException e) {
+            throw new RuntimeException("method handle lookup fails");
+        }
+    }
+
     // ========== Test infrastructure ==========
 
     private static final WhiteBox WHITE_BOX = WhiteBox.getWhiteBox();
@@ -1685,12 +2168,12 @@
     private static final int ValueTypePassFieldsAsArgsOff = 0x2;
     private static final int ValueTypeArrayFlattenOn = 0x4;
     private static final int ValueTypeArrayFlattenOff = 0x8;
-    private static final int AlwaysIncrementalInlineOff = 0x10;
-    private static final int AlwaysIncrementalInlineOn = 0x20;
-    static final int AllFlags = ValueTypePassFieldsAsArgsOn | ValueTypePassFieldsAsArgsOff | ValueTypeArrayFlattenOn | ValueTypeArrayFlattenOff | AlwaysIncrementalInlineOff | AlwaysIncrementalInlineOn;
+    private static final int ValueTypeReturnedAsFieldsOn = 0x10;
+    private static final int ValueTypeReturnedAsFieldsOff = 0x20;
+    static final int AllFlags = ValueTypePassFieldsAsArgsOn | ValueTypePassFieldsAsArgsOff | ValueTypeArrayFlattenOn | ValueTypeArrayFlattenOff | ValueTypeReturnedAsFieldsOn;
     private static final boolean ValueTypePassFieldsAsArgs = (Boolean)WHITE_BOX.getVMFlag("ValueTypePassFieldsAsArgs");
     private static final boolean ValueTypeArrayFlatten = (Boolean)WHITE_BOX.getVMFlag("ValueArrayFlatten");
-    private static final boolean AlwaysIncrementalInline = (Boolean)WHITE_BOX.getVMFlag("AlwaysIncrementalInline");
+    private static final boolean ValueTypeReturnedAsFields = (Boolean)WHITE_BOX.getVMFlag("ValueTypeReturnedAsFields");
     private static final int COMP_LEVEL_ANY = -1;
     private static final int COMP_LEVEL_FULL_OPTIMIZATION = 4;
     private static final Hashtable<String, Method> tests = new Hashtable<String, Method>();
@@ -1715,6 +2198,7 @@
     private static final String LINKTOSTATIC = START + "CallStaticJava" + MID + "linkToStatic" + END;
     private static final String NPE = START + "CallStaticJava" + MID + "null_check" + END;
     private static final String CCE = START + "CallStaticJava" + MID + "class_check" + END;
+    private static final String CALL = START + "CallStaticJava" + MID + END;
     private static final String SCOBJ = "(.*# ScObj.*" + END;
 
     static {
@@ -1751,15 +2235,17 @@
     }
 
     public static void main(String[] args) throws Throwable {
-        //tests.values().removeIf(p -> !p.getName().equals("test64")); // Run single test
+        //tests.values().removeIf(p -> !p.getName().equals("test74")); // Run single test
         if (args.length == 0) {
             execute_vm("-XX:+IgnoreUnrecognizedVMOptions", "-XX:-BackgroundCompilation",
                     "-XX:+PrintCompilation", "-XX:+PrintInlining", "-XX:+PrintIdeal", "-XX:+PrintOptoAssembly",
+                    "-XX:CICompilerCount=1",
                     "-XX:CompileCommand=quiet", "-XX:CompileCommand=compileonly,compiler.valhalla.valuetypes.ValueTypeTestBench::*",
                     "-XX:CompileCommand=compileonly,compiler.valhalla.valuetypes.MyValue1::*",
                     "-XX:CompileCommand=compileonly,compiler.valhalla.valuetypes.MyValue2::*",
                     "-XX:CompileCommand=compileonly,java.lang.Object::<init>",
-                    "-XX:CompileCommand=inline,java.lang.__Value::hashCode");
+                    "-XX:CompileCommand=inline,java.lang.__Value::hashCode",
+                    "-XX:CompileCommand=compileonly,java.lang.invoke.*::*");
         } else {
             // Execute tests
             ValueTypeTestBench bench = new ValueTypeTestBench();
@@ -1768,24 +2254,41 @@
     }
 
     public static void parseOutput(String output) throws Exception {
-        String split = "b        compiler.valhalla.valuetypes.";
-        String[] compilations = output.split(split);
-        // Print header
-        System.out.println(compilations[0]);
+        Pattern comp_re = Pattern.compile("\\n\\s+\\d+\\s+\\d+\\s+(%| )(s| )(!| )b(n| )\\s+\\S+\\.(?<name>[^.]+::\\S+)\\s+(?<osr>@ \\d+\\s+)?[(]\\d+ bytes[)]\\n");
+        Matcher m = comp_re.matcher(output);
+        Map<String,String> compilations = new LinkedHashMap<>();
+        int prev = 0;
+        String methodName = null;
+        while (m.find()) {
+            if (prev == 0) {
+                // Print header
+                System.out.print(output.substring(0, m.start()+1));
+            } else if (methodName != null) {
+                compilations.put(methodName, output.substring(prev, m.start()+1));
+            }
+            if (m.group("osr") != null) {
+                methodName = null;
+            } else {
+                methodName = m.group("name");
+            }
+            prev = m.end();
+        }
+        if (prev == 0) {
+            // Print header
+            System.out.print(output);
+        } else if (methodName != null) {
+            compilations.put(methodName, output.substring(prev));
+        }
         // Iterate over compilation output
-        for (String graph : compilations) {
-            String[] lines = graph.split("\\n");
-            if (lines[0].contains("@")) {
-                continue; // Ignore OSR compilations
-            }
-            String testName = lines[0].split(" ")[0];
+        for (String testName : compilations.keySet()) {
             Method test = tests.get(testName);
             if (test == null) {
                 // Skip helper methods
                 continue;
             }
+            String graph = compilations.get(testName);
             if (PRINT_GRAPH) {
-                System.out.println("\nGraph for " + graph);
+                System.out.println("\nGraph for " + testName + "\n" + graph);
             }
             // Parse graph using regular expressions to determine if it contains forbidden nodes
             Test[] annos = test.getAnnotationsByType(Test.class);
@@ -1803,10 +2306,10 @@
                 } else if ((a.valid() & ValueTypeArrayFlattenOff) != 0 && !ValueTypeArrayFlatten) {
                     assert anno == null;
                     anno = a;
-                } else if ((a.valid() & AlwaysIncrementalInlineOff) != 0 && !AlwaysIncrementalInline) {
+                } else if ((a.valid() & ValueTypeReturnedAsFieldsOn) != 0 && ValueTypeReturnedAsFields) {
                     assert anno == null;
                     anno = a;
-                } else if ((a.valid() & AlwaysIncrementalInlineOn) != 0 && AlwaysIncrementalInline) {
+                } else if ((a.valid() & ValueTypeReturnedAsFieldsOff) != 0 && !ValueTypeReturnedAsFields) {
                     assert anno == null;
                     anno = a;
                 }