changeset 56235:8cd372dc26ca lworld

valhalla support for AArch64
author dsamersoff
date Tue, 30 Jul 2019 15:04:38 +0200
parents a6fea066f20f
children f0f5751f8239
files src/hotspot/cpu/aarch64/aarch64.ad src/hotspot/cpu/aarch64/abstractInterpreter_aarch64.cpp src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.hpp src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp src/hotspot/cpu/aarch64/frame_aarch64.cpp src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.hpp src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.cpp src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.hpp src/hotspot/cpu/aarch64/gc/shared/cardTableBarrierSetAssembler_aarch64.cpp src/hotspot/cpu/aarch64/gc/shared/cardTableBarrierSetAssembler_aarch64.hpp src/hotspot/cpu/aarch64/gc/shared/modRefBarrierSetAssembler_aarch64.cpp src/hotspot/cpu/aarch64/gc/shared/modRefBarrierSetAssembler_aarch64.hpp src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.hpp src/hotspot/cpu/aarch64/globals_aarch64.hpp src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp src/hotspot/cpu/aarch64/interpreterRT_aarch64.cpp src/hotspot/cpu/aarch64/interpreterRT_aarch64.hpp src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp src/hotspot/cpu/aarch64/templateTable_aarch64.cpp src/hotspot/cpu/aarch64/templateTable_aarch64.hpp src/hotspot/cpu/aarch64/vtableStubs_aarch64.cpp src/hotspot/share/runtime/arguments.cpp test/hotspot/jtreg/compiler/valhalla/valuetypes/TestArrays.java test/hotspot/jtreg/compiler/valhalla/valuetypes/TestCallingConvention.java test/hotspot/jtreg/compiler/valhalla/valuetypes/TestCallingConventionC1.java test/hotspot/jtreg/compiler/valhalla/valuetypes/TestIntrinsics.java test/hotspot/jtreg/compiler/valhalla/valuetypes/TestJNICalls.java test/hotspot/jtreg/compiler/valhalla/valuetypes/TestLWorld.java test/hotspot/jtreg/compiler/valhalla/valuetypes/TestNullableArrays.java test/hotspot/jtreg/compiler/valhalla/valuetypes/TestNullableValueTypes.java test/hotspot/jtreg/compiler/valhalla/valuetypes/TestOnStackReplacement.java
diffstat 42 files changed, 1799 insertions(+), 352 deletions(-) [+]
line wrap: on
line diff
--- a/src/hotspot/cpu/aarch64/aarch64.ad	Sun Jul 28 14:09:02 2019 -0700
+++ b/src/hotspot/cpu/aarch64/aarch64.ad	Tue Jul 30 15:04:38 2019 +0200
@@ -1753,27 +1753,8 @@
   Compile* C = ra_->C;
   MacroAssembler _masm(&cbuf);
 
-  // n.b. frame size includes space for return pc and rfp
-  const long framesize = C->frame_size_in_bytes();
-  assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
-
-  // insert a nop at the start of the prolog so we can patch in a
-  // branch if we need to invalidate the method later
-  __ nop();
-
-  int bangsize = C->bang_size_in_bytes();
-  if (C->need_stack_bang(bangsize) && UseStackBanging)
-    __ generate_stack_overflow_check(bangsize);
-
-  __ build_frame(framesize);
-
-  if (NotifySimulator) {
-    __ notify(Assembler::method_entry);
-  }
-
-  if (VerifyStackAtCalls) {
-    Unimplemented();
-  }
+  __ verified_entry(C, 0);
+  __ bind(*_verified_entry);
 
   C->set_frame_complete(cbuf.insts_size());
 
@@ -2094,8 +2075,46 @@
   return 4;
 }
 
+///=============================================================================
+#ifndef PRODUCT
+void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
+{
+  st->print_cr("# MachVEPNode");
+  if (!_verified) {
+    st->print_cr("\t load_class");
+  } else {
+    st->print_cr("\t unpack_value_arg");
+  }
+}
+#endif
+
+void MachVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
+{
+  MacroAssembler _masm(&cbuf);
+
+  if (!_verified) {
+    Label skip;
+    __ cmp_klass(j_rarg0, rscratch2, rscratch1);
+    __ br(Assembler::EQ, skip);
+      __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+    __ bind(skip);
+
+  } else {
+    // Unpack value type args passed as oop and then jump to
+    // the verified entry point (skipping the unverified entry).
+    __ unpack_value_args(ra_->C, _receiver_only);
+    __ b(*_verified_entry);
+  }
+}
+
+
+uint MachVEPNode::size(PhaseRegAlloc* ra_) const
+{
+  return MachNode::size(ra_); // too many variables; just compute it the hard way
+}
+
+
 //=============================================================================
-
 #ifndef PRODUCT
 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 {
@@ -2117,9 +2136,11 @@
 {
   // This is the unverified entry point.
   MacroAssembler _masm(&cbuf);
-
+  Label skip;
+
+  // UseCompressedClassPointers logic are inside cmp_klass
   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
-  Label skip;
+
   // TODO
   // can we avoid this skip and still use a reloc?
   __ br(Assembler::EQ, skip);
@@ -2507,7 +2528,7 @@
 {
   int gps = 0;
   int fps = 0;
-  const TypeTuple *domain = tf->domain();
+  const TypeTuple *domain = tf->domain_cc();
   int max = domain->cnt();
   for (int i = TypeFunc::Parms; i < max; i++) {
     const Type *t = domain->field_at(i);
@@ -8182,7 +8203,7 @@
   ins_pipe(ialu_reg);
 %}
 
-instruct castP2X(iRegLNoSp dst, iRegP src) %{
+instruct castN2X(iRegLNoSp dst, iRegN src) %{
   match(Set dst (CastP2X src));
 
   ins_cost(INSN_COST);
@@ -8197,6 +8218,52 @@
   ins_pipe(ialu_reg);
 %}
 
+instruct castP2X(iRegLNoSp dst, iRegP src) %{
+  match(Set dst (CastP2X src));
+
+  ins_cost(INSN_COST);
+  format %{ "mov $dst, $src\t# ptr -> long" %}
+
+  ins_encode %{
+    if ($dst$$reg != $src$$reg) {
+      __ mov(as_Register($dst$$reg), as_Register($src$$reg));
+    }
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct castN2I(iRegINoSp dst, iRegN src) %{
+  match(Set dst (CastN2I src));
+
+  ins_cost(INSN_COST);
+  format %{ "movw $dst, $src\t# compressed ptr -> int" %}
+
+  ins_encode %{
+    if ($dst$$reg != $src$$reg) {
+      __ movw(as_Register($dst$$reg), as_Register($src$$reg));
+    }
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct castI2N(iRegNNoSp dst, iRegI src) %{
+  match(Set dst (CastI2N src));
+
+  ins_cost(INSN_COST);
+  format %{ "movw $dst, $src\t# int -> compressed ptr" %}
+
+  ins_encode %{
+    if ($dst$$reg != $src$$reg) {
+      __ movw(as_Register($dst$$reg), as_Register($src$$reg));
+    }
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+
 // Convert oop into int for vectors alignment masking
 instruct convP2I(iRegINoSp dst, iRegP src) %{
   match(Set dst (ConvL2I (CastP2X src)));
@@ -13686,33 +13753,16 @@
 // ============================================================================
 // clearing of an array
 
-instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
-%{
-  match(Set dummy (ClearArray cnt base));
+instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, iRegL val, Universe dummy, rFlagsReg cr)
+%{
+  match(Set dummy (ClearArray (Binary cnt base) val));
   effect(USE_KILL cnt, USE_KILL base);
 
   ins_cost(4 * INSN_COST);
-  format %{ "ClearArray $cnt, $base" %}
-
-  ins_encode %{
-    __ zero_words($base$$Register, $cnt$$Register);
-  %}
-
-  ins_pipe(pipe_class_memory);
-%}
-
-instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
-%{
-  predicate((u_int64_t)n->in(2)->get_long()
-            < (u_int64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
-  match(Set dummy (ClearArray cnt base));
-  effect(USE_KILL base);
-
-  ins_cost(4 * INSN_COST);
-  format %{ "ClearArray $cnt, $base" %}
-
-  ins_encode %{
-    __ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
+  format %{ "ClearArray $cnt, $base, $val" %}
+
+  ins_encode %{
+    __ fill_words($base$$Register, $cnt$$Register, $val$$Register);
   %}
 
   ins_pipe(pipe_class_memory);
--- a/src/hotspot/cpu/aarch64/abstractInterpreter_aarch64.cpp	Sun Jul 28 14:09:02 2019 -0700
+++ b/src/hotspot/cpu/aarch64/abstractInterpreter_aarch64.cpp	Tue Jul 30 15:04:38 2019 +0200
@@ -47,6 +47,7 @@
     case T_DOUBLE : i = 8; break;
     case T_OBJECT : i = 9; break;
     case T_ARRAY  : i = 9; break;
+    case T_VALUETYPE : i = 10; break;
     default       : ShouldNotReachHere();
   }
   assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers,
--- a/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp	Sun Jul 28 14:09:02 2019 -0700
+++ b/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp	Tue Jul 30 15:04:38 2019 +0200
@@ -118,6 +118,76 @@
 #endif
 }
 
+// Implementation of LoadFlattenedArrayStub
+
+LoadFlattenedArrayStub::LoadFlattenedArrayStub(LIR_Opr array, LIR_Opr index, LIR_Opr result, CodeEmitInfo* info) {
+  _array = array;
+  _index = index;
+  _result = result;
+  _scratch_reg = FrameMap::r0_oop_opr;
+  _info = new CodeEmitInfo(info);
+}
+
+void LoadFlattenedArrayStub::emit_code(LIR_Assembler* ce) {
+  assert(__ rsp_offset() == 0, "frame size should be fixed");
+  __ bind(_entry);
+  ce->store_parameter(_array->as_register(), 1);
+  ce->store_parameter(_index->as_register(), 0);
+  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::load_flattened_array_id)));
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  if (_result->as_register() != r0) { 
+    __ mov(_result->as_register(), r0);
+  }
+  __ b(_continuation);
+}
+
+
+// Implementation of StoreFlattenedArrayStub
+
+StoreFlattenedArrayStub::StoreFlattenedArrayStub(LIR_Opr array, LIR_Opr index, LIR_Opr value, CodeEmitInfo* info) {
+  _array = array;
+  _index = index;
+  _value = value;
+  _scratch_reg = FrameMap::r0_oop_opr;
+  _info = new CodeEmitInfo(info);
+}
+
+
+void StoreFlattenedArrayStub::emit_code(LIR_Assembler* ce) {
+  assert(__ rsp_offset() == 0, "frame size should be fixed");
+  __ bind(_entry);
+  ce->store_parameter(_array->as_register(), 2);
+  ce->store_parameter(_index->as_register(), 1);
+  ce->store_parameter(_value->as_register(), 0);
+  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::store_flattened_array_id)));
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  __ b(_continuation);
+}
+
+// Implementation of SubstitutabilityCheckStub
+SubstitutabilityCheckStub::SubstitutabilityCheckStub(LIR_Opr left, LIR_Opr right, LIR_Opr result, CodeEmitInfo* info) {
+  _left = left;
+  _right = right;
+  _result = result;
+  _scratch_reg = FrameMap::r0_oop_opr;
+  _info = new CodeEmitInfo(info);
+}
+
+void SubstitutabilityCheckStub::emit_code(LIR_Assembler* ce) {
+  assert(__ rsp_offset() == 0, "frame size should be fixed");
+  __ bind(_entry);
+  ce->store_parameter(_left->as_register(), 1);
+  ce->store_parameter(_right->as_register(), 0);
+  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::substitutability_check_id)));
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  if (_result->as_register() != r0) {
+    __ mov(_result->as_register(), r0);
+  }
+  __ b(_continuation);
+}
 
 
 // Implementation of NewInstanceStub
@@ -134,8 +204,6 @@
   _stub_id   = stub_id;
 }
 
-
-
 void NewInstanceStub::emit_code(LIR_Assembler* ce) {
   assert(__ rsp_offset() == 0, "frame size should be fixed");
   __ bind(_entry);
@@ -175,11 +243,12 @@
 
 // Implementation of NewObjectArrayStub
 
-NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) {
+NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info, bool is_value_type) {
   _klass_reg = klass_reg;
   _result = result;
   _length = length;
   _info = new CodeEmitInfo(info);
+  _is_value_type = is_value_type; 
 }
 
 
@@ -188,7 +257,13 @@
   __ bind(_entry);
   assert(_length->as_register() == r19, "length must in r19,");
   assert(_klass_reg->as_register() == r3, "klass_reg must in r3");
-  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_object_array_id)));
+
+  if (_is_value_type) {
+    __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_value_array_id)));
+  } else {
+    __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_object_array_id)));
+  }
+
   ce->add_call_info_here(_info);
   ce->verify_oop_map(_info);
   assert(_result->as_register() == r0, "result must in r0");
@@ -196,16 +271,31 @@
 }
 // Implementation of MonitorAccessStubs
 
-MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info)
+MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info,  CodeStub* throw_imse_stub, LIR_Opr scratch_reg)
 : MonitorAccessStub(obj_reg, lock_reg)
 {
   _info = new CodeEmitInfo(info);
+  _scratch_reg = scratch_reg;
+  _throw_imse_stub = throw_imse_stub;
+  if (_throw_imse_stub != NULL) {
+    assert(_scratch_reg != LIR_OprFact::illegalOpr, "must be");
+  }
 }
 
 
 void MonitorEnterStub::emit_code(LIR_Assembler* ce) {
   assert(__ rsp_offset() == 0, "frame size should be fixed");
   __ bind(_entry);
+  if (_throw_imse_stub != NULL) {
+    // When we come here, _obj_reg has already been checked to be non-null.
+    __ ldr(rscratch1, Address(_obj_reg->as_register(), oopDesc::mark_offset_in_bytes()));
+    __ mov(rscratch2, markOopDesc::always_locked_pattern);
+    __ andr(rscratch1, rscratch1, rscratch2);
+
+    __ cmp(rscratch1, rscratch2); 
+    __ br(Assembler::NE, *_throw_imse_stub->entry());
+  }
+
   ce->store_parameter(_obj_reg->as_register(),  1);
   ce->store_parameter(_lock_reg->as_register(), 0);
   Runtime1::StubID enter_id;
--- a/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp	Sun Jul 28 14:09:02 2019 -0700
+++ b/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp	Tue Jul 30 15:04:38 2019 +0200
@@ -45,7 +45,7 @@
       Register reg2 = r_2->as_Register();
       assert(reg2 == reg, "must be same register");
       opr = as_long_opr(reg);
-    } else if (type == T_OBJECT || type == T_ARRAY) {
+    } else if (type == T_OBJECT || type == T_ARRAY || type == T_VALUETYPE) {
       opr = as_oop_opr(reg);
     } else if (type == T_METADATA) {
       opr = as_metadata_opr(reg);
--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp	Sun Jul 28 14:09:02 2019 -0700
+++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp	Tue Jul 30 15:04:38 2019 +0200
@@ -40,6 +40,7 @@
 #include "gc/shared/collectedHeap.hpp"
 #include "nativeInst_aarch64.hpp"
 #include "oops/objArrayKlass.hpp"
+#include "oops/oop.inline.hpp"
 #include "runtime/frame.inline.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "vmreg_aarch64.inline.hpp"
@@ -242,7 +243,7 @@
 
   // build frame
   ciMethod* m = compilation()->method();
-  __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes());
+  __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes(), needs_stack_repair(), NULL); 
 
   // OSR buffer is
   //
@@ -452,7 +453,7 @@
 
   // remove the activation and dispatch to the unwind handler
   __ block_comment("remove_frame and dispatch to the unwind handler");
-  __ remove_frame(initial_frame_size_in_bytes());
+  __ remove_frame(initial_frame_size_in_bytes(), needs_stack_repair());
   __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::unwind_exception_id)));
 
   // Emit the slow path assembly
@@ -503,8 +504,9 @@
 void LIR_Assembler::return_op(LIR_Opr result) {
   assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == r0, "word returns are in r0,");
 
+  ciMethod* method = compilation()->method();
   // Pop the stack before the safepoint code
-  __ remove_frame(initial_frame_size_in_bytes());
+  __ remove_frame(initial_frame_size_in_bytes(), needs_stack_repair());
 
   if (StackReservedPages > 0 && compilation()->has_reserved_stack_access()) {
     __ reserved_stack_check();
@@ -515,6 +517,10 @@
   __ ret(lr);
 }
 
+void LIR_Assembler::store_value_type_fields_to_buf(ciValueKlass* vk) { 
+  __ store_value_type_fields_to_buf(vk);
+}
+
 int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
   address polling_page(os::get_polling_page());
   guarantee(info != NULL, "Shouldn't be NULL");
@@ -562,11 +568,12 @@
       break;
     }
 
+    case T_VALUETYPE:
     case T_OBJECT: {
-        if (patch_code == lir_patch_none) {
+        if (patch_code != lir_patch_none) {
+          jobject2reg_with_patching(dest->as_register(), info);
+        } else {
           jobject2reg(c->as_jobject(), dest->as_register());
-        } else {
-          jobject2reg_with_patching(dest->as_register(), info);
         }
       break;
     }
@@ -608,6 +615,7 @@
 void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) {
   LIR_Const* c = src->as_constant_ptr();
   switch (c->type()) {
+  case T_VALUETYPE: 
   case T_OBJECT:
     {
       if (! c->as_jobject())
@@ -674,6 +682,7 @@
     assert(c->as_jint() == 0, "should be");
     insn = &Assembler::strw;
     break;
+  case T_VALUETYPE: // DMS CHECK: the code is significantly differ from x86
   case T_OBJECT:
   case T_ARRAY:
     assert(c->as_jobject() == 0, "should be");
@@ -714,13 +723,13 @@
       return;
     }
     assert(src->is_single_cpu(), "must match");
-    if (src->type() == T_OBJECT) {
+    if (src->type() == T_OBJECT || src->type() == T_VALUETYPE) {
       __ verify_oop(src->as_register());
     }
     move_regs(src->as_register(), dest->as_register());
 
   } else if (dest->is_double_cpu()) {
-    if (src->type() == T_OBJECT || src->type() == T_ARRAY) {
+    if (src->type() == T_OBJECT || src->type() == T_ARRAY || src->type() == T_VALUETYPE) {
       // Surprising to me but we can see move of a long to t_object
       __ verify_oop(src->as_register());
       move_regs(src->as_register(), dest->as_register_lo());
@@ -748,7 +757,7 @@
 
 void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool pop_fpu_stack) {
   if (src->is_single_cpu()) {
-    if (type == T_ARRAY || type == T_OBJECT) {
+    if (type == T_ARRAY || type == T_OBJECT || type == T_VALUETYPE) {
       __ str(src->as_register(), frame_map()->address_for_slot(dest->single_stack_ix()));
       __ verify_oop(src->as_register());
     } else if (type == T_METADATA || type == T_DOUBLE) {
@@ -786,7 +795,7 @@
     return;
   }
 
-  if (type == T_ARRAY || type == T_OBJECT) {
+  if (type == T_ARRAY || type == T_OBJECT || type == T_VALUETYPE) {
     __ verify_oop(src->as_register());
 
     if (UseCompressedOops && !wide) {
@@ -808,6 +817,7 @@
       break;
     }
 
+    case T_VALUETYPE: // fall through
     case T_ARRAY:   // fall through
     case T_OBJECT:  // fall through
       if (UseCompressedOops && !wide) {
@@ -861,7 +871,7 @@
   assert(dest->is_register(), "should not call otherwise");
 
   if (dest->is_single_cpu()) {
-    if (type == T_ARRAY || type == T_OBJECT) {
+    if (type == T_ARRAY || type == T_OBJECT || type == T_VALUETYPE) {
       __ ldr(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()));
       __ verify_oop(dest->as_register());
     } else if (type == T_METADATA) {
@@ -933,7 +943,7 @@
   LIR_Address* addr = src->as_address_ptr();
   LIR_Address* from_addr = src->as_address_ptr();
 
-  if (addr->base()->type() == T_OBJECT) {
+  if (addr->base()->type() == T_OBJECT || addr->base()->type() == T_VALUETYPE) { 
     __ verify_oop(addr->base()->as_pointer_register());
   }
 
@@ -957,6 +967,7 @@
       break;
     }
 
+    case T_VALUETYPE: // fall through
     case T_ARRAY:   // fall through
     case T_OBJECT:  // fall through
       if (UseCompressedOops && !wide) {
@@ -1011,7 +1022,7 @@
       ShouldNotReachHere();
   }
 
-  if (type == T_ARRAY || type == T_OBJECT) {
+  if (type == T_ARRAY || type == T_OBJECT || type == T_VALUETYPE) {
     if (UseCompressedOops && !wide) {
       __ decode_heap_oop(dest->as_register());
     }
@@ -1022,11 +1033,28 @@
     }
   } else if (type == T_ADDRESS && addr->disp() == oopDesc::klass_offset_in_bytes()) {
     if (UseCompressedClassPointers) {
+      __ andr(dest->as_register(), dest->as_register(), oopDesc::compressed_klass_mask());
       __ decode_klass_not_null(dest->as_register());
+    } else {
+      __   ubfm(dest->as_register(), dest->as_register(), 0, 63 - oopDesc::storage_props_nof_bits);
     }
   }
 }
 
+void LIR_Assembler::move(LIR_Opr src, LIR_Opr dst) {
+  assert(dst->is_cpu_register(), "must be");
+  assert(dst->type() == src->type(), "must be");
+
+  if (src->is_cpu_register()) {
+    reg2reg(src, dst);
+  } else if (src->is_stack()) {
+    stack2reg(src, dst, dst->type());
+  } else if (src->is_constant()) {
+    const2reg(src, dst, lir_patch_none, NULL);
+  } else {
+    ShouldNotReachHere();
+  }
+}
 
 int LIR_Assembler::array_element_size(BasicType type) const {
   int elem_size = type2aelembytes(type);
@@ -1218,7 +1246,7 @@
   Register len =  op->len()->as_register();
   __ uxtw(len, len);
 
-  if (UseSlowPath ||
+  if (UseSlowPath || op->type() == T_VALUETYPE ||
       (!UseFastNewObjectArray && (op->type() == T_OBJECT || op->type() == T_ARRAY)) ||
       (!UseFastNewTypeArray   && (op->type() != T_OBJECT && op->type() != T_ARRAY))) {
     __ b(*op->stub()->entry());
@@ -1530,6 +1558,122 @@
   }
 }
 
+void LIR_Assembler::emit_opFlattenedArrayCheck(LIR_OpFlattenedArrayCheck* op) {
+  // We are loading/storing an array that *may* be a flattened array (the declared type
+  // Object[], interface[], or VT?[]). If this array is flattened, take slow path.
+
+  __ load_storage_props(op->tmp()->as_register(), op->array()->as_register());
+  __ tst(op->tmp()->as_register(), ArrayStorageProperties::flattened_value);
+  __ br(Assembler::NE, *op->stub()->entry());
+  if (!op->value()->is_illegal()) {
+    // We are storing into the array.
+    Label skip;
+    __ tst(op->tmp()->as_register(), ArrayStorageProperties::null_free_value);
+    __ br(Assembler::EQ, skip);
+    // The array is not flattened, but it is null_free. If we are storing
+    // a null, take the slow path (which will throw NPE).
+    __ cbz(op->value()->as_register(), *op->stub()->entry());
+    __ bind(skip);
+  }
+
+}
+
+void LIR_Assembler::emit_opNullFreeArrayCheck(LIR_OpNullFreeArrayCheck* op) {
+  // This is called when we use aastore into a an array declared as "[LVT;",
+  // where we know VT is not flattenable (due to ValueArrayElemMaxFlatOops, etc).
+  // However, we need to do a NULL check if the actual array is a "[QVT;".
+
+  __ load_storage_props(op->tmp()->as_register(), op->array()->as_register());
+  __ mov(rscratch1, (uint64_t) ArrayStorageProperties::null_free_value);
+  __ cmp(op->tmp()->as_register(), rscratch1);
+}
+
+void LIR_Assembler::emit_opSubstitutabilityCheck(LIR_OpSubstitutabilityCheck* op) {
+  Label L_oops_equal;
+  Label L_oops_not_equal;
+  Label L_end;
+
+  Register left  = op->left()->as_register();
+  Register right = op->right()->as_register();
+
+  __ cmp(left, right);
+  __ br(Assembler::EQ, L_oops_equal);
+
+  // (1) Null check -- if one of the operands is null, the other must not be null (because
+  //     the two references are not equal), so they are not substitutable,
+  //     FIXME: do null check only if the operand is nullable
+  {
+    __ cbz(left, L_oops_not_equal);
+    __ cbz(right, L_oops_not_equal);
+  }
+
+
+  ciKlass* left_klass = op->left_klass();
+  ciKlass* right_klass = op->right_klass();
+
+  // (2) Value object check -- if either of the operands is not a value object,
+  //     they are not substitutable. We do this only if we are not sure that the
+  //     operands are value objects
+  if ((left_klass == NULL || right_klass == NULL) ||// The klass is still unloaded, or came from a Phi node.
+      !left_klass->is_valuetype() || !right_klass->is_valuetype()) {
+    Register tmp1  = rscratch1; /* op->tmp1()->as_register(); */
+    Register tmp2  = rscratch2; /* op->tmp2()->as_register(); */
+
+    __ mov(tmp1, (intptr_t)markOopDesc::always_locked_pattern);
+
+    __ ldr(tmp2, Address(left, oopDesc::mark_offset_in_bytes()));
+    __ andr(tmp1, tmp1, tmp2);
+
+    __ ldr(tmp2, Address(right, oopDesc::mark_offset_in_bytes()));
+    __ andr(tmp1, tmp1, tmp2); 
+
+    __ mov(tmp2, (intptr_t)markOopDesc::always_locked_pattern);
+    __ cmp(tmp1, tmp2); 
+    __ br(Assembler::NE, L_oops_not_equal);
+  }
+
+  // (3) Same klass check: if the operands are of different klasses, they are not substitutable.
+  if (left_klass != NULL && left_klass->is_valuetype() && left_klass == right_klass) {
+    // No need to load klass -- the operands are statically known to be the same value klass.
+    __ b(*op->stub()->entry());
+  } else {
+    Register left_klass_op = op->left_klass_op()->as_register();
+    Register right_klass_op = op->right_klass_op()->as_register();
+
+    // DMS CHECK, likely x86 bug, make aarch64 implementation correct
+    __ load_klass(left_klass_op, left);
+    __ load_klass(right_klass_op, right);
+    __ cmp(left_klass_op, right_klass_op);
+    __ br(Assembler::EQ, *op->stub()->entry()); // same klass -> do slow check
+    // fall through to L_oops_not_equal
+  }
+
+  __ bind(L_oops_not_equal);
+  move(op->not_equal_result(), op->result_opr());
+  __ b(L_end);
+
+  __ bind(L_oops_equal);
+  move(op->equal_result(), op->result_opr());
+  __ b(L_end);
+
+  // We've returned from the stub. op->result_opr() contains 0x0 IFF the two
+  // operands are not substitutable. (Don't compare against 0x1 in case the
+  // C compiler is naughty)
+  __ bind(*op->stub()->continuation());
+
+  if (op->result_opr()->type() == T_LONG) {
+    __ cbzw(op->result_opr()->as_register(), L_oops_not_equal); // (call_stub() == 0x0) -> not_equal
+  } else {
+    __ cbz(op->result_opr()->as_register(), L_oops_not_equal); // (call_stub() == 0x0) -> not_equal
+  }
+
+  move(op->equal_result(), op->result_opr()); // (call_stub() != 0x0) -> equal
+  // fall-through
+  __ bind(L_end);
+
+}
+
+
 void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) {
   __ cmpxchg(addr, cmpval, newval, Assembler::word, /* acquire*/ true, /* release*/ true, /* weak*/ false, rscratch1);
   __ cset(rscratch1, Assembler::NE);
@@ -1940,10 +2084,10 @@
     if (opr2->is_single_cpu()) {
       // cpu register - cpu register
       Register reg2 = opr2->as_register();
-      if (opr1->type() == T_OBJECT || opr1->type() == T_ARRAY) {
+      if (opr1->type() == T_OBJECT || opr1->type() == T_ARRAY || opr1->type() == T_VALUETYPE) {
         __ cmpoop(reg1, reg2);
       } else {
-        assert(opr2->type() != T_OBJECT && opr2->type() != T_ARRAY, "cmp int, oop?");
+        assert(opr2->type() != T_OBJECT && opr2->type() != T_ARRAY && opr2->type() != T_VALUETYPE,  "cmp int, oop?");
         __ cmpw(reg1, reg2);
       }
       return;
@@ -1970,6 +2114,7 @@
       case T_ADDRESS:
         imm = opr2->as_constant_ptr()->as_jint();
         break;
+      case T_VALUETYPE:
       case T_OBJECT:
       case T_ARRAY:
         jobject2reg(opr2->as_constant_ptr()->as_jobject(), rscratch1);
@@ -2136,6 +2281,7 @@
       }
       break;
     case T_LONG:
+    case T_VALUETYPE: 
     case T_ADDRESS:
     case T_OBJECT:
       switch (code) {
@@ -2172,6 +2318,7 @@
       break;
     case T_LONG:
     case T_ADDRESS:
+    case T_VALUETYPE:
     case T_OBJECT:
       switch (code) {
       case lir_shl:  __ lsl (dreg, lreg, count); break;
@@ -2216,6 +2363,19 @@
   __ str(rscratch1, Address(sp, offset_from_rsp_in_bytes));
 }
 
+void LIR_Assembler::arraycopy_valuetype_check(Register obj, Register tmp, CodeStub* slow_path, bool is_dest) {
+  __ load_storage_props(tmp, obj);
+  if (is_dest) {
+    // We also take slow path if it's a null_free destination array, just in case the source array
+    // contains NULLs.
+    __ tst(tmp, ArrayStorageProperties::flattened_value | ArrayStorageProperties::null_free_value);
+  } else {
+    __ tst(tmp, ArrayStorageProperties::flattened_value);
+  }
+  __ br(Assembler::NE, *slow_path->entry());
+}
+
+
 
 // This code replaces a call to arraycopy; no exception may
 // be thrown in this code, they must be thrown in the System.arraycopy
@@ -2235,7 +2395,23 @@
   CodeStub* stub = op->stub();
   int flags = op->flags();
   BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL;
-  if (basic_type == T_ARRAY) basic_type = T_OBJECT;
+  if (basic_type == T_ARRAY || basic_type == T_VALUETYPE) basic_type = T_OBJECT;
+
+  if (flags & LIR_OpArrayCopy::always_slow_path) {
+    __ b(*stub->entry());
+    __ bind(*stub->continuation());
+    return;
+  }
+
+  if (flags & LIR_OpArrayCopy::src_valuetype_check) {
+    arraycopy_valuetype_check(src, tmp, stub, false);
+  }
+
+  if (flags & LIR_OpArrayCopy::dst_valuetype_check) {
+    arraycopy_valuetype_check(dst, tmp, stub, true);
+  }
+
+
 
   // if we don't know anything, just go through the generic arraycopy
   if (default_type == NULL // || basic_type == T_OBJECT
@@ -2904,6 +3080,7 @@
       case T_INT:
       case T_LONG:
       case T_OBJECT:
+      case T_VALUETYPE:
         type = 1;
         break;
       case T_FLOAT:
@@ -3170,6 +3347,7 @@
     xchg = &MacroAssembler::atomic_xchgal;
     add = &MacroAssembler::atomic_addal;
     break;
+  case T_VALUETYPE:
   case T_OBJECT:
   case T_ARRAY:
     if (UseCompressedOops) {
--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.hpp	Sun Jul 28 14:09:02 2019 -0700
+++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.hpp	Tue Jul 30 15:04:38 2019 +0200
@@ -82,5 +82,7 @@
   void store_parameter(Register r, int offset_from_esp_in_words);
   void store_parameter(jint c,     int offset_from_esp_in_words);
   void store_parameter(jobject c,  int offset_from_esp_in_words);
+  void arraycopy_valuetype_check(Register obj, Register tmp, CodeStub* slow_path, bool is_dest);
+  void move(LIR_Opr src, LIR_Opr dst);
 
 #endif // CPU_AARCH64_C1_LIRASSEMBLER_AARCH64_HPP
--- a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp	Sun Jul 28 14:09:02 2019 -0700
+++ b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp	Tue Jul 30 15:04:38 2019 +0200
@@ -35,6 +35,7 @@
 #include "ci/ciArray.hpp"
 #include "ci/ciObjArrayKlass.hpp"
 #include "ci/ciTypeArrayKlass.hpp"
+#include "ci/ciValueKlass.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stubRoutines.hpp"
 #include "vmreg_aarch64.inline.hpp"
@@ -101,6 +102,12 @@
 }
 
 
+void LIRGenerator::init_temps_for_substitutability_check(LIR_Opr& tmp1, LIR_Opr& tmp2) {
+  tmp1 = new_register(T_INT);
+  tmp2 = LIR_OprFact::illegalOpr;
+}
+
+
 //--------- loading items into registers --------------------------------
 
 
@@ -332,7 +339,7 @@
   LIR_Opr lock = new_register(T_INT);
   // Need a scratch register for biased locking
   LIR_Opr scratch = LIR_OprFact::illegalOpr;
-  if (UseBiasedLocking) {
+  if (UseBiasedLocking || x->maybe_valuetype()) {
     scratch = new_register(T_INT);
   }
 
@@ -340,11 +347,17 @@
   if (x->needs_null_check()) {
     info_for_exception = state_for(x);
   }
+
+  CodeStub* throw_imse_stub = 
+      x->maybe_valuetype() ?
+      new SimpleExceptionStub(Runtime1::throw_illegal_monitor_state_exception_id, LIR_OprFact::illegalOpr, state_for(x)) :
+      NULL;
+
   // this CodeEmitInfo must not have the xhandlers because here the
   // object is already locked (xhandlers expect object to be unlocked)
   CodeEmitInfo* info = state_for(x, x->state(), true);
   monitor_enter(obj.result(), lock, syncTempOpr(), scratch,
-                        x->monitor_no(), info_for_exception, info);
+                        x->monitor_no(), info_for_exception, info, throw_imse_stub);
 }
 
 
@@ -1153,6 +1166,22 @@
   __ move(reg, result);
 }
 
+void LIRGenerator::do_NewValueTypeInstance  (NewValueTypeInstance* x) {
+  // Mapping to do_NewInstance (same code)
+  CodeEmitInfo* info = state_for(x, x->state());
+  x->set_to_object_type();
+  LIR_Opr reg = result_register_for(x->type());
+  new_instance(reg, x->klass(), x->is_unresolved(),
+             FrameMap::r2_oop_opr,
+             FrameMap::r5_oop_opr,
+             FrameMap::r4_oop_opr,
+             LIR_OprFact::illegalOpr,
+             FrameMap::r3_metadata_opr, info);
+  LIR_Opr result = rlock_result(x);
+  __ move(reg, result);
+
+}
+
 void LIRGenerator::do_NewTypeArray(NewTypeArray* x) {
   CodeEmitInfo* info = state_for(x, x->state());
 
@@ -1198,13 +1227,18 @@
   length.load_item_force(FrameMap::r19_opr);
   LIR_Opr len = length.result();
 
-  CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info);
-  ciKlass* obj = (ciKlass*) ciObjArrayKlass::make(x->klass());
+  ciKlass* obj = (ciKlass*) x->exact_type();
+  CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info, x->is_never_null());
   if (obj == ciEnv::unloaded_ciobjarrayklass()) {
     BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error");
   }
+
   klass2reg_with_patching(klass_reg, obj, patching_info);
-  __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path);
+  if (x->is_never_null()) {
+    __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_VALUETYPE, klass_reg, slow_path);
+  } else {
+    __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path);
+  }
 
   LIR_Opr result = rlock_result(x);
   __ move(reg, result);
@@ -1280,6 +1314,9 @@
   CodeEmitInfo* info_for_exception =
       (x->needs_exception_state() ? state_for(x) :
                                     state_for(x, x->state_before(), true /*ignore_xhandler*/));
+  if (x->is_never_null()) {
+    __ null_check(obj.result(), new CodeEmitInfo(info_for_exception));
+  }
 
   CodeStub* stub;
   if (x->is_incompatible_class_change_check()) {
@@ -1298,10 +1335,13 @@
   if (!x->klass()->is_loaded() || UseCompressedClassPointers) {
     tmp3 = new_register(objectType);
   }
+
+
   __ checkcast(reg, obj.result(), x->klass(),
                new_register(objectType), new_register(objectType), tmp3,
                x->direct_compare(), info_for_exception, patching_info, stub,
-               x->profiled_method(), x->profiled_bci());
+               x->profiled_method(), x->profiled_bci(), x->is_never_null());
+
 }
 
 void LIRGenerator::do_InstanceOf(InstanceOf* x) {
--- a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp	Sun Jul 28 14:09:02 2019 -0700
+++ b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp	Tue Jul 30 15:04:38 2019 +0200
@@ -28,6 +28,8 @@
 #include "c1/c1_Runtime1.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "gc/shared/collectedHeap.hpp"
+#include "gc/shared/barrierSet.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
 #include "interpreter/interpreter.hpp"
 #include "oops/arrayOop.hpp"
 #include "oops/markOop.hpp"
@@ -83,6 +85,12 @@
   ldr(hdr, Address(obj, hdr_offset));
   // and mark it as unlocked
   orr(hdr, hdr, markOopDesc::unlocked_value);
+
+  if (EnableValhalla && !UseBiasedLocking) {
+    // Mask always_locked bit such that we go to the slow path if object is a value type
+    andr(hdr, hdr, ~markOopDesc::biased_lock_bit_in_place);
+  }
+
   // save unlocked object header into the displaced header location on the stack
   str(hdr, Address(disp_hdr, 0));
   // test if object header is still the same (i.e. unlocked), and if so, store the
@@ -330,7 +338,9 @@
 }
 
 
-void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) {
+void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes,  bool needs_stack_repair, Label* verified_value_entry_label) {
+   
+
   // If we have to make this method not-entrant we'll overwrite its
   // first instruction with a jump.  For this action to be legal we
   // must ensure that this first instruction is a B, BL, NOP, BKPT,
@@ -340,23 +350,52 @@
   // Make sure there is enough stack space for this method's activation.
   // Note that we do this before doing an enter().
   generate_stack_overflow_check(bang_size_in_bytes);
+
+  guarantee(needs_stack_repair == false, "Stack repair should not be true");
+  if (verified_value_entry_label != NULL) {
+    bind(*verified_value_entry_label);
+  }
+
   MacroAssembler::build_frame(framesize + 2 * wordSize);
   if (NotifySimulator) {
     notify(Assembler::method_entry);
   }
 }
 
-void C1_MacroAssembler::remove_frame(int framesize) {
+void C1_MacroAssembler::remove_frame(int framesize, bool needs_stack_repair) {
+
+  guarantee(needs_stack_repair == false, "Stack repair should not be true");
+
   MacroAssembler::remove_frame(framesize + 2 * wordSize);
   if (NotifySimulator) {
     notify(Assembler::method_reentry);
   }
 }
 
+void C1_MacroAssembler::verified_value_entry() {
+  if (C1Breakpoint || VerifyFPU || !UseStackBanging) {
+    // Verified Entry first instruction should be 5 bytes long for correct
+    // patching by patch_verified_entry().
+    //
+    // C1Breakpoint and VerifyFPU have one byte first instruction.
+    // Also first instruction will be one byte "push(rbp)" if stack banging
+    // code is not generated (see build_frame() above).
+    // For all these cases generate long instruction first.
+    nop();
+  }
+  
+  // build frame
+  // DMS CHECK: is it nop?
+  // verify_FPU(0, "method_entry");
+ 
+}
 
-void C1_MacroAssembler::verified_entry() {
+int C1_MacroAssembler::scalarized_entry(const CompiledEntrySignature *ces, int frame_size_in_bytes, int bang_size_in_bytes, Label& verified_value_entry_label, bool is_value_ro_entry) {
+  guarantee(false, "Support for ValueTypePassFieldsAsArgs and ValueTypeReturnedAsFields is not implemented");
+  return 0;
 }
 
+
 void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) {
   // rbp, + 0: link
   //     + 1: return address
--- a/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp	Sun Jul 28 14:09:02 2019 -0700
+++ b/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp	Tue Jul 30 15:04:38 2019 +0200
@@ -773,6 +773,7 @@
 
     case new_type_array_id:
     case new_object_array_id:
+    case new_value_array_id:
       {
         Register length   = r19; // Incoming
         Register klass    = r3; // Incoming
@@ -780,9 +781,13 @@
 
         if (id == new_type_array_id) {
           __ set_info("new_type_array", dont_gc_arguments);
-        } else {
+        }
+        else if (id == new_object_array_id) {
           __ set_info("new_object_array", dont_gc_arguments);
         }
+        else { 
+          __ set_info("new_value_array", dont_gc_arguments);
+        }
 
 #ifdef ASSERT
         // assert object type is really an array of the proper kind
@@ -791,9 +796,14 @@
           Register t0 = obj;
           __ ldrw(t0, Address(klass, Klass::layout_helper_offset()));
           __ asrw(t0, t0, Klass::_lh_array_tag_shift);
-          int tag = ((id == new_type_array_id)
-                     ? Klass::_lh_array_tag_type_value
-                     : Klass::_lh_array_tag_obj_value);
+
+          int tag = 0;
+          switch (id) {
+           case new_type_array_id: tag = Klass::_lh_array_tag_type_value; break;
+           case new_object_array_id: tag = Klass::_lh_array_tag_obj_value; break;
+           case new_value_array_id: tag = Klass::_lh_array_tag_vt_value; break;
+           default:  ShouldNotReachHere();
+          }
           __ mov(rscratch1, tag);
           __ cmpw(t0, rscratch1);
           __ br(Assembler::EQ, ok);
@@ -853,6 +863,7 @@
         if (id == new_type_array_id) {
           call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length);
         } else {
+          // Runtime1::new_object_array handles both object and value arrays
           call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length);
         }
 
@@ -888,6 +899,84 @@
       }
       break;
 
+    case buffer_value_args_id:
+    case buffer_value_args_no_receiver_id:
+    { 
+        const char* name = (id == buffer_value_args_id) ?
+          "buffer_value_args" : "buffer_value_args_no_receiver";
+        StubFrame f(sasm, name, dont_gc_arguments);
+        OopMap* map = save_live_registers(sasm, 2);
+        Register method = r1; 
+        address entry = (id == buffer_value_args_id) ?
+          CAST_FROM_FN_PTR(address, buffer_value_args) :
+          CAST_FROM_FN_PTR(address, buffer_value_args_no_receiver);
+        int call_offset = __ call_RT(r0, noreg, entry, method);
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers_except_r0(sasm);
+        __ verify_oop(r0);  // r0: an array of buffered value objects
+     }
+     break;
+
+    case load_flattened_array_id:
+      {
+        StubFrame f(sasm, "load_flattened_array", dont_gc_arguments);
+        OopMap* map = save_live_registers(sasm, 3);
+
+        // Called with store_parameter and not C abi
+
+        f.load_argument(1, r0); // r0,: array
+        f.load_argument(0, r1); // r1,: index
+        int call_offset = __ call_RT(r0, noreg, CAST_FROM_FN_PTR(address, load_flattened_array), r0, r1);
+
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers_except_r0(sasm);
+
+        // r0: loaded element at array[index]
+        __ verify_oop(r0);
+      }
+      break;
+
+    case store_flattened_array_id:
+      {
+        StubFrame f(sasm, "store_flattened_array", dont_gc_arguments);
+        OopMap* map = save_live_registers(sasm, 4);
+
+        // Called with store_parameter and not C abi
+
+        f.load_argument(2, r0); // r0: array
+        f.load_argument(1, r1); // r1: index
+        f.load_argument(0, r2); // r2: value
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, store_flattened_array), r0, r1, r2);
+
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers_except_r0(sasm);
+      }
+      break;
+
+      case substitutability_check_id:
+      { 
+        StubFrame f(sasm, "substitutability_check", dont_gc_arguments);
+        OopMap* map = save_live_registers(sasm, 3);
+
+        // Called with store_parameter and not C abi
+
+        f.load_argument(1, r0); // r0,: left
+        f.load_argument(0, r1); // r1,: right
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, substitutability_check), r0, r1);
+
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers_except_r0(sasm);
+
+        // r0,: are the two operands substitutable
+      }
+      break;
+
+
+
     case register_finalizer_id:
       {
         __ set_info("register_finalizer", dont_gc_arguments);
@@ -927,11 +1016,17 @@
       break;
 
     case throw_incompatible_class_change_error_id:
-      { StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments);
+      { StubFrame f(sasm, "throw_incompatible_class_change_exception", dont_gc_arguments);
         oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false);
       }
       break;
 
+    case throw_illegal_monitor_state_exception_id:
+      { StubFrame f(sasm, "throw_illegal_monitor_state_exception", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_illegal_monitor_state_exception), false);
+      }
+      break;
+
     case slow_subtype_check_id:
       {
         // Typical calling sequence:
@@ -1123,8 +1218,10 @@
       }
       break;
 
-
     default:
+      // DMS CHECK: This code should be fixed in JDK workspace, because it fails 
+      // with assert during vm intialization rather than insert a call 
+      // to unimplemented_entry
       { StubFrame f(sasm, "unimplemented entry", dont_gc_arguments);
         __ mov(r0, (int)id);
         __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), r0);
@@ -1133,6 +1230,8 @@
       break;
     }
   }
+
+
   return oop_maps;
 }
 
--- a/src/hotspot/cpu/aarch64/frame_aarch64.cpp	Sun Jul 28 14:09:02 2019 -0700
+++ b/src/hotspot/cpu/aarch64/frame_aarch64.cpp	Tue Jul 30 15:04:38 2019 +0200
@@ -594,6 +594,7 @@
   }
 
   switch (type) {
+    case T_VALUETYPE :
     case T_OBJECT  :
     case T_ARRAY   : {
       oop obj;
--- a/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp	Sun Jul 28 14:09:02 2019 -0700
+++ b/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp	Tue Jul 30 15:04:38 2019 +0200
@@ -273,7 +273,12 @@
 }
 
 void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-                                         Address dst, Register val, Register tmp1, Register tmp2) {
+                                         Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
+
+   if (tmp3 == noreg) {
+       tmp3 = r8;  
+   }
+
   // flatten object address if needed
   if (dst.index() == noreg && dst.offset() == 0) {
     if (dst.base() != r3) {
@@ -292,7 +297,7 @@
                        false /* expand_call */);
 
   if (val == noreg) {
-    BarrierSetAssembler::store_at(masm, decorators, type, Address(r3, 0), noreg, noreg, noreg);
+    BarrierSetAssembler::store_at(masm, decorators, type, Address(r3, 0), noreg, noreg, noreg, noreg);
   } else {
     // G1 barrier needs uncompressed oop for region cross check.
     Register new_val = val;
@@ -300,7 +305,7 @@
       new_val = rscratch2;
       __ mov(new_val, val);
     }
-    BarrierSetAssembler::store_at(masm, decorators, type, Address(r3, 0), val, noreg, noreg);
+    BarrierSetAssembler::store_at(masm, decorators, type, Address(r3, 0), val, noreg, noreg, noreg);
     g1_write_barrier_post(masm,
                           r3 /* store_adr */,
                           new_val /* new_val */,
--- a/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.hpp	Sun Jul 28 14:09:02 2019 -0700
+++ b/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.hpp	Tue Jul 30 15:04:38 2019 +0200
@@ -57,7 +57,7 @@
                              Register tmp2);
 
   virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-                            Address dst, Register val, Register tmp1, Register tmp2);
+                            Address dst, Register val, Register tmp1, Register tmp2, Register tmp3);
 
 public:
 #ifdef COMPILER1
--- a/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.cpp	Sun Jul 28 14:09:02 2019 -0700
+++ b/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.cpp	Tue Jul 30 15:04:38 2019 +0200
@@ -73,7 +73,7 @@
 }
 
 void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-                                   Address dst, Register val, Register tmp1, Register tmp2) {
+                                   Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
   bool in_heap = (decorators & IN_HEAP) != 0;
   bool in_native = (decorators & IN_NATIVE) != 0;
   switch (type) {
@@ -229,3 +229,21 @@
   }
   __ str(t1, Address(rthread, in_bytes(JavaThread::allocated_bytes_offset())));
 }
+
+void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm)  {
+// DMS CHECK: 8210498: nmethod entry barriers is not implemented
+#if 0
+ BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
+  if (bs_nm == NULL) {
+    return;
+  }
+  Label continuation;
+  Address disarmed_addr(rthread, in_bytes(bs_nm->thread_disarmed_offset()));
+  __ align(8);
+  __ ldr(rscratch1, disarmed_addr);
+  __ cbz(rscratch1, continuation);
+  __ blr(RuntimeAddress(StubRoutines::aarch64::method_entry_barrier()));
+  __ bind(continuation);
+#endif
+}
+
--- a/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.hpp	Sun Jul 28 14:09:02 2019 -0700
+++ b/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.hpp	Tue Jul 30 15:04:38 2019 +0200
@@ -43,7 +43,7 @@
   virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
                        Register dst, Address src, Register tmp1, Register tmp_thread);
   virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-                        Address dst, Register val, Register tmp1, Register tmp2);
+                        Address dst, Register val, Register tmp1, Register tmp2, Register tmp3 = noreg);
 
   virtual void obj_equals(MacroAssembler* masm,
                           Register obj1, Register obj2);
@@ -72,6 +72,7 @@
     Label&   slow_case                 // continuation point if fast allocation fails
   );
   virtual void barrier_stubs_init() {}
+  virtual void nmethod_entry_barrier(MacroAssembler* masm);
 };
 
 #endif // CPU_AARCH64_GC_SHARED_BARRIERSETASSEMBLER_AARCH64_HPP
--- a/src/hotspot/cpu/aarch64/gc/shared/cardTableBarrierSetAssembler_aarch64.cpp	Sun Jul 28 14:09:02 2019 -0700
+++ b/src/hotspot/cpu/aarch64/gc/shared/cardTableBarrierSetAssembler_aarch64.cpp	Tue Jul 30 15:04:38 2019 +0200
@@ -91,18 +91,26 @@
 }
 
 void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-                                                Address dst, Register val, Register tmp1, Register tmp2) {
+                                                Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
   bool in_heap = (decorators & IN_HEAP) != 0;
   bool is_array = (decorators & IS_ARRAY) != 0;
   bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
   bool precise = is_array || on_anonymous;
 
   bool needs_post_barrier = val != noreg && in_heap;
-  BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg);
+  BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg, noreg);
   if (needs_post_barrier) {
     // flatten object address if needed
     if (!precise || (dst.index() == noreg && dst.offset() == 0)) {
-      store_check(masm, dst.base(), dst);
+      if (tmp3 != noreg) {
+        // Called by MacroAssembler::pack_value_helper. We cannot corrupt the dst.base() register
+        __ mov(tmp3, dst.base());
+        store_check(masm, tmp3, dst);
+      } else {
+        // It's OK to corrupt the dst.base() register.
+        store_check(masm, dst.base(), dst);
+      }
+
     } else {
       __ lea(r3, dst);
       store_check(masm, r3, dst);
--- a/src/hotspot/cpu/aarch64/gc/shared/cardTableBarrierSetAssembler_aarch64.hpp	Sun Jul 28 14:09:02 2019 -0700
+++ b/src/hotspot/cpu/aarch64/gc/shared/cardTableBarrierSetAssembler_aarch64.hpp	Tue Jul 30 15:04:38 2019 +0200
@@ -35,7 +35,7 @@
   virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
                                                 Register start, Register count, Register tmp, RegSet saved_regs);
   virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-                            Address dst, Register val, Register tmp1, Register tmp2);
+                            Address dst, Register val, Register tmp1, Register tmp2, Register tmp3);
 
 };
 
--- a/src/hotspot/cpu/aarch64/gc/shared/modRefBarrierSetAssembler_aarch64.cpp	Sun Jul 28 14:09:02 2019 -0700
+++ b/src/hotspot/cpu/aarch64/gc/shared/modRefBarrierSetAssembler_aarch64.cpp	Tue Jul 30 15:04:38 2019 +0200
@@ -45,10 +45,10 @@
 }
 
 void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-                                         Address dst, Register val, Register tmp1, Register tmp2) {
+                                         Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
   if (type == T_OBJECT || type == T_ARRAY) {
-    oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2);
+    oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
   } else {
-    BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
+    BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
   }
 }
--- a/src/hotspot/cpu/aarch64/gc/shared/modRefBarrierSetAssembler_aarch64.hpp	Sun Jul 28 14:09:02 2019 -0700
+++ b/src/hotspot/cpu/aarch64/gc/shared/modRefBarrierSetAssembler_aarch64.hpp	Tue Jul 30 15:04:38 2019 +0200
@@ -40,7 +40,7 @@
                                                 Register start, Register count, Register tmp, RegSet saved_regs) {}
 
   virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-                            Address dst, Register val, Register tmp1, Register tmp2) = 0;
+                            Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) = 0;
 
 public:
   virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
@@ -48,7 +48,7 @@
   virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
                                   Register start, Register count, Register tmp, RegSet saved_regs);
   virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-                        Address dst, Register val, Register tmp1, Register tmp2);
+                        Address dst, Register val, Register tmp1, Register tmp2, Register tmp3);
 };
 
 #endif // CPU_AARCH64_GC_SHARED_MODREFBARRIERSETASSEMBLER_AARCH64_HPP
--- a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp	Sun Jul 28 14:09:02 2019 -0700
+++ b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp	Tue Jul 30 15:04:38 2019 +0200
@@ -128,7 +128,8 @@
                                         Address dst,
                                         Register val,
                                         Register tmp1,
-                                        Register tmp2) {
+                                        Register tmp2,
+                                        Register tmp3) {
   // Verify value
   if (type == T_OBJECT || type == T_ARRAY) {
     // Note that src could be noreg, which means we
--- a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.hpp	Sun Jul 28 14:09:02 2019 -0700
+++ b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.hpp	Tue Jul 30 15:04:38 2019 +0200
@@ -55,7 +55,8 @@
                         Address dst,
                         Register val,
                         Register tmp1,
-                        Register tmp2);
+                        Register tmp2,
+                        Register tmp3);
 #endif // ASSERT
 
   virtual void arraycopy_prologue(MacroAssembler* masm,
--- a/src/hotspot/cpu/aarch64/globals_aarch64.hpp	Sun Jul 28 14:09:02 2019 -0700
+++ b/src/hotspot/cpu/aarch64/globals_aarch64.hpp	Tue Jul 30 15:04:38 2019 +0200
@@ -67,6 +67,7 @@
 define_pd_global(bool, PreserveFramePointer, false);
 
 define_pd_global(bool, ValueTypePassFieldsAsArgs, false);
+define_pd_global(bool, ValueTypeReturnedAsFields, false);
 
 // GC Ergo Flags
 define_pd_global(uintx, CMSYoungGenPerWorker, 64*M);  // default max size of CMS young gen, per GC worker thread
--- a/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp	Sun Jul 28 14:09:02 2019 -0700
+++ b/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp	Tue Jul 30 15:04:38 2019 +0200
@@ -35,6 +35,7 @@
 #include "oops/markOop.hpp"
 #include "oops/method.hpp"
 #include "oops/methodData.hpp"
+#include "oops/valueKlass.hpp"
 #include "prims/jvmtiExport.hpp"
 #include "prims/jvmtiThreadState.hpp"
 #include "runtime/basicLock.hpp"
@@ -656,6 +657,7 @@
   // get sender esp
   ldr(esp,
       Address(rfp, frame::interpreter_frame_sender_sp_offset * wordSize));
+
   if (StackReservedPages > 0) {
     // testing if reserved zone needs to be re-enabled
     Label no_reserved_zone_enabling;
@@ -672,6 +674,7 @@
 
     bind(no_reserved_zone_enabling);
   }
+
   // remove frame anchor
   leave();
   // If we're returning to interpreted code we will shortly be
@@ -725,6 +728,11 @@
     // Save (object->mark() | 1) into BasicLock's displaced header
     str(swap_reg, Address(lock_reg, mark_offset));
 
+    if (EnableValhalla && !UseBiasedLocking) { 
+      // For slow path is_always_locked, using biased, which is never natural for !UseBiasLocking
+      andr(swap_reg, swap_reg, ~markOopDesc::biased_lock_bit_in_place);
+    }
+
     assert(lock_offset == 0,
            "displached header must be first word in BasicObjectLock");
 
--- a/src/hotspot/cpu/aarch64/interpreterRT_aarch64.cpp	Sun Jul 28 14:09:02 2019 -0700
+++ b/src/hotspot/cpu/aarch64/interpreterRT_aarch64.cpp	Tue Jul 30 15:04:38 2019 +0200
@@ -255,6 +255,10 @@
   }
 }
 
+void InterpreterRuntime::SignatureHandlerGenerator::pass_valuetype() {
+   pass_object();
+}
+
 void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) {
   // generate code to handle arguments
   iterate(fingerprint);
@@ -348,6 +352,11 @@
     }
   }
 
+  virtual void pass_valuetype() {
+    // values are handled with oops, like objects
+    pass_object();
+  }
+
   virtual void pass_float()
   {
     jint from_obj = *(jint*)(_from+Interpreter::local_offset_in_bytes(0));
--- a/src/hotspot/cpu/aarch64/interpreterRT_aarch64.hpp	Sun Jul 28 14:09:02 2019 -0700
+++ b/src/hotspot/cpu/aarch64/interpreterRT_aarch64.hpp	Tue Jul 30 15:04:38 2019 +0200
@@ -44,6 +44,7 @@
   void pass_float();
   void pass_double();
   void pass_object();
+  void pass_valuetype();
 
  public:
   // Creation
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp	Sun Jul 28 14:09:02 2019 -0700
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp	Tue Jul 30 15:04:38 2019 +0200
@@ -1309,7 +1309,11 @@
 
 
 void MacroAssembler::verify_oop(Register reg, const char* s) {
-  if (!VerifyOops) return;
+  if (!VerifyOops || VerifyAdapterSharing) {
+    // Below address of the code string confuses VerifyAdapterSharing
+    // because it may differ between otherwise equivalent adapters.
+    return;
+  }
 
   // Pass register number to verify_oop_subroutine
   const char* b = NULL;
@@ -1339,7 +1343,11 @@
 }
 
 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
-  if (!VerifyOops) return;
+  if (!VerifyOops || VerifyAdapterSharing) {
+    // Below address of the code string confuses VerifyAdapterSharing
+    // because it may differ between otherwise equivalent adapters.
+    return;
+  }
 
   const char* b = NULL;
   {
@@ -1442,6 +1450,10 @@
   call_VM_leaf_base(entry_point, 3);
 }
 
+void MacroAssembler::super_call_VM_leaf(address entry_point) {
+  MacroAssembler::call_VM_leaf_base(entry_point, 1);
+}
+
 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
   pass_arg0(this, arg_0);
   MacroAssembler::call_VM_leaf_base(entry_point, 1);
@@ -1491,6 +1503,39 @@
   }
 }
 
+void MacroAssembler::test_klass_is_value(Register klass, Register temp_reg, Label& is_value) {
+  ldrw(temp_reg, Address(klass, Klass::access_flags_offset()));
+  andr(temp_reg, temp_reg, JVM_ACC_VALUE);
+  cbnz(temp_reg, is_value); 
+}
+
+void MacroAssembler::test_field_is_flattenable(Register flags, Register temp_reg, Label& is_flattenable) {
+  (void) temp_reg; // keep signature uniform with x86
+  tbnz(flags, ConstantPoolCacheEntry::is_flattenable_field_shift, is_flattenable);
+}
+
+void MacroAssembler::test_field_is_not_flattenable(Register flags, Register temp_reg, Label& not_flattenable) {
+  (void) temp_reg; // keep signature uniform with x86
+  tbz(flags, ConstantPoolCacheEntry::is_flattenable_field_shift, not_flattenable);
+}
+
+void MacroAssembler::test_field_is_flattened(Register flags, Register temp_reg, Label& is_flattened) {
+  (void) temp_reg; // keep signature uniform with x86
+  tbnz(flags, ConstantPoolCacheEntry::is_flattened_field_shift, is_flattened);
+}
+
+void MacroAssembler::test_flattened_array_oop(Register oop, Register temp_reg, Label& is_flattened_array) {
+  load_storage_props(temp_reg, oop);
+  andr(temp_reg, temp_reg, ArrayStorageProperties::flattened_value);
+  cbnz(temp_reg, is_flattened_array);
+}
+
+void MacroAssembler::test_null_free_array_oop(Register oop, Register temp_reg, Label& is_null_free_array) {
+  load_storage_props(temp_reg, oop);
+  andr(temp_reg, temp_reg, ArrayStorageProperties::null_free_value);
+  cbnz(temp_reg, is_null_free_array);
+}
+
 // MacroAssembler protected routines needed to implement
 // public methods
 
@@ -3683,12 +3728,21 @@
   bs->obj_equals(this, obj1, obj2);
 }
 
-void MacroAssembler::load_klass(Register dst, Register src) {
+void MacroAssembler::load_metadata(Register dst, Register src) {
   if (UseCompressedClassPointers) {
     ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+  } else {
+    ldr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+  }
+}
+
+void MacroAssembler::load_klass(Register dst, Register src) {
+  load_metadata(dst, src);
+  if (UseCompressedClassPointers) {
+    andr(dst, dst, oopDesc::compressed_klass_mask());
     decode_klass_not_null(dst);
   } else {
-    ldr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+    ubfm(dst, dst, 0, 63 - oopDesc::storage_props_nof_bits);
   }
 }
 
@@ -3707,6 +3761,15 @@
   resolve_oop_handle(dst, tmp);
 }
 
+void MacroAssembler::load_storage_props(Register dst, Register src) {
+  load_metadata(dst, src);
+  if (UseCompressedClassPointers) {
+    asrw(dst, dst, oopDesc::narrow_storage_props_shift);
+  } else {
+    asr(dst, dst, oopDesc::wide_storage_props_shift);
+  }
+}
+
 void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp) {
   if (UseCompressedClassPointers) {
     ldrw(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
@@ -4024,14 +4087,14 @@
 
 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
                                      Address dst, Register src,
-                                     Register tmp1, Register thread_tmp) {
+                                     Register tmp1, Register thread_tmp, Register tmp3) {
   BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
   decorators = AccessInternal::decorator_fixup(decorators);
   bool as_raw = (decorators & AS_RAW) != 0;
   if (as_raw) {
-    bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, thread_tmp);
+    bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, thread_tmp, tmp3);
   } else {
-    bs->store_at(this, decorators, type, dst, src, tmp1, thread_tmp);
+    bs->store_at(this, decorators, type, dst, src, tmp1, thread_tmp, tmp3);
   }
 }
 
@@ -4055,13 +4118,13 @@
 }
 
 void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1,
-                                    Register thread_tmp, DecoratorSet decorators) {
-  access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
+                                    Register thread_tmp, Register tmp3, DecoratorSet decorators) {
+  access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp, tmp3);
 }
 
 // Used for storing NULLs.
 void MacroAssembler::store_heap_oop_null(Address dst) {
-  access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg);
+  access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg);
 }
 
 Address MacroAssembler::allocate_metadata_address(Metadata* obj) {
@@ -5865,3 +5928,39 @@
 
   pop(saved_regs, sp);
 }
+
+// C2 compiled method's prolog code 
+// Moved here from aarch64.ad to support Valhalla code belows
+void MacroAssembler::verified_entry(Compile* C, int sp_inc) {
+
+// n.b. frame size includes space for return pc and rfp
+  const long framesize = C->frame_size_in_bytes();
+  assert(framesize % (2 * wordSize) == 0, "must preserve 2 * wordSize alignment");
+
+  // insert a nop at the start of the prolog so we can patch in a
+  // branch if we need to invalidate the method later
+  nop();
+
+  int bangsize = C->bang_size_in_bytes();
+  if (C->need_stack_bang(bangsize) && UseStackBanging)
+     generate_stack_overflow_check(bangsize);
+
+  build_frame(framesize);
+
+  if (NotifySimulator) {
+    notify(Assembler::method_entry);
+  }
+
+  if (VerifyStackAtCalls) {
+    Unimplemented();
+  }
+}
+
+void MacroAssembler::unpack_value_args(Compile* C, bool receiver_only) {
+  // Called from MachVEP node
+  unimplemented("Support for ValueTypePassFieldsAsArgs and ValueTypeReturnedAsFields is not implemented");
+}
+
+void MacroAssembler::store_value_type_fields_to_buf(ciValueKlass* vk) {
+  super_call_VM_leaf(StubRoutines::store_value_type_fields_to_buf());
+}
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp	Sun Jul 28 14:09:02 2019 -0700
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp	Tue Jul 30 15:04:38 2019 +0200
@@ -29,6 +29,8 @@
 #include "asm/assembler.hpp"
 #include "oops/compressedOops.hpp"
 
+class ciValueKlass;
+
 // MacroAssembler extends Assembler by frequently used macros.
 //
 // Instructions for which a 'better' code sequence exists depending
@@ -585,6 +587,16 @@
   static bool needs_explicit_null_check(intptr_t offset);
   static bool uses_implicit_null_check(void* address);
 
+  void test_klass_is_value(Register klass, Register temp_reg, Label& is_value);
+
+  void test_field_is_flattenable(Register flags, Register temp_reg, Label& is_flattenable);
+  void test_field_is_not_flattenable(Register flags, Register temp_reg, Label& notFlattenable);
+  void test_field_is_flattened(Register flags, Register temp_reg, Label& is_flattened);
+
+  // Check klass/oops is flat value type array (oop->_klass->_layout_helper & vt_bit)
+  void test_flattened_array_oop(Register klass, Register temp_reg, Label& is_flattened_array);
+  void test_null_free_array_oop(Register oop, Register temp_reg, Label& is_null_free_array);
+
   static address target_addr_for_insn(address insn_addr, unsigned insn);
   static address target_addr_for_insn(address insn_addr) {
     unsigned insn = *(unsigned*)insn_addr;
@@ -789,6 +801,9 @@
   void c2bool(Register x);
 
   // oop manipulations
+  void load_metadata(Register dst, Register src);
+  void load_storage_props(Register dst, Register src);
+
   void load_klass(Register dst, Register src);
   void store_klass(Register dst, Register src);
   void cmp_klass(Register oop, Register trial_klass, Register tmp);
@@ -800,7 +815,7 @@
                       Register tmp1, Register tmp_thread);
 
   void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src,
-                       Register tmp1, Register tmp_thread);
+                       Register tmp1, Register tmp_thread, Register tmp3 = noreg);
 
   // Resolves obj for access. Result is placed in the same register.
   // All other registers are preserved.
@@ -812,7 +827,7 @@
   void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg,
                               Register thread_tmp = noreg, DecoratorSet decorators = 0);
   void store_heap_oop(Address dst, Register src, Register tmp1 = noreg,
-                      Register tmp_thread = noreg, DecoratorSet decorators = 0);
+                      Register tmp_thread = noreg, Register tmp3 = noreg, DecoratorSet decorators = 0);
 
   // currently unimplemented
   // Used for storing NULL. All other oop constants should be
@@ -1143,6 +1158,19 @@
 
   void adrp(Register reg1, const Address &dest, unsigned long &byte_offset);
 
+
+  enum RegState {
+     reg_readonly,
+     reg_writable,
+     reg_written
+  };
+
+  void verified_entry(Compile* C, int sp_inc);
+
+// Unpack all value type arguments passed as oops 
+  void unpack_value_args(Compile* C, bool receiver_only);
+  void store_value_type_fields_to_buf(ciValueKlass* vk);
+
   void tableswitch(Register index, jint lowbound, jint highbound,
                    Label &jumptable, Label &jumptable_end, int stride = 1) {
     adr(rscratch1, jumptable);
@@ -1235,6 +1263,8 @@
                      int elem_size);
 
   void fill_words(Register base, Register cnt, Register value);
+  void fill_words(Register base, u_int64_t cnt, Register value);
+
   void zero_words(Register base, u_int64_t cnt);
   void zero_words(Register ptr, Register cnt);
   void zero_dcache_blocks(Register base, Register cnt);
--- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp	Sun Jul 28 14:09:02 2019 -0700
+++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp	Tue Jul 30 15:04:38 2019 +0200
@@ -26,6 +26,7 @@
 #include "precompiled.hpp"
 #include "asm/macroAssembler.hpp"
 #include "asm/macroAssembler.inline.hpp"
+#include "classfile/symbolTable.hpp"
 #include "code/debugInfoRec.hpp"
 #include "code/icBuffer.hpp"
 #include "code/vtableStubs.hpp"
@@ -288,6 +289,7 @@
     case T_OBJECT:
     case T_ARRAY:
     case T_ADDRESS:
+    case T_VALUETYPE:
       if (int_args < Argument::n_int_register_parameters_j) {
         regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
       } else {
@@ -321,6 +323,90 @@
   return align_up(stk_args, 2);
 }
 
+
+// const uint SharedRuntime::java_return_convention_max_int = Argument::n_int_register_parameters_j+1;
+const uint SharedRuntime::java_return_convention_max_int = 6; 
+const uint SharedRuntime::java_return_convention_max_float = Argument::n_float_register_parameters_j;
+
+int SharedRuntime::java_return_convention(const BasicType *sig_bt, VMRegPair *regs, int total_args_passed) {
+
+  // Create the mapping between argument positions and
+  // registers.
+  // r1, r2 used to address klasses and states, exclude it from return convention to avoid colision 
+
+  static const Register INT_ArgReg[java_return_convention_max_int] = {
+     r0 /* j_rarg7 */, j_rarg6, j_rarg5, j_rarg4, j_rarg3, j_rarg2
+  };
+
+  static const FloatRegister FP_ArgReg[java_return_convention_max_float] = {
+    j_farg0, j_farg1, j_farg2, j_farg3, j_farg4, j_farg5, j_farg6, j_farg7
+  };
+
+  uint int_args = 0;
+  uint fp_args = 0;
+
+  for (int i = 0; i < total_args_passed; i++) {
+    switch (sig_bt[i]) {
+    case T_BOOLEAN:
+    case T_CHAR:
+    case T_BYTE:
+    case T_SHORT:
+    case T_INT:
+      if (int_args < Argument::n_int_register_parameters_j) {
+        regs[i].set1(INT_ArgReg[int_args]->as_VMReg());
+        int_args ++;
+      } else {
+        // Should we have gurantee here?
+        return -1;
+      }
+      break;
+    case T_VOID:
+      // halves of T_LONG or T_DOUBLE
+      assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
+      regs[i].set_bad();
+      break;
+    case T_LONG:
+      assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
+      // fall through
+    case T_OBJECT:
+    case T_ARRAY:
+    case T_ADDRESS:
+      // Should T_METADATA be added to java_calling_convention as well ?
+    case T_METADATA:
+    case T_VALUETYPE:
+      if (int_args < Argument::n_int_register_parameters_j) {
+        regs[i].set2(INT_ArgReg[int_args]->as_VMReg());
+        int_args ++;
+      } else {
+        return -1;
+      }
+      break;
+    case T_FLOAT:
+      if (fp_args < Argument::n_float_register_parameters_j) {
+        regs[i].set1(FP_ArgReg[fp_args]->as_VMReg());
+        fp_args ++;
+      } else {
+        return -1;
+      }
+      break;
+    case T_DOUBLE:
+      assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
+      if (fp_args < Argument::n_float_register_parameters_j) {
+        regs[i].set2(FP_ArgReg[fp_args]->as_VMReg());
+        fp_args ++;
+      } else {
+        return -1;
+      }
+      break;
+    default:
+      ShouldNotReachHere();
+      break;
+    }
+  }
+
+  return int_args + fp_args;
+}
+
 // Patch the callers callsite with entry to compiled code if it exists.
 static void patch_callers_callsite(MacroAssembler *masm) {
   Label L;
@@ -351,46 +437,18 @@
   __ bind(L);
 }
 
-static void gen_c2i_adapter(MacroAssembler *masm,
-                            int total_args_passed,
-                            int comp_args_on_stack,
-                            const BasicType *sig_bt,
-                            const VMRegPair *regs,
-                            Label& skip_fixup) {
-  // Before we get into the guts of the C2I adapter, see if we should be here
-  // at all.  We've come from compiled code and are attempting to jump to the
-  // interpreter, which means the caller made a static call to get here
-  // (vcalls always get a compiled target if there is one).  Check for a
-  // compiled target.  If there is one, we need to patch the caller's call.
-  patch_callers_callsite(masm);
-
-  __ bind(skip_fixup);
-
-  int words_pushed = 0;
-
-  // Since all args are passed on the stack, total_args_passed *
-  // Interpreter::stackElementSize is the space we need.
-
-  int extraspace = total_args_passed * Interpreter::stackElementSize;
-
-  __ mov(r13, sp);
-
-  // stack is aligned, keep it that way
-  extraspace = align_up(extraspace, 2*wordSize);
-
-  if (extraspace)
-    __ sub(sp, sp, extraspace);
-
-  // Now write the args into the outgoing interpreter space
-  for (int i = 0; i < total_args_passed; i++) {
-    if (sig_bt[i] == T_VOID) {
-      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
-      continue;
-    }
-
-    // offset to start parameters
-    int st_off   = (total_args_passed - i - 1) * Interpreter::stackElementSize;
-    int next_off = st_off - Interpreter::stackElementSize;
+// For each value type argument, sig includes the list of fields of
+// the value type. This utility function computes the number of
+// arguments for the call if value types are passed by reference (the
+// calling convention the interpreter expects).
+static int compute_total_args_passed_int(const GrowableArray<SigEntry>* sig_extended) {
+ int total_args_passed = 0;
+ total_args_passed = sig_extended->length();
+ return total_args_passed;
+}
+
+
+static void gen_c2i_adapter_helper(MacroAssembler* masm, BasicType bt, const VMRegPair& reg_pair, int extraspace, const Address& to) {
 
     // Say 4 args:
     // i   st_off
@@ -405,76 +463,122 @@
     // leaves one slot empty and only stores to a single slot. In this case the
     // slot that is occupied is the T_VOID slot. See I said it was confusing.
 
-    VMReg r_1 = regs[i].first();
-    VMReg r_2 = regs[i].second();
+    // int next_off = st_off - Interpreter::stackElementSize;
+
+    VMReg r_1 = reg_pair.first();
+    VMReg r_2 = reg_pair.second();
+
     if (!r_1->is_valid()) {
       assert(!r_2->is_valid(), "");
-      continue;
+      return;
     }
+
     if (r_1->is_stack()) {
       // memory to memory use rscratch1
-      int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size
-                    + extraspace
-                    + words_pushed * wordSize);
+      // DMS CHECK: words_pushed is always 0 and can be removed?
+      // int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace + words_pushed * wordSize);
+      int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace);
       if (!r_2->is_valid()) {
         // sign extend??
         __ ldrw(rscratch1, Address(sp, ld_off));
-        __ str(rscratch1, Address(sp, st_off));
+        __ str(rscratch1, to);
 
       } else {
-
         __ ldr(rscratch1, Address(sp, ld_off));
-
-        // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
-        // T_DOUBLE and T_LONG use two slots in the interpreter
-        if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
-          // ld_off == LSW, ld_off+wordSize == MSW
-          // st_off == MSW, next_off == LSW
-          __ str(rscratch1, Address(sp, next_off));
-#ifdef ASSERT
-          // Overwrite the unused slot with known junk
-          __ mov(rscratch1, 0xdeadffffdeadaaaaul);
-          __ str(rscratch1, Address(sp, st_off));
-#endif /* ASSERT */
-        } else {
-          __ str(rscratch1, Address(sp, st_off));
-        }
+        __ str(rscratch1, to);
       }
     } else if (r_1->is_Register()) {
-      Register r = r_1->as_Register();
-      if (!r_2->is_valid()) {
-        // must be only an int (or less ) so move only 32bits to slot
-        // why not sign extend??
-        __ str(r, Address(sp, st_off));
-      } else {
-        // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
-        // T_DOUBLE and T_LONG use two slots in the interpreter
-        if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
-          // long/double in gpr
-#ifdef ASSERT
-          // Overwrite the unused slot with known junk
-          __ mov(rscratch1, 0xdeadffffdeadaaabul);
-          __ str(rscratch1, Address(sp, st_off));
-#endif /* ASSERT */
-          __ str(r, Address(sp, next_off));
-        } else {
-          __ str(r, Address(sp, st_off));
-        }
-      }
+      Register r = r_1->as_Register(); 
+      __ str(r, to);
     } else {
       assert(r_1->is_FloatRegister(), "");
       if (!r_2->is_valid()) {
         // only a float use just part of the slot
-        __ strs(r_1->as_FloatRegister(), Address(sp, st_off));
+        __ strs(r_1->as_FloatRegister(), to);
       } else {
-#ifdef ASSERT
-        // Overwrite the unused slot with known junk
-        __ mov(rscratch1, 0xdeadffffdeadaaacul);
-        __ str(rscratch1, Address(sp, st_off));
-#endif /* ASSERT */
-        __ strd(r_1->as_FloatRegister(), Address(sp, next_off));
+        __ strd(r_1->as_FloatRegister(), to);
       }
+   }
+}
+
+static void gen_c2i_adapter(MacroAssembler *masm,
+                            const GrowableArray<SigEntry>* sig_extended,
+                            const VMRegPair *regs,
+                            Label& skip_fixup,
+                            address start,
+                            OopMapSet* oop_maps,
+                            int& frame_complete,
+                            int& frame_size_in_words,
+                            bool alloc_value_receiver) {
+
+  // Before we get into the guts of the C2I adapter, see if we should be here
+  // at all.  We've come from compiled code and are attempting to jump to the
+  // interpreter, which means the caller made a static call to get here
+  // (vcalls always get a compiled target if there is one).  Check for a
+  // compiled target.  If there is one, we need to patch the caller's call.
+  patch_callers_callsite(masm);
+
+  __ bind(skip_fixup);
+
+  bool has_value_argument = false;
+  int words_pushed = 0;
+
+  // Since all args are passed on the stack, total_args_passed *
+  // Interpreter::stackElementSize is the space we need.
+
+  int total_args_passed = compute_total_args_passed_int(sig_extended);
+  int extraspace = (total_args_passed * Interpreter::stackElementSize) + wordSize;
+
+  // stack is aligned, keep it that way
+  extraspace = align_up(extraspace, 2 * wordSize);
+
+  __ mov(r13, sp);
+
+  if (extraspace)
+    __ sub(sp, sp, extraspace);
+
+  // Now write the args into the outgoing interpreter space
+
+  int ignored = 0, next_vt_arg = 0, next_arg_int = 0;
+  bool has_oop_field = false;
+
+  for (int next_arg_comp = 0; next_arg_comp < total_args_passed; next_arg_comp++) {
+    BasicType bt = sig_extended->at(next_arg_comp)._bt;
+    // offset to start parameters
+    int st_off   = (total_args_passed - next_arg_int - 1) * Interpreter::stackElementSize;
+
+    if (SigEntry::is_reserved_entry(sig_extended, next_arg_comp)) {
+       continue; // Ignore reserved entry
     }
+
+     if (bt == T_VOID) { 
+       assert(next_arg_comp > 0 && (sig_extended->at(next_arg_comp - 1)._bt == T_LONG || sig_extended->at(next_arg_comp - 1)._bt == T_DOUBLE), "missing half");
+       next_arg_int ++;
+       continue;
+     }
+
+     int next_off = st_off - Interpreter::stackElementSize;
+     int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : st_off;
+
+     gen_c2i_adapter_helper(masm, bt, regs[next_arg_comp], extraspace, Address(sp, offset)); 
+     next_arg_int ++;
+  }
+
+// If a value type was allocated and initialized, apply post barrier to all oop fields
+  if (has_value_argument && has_oop_field) {
+    __ push(r13); // save senderSP
+    __ push(r1); // save callee
+    // Allocate argument register save area
+    if (frame::arg_reg_save_area_bytes != 0) {
+      __ sub(sp, sp, frame::arg_reg_save_area_bytes);
+    }
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::apply_post_barriers), rthread, r10);
+    // De-allocate argument register save area
+    if (frame::arg_reg_save_area_bytes != 0) {
+      __ add(sp, sp, frame::arg_reg_save_area_bytes);
+    }
+    __ pop(r1); // restore callee
+    __ pop(r13); // restore sender SP
   }
 
   __ mov(esp, sp); // Interp expects args on caller's expression stack
@@ -483,12 +587,8 @@
   __ br(rscratch1);
 }
 
-
-void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
-                                    int total_args_passed,
-                                    int comp_args_on_stack,
-                                    const BasicType *sig_bt,
-                                    const VMRegPair *regs) {
+void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, int comp_args_on_stack, const GrowableArray<SigEntry>* sig, const VMRegPair *regs) {
+
 
   // Note: r13 contains the senderSP on entry. We must preserve it since
   // we may do a i2c -> c2i transition if we lose a race where compiled
@@ -548,10 +648,11 @@
   }
 
   // Cut-out for having no stack args.
-  int comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
+  int comp_words_on_stack = 0; 
   if (comp_args_on_stack) {
-    __ sub(rscratch1, sp, comp_words_on_stack * wordSize);
-    __ andr(sp, rscratch1, -16);
+     comp_words_on_stack = align_up(comp_args_on_stack * VMRegImpl::stack_slot_size, wordSize) >> LogBytesPerWord;
+     __ sub(rscratch1, sp, comp_words_on_stack * wordSize);
+     __ andr(sp, rscratch1, -16);
   }
 
   // Will jump to the compiled code just as if compiled code was doing it.
@@ -570,19 +671,23 @@
   }
 #endif // INCLUDE_JVMCI
 
+  int total_args_passed = sig->length();
+
   // Now generate the shuffle code.
   for (int i = 0; i < total_args_passed; i++) {
-    if (sig_bt[i] == T_VOID) {
-      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
+    BasicType bt = sig->at(i)._bt; 
+
+    assert(bt != T_VALUETYPE, "i2c adapter doesn't unpack value args");
+    if (bt == T_VOID) {
+      assert(i > 0 && (sig->at(i - 1)._bt == T_LONG || sig->at(i - 1)._bt == T_DOUBLE), "missing half");
       continue;
     }
 
     // Pick up 0, 1 or 2 words from SP+offset.
-
-    assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
-            "scrambled load targets?");
+    assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?");
+
     // Load in argument order going down.
-    int ld_off = (total_args_passed - i - 1)*Interpreter::stackElementSize;
+    int ld_off = (total_args_passed - i - 1) * Interpreter::stackElementSize;
     // Point to interpreter value (vs. tag)
     int next_off = ld_off - Interpreter::stackElementSize;
     //
@@ -596,7 +701,7 @@
     }
     if (r_1->is_stack()) {
       // Convert stack slot to an SP offset (+ wordSize to account for return address )
-      int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size;
+      int st_off = regs[i].first()->reg2stack() * VMRegImpl::stack_slot_size;
       if (!r_2->is_valid()) {
         // sign extend???
         __ ldrsw(rscratch2, Address(esp, ld_off));
@@ -613,39 +718,38 @@
         // are accessed as negative so LSW is at LOW address
 
         // ld_off is MSW so get LSW
-        const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
-                           next_off : ld_off;
+        const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : ld_off;
         __ ldr(rscratch2, Address(esp, offset));
         // st_off is LSW (i.e. reg.first())
-        __ str(rscratch2, Address(sp, st_off));
-      }
-    } else if (r_1->is_Register()) {  // Register argument
-      Register r = r_1->as_Register();
-      if (r_2->is_valid()) {
-        //
-        // We are using two VMRegs. This can be either T_OBJECT,
-        // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates
-        // two slots but only uses one for thr T_LONG or T_DOUBLE case
-        // So we must adjust where to pick up the data to match the
-        // interpreter.
-
-        const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
-                           next_off : ld_off;
-
-        // this can be a misaligned move
-        __ ldr(r, Address(esp, offset));
-      } else {
-        // sign extend and use a full word?
-        __ ldrw(r, Address(esp, ld_off));
-      }
-    } else {
-      if (!r_2->is_valid()) {
-        __ ldrs(r_1->as_FloatRegister(), Address(esp, ld_off));
-      } else {
-        __ ldrd(r_1->as_FloatRegister(), Address(esp, next_off));
-      }
-    }
-  }
+         __ str(rscratch2, Address(sp, st_off));
+       }
+     } else if (r_1->is_Register()) {  // Register argument
+       Register r = r_1->as_Register();
+       if (r_2->is_valid()) {
+         //
+         // We are using two VMRegs. This can be either T_OBJECT,
+         // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates
+         // two slots but only uses one for thr T_LONG or T_DOUBLE case
+         // So we must adjust where to pick up the data to match the
+         // interpreter.
+ 
+        const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : ld_off;
+ 
+         // this can be a misaligned move
+         __ ldr(r, Address(esp, offset));
+       } else {
+         // sign extend and use a full word?
+         __ ldrw(r, Address(esp, ld_off));
+       }
+     } else {
+       if (!r_2->is_valid()) {
+         __ ldrs(r_1->as_FloatRegister(), Address(esp, ld_off));
+       } else {
+         __ ldrd(r_1->as_FloatRegister(), Address(esp, next_off));
+       }
+     }
+   }
+
 
   // 6243940 We might end up in handle_wrong_method if
   // the callee is deoptimized as we race thru here. If that
@@ -658,7 +762,6 @@
   // and the vm will find there should this case occur.
 
   __ str(rmethod, Address(rthread, JavaThread::callee_target_offset()));
-
   __ br(rscratch1);
 }
 
@@ -727,32 +830,7 @@
 }
 #endif
 
-// ---------------------------------------------------------------
-AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
-                                                            int total_args_passed,
-                                                            int comp_args_on_stack,
-                                                            const BasicType *sig_bt,
-                                                            const VMRegPair *regs,
-                                                            AdapterFingerPrint* fingerprint) {
-  address i2c_entry = __ pc();
-#ifdef BUILTIN_SIM
-  char *name = NULL;
-  AArch64Simulator *sim = NULL;
-  size_t len = 65536;
-  if (NotifySimulator) {
-    name = NEW_C_HEAP_ARRAY(char, len, mtInternal);
-  }
-
-  if (name) {
-    generate_i2c_adapter_name(name, total_args_passed, sig_bt);
-    sim = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck);
-    sim->notifyCompile(name, i2c_entry);
-  }
-#endif
-  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
-
-  address c2i_unverified_entry = __ pc();
-  Label skip_fixup;
+static void gen_inline_cache_check(MacroAssembler *masm, Label& skip_fixup) {
 
   Label ok;
 
@@ -788,21 +866,77 @@
     __ block_comment("} c2i_unverified_entry");
   }
 
+
+}
+
+
+
+// ---------------------------------------------------------------
+AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
+                                                            int comp_args_on_stack,
+                                                            const GrowableArray<SigEntry>* sig,
+                                                            const VMRegPair* regs,
+                                                            const GrowableArray<SigEntry>* sig_cc,
+                                                            const VMRegPair* regs_cc,
+                                                            const GrowableArray<SigEntry>* sig_cc_ro,
+                                                            const VMRegPair* regs_cc_ro,
+                                                            AdapterFingerPrint* fingerprint,
+                                                            AdapterBlob*& new_adapter) {
+
+  address i2c_entry = __ pc();
+  gen_i2c_adapter(masm, comp_args_on_stack, sig, regs);
+
+  address c2i_unverified_entry = __ pc();
+  Label skip_fixup;
+
+
+  gen_inline_cache_check(masm, skip_fixup);
+
+  OopMapSet* oop_maps = new OopMapSet();
+  int frame_complete = CodeOffsets::frame_never_safe;
+  int frame_size_in_words = 0;
+
+  // Scalarized c2i adapter with non-scalarized receiver (i.e., don't pack receiver)
+  address c2i_value_ro_entry = __ pc();
+  if (regs_cc != regs_cc_ro) {
+    Label unused;
+    gen_c2i_adapter(masm, sig_cc_ro, regs_cc_ro, skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, false);
+    skip_fixup = unused;
+  }
+
+  // Scalarized c2i adapter
   address c2i_entry = __ pc();
 
-#ifdef BUILTIN_SIM
-  if (name) {
-    name[0] = 'c';
-    name[2] = 'i';
-    sim->notifyCompile(name, c2i_entry);
-    FREE_C_HEAP_ARRAY(char, name, mtInternal);
+  // Not implemented
+  // BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
+  // bs->c2i_entry_barrier(masm);
+
+  gen_c2i_adapter(masm, sig_cc, regs_cc, skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, true);
+
+  address c2i_unverified_value_entry = c2i_unverified_entry;
+
+ // Non-scalarized c2i adapter
+  address c2i_value_entry = c2i_entry;
+  if (regs != regs_cc) {
+    Label value_entry_skip_fixup;
+    c2i_unverified_value_entry = __ pc();
+    gen_inline_cache_check(masm, value_entry_skip_fixup);
+
+    c2i_value_entry = __ pc();
+    Label unused;
+    gen_c2i_adapter(masm, sig, regs, value_entry_skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, false);
   }
-#endif
-
-  gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
 
   __ flush();
-  return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
+
+  // The c2i adapter might safepoint and trigger a GC. The caller must make sure that
+  // the GC knows about the location of oop argument locations passed to the c2i adapter.
+
+  bool caller_must_gc_arguments = (regs != regs_cc);
+  new_adapter = AdapterBlob::create(masm->code(), frame_complete, frame_size_in_words + 10, oop_maps, caller_must_gc_arguments);
+
+  return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_value_entry, c2i_value_ro_entry, c2i_unverified_entry, c2i_unverified_value_entry);
+
 }
 
 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
@@ -845,6 +979,7 @@
         // fall through
       case T_OBJECT:
       case T_ARRAY:
+      case T_VALUETYPE:
       case T_ADDRESS:
       case T_METADATA:
         if (int_args < Argument::n_int_register_parameters_c) {
@@ -1721,6 +1856,7 @@
           int_args++;
           break;
         }
+      case T_VALUETYPE:
       case T_OBJECT:
         assert(!is_critical_native, "no oop arguments");
         object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
@@ -1902,6 +2038,7 @@
     case T_LONG:
       return_type = 1; break;
     case T_ARRAY:
+    case T_VALUETYPE:
     case T_OBJECT:
       return_type = 1; break;
     case T_FLOAT:
@@ -1934,6 +2071,7 @@
     // Result is in v0 we'll save as needed
     break;
   case T_ARRAY:                 // Really a handle
+  case T_VALUETYPE:
   case T_OBJECT:                // Really a handle
       break; // can't de-handlize until after safepoint check
   case T_VOID: break;
@@ -2038,7 +2176,7 @@
   __ reset_last_Java_frame(false);
 
   // Unbox oop result, e.g. JNIHandles::resolve result.
-  if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
+  if (ret_type == T_OBJECT || ret_type == T_ARRAY || ret_type == T_VALUETYPE) {
     __ resolve_jobject(r0, rthread, rscratch2);
   }
 
@@ -3194,3 +3332,108 @@
   _exception_blob =  ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1);
 }
 #endif // COMPILER2_OR_JVMCI
+
+BufferedValueTypeBlob* SharedRuntime::generate_buffered_value_type_adapter(const ValueKlass* vk) {
+  BufferBlob* buf = BufferBlob::create("value types pack/unpack", 16 * K);
+  CodeBuffer buffer(buf);
+  short buffer_locs[20];
+  buffer.insts()->initialize_shared_locs((relocInfo*)buffer_locs,
+                                         sizeof(buffer_locs)/sizeof(relocInfo));
+
+  MacroAssembler _masm(&buffer);
+  MacroAssembler* masm = &_masm;
+
+  const Array<SigEntry>* sig_vk = vk->extended_sig();
+  const Array<VMRegPair>* regs = vk->return_regs();
+
+  int pack_fields_off = __ offset();
+
+  int j = 1;
+  for (int i = 0; i < sig_vk->length(); i++) {
+    BasicType bt = sig_vk->at(i)._bt;
+    if (bt == T_VALUETYPE) {
+      continue;
+    }
+    if (bt == T_VOID) {
+      if (sig_vk->at(i-1)._bt == T_LONG ||
+          sig_vk->at(i-1)._bt == T_DOUBLE) {
+        j++;
+      }
+      continue;
+    }
+    int off = sig_vk->at(i)._offset;
+    VMRegPair pair = regs->at(j);
+    VMReg r_1 = pair.first();
+    VMReg r_2 = pair.second();
+    Address to(r0, off);
+    if (bt == T_FLOAT) { 
+      __ strs(r_1->as_FloatRegister(), to);
+    } else if (bt == T_DOUBLE) {
+      __ strd(r_1->as_FloatRegister(), to);
+    } else if (bt == T_OBJECT || bt == T_ARRAY) {
+      Register val = r_1->as_Register();
+      assert_different_registers(r0, val);
+      // We don't need barriers because the destination is a newly allocated object.
+      // Also, we cannot use store_heap_oop(to, val) because it uses r8 as tmp.
+      if (UseCompressedOops) {
+        __ encode_heap_oop(val);
+        __ str(val, to);
+      } else {
+        __ str(val, to);
+      }
+    } else {
+      assert(is_java_primitive(bt), "unexpected basic type");
+      assert_different_registers(r0, r_1->as_Register());
+      size_t size_in_bytes = type2aelembytes(bt);
+      __ store_sized_value(to, r_1->as_Register(), size_in_bytes);
+    }
+    j++;
+  }
+  assert(j == regs->length(), "missed a field?");
+
+  __ ret(lr);
+
+  int unpack_fields_off = __ offset();
+
+  j = 1;
+  for (int i = 0; i < sig_vk->length(); i++) {
+    BasicType bt = sig_vk->at(i)._bt;
+    if (bt == T_VALUETYPE) {
+      continue;
+    }
+    if (bt == T_VOID) {
+      if (sig_vk->at(i-1)._bt == T_LONG ||
+          sig_vk->at(i-1)._bt == T_DOUBLE) {
+        j++;
+      }
+      continue;
+    }
+    int off = sig_vk->at(i)._offset;
+    VMRegPair pair = regs->at(j);
+    VMReg r_1 = pair.first();
+    VMReg r_2 = pair.second();
+    Address from(r0, off);
+    if (bt == T_FLOAT) {
+      __ ldrs(r_1->as_FloatRegister(), from);
+    } else if (bt == T_DOUBLE) {
+      __ ldrd(r_1->as_FloatRegister(), from);
+    } else if (bt == T_OBJECT || bt == T_ARRAY) {
+       assert_different_registers(r0, r_1->as_Register());
+       __ load_heap_oop(r_1->as_Register(), from);
+    } else {
+      assert(is_java_primitive(bt), "unexpected basic type");
+      assert_different_registers(r0, r_1->as_Register());
+
+      size_t size_in_bytes = type2aelembytes(bt);
+      __ load_sized_value(r_1->as_Register(), from, size_in_bytes, bt != T_CHAR && bt != T_BOOLEAN);
+    }
+    j++;
+  }
+  assert(j == regs->length(), "missed a field?");
+
+  __ ret(lr);
+
+  __ flush();
+
+  return BufferedValueTypeBlob::create(&buffer, pack_fields_off, unpack_fields_off);
+}
--- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp	Sun Jul 28 14:09:02 2019 -0700
+++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp	Tue Jul 30 15:04:38 2019 +0200
@@ -320,7 +320,7 @@
     return_address = __ pc();
 
     // store result depending on type (everything that is not
-    // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
+    // T_OBJECT, T_VALUETYPE, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
     // n.b. this assumes Java returns an integral result in r0
     // and a floating result in j_farg0
     __ ldr(j_rarg2, result);
@@ -328,6 +328,8 @@
     __ ldr(j_rarg1, result_type);
     __ cmp(j_rarg1, (u1)T_OBJECT);
     __ br(Assembler::EQ, is_long);
+    __ cmp(j_rarg1, (u1)T_VALUETYPE);
+    __ br(Assembler::EQ, is_long);
     __ cmp(j_rarg1, (u1)T_LONG);
     __ br(Assembler::EQ, is_long);
     __ cmp(j_rarg1, (u1)T_FLOAT);
@@ -1829,7 +1831,7 @@
     __ align(OptoLoopAlignment);
 
     __ BIND(L_store_element);
-    __ store_heap_oop(__ post(to, UseCompressedOops ? 4 : 8), copied_oop, noreg, noreg, AS_RAW);  // store the oop
+    __ store_heap_oop(__ post(to, UseCompressedOops ? 4 : 8), copied_oop, noreg, noreg, noreg, AS_RAW);  // store the oop 
     __ sub(count, count, 1);
     __ cbz(count, L_do_card_marks);
 
@@ -5650,6 +5652,184 @@
   };
 
 
+  // Call here from the interpreter or compiled code to either load
+  // multiple returned values from the value type instance being
+  // returned to registers or to store returned values to a newly
+  // allocated value type instance.
+  address generate_return_value_stub(address destination, const char* name, bool has_res) {
+
+    // Information about frame layout at time of blocking runtime call.
+    // Note that we only have to preserve callee-saved registers since
+    // the compilers are responsible for supplying a continuation point
+    // if they expect all registers to be preserved.
+    // n.b. aarch64 asserts that frame::arg_reg_save_area_bytes == 0
+    enum layout {
+      rfp_off = 0, rfp_off2,
+
+      j_rarg7_off, j_rarg7_2,
+      j_rarg6_off, j_rarg6_2,
+      j_rarg5_off, j_rarg5_2,
+      j_rarg4_off, j_rarg4_2,
+      j_rarg3_off, j_rarg3_2,
+      j_rarg2_off, j_rarg2_2,
+      j_rarg1_off, j_rarg1_2,
+      j_rarg0_off, j_rarg0_2,
+
+      j_farg0_off, j_farg0_2,
+      j_farg1_off, j_farg1_2,
+      j_farg2_off, j_farg2_2,
+      j_farg3_off, j_farg3_2,
+      j_farg4_off, j_farg4_2,
+      j_farg5_off, j_farg5_2,
+      j_farg6_off, j_farg6_2,
+      j_farg7_off, j_farg7_2,
+ 
+      return_off, return_off2,
+      framesize // inclusive of return address
+    };
+
+    int insts_size = 512;
+    int locs_size  = 64;
+
+    CodeBuffer code(name, insts_size, locs_size);
+    OopMapSet* oop_maps  = new OopMapSet();
+    MacroAssembler* masm = new MacroAssembler(&code);
+
+    address start = __ pc();
+
+    const Address f7_save       (rfp, j_farg7_off * wordSize);
+    const Address f6_save       (rfp, j_farg6_off * wordSize);
+    const Address f5_save       (rfp, j_farg5_off * wordSize);
+    const Address f4_save       (rfp, j_farg4_off * wordSize);
+    const Address f3_save       (rfp, j_farg3_off * wordSize);
+    const Address f2_save       (rfp, j_farg2_off * wordSize);
+    const Address f1_save       (rfp, j_farg1_off * wordSize);
+    const Address f0_save       (rfp, j_farg0_off * wordSize);
+
+    const Address r0_save      (rfp, j_rarg0_off * wordSize);
+    const Address r1_save      (rfp, j_rarg1_off * wordSize);
+    const Address r2_save      (rfp, j_rarg2_off * wordSize);
+    const Address r3_save      (rfp, j_rarg3_off * wordSize);
+    const Address r4_save      (rfp, j_rarg4_off * wordSize);
+    const Address r5_save      (rfp, j_rarg5_off * wordSize);
+    const Address r6_save      (rfp, j_rarg6_off * wordSize);
+    const Address r7_save      (rfp, j_rarg7_off * wordSize);
+
+    // Generate oop map
+    OopMap* map = new OopMap(framesize, 0);
+
+    map->set_callee_saved(VMRegImpl::stack2reg(rfp_off), rfp->as_VMReg());
+    map->set_callee_saved(VMRegImpl::stack2reg(j_rarg7_off), j_rarg7->as_VMReg());
+    map->set_callee_saved(VMRegImpl::stack2reg(j_rarg6_off), j_rarg6->as_VMReg());
+    map->set_callee_saved(VMRegImpl::stack2reg(j_rarg5_off), j_rarg5->as_VMReg());
+    map->set_callee_saved(VMRegImpl::stack2reg(j_rarg4_off), j_rarg4->as_VMReg());
+    map->set_callee_saved(VMRegImpl::stack2reg(j_rarg3_off), j_rarg3->as_VMReg());
+    map->set_callee_saved(VMRegImpl::stack2reg(j_rarg2_off), j_rarg2->as_VMReg());
+    map->set_callee_saved(VMRegImpl::stack2reg(j_rarg1_off), j_rarg1->as_VMReg());
+    map->set_callee_saved(VMRegImpl::stack2reg(j_rarg0_off), j_rarg0->as_VMReg());
+
+    map->set_callee_saved(VMRegImpl::stack2reg(j_farg0_off), j_farg0->as_VMReg());
+    map->set_callee_saved(VMRegImpl::stack2reg(j_farg1_off), j_farg1->as_VMReg());
+    map->set_callee_saved(VMRegImpl::stack2reg(j_farg2_off), j_farg2->as_VMReg());
+    map->set_callee_saved(VMRegImpl::stack2reg(j_farg3_off), j_farg3->as_VMReg());
+    map->set_callee_saved(VMRegImpl::stack2reg(j_farg4_off), j_farg4->as_VMReg());
+    map->set_callee_saved(VMRegImpl::stack2reg(j_farg5_off), j_farg5->as_VMReg());
+    map->set_callee_saved(VMRegImpl::stack2reg(j_farg6_off), j_farg6->as_VMReg());
+    map->set_callee_saved(VMRegImpl::stack2reg(j_farg7_off), j_farg7->as_VMReg());
+
+    // This is an inlined and slightly modified version of call_VM
+    // which has the ability to fetch the return PC out of
+    // thread-local storage and also sets up last_Java_sp slightly
+    // differently than the real call_VM
+
+    __ enter(); // Save FP and LR before call
+
+    assert(is_even(framesize/2), "sp not 16-byte aligned");
+
+    // lr and fp are already in place
+    __ sub(sp, rfp, ((unsigned)framesize - 4) << LogBytesPerInt); // prolog
+
+    __ strd(j_farg7, f7_save); 
+    __ strd(j_farg6, f6_save); 
+    __ strd(j_farg5, f5_save); 
+    __ strd(j_farg4, f4_save); 
+    __ strd(j_farg3, f3_save); 
+    __ strd(j_farg2, f2_save); 
+    __ strd(j_farg1, f1_save); 
+    __ strd(j_farg0, f0_save); 
+
+    __ str(j_rarg0, r0_save); 
+    __ str(j_rarg1, r1_save); 
+    __ str(j_rarg2, r2_save); 
+    __ str(j_rarg3, r3_save); 
+    __ str(j_rarg4, r4_save); 
+    __ str(j_rarg5, r5_save); 
+    __ str(j_rarg6, r6_save); 
+    __ str(j_rarg7, r7_save); 
+
+    int frame_complete = __ pc() - start;
+
+    // Set up last_Java_sp and last_Java_fp
+    address the_pc = __ pc();
+    __ set_last_Java_frame(sp, rfp, the_pc, rscratch1);
+
+    // Call runtime
+    __ mov(c_rarg0, rthread);
+    __ mov(c_rarg1, r0);
+
+    BLOCK_COMMENT("call runtime_entry");
+    __ mov(rscratch1, destination);
+    __ blrt(rscratch1, 2 /* number_of_arguments */, 0, 1);
+
+    oop_maps->add_gc_map(the_pc - start, map);
+
+    __ reset_last_Java_frame(false); 
+    __ maybe_isb(); 
+
+    __ ldrd(j_farg7, f7_save); 
+    __ ldrd(j_farg6, f6_save); 
+    __ ldrd(j_farg5, f5_save); 
+    __ ldrd(j_farg4, f4_save); 
+    __ ldrd(j_farg3, f3_save); 
+    __ ldrd(j_farg3, f2_save); 
+    __ ldrd(j_farg1, f1_save); 
+    __ ldrd(j_farg0, f0_save); 
+
+    __ ldr(j_rarg0, r0_save); 
+    __ ldr(j_rarg1, r1_save); 
+    __ ldr(j_rarg2, r2_save); 
+    __ ldr(j_rarg3, r3_save); 
+    __ ldr(j_rarg4, r4_save); 
+    __ ldr(j_rarg5, r5_save); 
+    __ ldr(j_rarg6, r6_save); 
+    __ ldr(j_rarg7, r7_save); 
+
+    __ leave();
+
+    // check for pending exceptions
+    Label pending;
+    __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset())));
+    __ cmp(rscratch1, (u1)NULL_WORD);
+    __ br(Assembler::NE, pending);
+
+    if (has_res) {
+      __ get_vm_result(r0, rthread);
+    }
+    __ ret(lr);
+
+    __ bind(pending);
+    __ ldr(r0, Address(rthread, in_bytes(Thread::pending_exception_offset())));
+    __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+
+
+    // codeBlob framesize is in words (not VMRegImpl::slot_size)
+    int frame_size_in_words = (framesize >> (LogBytesPerWord - LogBytesPerInt));
+    RuntimeStub* stub =
+      RuntimeStub::new_runtime_stub(name, &code, frame_complete, frame_size_in_words, oop_maps, false);
+
+    return stub->entry_point();
+  }
+
   // Initialization
   void generate_initial() {
     // Generate initial stubs and initializes the entry points
@@ -5699,6 +5879,12 @@
     if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos)) {
       StubRoutines::_dcos = generate_dsin_dcos(/* isCos = */ true);
     }
+
+
+    StubRoutines::_load_value_type_fields_in_regs = 
+         generate_return_value_stub(CAST_FROM_FN_PTR(address, SharedRuntime::load_value_type_fields_in_regs), "load_value_type_fields_in_regs", false);
+    StubRoutines::_store_value_type_fields_to_buf = 
+         generate_return_value_stub(CAST_FROM_FN_PTR(address, SharedRuntime::store_value_type_fields_to_buf), "store_value_type_fields_to_buf", true);
   }
 
   void generate_all() {
--- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp	Sun Jul 28 14:09:02 2019 -0700
+++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp	Tue Jul 30 15:04:38 2019 +0200
@@ -38,6 +38,7 @@
 #include "oops/methodData.hpp"
 #include "oops/method.hpp"
 #include "oops/oop.inline.hpp"
+#include "oops/valueKlass.hpp"
 #include "prims/jvmtiExport.hpp"
 #include "prims/jvmtiThreadState.hpp"
 #include "runtime/arguments.hpp"
@@ -440,6 +441,7 @@
   __ ldr(esp, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
   // and NULL it as marker that esp is now tos until next java call
   __ str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
+
   __ restore_bcp();
   __ restore_locals();
   __ restore_constant_pool_cache();
@@ -566,6 +568,7 @@
   case T_VOID   : /* nothing to do */        break;
   case T_FLOAT  : /* nothing to do */        break;
   case T_DOUBLE : /* nothing to do */        break;
+  case T_VALUETYPE: // fall through (value types are handled with oops)
   case T_OBJECT :
     // retrieve result from frame
     __ ldr(r0, Address(rfp, frame::interpreter_frame_oop_temp_offset*wordSize));
--- a/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp	Sun Jul 28 14:09:02 2019 -0700
+++ b/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp	Tue Jul 30 15:04:38 2019 +0200
@@ -147,7 +147,7 @@
                          Register val,
                          DecoratorSet decorators) {
   assert(val == noreg || val == r0, "parameter is just for looks");
-  __ store_heap_oop(dst, val, r10, r1, decorators);
+  __ store_heap_oop(dst, val, r10, r1, noreg, decorators); 
 }
 
 static void do_oop_load(InterpreterMacroAssembler* _masm,
@@ -170,6 +170,7 @@
   Label L_patch_done;
 
   switch (bc) {
+  case Bytecodes::_fast_qputfield:
   case Bytecodes::_fast_aputfield:
   case Bytecodes::_fast_bputfield:
   case Bytecodes::_fast_zputfield:
@@ -745,10 +746,10 @@
   }
   Label ok;
   __ br(Assembler::LO, ok);
-    // ??? convention: move array into r3 for exception message
-  __ mov(r3, array);
-  __ mov(rscratch1, Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
-  __ br(rscratch1);
+  // ??? convention: move array into r3 for exception message
+   __ mov(r3, array);
+   __ mov(rscratch1, Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
+   __ br(rscratch1);
   __ bind(ok);
 }
 
@@ -808,11 +809,21 @@
   // r0: array
   // r1: index
   index_check(r0, r1); // leaves index in r1, kills rscratch1
-  __ add(r1, r1, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
-  do_oop_load(_masm,
-              Address(r0, r1, Address::uxtw(LogBytesPerHeapOop)),
-              r0,
-              IS_ARRAY);
+  if (ValueArrayFlatten) {
+    Label is_flat_array, done;
+
+    __ test_flattened_array_oop(r0, r8 /*temp*/, is_flat_array); 
+    __ add(r1, r1, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
+    do_oop_load(_masm, Address(r0, r1, Address::uxtw(LogBytesPerHeapOop)), r0, IS_ARRAY);
+
+    __ b(done);
+    __ bind(is_flat_array);
+    __ call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::value_array_load), r0, r1);
+    __ bind(done);
+  } else {
+    __ add(r1, r1, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
+    do_oop_load(_masm, Address(r0, r1, Address::uxtw(LogBytesPerHeapOop)), r0, IS_ARRAY);
+  }
 }
 
 void TemplateTable::baload()
@@ -1102,37 +1113,47 @@
   Label is_null, ok_is_subtype, done;
   transition(vtos, vtos);
   // stack: ..., array, index, value
-  __ ldr(r0, at_tos());    // value
+  __ ldr(r0, at_tos());    // value 
   __ ldr(r2, at_tos_p1()); // index
   __ ldr(r3, at_tos_p2()); // array
 
   Address element_address(r3, r4, Address::uxtw(LogBytesPerHeapOop));
 
   index_check(r3, r2);     // kills r1
-  __ add(r4, r2, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
+
+  // DMS CHECK: what does line below do?
+  __ add(r4, r2, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); 
 
   // do array store check - check for NULL value first
   __ cbz(r0, is_null);
 
+  Label  is_flat_array;
+  if (ValueArrayFlatten) {
+    __ test_flattened_array_oop(r3, r8 /*temp*/, is_flat_array);
+  }
+
   // Move subklass into r1
   __ load_klass(r1, r0);
+
   // Move superklass into r0
   __ load_klass(r0, r3);
-  __ ldr(r0, Address(r0,
-                     ObjArrayKlass::element_klass_offset()));
+  __ ldr(r0, Address(r0, ObjArrayKlass::element_klass_offset()));
   // Compress array + index*oopSize + 12 into a single register.  Frees r2.
 
   // Generate subtype check.  Blows r2, r5
   // Superklass in r0.  Subklass in r1.
+
   __ gen_subtype_check(r1, ok_is_subtype);
 
   // Come here on failure
   // object is at TOS
   __ b(Interpreter::_throw_ArrayStoreException_entry);
 
+
   // Come here on success
   __ bind(ok_is_subtype);
 
+
   // Get the value we will store
   __ ldr(r0, at_tos());
   // Now store using the appropriate barrier
@@ -1143,8 +1164,61 @@
   __ bind(is_null);
   __ profile_null_seen(r2);
 
+  if (EnableValhalla) {
+    Label is_null_into_value_array_npe, store_null;
+
+    // No way to store null in flat array
+    __ test_null_free_array_oop(r3, r8, is_null_into_value_array_npe); 
+    __ b(store_null);
+
+    __ bind(is_null_into_value_array_npe);
+    __ b(ExternalAddress(Interpreter::_throw_NullPointerException_entry));
+
+    __ bind(store_null);
+  }
+
   // Store a NULL
   do_oop_store(_masm, element_address, noreg, IS_ARRAY);
+  __ b(done);
+
+  if (EnableValhalla) { 
+     Label is_type_ok;
+
+    // store non-null value
+    __ bind(is_flat_array);
+
+    // Simplistic type check...
+    // r0 - value, r2 - index, r3 - array.
+
+    // Profile the not-null value's klass.
+    // Load value class 
+     __ load_klass(r1, r0);
+     __ profile_typecheck(r2, r1, r0); // blows r2, and r0
+
+    // flat value array needs exact type match
+    // is "r8 == r0" (value subclass == array element superclass)
+
+    // Move element klass into r0
+
+     __ load_klass(r0, r3);
+
+     __ ldr(r0, Address(r0, ArrayKlass::element_klass_offset())); 
+     __ cmp(r0, r1);
+     __ br(Assembler::EQ, is_type_ok);
+
+     __ profile_typecheck_failed(r2);
+     __ b(ExternalAddress(Interpreter::_throw_ArrayStoreException_entry));
+
+     __ bind(is_type_ok);
+
+    // DMS CHECK: Reload from TOS to be safe, because of profile_typecheck that blows r2 and r0.
+    // Should we really do it?
+     __ ldr(r1, at_tos());  // value
+     __ mov(r2, r3); // array, ldr(r2, at_tos_p2()); 
+     __ ldr(r3, at_tos_p1()); // index
+     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::value_array_store), r1, r2, r3);
+  }
+
 
   // Pop stack arguments
   __ bind(done);
@@ -2021,19 +2095,86 @@
   __ profile_not_taken_branch(r0);
 }
 
-void TemplateTable::if_acmp(Condition cc)
-{
+void TemplateTable::if_acmp(Condition cc) {
   transition(atos, vtos);
   // assume branch is more often taken than not (loops use backward branches)
-  Label not_taken;
+  Label taken, not_taken;
   __ pop_ptr(r1);
+
+  Register is_value_mask = rscratch1;
+  __ mov(is_value_mask, markOopDesc::always_locked_pattern);
+
+  if (EnableValhalla && ACmpOnValues == 3) {
+    __ cmp(r1, r0);
+    __ br(Assembler::EQ, (cc == equal) ? taken : not_taken);
+
+    // might be substitutable, test if either r0 or r1 is null
+    __ andr(r2, r0, r1);
+    __ cbz(r2, (cc == equal) ? not_taken : taken);
+
+    // and both are values ?
+    __ ldr(r2, Address(r1, oopDesc::mark_offset_in_bytes()));
+    __ andr(r2, r2, is_value_mask);
+    __ ldr(r4, Address(r0, oopDesc::mark_offset_in_bytes()));
+    __ andr(r4, r4, is_value_mask);
+    __ andr(r2, r2, r4);
+    __ cmp(r2,  is_value_mask);
+    __ br(Assembler::NE, (cc == equal) ? not_taken : taken);
+
+    // same value klass ?
+    __ load_metadata(r2, r1);
+    __ load_metadata(r4, r0);
+    __ cmp(r2, r4);
+    __ br(Assembler::NE, (cc == equal) ? not_taken : taken);
+
+    // Know both are the same type, let's test for substitutability...
+    if (cc == equal) {
+      invoke_is_substitutable(r0, r1, taken, not_taken);
+    } else {
+      invoke_is_substitutable(r0, r1, not_taken, taken);
+    }
+    __ stop("Not reachable");
+  }
+
+  if (EnableValhalla && ACmpOnValues == 1) {
+    Label is_null;
+    __ cbz(r1, is_null);
+    __ ldr(r2, Address(r1, oopDesc::mark_offset_in_bytes()));
+    __ andr(r2, r2, is_value_mask);
+    __ cmp(r2, is_value_mask);
+    __ cset(r2, Assembler::EQ);
+    __ orr(r1, r1, r2);
+    __ bind(is_null);
+  }
+
   __ cmpoop(r1, r0);
+
+  if (EnableValhalla && ACmpOnValues == 2) {
+    __ br(Assembler::NE, (cc == not_equal) ? taken : not_taken);
+    __ cbz(r1, (cc == equal) ? taken : not_taken);
+    __ ldr(r2, Address(r1, oopDesc::mark_offset_in_bytes()));  
+    __ andr(r2, r2, is_value_mask);
+    __ cmp(r2, is_value_mask); 
+    cc = (cc == equal) ? not_equal : equal;
+  }
+
   __ br(j_not(cc), not_taken);
+  __ bind(taken);
   branch(false, false);
   __ bind(not_taken);
   __ profile_not_taken_branch(r0);
 }
 
+void TemplateTable::invoke_is_substitutable(Register aobj, Register bobj,
+                                            Label& is_subst, Label& not_subst) {
+
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::is_substitutable), aobj, bobj);
+  // Restored... r0 answer, jmp to outcome...
+  __ cbz(r0, not_subst);
+  __ b(is_subst);
+}
+
+
 void TemplateTable::ret() {
   transition(vtos, vtos);
   // We might be moving to a safepoint.  The thread which calls
@@ -2283,7 +2424,7 @@
     __ narrow(r0);
   }
 
-  __ remove_activation(state);
+  __ remove_activation(state); 
   __ ret(lr);
 }
 
@@ -2497,8 +2638,7 @@
 
   // x86 uses a shift and mask or wings it with a shift plus assert
   // the mask is not needed. aarch64 just uses bitfield extract
-  __ ubfxw(flags, raw_flags, ConstantPoolCacheEntry::tos_state_shift,
-           ConstantPoolCacheEntry::tos_state_bits);
+  __ ubfxw(flags, raw_flags, ConstantPoolCacheEntry::tos_state_shift, ConstantPoolCacheEntry::tos_state_bits);
 
   assert(btos == 0, "change code, btos != 0");
   __ cbnz(flags, notByte);
@@ -2533,12 +2673,68 @@
   __ cmp(flags, (u1)atos);
   __ br(Assembler::NE, notObj);
   // atos
-  do_oop_load(_masm, field, r0, IN_HEAP);
-  __ push(atos);
-  if (rc == may_rewrite) {
-    patch_bytecode(Bytecodes::_fast_agetfield, bc, r1);
+  if (!EnableValhalla) {
+    do_oop_load(_masm, field, r0, IN_HEAP);
+    __ push(atos);
+    if (rc == may_rewrite) {
+      patch_bytecode(Bytecodes::_fast_agetfield, bc, r1);
+    }  
+    __ b(Done);
+  } else { // Valhalla
+
+    if (is_static) {
+      __ load_heap_oop(r0, field);
+      Label isFlattenable, isUninitialized;
+      // Issue below if the static field has not been initialized yet
+      __ test_field_is_flattenable(raw_flags, r8 /*temp*/, isFlattenable);
+        // Not flattenable case
+        __ push(atos);
+        __ b(Done);
+      // Flattenable case, must not return null even if uninitialized
+      __ bind(isFlattenable);
+        __ cbz(r0, isUninitialized);
+          __ push(atos);
+          __ b(Done);
+        __ bind(isUninitialized);
+          __ andw(raw_flags, raw_flags, ConstantPoolCacheEntry::field_index_mask);
+          __ call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::uninitialized_static_value_field), obj, raw_flags);
+          __ verify_oop(r0);
+          __ push(atos);
+          __ b(Done);
+    } else {
+      Label isFlattened, isInitialized, isFlattenable, rewriteFlattenable;
+        __ test_field_is_flattenable(raw_flags, r8 /*temp*/, isFlattenable);
+        // Non-flattenable field case, also covers the object case
+        __ load_heap_oop(r0, field);
+        __ push(atos);
+        if (rc == may_rewrite) {
+          patch_bytecode(Bytecodes::_fast_agetfield, bc, r1);
+        }
+        __ b(Done);
+      __ bind(isFlattenable);
+        __ test_field_is_flattened(raw_flags, r8 /* temp */, isFlattened);
+         // Non-flattened field case
+          __ load_heap_oop(r0, field);
+          __ cbnz(r0, isInitialized);
+            __ andw(raw_flags, raw_flags, ConstantPoolCacheEntry::field_index_mask);
+            __ call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::uninitialized_instance_value_field), obj, raw_flags);
+          __ bind(isInitialized);
+          __ verify_oop(r0);
+          __ push(atos);
+          __ b(rewriteFlattenable);
+        __ bind(isFlattened);
+          __ ldr(r10, Address(cache, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f1_offset())));
+          __ andw(raw_flags, raw_flags, ConstantPoolCacheEntry::field_index_mask);
+          call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::read_flattened_field), obj, raw_flags, r10); 
+          __ verify_oop(r0);
+          __ push(atos);
+      __ bind(rewriteFlattenable);
+      if (rc == may_rewrite) { 
+         patch_bytecode(Bytecodes::_fast_qgetfield, bc, r1);
+      }
+      __ b(Done);
+    }
   }
-  __ b(Done);
 
   __ bind(notObj);
   __ cmp(flags, (u1)itos);
@@ -2708,6 +2904,7 @@
   const Register obj   = r2;
   const Register off   = r19;
   const Register flags = r0;
+  const Register flags2 = r6;
   const Register bc    = r4;
 
   resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
@@ -2730,6 +2927,8 @@
   Label notByte, notBool, notInt, notShort, notChar,
         notLong, notFloat, notObj, notDouble;
 
+  __ mov(flags2, flags); 
+
   // x86 uses a shift and mask or wings it with a shift plus assert
   // the mask is not needed. aarch64 just uses bitfield extract
   __ ubfxw(flags, flags, ConstantPoolCacheEntry::tos_state_shift,  ConstantPoolCacheEntry::tos_state_bits);
@@ -2772,14 +2971,56 @@
 
   // atos
   {
-    __ pop(atos);
-    if (!is_static) pop_and_check_object(obj);
-    // Store into the field
-    do_oop_store(_masm, field, r0, IN_HEAP);
-    if (rc == may_rewrite) {
-      patch_bytecode(Bytecodes::_fast_aputfield, bc, r1, true, byte_no);
-    }
-    __ b(Done);
+     if (!EnableValhalla) {
+      __ pop(atos);
+      if (!is_static) pop_and_check_object(obj);
+      // Store into the field
+      do_oop_store(_masm, field, r0, IN_HEAP);
+      if (rc == may_rewrite) {
+        patch_bytecode(Bytecodes::_fast_aputfield, bc, r1, true, byte_no);
+      }
+      __ b(Done);
+     } else { // Valhalla
+
+      __ pop(atos);
+      if (is_static) {
+        Label notFlattenable;
+         __ test_field_is_not_flattenable(flags2, r8 /* temp */, notFlattenable);
+         __ null_check(r0);
+         __ bind(notFlattenable);
+         do_oop_store(_masm, field, r0, IN_HEAP); 
+         __ b(Done);
+      } else {
+        Label isFlattenable, isFlattened, notBuffered, notBuffered2, rewriteNotFlattenable, rewriteFlattenable;
+        __ test_field_is_flattenable(flags2, r8 /*temp*/, isFlattenable);
+        // Not flattenable case, covers not flattenable values and objects
+        pop_and_check_object(obj);
+        // Store into the field
+        do_oop_store(_masm, field, r0, IN_HEAP);
+        __ bind(rewriteNotFlattenable);
+        if (rc == may_rewrite) {
+          patch_bytecode(Bytecodes::_fast_aputfield, bc, r19, true, byte_no); 
+        }
+        __ b(Done);
+        // Implementation of the flattenable semantic
+        __ bind(isFlattenable);
+        __ null_check(r0);
+        __ test_field_is_flattened(flags2, r8 /*temp*/, isFlattened);
+        // Not flattened case
+        pop_and_check_object(obj);
+        // Store into the field
+        do_oop_store(_masm, field, r0, IN_HEAP);
+        __ b(rewriteFlattenable);
+        __ bind(isFlattened);
+        pop_and_check_object(obj);
+        call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::write_flattened_value), r0, off, obj);
+        __ bind(rewriteFlattenable);
+        if (rc == may_rewrite) {
+          patch_bytecode(Bytecodes::_fast_qputfield, bc, r19, true, byte_no);
+        }
+        __ b(Done);
+      }
+     }  // Valhalla
   }
 
   __ bind(notObj);
@@ -2919,6 +3160,7 @@
     // to do it for every data type, we use the saved values as the
     // jvalue object.
     switch (bytecode()) {          // load values into the jvalue object
+    case Bytecodes::_fast_qputfield: //fall through
     case Bytecodes::_fast_aputfield: __ push_ptr(r0); break;
     case Bytecodes::_fast_bputfield: // fall through
     case Bytecodes::_fast_zputfield: // fall through
@@ -2945,6 +3187,7 @@
                r19, c_rarg2, c_rarg3);
 
     switch (bytecode()) {             // restore tos values
+    case Bytecodes::_fast_qputfield: //fall through
     case Bytecodes::_fast_aputfield: __ pop_ptr(r0); break;
     case Bytecodes::_fast_bputfield: // fall through
     case Bytecodes::_fast_zputfield: // fall through
@@ -2995,6 +3238,19 @@
 
   // access field
   switch (bytecode()) {
+  case Bytecodes::_fast_qputfield: //fall through 
+   {
+      Label isFlattened, done; 
+      __ null_check(r0);
+      __ test_field_is_flattened(r3, r8 /* temp */, isFlattened);
+      // No Flattened case
+      do_oop_store(_masm, field, r0, IN_HEAP);
+      __ b(done);
+      __ bind(isFlattened);
+      call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::write_flattened_value), r0, r1, r2);
+      __ bind(done);
+    }
+    break;
   case Bytecodes::_fast_aputfield:
     do_oop_store(_masm, field, r0, IN_HEAP);
     break;
@@ -3088,6 +3344,32 @@
 
   // access field
   switch (bytecode()) {
+  case Bytecodes::_fast_qgetfield: 
+    {
+       Label isFlattened, isInitialized, Done;
+       // DMS CHECK: We don't need to reload multiple times, but stay close to original code
+       __ ldrw(r9, Address(r2, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()))); 
+       __ test_field_is_flattened(r9, r8 /* temp */, isFlattened);
+        // Non-flattened field case
+        __ mov(r9, r0);
+        __ load_heap_oop(r0, field);
+        __ cbnz(r0, isInitialized);
+          __ mov(r0, r9);
+          __ ldrw(r9, Address(r2, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()))); 
+          __ andw(r9, r9, ConstantPoolCacheEntry::field_index_mask);
+          __ call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::uninitialized_instance_value_field), r0, r9);
+        __ bind(isInitialized);
+        __ verify_oop(r0);
+        __ b(Done);
+      __ bind(isFlattened);
+        __ ldrw(r9, Address(r2, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())));
+        __ andw(r9, r9, ConstantPoolCacheEntry::field_index_mask);
+        __ ldr(r3, Address(r2, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f1_offset())));
+        call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::read_flattened_field), r0, r9, r3);
+        __ verify_oop(r0);
+      __ bind(Done);
+    }
+    break;
   case Bytecodes::_fast_agetfield:
     do_oop_load(_masm, field, r0, IN_HEAP);
     __ verify_oop(r0);
@@ -3644,6 +3926,30 @@
   __ membar(Assembler::StoreStore);
 }
 
+void TemplateTable::defaultvalue() {
+  transition(vtos, atos);
+  __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1);
+  __ get_constant_pool(c_rarg1);
+  call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::defaultvalue),
+          c_rarg1, c_rarg2);
+  __ verify_oop(r0);
+  // Must prevent reordering of stores for object initialization with stores that publish the new object.
+  __ membar(Assembler::StoreStore);
+}
+
+void TemplateTable::withfield() {
+  transition(vtos, atos);
+  resolve_cache_and_index(f2_byte, c_rarg1 /*cache*/, c_rarg2 /*index*/, sizeof(u2));
+
+  // n.b. unlike x86 cache is now rcpool plus the indexed offset
+  // so using rcpool to meet shared code expectations
+ 
+  call_VM(r1, CAST_FROM_FN_PTR(address, InterpreterRuntime::withfield), rcpool);
+  __ verify_oop(r1);
+  __ add(esp, esp, r0);
+  __ mov(r0, r1);
+}
+
 void TemplateTable::newarray() {
   transition(itos, atos);
   __ load_unsigned_byte(c_rarg1, at_bcp(1));
@@ -3715,14 +4021,29 @@
   __ bind(ok_is_subtype);
   __ mov(r0, r3); // Restore object in r3
 
+  __ b(done);
+  __ bind(is_null);
+
   // Collect counts on whether this test sees NULLs a lot or not.
   if (ProfileInterpreter) {
-    __ b(done);
-    __ bind(is_null);
     __ profile_null_seen(r2);
-  } else {
-    __ bind(is_null);   // same as 'done'
   }
+
+  if (EnableValhalla) {
+    // Get cpool & tags index
+    __ get_cpool_and_tags(r2, r3); // r2=cpool, r3=tags array
+    __ get_unsigned_2_byte_index_at_bcp(r19, 1); // r19=index
+     // See if bytecode has already been quicked
+    __ add(rscratch1, r3, Array<u1>::base_offset_in_bytes());
+    __ lea(r1, Address(rscratch1, r19));
+    __ ldarb(r1, r1); 
+    // See if CP entry is a Q-descriptor
+    __ andr (r1, r1, JVM_CONSTANT_QDescBit);
+    __ cmp(r1, (u1) JVM_CONSTANT_QDescBit);
+    __ br(Assembler::NE, done);
+    __ b(ExternalAddress(Interpreter::_throw_NullPointerException_entry));
+  }
+
   __ bind(done);
 }
 
--- a/src/hotspot/cpu/aarch64/templateTable_aarch64.hpp	Sun Jul 28 14:09:02 2019 -0700
+++ b/src/hotspot/cpu/aarch64/templateTable_aarch64.hpp	Tue Jul 30 15:04:38 2019 +0200
@@ -39,4 +39,6 @@
   static void index_check(Register array, Register index);
   static void index_check_without_pop(Register array, Register index);
 
+  static void invoke_is_substitutable(Register aobj, Register bobj, Label& is_subst, Label& not_subst);
+
 #endif // CPU_AARCH64_TEMPLATETABLE_AARCH64_HPP
--- a/src/hotspot/cpu/aarch64/vtableStubs_aarch64.cpp	Sun Jul 28 14:09:02 2019 -0700
+++ b/src/hotspot/cpu/aarch64/vtableStubs_aarch64.cpp	Tue Jul 30 15:04:38 2019 +0200
@@ -47,10 +47,10 @@
 extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index);
 #endif
 
-VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
+VtableStub* VtableStubs::create_vtable_stub(int vtable_index, bool caller_is_c1) {
   // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
   const int stub_code_length = code_size_limit(true);
-  VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
+  VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index, caller_is_c1);
   // Can be NULL if there is no free space in the code cache.
   if (s == NULL) {
     return NULL;
@@ -63,6 +63,10 @@
   int       slop_bytes = 0;
   int       slop_delta = 0;
 
+// No variance was detected in vtable stub sizes. Setting index_dependent_slop == 0 will unveil any deviation from this observation.
+  const int index_dependent_slop     = 0;
+  ByteSize  entry_offset = caller_is_c1 ? Method::from_compiled_value_offset() :  Method::from_compiled_value_ro_offset();
+
   ResourceMark    rm;
   CodeBuffer      cb(s->entry_point(), stub_code_length);
   MacroAssembler* masm = new MacroAssembler(&cb);
@@ -116,7 +120,7 @@
   if (DebugVtables) {
     Label L;
     __ cbz(rmethod, L);
-    __ ldr(rscratch1, Address(rmethod, Method::from_compiled_offset()));
+    __ ldr(rscratch1, Address(rmethod, entry_offset));
     __ cbnz(rscratch1, L);
     __ stop("Vtable entry is NULL");
     __ bind(L);
@@ -127,20 +131,21 @@
   // rmethod: Method*
   // r2: receiver
   address ame_addr = __ pc();
-  __ ldr(rscratch1, Address(rmethod, Method::from_compiled_offset()));
+  __ ldr(rscratch1, Address(rmethod, entry_offset));
   __ br(rscratch1);
 
   masm->flush();
-  bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, 0);
+  slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets
+  bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, index_dependent_slop);
 
   return s;
 }
 
 
-VtableStub* VtableStubs::create_itable_stub(int itable_index) {
+VtableStub* VtableStubs::create_itable_stub(int itable_index, bool caller_is_c1) { 
   // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
   const int stub_code_length = code_size_limit(false);
-  VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
+  VtableStub* s = new(stub_code_length) VtableStub(false, itable_index, caller_is_c1);
   // Can be NULL if there is no free space in the code cache.
   if (s == NULL) {
     return NULL;
@@ -152,6 +157,10 @@
   int       slop_bytes = 0;
   int       slop_delta = 0;
 
+  const int index_dependent_slop = (itable_index == 0) ? 4 :     // code size change with transition from 8-bit to 32-bit constant (@index == 16).
+                                   (itable_index < 16) ? 3 : 0;  // index == 0 generates even shorter code.
+  ByteSize  entry_offset = caller_is_c1 ? Method::from_compiled_value_offset() :  Method::from_compiled_value_ro_offset();
+
   ResourceMark    rm;
   CodeBuffer      cb(s->entry_point(), stub_code_length);
   MacroAssembler* masm = new MacroAssembler(&cb);
@@ -221,7 +230,7 @@
   if (DebugVtables) {
     Label L2;
     __ cbz(rmethod, L2);
-    __ ldr(rscratch1, Address(rmethod, Method::from_compiled_offset()));
+    __ ldr(rscratch1, Address(rmethod, entry_offset));
     __ cbnz(rscratch1, L2);
     __ stop("compiler entrypoint is null");
     __ bind(L2);
@@ -231,7 +240,7 @@
   // rmethod: Method*
   // j_rarg0: receiver
   address ame_addr = __ pc();
-  __ ldr(rscratch1, Address(rmethod, Method::from_compiled_offset()));
+  __ ldr(rscratch1, Address(rmethod, entry_offset));
   __ br(rscratch1);
 
   __ bind(L_no_such_interface);
@@ -244,7 +253,8 @@
   __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
 
   masm->flush();
-  bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0);
+  slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets
+  bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, index_dependent_slop);
 
   return s;
 }
--- a/src/hotspot/share/runtime/arguments.cpp	Sun Jul 28 14:09:02 2019 -0700
+++ b/src/hotspot/share/runtime/arguments.cpp	Tue Jul 30 15:04:38 2019 +0200
@@ -2111,12 +2111,12 @@
 
   status = status && GCArguments::check_args_consistency();
 
-  if (LP64_ONLY(false &&) !FLAG_IS_DEFAULT(ValueTypePassFieldsAsArgs)) {
+  if (AMD64_ONLY(false &&) !FLAG_IS_DEFAULT(ValueTypePassFieldsAsArgs)) {
     FLAG_SET_CMDLINE(ValueTypePassFieldsAsArgs, false);
     warning("ValueTypePassFieldsAsArgs is not supported on this platform");
   }
 
-  if (LP64_ONLY(false &&) !FLAG_IS_DEFAULT(ValueTypeReturnedAsFields)) {
+  if (AMD64_ONLY(false &&) !FLAG_IS_DEFAULT(ValueTypeReturnedAsFields)) {
     FLAG_SET_CMDLINE(ValueTypeReturnedAsFields, false);
     warning("ValueTypeReturnedAsFields is not supported on this platform");
   }
--- a/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestArrays.java	Sun Jul 28 14:09:02 2019 -0700
+++ b/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestArrays.java	Tue Jul 30 15:04:38 2019 +0200
@@ -32,7 +32,7 @@
  * @test
  * @summary Test value type arrays
  * @library /testlibrary /test/lib /compiler/whitebox /
- * @requires os.simpleArch == "x64"
+ * @requires (os.simpleArch == "x64" | os.simpleArch == "aarch64")
  * @compile TestArrays.java
  * @run driver ClassFileInstaller sun.hotspot.WhiteBox jdk.test.lib.Platform
  * @run main/othervm/timeout=300 -Xbootclasspath/a:. -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions
--- a/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestCallingConvention.java	Sun Jul 28 14:09:02 2019 -0700
+++ b/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestCallingConvention.java	Tue Jul 30 15:04:38 2019 +0200
@@ -32,7 +32,7 @@
  * @test
  * @summary Test value type calling convention optimizations
  * @library /testlibrary /test/lib /compiler/whitebox /
- * @requires os.simpleArch == "x64"
+ * @requires (os.simpleArch == "x64" | os.simpleArch == "aarch64")
  * @compile TestCallingConvention.java
  * @run driver ClassFileInstaller sun.hotspot.WhiteBox jdk.test.lib.Platform
  * @run main/othervm/timeout=300 -Xbootclasspath/a:. -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions
--- a/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestCallingConventionC1.java	Sun Jul 28 14:09:02 2019 -0700
+++ b/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestCallingConventionC1.java	Tue Jul 30 15:04:38 2019 +0200
@@ -30,7 +30,7 @@
  * @test
  * @summary Test calls from {C1} to {C2, Interpreter}, and vice versa.
  * @library /testlibrary /test/lib /compiler/whitebox /
- * @requires os.simpleArch == "x64"
+ * @requires (os.simpleArch == "x64" | os.simpleArch == "aarch64")
  * @compile TestCallingConventionC1.java
  * @run driver ClassFileInstaller sun.hotspot.WhiteBox jdk.test.lib.Platform
  * @run main/othervm/timeout=300 -Xbootclasspath/a:. -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions
--- a/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestIntrinsics.java	Sun Jul 28 14:09:02 2019 -0700
+++ b/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestIntrinsics.java	Tue Jul 30 15:04:38 2019 +0200
@@ -36,7 +36,7 @@
  * @summary Test intrinsic support for value types
  * @library /testlibrary /test/lib /compiler/whitebox /
  * @modules java.base/jdk.internal.misc
- * @requires os.simpleArch == "x64"
+ * @requires (os.simpleArch == "x64" | os.simpleArch == "aarch64")
  * @compile TestIntrinsics.java
  * @run driver ClassFileInstaller sun.hotspot.WhiteBox jdk.test.lib.Platform
  * @run main/othervm/timeout=300 -Xbootclasspath/a:. -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions
--- a/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestJNICalls.java	Sun Jul 28 14:09:02 2019 -0700
+++ b/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestJNICalls.java	Tue Jul 30 15:04:38 2019 +0200
@@ -31,7 +31,7 @@
  * @test
  * @summary Test calling native methods with value type arguments from compiled code.
  * @library /testlibrary /test/lib /compiler/whitebox /
- * @requires os.simpleArch == "x64"
+ * @requires (os.simpleArch == "x64" | os.simpleArch == "aarch64")
  * @compile TestJNICalls.java
  * @run driver ClassFileInstaller sun.hotspot.WhiteBox jdk.test.lib.Platform
  * @run main/othervm/timeout=300 -Xbootclasspath/a:. -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions
--- a/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestLWorld.java	Sun Jul 28 14:09:02 2019 -0700
+++ b/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestLWorld.java	Tue Jul 30 15:04:38 2019 +0200
@@ -35,7 +35,7 @@
  * @summary Test value types in LWorld.
  * @modules java.base/jdk.experimental.value
  * @library /testlibrary /test/lib /compiler/whitebox /
- * @requires os.simpleArch == "x64"
+ * @requires (os.simpleArch == "x64" | os.simpleArch == "aarch64")
  * @compile TestLWorld.java
  * @run driver ClassFileInstaller sun.hotspot.WhiteBox jdk.test.lib.Platform
  * @run main/othervm/timeout=300 -Xbootclasspath/a:. -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions
--- a/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestNullableArrays.java	Sun Jul 28 14:09:02 2019 -0700
+++ b/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestNullableArrays.java	Tue Jul 30 15:04:38 2019 +0200
@@ -31,7 +31,7 @@
  * @test
  * @summary Test nullable value type arrays
  * @library /testlibrary /test/lib /compiler/whitebox /
- * @requires os.simpleArch == "x64"
+ * @requires (os.simpleArch == "x64" | os.simpleArch == "aarch64")
  * @compile TestNullableArrays.java
  * @run driver ClassFileInstaller sun.hotspot.WhiteBox jdk.test.lib.Platform
  * @run main/othervm/timeout=300 -Xbootclasspath/a:. -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions
--- a/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestNullableValueTypes.java	Sun Jul 28 14:09:02 2019 -0700
+++ b/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestNullableValueTypes.java	Tue Jul 30 15:04:38 2019 +0200
@@ -32,7 +32,7 @@
  * @test
  * @summary Test correct handling of nullable value types.
  * @library /testlibrary /test/lib /compiler/whitebox /
- * @requires os.simpleArch == "x64"
+ * @requires (os.simpleArch == "x64" | os.simpleArch == "aarch64")
  * @compile TestNullableValueTypes.java
  * @run driver ClassFileInstaller sun.hotspot.WhiteBox jdk.test.lib.Platform
  * @run main/othervm/timeout=300 -Xbootclasspath/a:. -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions
--- a/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestOnStackReplacement.java	Sun Jul 28 14:09:02 2019 -0700
+++ b/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestOnStackReplacement.java	Tue Jul 30 15:04:38 2019 +0200
@@ -30,7 +30,7 @@
  * @test
  * @summary Test on stack replacement (OSR) with value types
  * @library /testlibrary /test/lib /compiler/whitebox /
- * @requires os.simpleArch == "x64"
+ * @requires (os.simpleArch == "x64" | os.simpleArch == "aarch64")
  * @compile TestOnStackReplacement.java
  * @run driver ClassFileInstaller sun.hotspot.WhiteBox jdk.test.lib.Platform
  * @run main/othervm/timeout=300 -Xbootclasspath/a:. -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions