changeset 52021:b826402f953a lworld

8189802: [lworld] Non-flattened value type array is not initialized Reviewed-by: thartmann
author roland
date Wed, 12 Sep 2018 16:54:40 +0200
parents 75070bacda03
children 418f9790c907
files src/hotspot/cpu/x86/macroAssembler_x86.cpp src/hotspot/cpu/x86/macroAssembler_x86.hpp src/hotspot/cpu/x86/vm_version_x86.cpp src/hotspot/cpu/x86/x86_64.ad src/hotspot/share/oops/instanceKlass.hpp src/hotspot/share/oops/valueKlass.hpp src/hotspot/share/opto/callnode.cpp src/hotspot/share/opto/callnode.hpp src/hotspot/share/opto/graphKit.cpp src/hotspot/share/opto/graphKit.hpp src/hotspot/share/opto/library_call.cpp src/hotspot/share/opto/macro.cpp src/hotspot/share/opto/macro.hpp src/hotspot/share/opto/macroArrayCopy.cpp src/hotspot/share/opto/matcher.cpp src/hotspot/share/opto/memnode.cpp src/hotspot/share/opto/memnode.hpp src/hotspot/share/opto/valuetypenode.cpp test/hotspot/jtreg/compiler/valhalla/valuetypes/TestArrays.java test/hotspot/jtreg/compiler/valhalla/valuetypes/TestIntrinsics.java
diffstat 20 files changed, 524 insertions(+), 249 deletions(-) [+]
line wrap: on
line diff
--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp	Tue Sep 11 10:40:46 2018 -0400
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp	Wed Sep 12 16:54:40 2018 +0200
@@ -6743,14 +6743,16 @@
 }
 
 // clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM registers
-void MacroAssembler::xmm_clear_mem(Register base, Register cnt, XMMRegister xtmp) {
+void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register val, XMMRegister xtmp) {
   // cnt - number of qwords (8-byte words).
   // base - start address, qword aligned.
   Label L_zero_64_bytes, L_loop, L_sloop, L_tail, L_end;
+  movdq(xtmp, val);
   if (UseAVX >= 2) {
-    vpxor(xtmp, xtmp, xtmp, AVX_256bit);
-  } else {
-    pxor(xtmp, xtmp);
+    punpcklqdq(xtmp, xtmp);
+    vinserti128_high(xtmp, xtmp);
+  } else {
+    punpcklqdq(xtmp, xtmp);
   }
   jmp(L_zero_64_bytes);
 
@@ -6794,22 +6796,18 @@
   BIND(L_end);
 }
 
-void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp, XMMRegister xtmp, bool is_large) {
+void MacroAssembler::clear_mem(Register base, Register cnt, Register val, XMMRegister xtmp, bool is_large, bool word_copy_only) {
   // cnt - number of qwords (8-byte words).
   // base - start address, qword aligned.
   // is_large - if optimizers know cnt is larger than InitArrayShortSize
   assert(base==rdi, "base register must be edi for rep stos");
-  assert(tmp==rax,   "tmp register must be eax for rep stos");
+  assert(val==rax,   "tmp register must be eax for rep stos");
   assert(cnt==rcx,   "cnt register must be ecx for rep stos");
   assert(InitArrayShortSize % BytesPerLong == 0,
     "InitArrayShortSize should be the multiple of BytesPerLong");
 
   Label DONE;
 
-  if (!is_large || !UseXMMForObjInit) {
-    xorptr(tmp, tmp);
-  }
-
   if (!is_large) {
     Label LOOP, LONG;
     cmpptr(cnt, InitArrayShortSize/BytesPerLong);
@@ -6822,7 +6820,7 @@
 
     // Use individual pointer-sized stores for small counts:
     BIND(LOOP);
-    movptr(Address(base, cnt, Address::times_ptr), tmp);
+    movptr(Address(base, cnt, Address::times_ptr), val);
     decrement(cnt);
     jccb(Assembler::greaterEqual, LOOP);
     jmpb(DONE);
@@ -6831,12 +6829,11 @@
   }
 
   // Use longer rep-prefixed ops for non-small counts:
-  if (UseFastStosb) {
+  if (UseFastStosb && !word_copy_only) {
     shlptr(cnt, 3); // convert to number of bytes
     rep_stosb();
   } else if (UseXMMForObjInit) {
-    movptr(tmp, base);
-    xmm_clear_mem(tmp, cnt, xtmp);
+    xmm_clear_mem(base, cnt, val, xtmp);
   } else {
     NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM
     rep_stos();
--- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp	Tue Sep 11 10:40:46 2018 -0400
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp	Wed Sep 12 16:54:40 2018 +0200
@@ -1606,10 +1606,10 @@
 
   // clear memory of size 'cnt' qwords, starting at 'base';
   // if 'is_large' is set, do not try to produce short loop
-  void clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, bool is_large);
+  void clear_mem(Register base, Register cnt, Register val, XMMRegister xtmp, bool is_large, bool word_copy_only);
 
   // clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM registers
-  void xmm_clear_mem(Register base, Register cnt, XMMRegister xtmp);
+  void xmm_clear_mem(Register base, Register cnt, Register val, XMMRegister xtmp);
 
 #ifdef COMPILER2
   void string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
--- a/src/hotspot/cpu/x86/vm_version_x86.cpp	Tue Sep 11 10:40:46 2018 -0400
+++ b/src/hotspot/cpu/x86/vm_version_x86.cpp	Wed Sep 12 16:54:40 2018 +0200
@@ -1409,7 +1409,7 @@
   }
 
   // Use XMM/YMM MOVDQU instruction for Object Initialization
-  if (!UseFastStosb && UseSSE >= 2 && UseUnalignedLoadStores) {
+  if (UseSSE >= 2 && UseUnalignedLoadStores) {
     if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
       UseXMMForObjInit = true;
     }
--- a/src/hotspot/cpu/x86/x86_64.ad	Tue Sep 11 10:40:46 2018 -0400
+++ b/src/hotspot/cpu/x86/x86_64.ad	Wed Sep 12 16:54:40 2018 +0200
@@ -10784,15 +10784,14 @@
 
 // =======================================================================
 // fast clearing of an array
-instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
+instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
                   Universe dummy, rFlagsReg cr)
 %{
-  predicate(!((ClearArrayNode*)n)->is_large());
-  match(Set dummy (ClearArray cnt base));
-  effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
+  predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only());
+  match(Set dummy (ClearArray (Binary cnt base) val));
+  effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr);
 
   format %{ $$template
-    $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
     $$emit$$"jg      LARGE\n\t"
     $$emit$$"dec     rcx\n\t"
@@ -10806,19 +10805,20 @@
        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
     } else if (UseXMMForObjInit) {
-       $$emit$$"mov     rdi,rax\n\t"
-       $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
+       $$emit$$"movdq   $tmp, $val\n\t"
+       $$emit$$"punpcklqdq $tmp, $tmp\n\t"
+       $$emit$$"vinserti128_high $tmp, $tmp\n\t"
        $$emit$$"jmpq    L_zero_64_bytes\n\t"
        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
-       $$emit$$"vmovdqu ymm0,(rax)\n\t"
-       $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
+       $$emit$$"vmovdqu $tmp,(rax)\n\t"
+       $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
        $$emit$$"add     0x40,rax\n\t"
        $$emit$$"# L_zero_64_bytes:\n\t"
        $$emit$$"sub     0x8,rcx\n\t"
        $$emit$$"jge     L_loop\n\t"
        $$emit$$"add     0x4,rcx\n\t"
        $$emit$$"jl      L_tail\n\t"
-       $$emit$$"vmovdqu ymm0,(rax)\n\t"
+       $$emit$$"vmovdqu $tmp,(rax)\n\t"
        $$emit$$"add     0x20,rax\n\t"
        $$emit$$"sub     0x4,rcx\n\t"
        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
@@ -10837,38 +10837,44 @@
     $$emit$$"# DONE"
   %}
   ins_encode %{
-    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
-                 $tmp$$XMMRegister, false);
+    __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
+                 $tmp$$XMMRegister, false, false);
   %}
   ins_pipe(pipe_slow);
 %}
 
-instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero, 
-                        Universe dummy, rFlagsReg cr)
-%{
-  predicate(((ClearArrayNode*)n)->is_large());
-  match(Set dummy (ClearArray cnt base));
-  effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
+instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
+                  Universe dummy, rFlagsReg cr)
+%{
+  predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only());
+  match(Set dummy (ClearArray (Binary cnt base) val));
+  effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr);
 
   format %{ $$template
-    if (UseFastStosb) {
-       $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
-       $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
-       $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
-    } else if (UseXMMForObjInit) {
-       $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
-       $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
+    $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
+    $$emit$$"jg      LARGE\n\t"
+    $$emit$$"dec     rcx\n\t"
+    $$emit$$"js      DONE\t# Zero length\n\t"
+    $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
+    $$emit$$"dec     rcx\n\t"
+    $$emit$$"jge     LOOP\n\t"
+    $$emit$$"jmp     DONE\n\t"
+    $$emit$$"# LARGE:\n\t"
+    if (UseXMMForObjInit) {
+       $$emit$$"movdq   $tmp, $val\n\t"
+       $$emit$$"punpcklqdq $tmp, $tmp\n\t"
+       $$emit$$"vinserti128_high $tmp, $tmp\n\t"
        $$emit$$"jmpq    L_zero_64_bytes\n\t"
        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
-       $$emit$$"vmovdqu ymm0,(rax)\n\t"
-       $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
+       $$emit$$"vmovdqu $tmp,(rax)\n\t"
+       $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
        $$emit$$"add     0x40,rax\n\t"
        $$emit$$"# L_zero_64_bytes:\n\t"
        $$emit$$"sub     0x8,rcx\n\t"
        $$emit$$"jge     L_loop\n\t"
        $$emit$$"add     0x4,rcx\n\t"
        $$emit$$"jl      L_tail\n\t"
-       $$emit$$"vmovdqu ymm0,(rax)\n\t"
+       $$emit$$"vmovdqu $tmp,(rax)\n\t"
        $$emit$$"add     0x20,rax\n\t"
        $$emit$$"sub     0x4,rcx\n\t"
        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
@@ -10882,13 +10888,108 @@
        $$emit$$"jge     L_sloop\n\t"
        $$emit$$"# L_end:\n\t"
     } else {
-       $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
+       $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
+    }
+    $$emit$$"# DONE"
+  %}
+  ins_encode %{
+    __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
+                 $tmp$$XMMRegister, false, true);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val, 
+                        Universe dummy, rFlagsReg cr)
+%{
+  predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only());
+  match(Set dummy (ClearArray (Binary cnt base) val));
+  effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr);
+
+  format %{ $$template
+    if (UseFastStosb) {
+       $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
+       $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
+    } else if (UseXMMForObjInit) {
+       $$emit$$"movdq   $tmp, $val\n\t"
+       $$emit$$"punpcklqdq $tmp, $tmp\n\t"
+       $$emit$$"vinserti128_high $tmp, $tmp\n\t"
+       $$emit$$"jmpq    L_zero_64_bytes\n\t"
+       $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
+       $$emit$$"vmovdqu $tmp,(rax)\n\t"
+       $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
+       $$emit$$"add     0x40,rax\n\t"
+       $$emit$$"# L_zero_64_bytes:\n\t"
+       $$emit$$"sub     0x8,rcx\n\t"
+       $$emit$$"jge     L_loop\n\t"
+       $$emit$$"add     0x4,rcx\n\t"
+       $$emit$$"jl      L_tail\n\t"
+       $$emit$$"vmovdqu $tmp,(rax)\n\t"
+       $$emit$$"add     0x20,rax\n\t"
+       $$emit$$"sub     0x4,rcx\n\t"
+       $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
+       $$emit$$"add     0x4,rcx\n\t"
+       $$emit$$"jle     L_end\n\t"
+       $$emit$$"dec     rcx\n\t"
+       $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
+       $$emit$$"vmovq   xmm0,(rax)\n\t"
+       $$emit$$"add     0x8,rax\n\t"
+       $$emit$$"dec     rcx\n\t"
+       $$emit$$"jge     L_sloop\n\t"
+       $$emit$$"# L_end:\n\t"
+    } else {
        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
     }
   %}
   ins_encode %{
-    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 
-                 $tmp$$XMMRegister, true);
+    __ clear_mem($base$$Register, $cnt$$Register, $val$$Register, 
+                 $tmp$$XMMRegister, true, false);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val, 
+                        Universe dummy, rFlagsReg cr)
+%{
+  predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only());
+  match(Set dummy (ClearArray (Binary cnt base) val));
+  effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr);
+
+  format %{ $$template
+    if (UseXMMForObjInit) {
+       $$emit$$"movdq   $tmp, $val\n\t"
+       $$emit$$"punpcklqdq $tmp, $tmp\n\t"
+       $$emit$$"vinserti128_high $tmp, $tmp\n\t"
+       $$emit$$"jmpq    L_zero_64_bytes\n\t"
+       $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
+       $$emit$$"vmovdqu $tmp,(rax)\n\t"
+       $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
+       $$emit$$"add     0x40,rax\n\t"
+       $$emit$$"# L_zero_64_bytes:\n\t"
+       $$emit$$"sub     0x8,rcx\n\t"
+       $$emit$$"jge     L_loop\n\t"
+       $$emit$$"add     0x4,rcx\n\t"
+       $$emit$$"jl      L_tail\n\t"
+       $$emit$$"vmovdqu $tmp,(rax)\n\t"
+       $$emit$$"add     0x20,rax\n\t"
+       $$emit$$"sub     0x4,rcx\n\t"
+       $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
+       $$emit$$"add     0x4,rcx\n\t"
+       $$emit$$"jle     L_end\n\t"
+       $$emit$$"dec     rcx\n\t"
+       $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
+       $$emit$$"vmovq   xmm0,(rax)\n\t"
+       $$emit$$"add     0x8,rax\n\t"
+       $$emit$$"dec     rcx\n\t"
+       $$emit$$"jge     L_sloop\n\t"
+       $$emit$$"# L_end:\n\t"
+    } else {
+       $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
+    }
+  %}
+  ins_encode %{
+    __ clear_mem($base$$Register, $cnt$$Register, $val$$Register, 
+                 $tmp$$XMMRegister, true, true);
   %}
   ins_pipe(pipe_slow);
 %}
--- a/src/hotspot/share/oops/instanceKlass.hpp	Tue Sep 11 10:40:46 2018 -0400
+++ b/src/hotspot/share/oops/instanceKlass.hpp	Wed Sep 12 16:54:40 2018 +0200
@@ -1083,6 +1083,8 @@
   JFR_ONLY(DEFINE_KLASS_TRACE_ID_OFFSET;)
   static ByteSize init_thread_offset() { return in_ByteSize(offset_of(InstanceKlass, _init_thread)); }
 
+  static ByteSize adr_valueklass_fixed_block_offset() { return in_ByteSize(offset_of(InstanceKlass, _adr_valueklass_fixed_block)); }
+
   // subclass/subinterface checks
   bool implements_interface(Klass* k) const;
   bool is_same_or_direct_interface(Klass* k) const;
--- a/src/hotspot/share/oops/valueKlass.hpp	Tue Sep 11 10:40:46 2018 -0400
+++ b/src/hotspot/share/oops/valueKlass.hpp	Wed Sep 12 16:54:40 2018 +0200
@@ -107,7 +107,7 @@
 
   address adr_default_value_offset() const {
     assert(_adr_valueklass_fixed_block != NULL, "Should have been initialized");
-    return ((address)_adr_valueklass_fixed_block) + in_bytes(byte_offset_of(ValueKlassFixedBlock, _default_value_offset));
+    return ((address)_adr_valueklass_fixed_block) + in_bytes(default_value_offset_offset());
   }
 
   // static Klass* array_klass_impl(InstanceKlass* this_k, bool or_null, int n, TRAPS);
@@ -241,8 +241,7 @@
   }
 
   static ByteSize default_value_offset_offset() {
-    fatal("Should be re-implemented using the ValueKlassStaticBlock indirection");
-    return in_ByteSize((InstanceKlass::header_size()+2) * wordSize);
+    return byte_offset_of(ValueKlassFixedBlock, _default_value_offset);
   }
 
   void set_default_value_offset(int offset) {
--- a/src/hotspot/share/opto/callnode.cpp	Tue Sep 11 10:40:46 2018 -0400
+++ b/src/hotspot/share/opto/callnode.cpp	Wed Sep 12 16:54:40 2018 +0200
@@ -1420,7 +1420,8 @@
 AllocateNode::AllocateNode(Compile* C, const TypeFunc *atype,
                            Node *ctrl, Node *mem, Node *abio,
                            Node *size, Node *klass_node,
-                           Node* initial_test, ValueTypeBaseNode* value_node)
+                           Node* initial_test,
+                           ValueTypeBaseNode* value_node)
   : CallNode(atype, NULL, TypeRawPtr::BOTTOM)
 {
   init_class_id(Class_Allocate);
@@ -1440,6 +1441,8 @@
   init_req( InitialTest        , initial_test);
   init_req( ALength            , topnode);
   init_req( ValueNode          , value_node);
+  // DefaultValue defaults to NULL
+  // RawDefaultValue defaults to NULL
   C->add_macro_node(this);
 }
 
--- a/src/hotspot/share/opto/callnode.hpp	Tue Sep 11 10:40:46 2018 -0400
+++ b/src/hotspot/share/opto/callnode.hpp	Wed Sep 12 16:54:40 2018 +0200
@@ -868,6 +868,8 @@
     InitialTest,                      // slow-path test (may be constant)
     ALength,                          // array length (or TOP if none)
     ValueNode,
+    DefaultValue,                     // default value in case of non flattened value array
+    RawDefaultValue,                  // same as above but as raw machine word
     ParmLimit
   };
 
@@ -878,6 +880,8 @@
     fields[InitialTest] = TypeInt::BOOL;
     fields[ALength]     = t;  // length (can be a bad length)
     fields[ValueNode]   = Type::BOTTOM;
+    fields[DefaultValue] = TypeInstPtr::NOTNULL;
+    fields[RawDefaultValue] = TypeX_X;
 
     const TypeTuple *domain = TypeTuple::make(ParmLimit, fields);
 
@@ -898,7 +902,8 @@
 
   virtual uint size_of() const; // Size is bigger
   AllocateNode(Compile* C, const TypeFunc *atype, Node *ctrl, Node *mem, Node *abio,
-               Node *size, Node *klass_node, Node *initial_test, ValueTypeBaseNode* value_node = NULL);
+               Node *size, Node *klass_node, Node *initial_test,
+               ValueTypeBaseNode* value_node = NULL);
   // Expansion modifies the JVMState, so we need to clone it
   virtual void  clone_jvms(Compile* C) {
     if (jvms() != NULL) {
@@ -979,13 +984,15 @@
 public:
   AllocateArrayNode(Compile* C, const TypeFunc *atype, Node *ctrl, Node *mem, Node *abio,
                     Node* size, Node* klass_node, Node* initial_test,
-                    Node* count_val
+                    Node* count_val, Node* default_value, Node* raw_default_value
                     )
     : AllocateNode(C, atype, ctrl, mem, abio, size, klass_node,
                    initial_test)
   {
     init_class_id(Class_AllocateArray);
     set_req(AllocateNode::ALength,        count_val);
+    init_req(AllocateNode::DefaultValue,  default_value);
+    init_req(AllocateNode::RawDefaultValue, raw_default_value);
   }
   virtual int Opcode() const;
   virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
--- a/src/hotspot/share/opto/graphKit.cpp	Tue Sep 11 10:40:46 2018 -0400
+++ b/src/hotspot/share/opto/graphKit.cpp	Wed Sep 12 16:54:40 2018 +0200
@@ -3319,6 +3319,14 @@
   return _gvn.transform(new AndXNode(mark, value_mask));
 }
 
+Node* GraphKit::gen_value_type_test(Node* kls) {
+  Node* flags_addr = basic_plus_adr(kls, in_bytes(Klass::access_flags_offset()));
+  Node* flags = make_load(NULL, flags_addr, TypeInt::INT, T_INT, MemNode::unordered);
+  Node* is_value = _gvn.transform(new AndINode(flags, intcon(JVM_ACC_VALUE)));
+  Node* cmp = _gvn.transform(new CmpINode(is_value, intcon(0)));
+  return cmp;
+}
+
 // Deoptimize if 'obj' is a value type
 void GraphKit::gen_value_type_guard(Node* obj, int nargs) {
   assert(EnableValhalla, "should only be used if value types are enabled");
@@ -3384,16 +3392,22 @@
   }
 }
 
+Node* GraphKit::gen_lh_array_test(Node* kls, unsigned int lh_value) {
+  Node* lhp = basic_plus_adr(kls, in_bytes(Klass::layout_helper_offset()));
+  Node* layout_val = make_load(NULL, lhp, TypeInt::INT, T_INT, MemNode::unordered);
+  layout_val = _gvn.transform(new RShiftINode(layout_val, intcon(Klass::_lh_array_tag_shift)));
+  Node* cmp = _gvn.transform(new CmpINode(layout_val, intcon(lh_value)));
+  return cmp;
+}
+
+
 // Deoptimize if 'ary' is a flattened value type array
 void GraphKit::gen_flattened_array_guard(Node* ary, int nargs) {
   assert(EnableValhalla, "should only be used if value types are enabled");
   if (ValueArrayFlatten) {
     // Cannot statically determine if array is flattened, emit runtime check
     Node* kls = load_object_klass(ary);
-    Node* lhp = basic_plus_adr(kls, in_bytes(Klass::layout_helper_offset()));
-    Node* layout_val = make_load(NULL, lhp, TypeInt::INT, T_INT, MemNode::unordered);
-    layout_val = _gvn.transform(new RShiftINode(layout_val, intcon(Klass::_lh_array_tag_shift)));
-    Node* cmp = _gvn.transform(new CmpINode(layout_val, intcon(Klass::_lh_array_tag_vt_value)));
+    Node* cmp = gen_lh_array_test(kls, Klass::_lh_array_tag_vt_value);
     Node* bol = _gvn.transform(new BoolNode(cmp, BoolTest::ne));
 
     { BuildCutout unless(this, bol, PROB_MAX);
@@ -3802,6 +3816,14 @@
   return set_output_for_allocation(alloc, oop_type, deoptimize_on_exception);
 }
 
+// With compressed oops, the 64 bit init value for non flattened value
+// arrays is built from 2 32 bit compressed oops
+static Node* raw_default_for_coops(Node* default_value, GraphKit& kit) {
+  Node* lower = kit.gvn().transform(new CastP2XNode(kit.control(), default_value));
+  Node* upper = kit.gvn().transform(new LShiftLNode(lower, kit.intcon(32)));
+  return kit.gvn().transform(new OrLNode(lower, upper));
+}
+
 //-------------------------------new_array-------------------------------------
 // helper for newarray and anewarray
 // The 'length' parameter is (obviously) the length of the array.
@@ -3958,20 +3980,93 @@
     initial_slow_test = initial_slow_test->as_Bool()->as_int_value(&_gvn);
   }
 
+  const TypeOopPtr* ary_type = _gvn.type(klass_node)->is_klassptr()->as_instance_type();
+  const TypeAryPtr* ary_ptr = ary_type->isa_aryptr();
+  const Type* elem = NULL;
+  ciKlass* elem_klass = NULL;
+  if (ary_ptr != NULL) {
+    elem = ary_ptr->elem();
+    elem_klass = ary_ptr->klass()->as_array_klass()->element_klass();
+  }
+  Node* default_value = NULL;
+  Node* raw_default_value = NULL;
+  if (elem != NULL && elem->make_ptr()) {
+    if (elem_klass != NULL && elem_klass->is_valuetype()) {
+      ciValueKlass* vk = elem_klass->as_value_klass();
+      if (!vk->flatten_array()) {
+        default_value = ValueTypeNode::load_default_oop(gvn(), vk);
+        if (elem->isa_narrowoop()) {
+          default_value = _gvn.transform(new EncodePNode(default_value, elem));
+          raw_default_value = raw_default_for_coops(default_value, *this);
+        } else {
+          raw_default_value = _gvn.transform(new CastP2XNode(control(), default_value));
+        }
+      }
+    }
+  }
+
+  if (EnableValhalla && (!layout_con || elem == NULL || (elem_klass != NULL && elem_klass->is_java_lang_Object() && !ary_type->klass_is_exact()))) {
+    assert(raw_default_value == NULL, "shouldn't be set yet");
+
+    // unkown array type, could be a non flattened value array that's
+    // initialize to a non zero default value
+
+    Node* r = new RegionNode(4);
+    Node* phi = new PhiNode(r, TypeX_X);
+
+    Node* cmp = gen_lh_array_test(klass_node, Klass::_lh_array_tag_obj_value);
+    Node* bol = _gvn.transform(new BoolNode(cmp, BoolTest::eq));
+    IfNode* iff = create_and_map_if(control(), bol, PROB_FAIR, COUNT_UNKNOWN);
+    r->init_req(1, _gvn.transform(new IfFalseNode(iff)));
+    phi->init_req(1, MakeConX(0));
+    set_control(_gvn.transform(new IfTrueNode(iff)));
+    Node* k_adr = basic_plus_adr(klass_node, in_bytes(ArrayKlass::element_klass_offset()));
+    Node* elem_klass = _gvn.transform(LoadKlassNode::make(_gvn, control(), immutable_memory(), k_adr, TypeInstPtr::KLASS));
+    cmp = gen_value_type_test(elem_klass);
+    bol = _gvn.transform(new BoolNode(cmp, BoolTest::eq));
+    iff = create_and_map_if(control(), bol, PROB_FAIR, COUNT_UNKNOWN);
+    r->init_req(2, _gvn.transform(new IfTrueNode(iff)));
+    phi->init_req(2, MakeConX(0));
+    set_control(_gvn.transform(new IfFalseNode(iff)));
+
+    Node* adr_fixed_block_addr = basic_plus_adr(elem_klass, in_bytes(InstanceKlass::adr_valueklass_fixed_block_offset()));
+    Node* adr_fixed_block = make_load(control(), adr_fixed_block_addr, TypeRawPtr::NOTNULL, T_ADDRESS, MemNode::unordered);
+
+    Node* default_value_offset_addr = basic_plus_adr(adr_fixed_block, in_bytes(ValueKlass::default_value_offset_offset()));
+    Node* default_value_offset = make_load(control(), default_value_offset_addr, TypeInt::INT, T_INT, MemNode::unordered);
+
+    Node* elem_mirror = load_mirror_from_klass(elem_klass);
+
+    Node* default_value_addr = basic_plus_adr(elem_mirror, ConvI2X(default_value_offset));
+    const TypePtr* adr_type = _gvn.type(default_value_addr)->is_ptr();
+    Node* val = access_load_at(elem_mirror, default_value_addr, adr_type, TypeInstPtr::BOTTOM, T_OBJECT, IN_HEAP);
+
+    if (UseCompressedOops) {
+      val = _gvn.transform(new EncodePNode(val, elem));
+      val = raw_default_for_coops(val, *this);
+    } else {
+      val = _gvn.transform(new CastP2XNode(control(), val));
+    }
+    r->init_req(3, control());
+    phi->init_req(3, val);
+    set_control(_gvn.transform(r));
+    raw_default_value = _gvn.transform(phi);
+  }
+
   // Create the AllocateArrayNode and its result projections
   AllocateArrayNode* alloc
     = new AllocateArrayNode(C, AllocateArrayNode::alloc_type(TypeInt::INT),
                             control(), mem, i_o(),
                             size, klass_node,
                             initial_slow_test,
-                            length);
+                            length, default_value,
+                            raw_default_value);
 
   // Cast to correct type.  Note that the klass_node may be constant or not,
   // and in the latter case the actual array type will be inexact also.
   // (This happens via a non-constant argument to inline_native_newArray.)
   // In any case, the value of klass_node provides the desired array type.
   const TypeInt* length_type = _gvn.find_int_type(length);
-  const TypeOopPtr* ary_type = _gvn.type(klass_node)->is_klassptr()->as_instance_type();
   if (ary_type->isa_aryptr() && length_type != NULL) {
     // Try to get a better type than POS for the size
     ary_type = ary_type->is_aryptr()->cast_to_size(length_type);
@@ -3989,90 +4084,9 @@
     }
   }
 
-  const TypeAryPtr* ary_ptr = ary_type->isa_aryptr();
-  ciKlass* elem_klass = ary_ptr != NULL ? ary_ptr->klass()->as_array_klass()->element_klass() : NULL;
-  if (elem_klass != NULL && elem_klass->is_valuetype()) {
-    ciValueKlass* vk = elem_klass->as_value_klass();
-    if (!vk->flatten_array()) {
-      // Non-flattened value type arrays need to be initialized with default value type oops
-      initialize_value_type_array(javaoop, length, elem_klass->as_value_klass(), nargs);
-      // TODO re-enable once JDK-8189802 is fixed
-      // InitializeNode* init = alloc->initialization();
-      //init->set_complete_with_arraycopy();
-    }
-  } else if (EnableValhalla && (!layout_con || elem_klass == NULL || (elem_klass->is_java_lang_Object() && !ary_type->klass_is_exact()))) {
-    InitializeNode* init = alloc->initialization();
-    init->set_unknown_value();
-  }
-
   return javaoop;
 }
 
-void GraphKit::initialize_value_type_array(Node* array, Node* length, ciValueKlass* vk, int nargs) {
-  // Check for zero length
-  Node* null_ctl = top();
-  null_check_common(length, T_INT, false, &null_ctl, false);
-  if (stopped()) {
-    set_control(null_ctl); // Always zero
-    return;
-  }
-
-  RegionNode* res_ctl = new RegionNode(3);
-  gvn().set_type(res_ctl, Type::CONTROL);
-  record_for_igvn(res_ctl);
-
-  // Length is zero: don't execute initialization loop
-  res_ctl->init_req(1, null_ctl);
-  PhiNode* res_io  = PhiNode::make(res_ctl, i_o(), Type::ABIO);
-  PhiNode* res_mem = PhiNode::make(res_ctl, merged_memory(), Type::MEMORY, TypePtr::BOTTOM);
-  gvn().set_type(res_io, Type::ABIO);
-  gvn().set_type(res_mem, Type::MEMORY);
-  record_for_igvn(res_io);
-  record_for_igvn(res_mem);
-
-  // Length is non-zero: execute a loop that initializes the array with the default value type
-  Node* oop = ValueTypeNode::load_default_oop(gvn(), vk);
-
-  add_predicate(nargs);
-  RegionNode* loop = new RegionNode(3);
-  loop->init_req(1, control());
-  PhiNode* index = PhiNode::make(loop, intcon(0), TypeInt::INT);
-  PhiNode* mem   = PhiNode::make(loop, reset_memory(), Type::MEMORY, TypePtr::BOTTOM);
-
-  gvn().set_type(loop, Type::CONTROL);
-  gvn().set_type(index, TypeInt::INT);
-  gvn().set_type(mem, Type::MEMORY);
-  record_for_igvn(loop);
-  record_for_igvn(index);
-  record_for_igvn(mem);
-
-  // Loop body: initialize array element at 'index'
-  set_control(loop);
-  set_all_memory(mem);
-  Node* adr = array_element_address(array, index, T_OBJECT);
-  const TypeOopPtr* elemtype = TypeInstPtr::make(TypePtr::NotNull, vk);
-  access_store_at(control(), array, adr, TypeAryPtr::OOPS, oop, elemtype, T_VALUETYPE, MemNode::release);
-
-  // Check if we need to execute another loop iteration
-  length = SubI(length, intcon(1));
-  IfNode* iff = create_and_map_if(control(), Bool(CmpI(index, length), BoolTest::lt), PROB_FAIR, COUNT_UNKNOWN);
-
-  // Continue with next iteration
-  loop->init_req(2, IfTrue(iff));
-  index->init_req(2, AddI(index, intcon(1)));
-  mem->init_req(2, merged_memory());
-
-  // Exit loop
-  res_ctl->init_req(2, IfFalse(iff));
-  res_io->set_req(2, i_o());
-  res_mem->set_req(2, reset_memory());
-
-  // Set merged control, IO and memory
-  set_control(res_ctl);
-  set_i_o(res_io);
-  set_all_memory(res_mem);
-}
-
 // The following "Ideal_foo" functions are placed here because they recognize
 // the graph shapes created by the functions immediately above.
 
@@ -4369,3 +4383,12 @@
   // assumption of CCP analysis.
   return _gvn.transform(new CastPPNode(ary, ary_type->cast_to_stable(true)));
 }
+
+//---------------------------load_mirror_from_klass----------------------------
+// Given a klass oop, load its java mirror (a java.lang.Class oop).
+Node* GraphKit::load_mirror_from_klass(Node* klass) {
+  Node* p = basic_plus_adr(klass, in_bytes(Klass::java_mirror_offset()));
+  Node* load = make_load(NULL, p, TypeRawPtr::NOTNULL, T_ADDRESS, MemNode::unordered);
+  // mirror = ((OopHandle)mirror)->resolve();
+  return access_load(load, TypeInstPtr::MIRROR, T_OBJECT, IN_NATIVE);
+}
--- a/src/hotspot/share/opto/graphKit.hpp	Tue Sep 11 10:40:46 2018 -0400
+++ b/src/hotspot/share/opto/graphKit.hpp	Wed Sep 12 16:54:40 2018 +0200
@@ -829,9 +829,11 @@
                        Node* *failure_control = NULL );
 
   Node* is_always_locked(Node* obj);
+  Node* gen_value_type_test(Node* kls);
   void gen_value_type_guard(Node* obj, int nargs = 0);
   void gen_value_type_array_guard(Node* ary, Node* obj, Node* elem_klass = NULL);
   void gen_flattened_array_guard(Node* ary, int nargs = 0);
+  Node* gen_lh_array_test(Node* kls, unsigned int lh_value);
 
   Node* gen_subtype_check(Node* subklass, Node* superklass) {
     MergeMemNode* mem = merged_memory();
@@ -861,8 +863,6 @@
   Node* new_array(Node* klass_node, Node* count_val, int nargs,
                   Node* *return_size_val = NULL,
                   bool deoptimize_on_exception = false);
-  // Initialize a non-flattened value type array with default oops
-  void initialize_value_type_array(Node* array, Node* length, ciValueKlass* vk, int nargs);
 
   // java.lang.String helpers
   Node* load_String_length(Node* ctrl, Node* str);
@@ -900,6 +900,8 @@
 
   // Produce new array node of stable type
   Node* cast_array_to_stable(Node* ary, const TypeAryPtr* ary_type);
+
+  Node* load_mirror_from_klass(Node* klass);
 };
 
 // Helper class to support building of control flow branches. Upon
--- a/src/hotspot/share/opto/library_call.cpp	Tue Sep 11 10:40:46 2018 -0400
+++ b/src/hotspot/share/opto/library_call.cpp	Wed Sep 12 16:54:40 2018 +0200
@@ -164,7 +164,6 @@
   void  generate_string_range_check(Node* array, Node* offset,
                                     Node* length, bool char_count);
   Node* generate_current_thread(Node* &tls_output);
-  Node* load_mirror_from_klass(Node* klass);
   Node* load_klass_from_mirror_common(Node* mirror, bool never_see_null,
                                       RegionNode* region, int null_path,
                                       int offset);
@@ -3070,15 +3069,6 @@
   return true;
 }
 
-//---------------------------load_mirror_from_klass----------------------------
-// Given a klass oop, load its java mirror (a java.lang.Class oop).
-Node* LibraryCallKit::load_mirror_from_klass(Node* klass) {
-  Node* p = basic_plus_adr(klass, in_bytes(Klass::java_mirror_offset()));
-  Node* load = make_load(NULL, p, TypeRawPtr::NOTNULL, T_ADDRESS, MemNode::unordered);
-  // mirror = ((OopHandle)mirror)->resolve();
-  return access_load(load, TypeInstPtr::MIRROR, T_OBJECT, IN_NATIVE);
-}
-
 //-----------------------load_klass_from_mirror_common-------------------------
 // Given a java mirror (a java.lang.Class oop), load its corresponding klass oop.
 // Test the klass oop for null (signifying a primitive Class like Integer.TYPE),
@@ -4316,7 +4306,6 @@
     // We will be completely responsible for initializing this object -
     // mark Initialize node as complete.
     alloc = AllocateNode::Ideal_allocation(alloc_obj, &_gvn);
-    alloc->initialization()->clear_unknown_value();
     // The object was just allocated - there should be no any stores!
     guarantee(alloc != NULL && alloc->maybe_set_complete(&_gvn), "");
     // Mark as complete_with_arraycopy so that on AllocateNode
--- a/src/hotspot/share/opto/macro.cpp	Tue Sep 11 10:40:46 2018 -0400
+++ b/src/hotspot/share/opto/macro.cpp	Wed Sep 12 16:54:40 2018 +0200
@@ -544,6 +544,11 @@
   if (mem != NULL) {
     if (mem == start_mem || mem == alloc_mem) {
       // hit a sentinel, return appropriate 0 value
+      Node* default_value = alloc->in(AllocateNode::DefaultValue);
+      if (default_value != NULL) {
+        return default_value;
+      }
+      assert(alloc->in(AllocateNode::RawDefaultValue) == NULL, "default value may not be null");
       return _igvn.zerocon(ft);
     } else if (mem->is_Store()) {
       return mem->in(MemNode::ValueIn);
@@ -776,6 +781,13 @@
       assert(klass->is_array_klass() && nfields >= 0, "must be an array klass.");
       elem_type = klass->as_array_klass()->element_type();
       basic_elem_type = elem_type->basic_type();
+      if (elem_type->is_valuetype()) {
+        ciValueKlass* vk = elem_type->as_value_klass();
+        if (!vk->flatten_array()) {
+          assert(basic_elem_type == T_VALUETYPE, "unexpected element basic type");
+          basic_elem_type = T_OBJECT;
+        }
+      }
       array_base = arrayOopDesc::base_offset_in_bytes(basic_elem_type);
       element_size = type2aelembytes(basic_elem_type);
       if (klass->is_value_array_klass()) {
@@ -1335,51 +1347,6 @@
   Node *slow_region = NULL;
   Node *toobig_false = ctrl;
 
-  if (!always_slow && (alloc->initialization() == NULL || alloc->initialization()->is_unknown_value())) {
-    const TypeOopPtr* ary_type = _igvn.type(klass_node)->is_klassptr()->as_instance_type();
-    const TypeAryPtr* ary_ptr = ary_type->isa_aryptr();
-
-    ciKlass* elem_klass = NULL;
-    if (ary_ptr != NULL && ary_ptr->klass() != NULL) {
-      elem_klass = ary_ptr->klass()->as_array_klass()->element_klass();
-    }
-
-    if (elem_klass == NULL || (elem_klass->is_java_lang_Object() && !ary_ptr->klass_is_exact()) || elem_klass->is_valuetype()) {
-      // If it's an array of values we must go to the slow path so it is
-      // correctly initialized with default values.
-      Node* fast_region = new RegionNode(3);
-      Node* not_obj_array = ctrl;
-      Node* obj_array = generate_object_array_guard(&not_obj_array, mem, klass_node, NULL);
-
-      fast_region->init_req(1, not_obj_array);
-      slow_region = new RegionNode(1);
-      Node* k_adr = basic_plus_adr(klass_node, klass_node, in_bytes(ArrayKlass::element_klass_offset()));
-      Node* elem_klass = LoadKlassNode::make(_igvn, NULL, C->immutable_memory(), k_adr, TypeInstPtr::KLASS);
-      transform_later(elem_klass);
-      Node* flags = make_load(NULL, mem, elem_klass, in_bytes(Klass::access_flags_offset()), TypeInt::INT, T_INT);
-      Node* is_value_elem = new AndINode(flags, intcon(JVM_ACC_VALUE));
-      transform_later(is_value_elem);
-      Node* cmp = new CmpINode(is_value_elem, _igvn.intcon(0));
-      transform_later(cmp);
-      Node* bol = new BoolNode(cmp, BoolTest::ne);
-      transform_later(bol);
-      IfNode* value_array_iff = new IfNode(obj_array, bol, PROB_MIN, COUNT_UNKNOWN);
-      transform_later(value_array_iff);
-      Node* value_array = new IfTrueNode(value_array_iff);
-      transform_later(value_array);
-      slow_region->add_req(value_array);
-      Node* not_value_array = new IfFalseNode(value_array_iff);
-      transform_later(not_value_array);
-      fast_region->init_req(2, not_value_array);
-      transform_later(fast_region);
-      ctrl = fast_region;
-    }
-    InitializeNode* init = alloc->initialization();
-    if (init != NULL) {
-      init->clear_unknown_value();
-    }
-  }
-
   assert (initial_slow_test == NULL || !always_slow, "arguments must be consistent");
   // generate the initial test if necessary
   if (initial_slow_test != NULL ) {
@@ -1851,6 +1818,8 @@
     // within an Allocate, and then (maybe or maybe not) clear some more later.
     if (!(UseTLAB && ZeroTLAB)) {
       rawmem = ClearArrayNode::clear_memory(control, rawmem, object,
+                                            alloc->in(AllocateNode::DefaultValue),
+                                            alloc->in(AllocateNode::RawDefaultValue),
                                             header_size, size_in_bytes,
                                             &_igvn);
     }
--- a/src/hotspot/share/opto/macro.hpp	Tue Sep 11 10:40:46 2018 -0400
+++ b/src/hotspot/share/opto/macro.hpp	Wed Sep 12 16:54:40 2018 +0200
@@ -149,6 +149,8 @@
   void generate_clear_array(Node* ctrl, MergeMemNode* merge_mem,
                             const TypePtr* adr_type,
                             Node* dest,
+                            Node* val,
+                            Node* raw_val,
                             BasicType basic_elem_type,
                             Node* slice_idx,
                             Node* slice_len,
--- a/src/hotspot/share/opto/macroArrayCopy.cpp	Tue Sep 11 10:40:46 2018 -0400
+++ b/src/hotspot/share/opto/macroArrayCopy.cpp	Wed Sep 12 16:54:40 2018 +0200
@@ -278,13 +278,6 @@
     if (top_dest->klass() == NULL) {
       return false;
     }
-    ciKlass* elem_klass = top_dest->klass()->as_array_klass()->element_klass();
-    if (elem_klass != NULL && elem_klass->is_valuetype()) {
-      ciValueKlass* vk = elem_klass->as_value_klass();
-      if (!vk->flatten_array()) {
-        return false;
-      }
-    }
   }
 
   return ReduceBulkZeroing
@@ -348,6 +341,8 @@
 
   Node* original_dest      = dest;
   bool  dest_uninitialized = false;
+  Node* default_value = NULL;
+  Node* raw_default_value = NULL;
 
   // See if this is the initialization of a newly-allocated array.
   // If so, we will take responsibility here for initializing it to zero.
@@ -368,6 +363,8 @@
     // Also, if this flag is set we make sure that arraycopy interacts properly
     // with G1, eliding pre-barriers. See CR 6627983.
     dest_uninitialized = true;
+    default_value = alloc->in(AllocateNode::DefaultValue);
+    raw_default_value = alloc->in(AllocateNode::RawDefaultValue);
   } else {
     // No zeroing elimination here.
     alloc             = NULL;
@@ -439,7 +436,9 @@
       } else {
         // Clear the whole thing since there are no source elements to copy.
         generate_clear_array(local_ctrl, local_mem,
-                             adr_type, dest, basic_elem_type,
+                             adr_type, dest,
+                             default_value, raw_default_value,
+                             basic_elem_type,
                              intcon(0), NULL,
                              alloc->in(AllocateNode::AllocSize));
         // Use a secondary InitializeNode as raw memory barrier.
@@ -475,7 +474,9 @@
     // If there is a head section that needs zeroing, do it now.
     if (_igvn.find_int_con(dest_offset, -1) != 0) {
       generate_clear_array(*ctrl, mem,
-                           adr_type, dest, basic_elem_type,
+                           adr_type, dest,
+                           default_value, raw_default_value,
+                           basic_elem_type,
                            intcon(0), dest_offset,
                            NULL);
     }
@@ -524,7 +525,9 @@
       *ctrl = tail_ctl;
       if (notail_ctl == NULL) {
         generate_clear_array(*ctrl, mem,
-                             adr_type, dest, basic_elem_type,
+                             adr_type, dest,
+                             default_value, raw_default_value,
+                             basic_elem_type,
                              dest_tail, NULL,
                              dest_size);
       } else {
@@ -534,7 +537,9 @@
         done_ctl->init_req(1, notail_ctl);
         done_mem->init_req(1, mem->memory_at(alias_idx));
         generate_clear_array(*ctrl, mem,
-                             adr_type, dest, basic_elem_type,
+                             adr_type, dest,
+                             default_value, raw_default_value,
+                             basic_elem_type,
                              dest_tail, NULL,
                              dest_size);
         done_ctl->init_req(2, *ctrl);
@@ -712,7 +717,9 @@
 
     if (dest_uninitialized) {
       generate_clear_array(local_ctrl, local_mem,
-                           adr_type, dest, basic_elem_type,
+                           adr_type, dest,
+                           default_value, raw_default_value,
+                           basic_elem_type,
                            intcon(0), NULL,
                            alloc->in(AllocateNode::AllocSize));
     }
@@ -816,6 +823,8 @@
 void PhaseMacroExpand::generate_clear_array(Node* ctrl, MergeMemNode* merge_mem,
                                             const TypePtr* adr_type,
                                             Node* dest,
+                                            Node* val,
+                                            Node* raw_val,
                                             BasicType basic_elem_type,
                                             Node* slice_idx,
                                             Node* slice_len,
@@ -855,12 +864,12 @@
 
   if (start_con >= 0 && end_con >= 0) {
     // Constant start and end.  Simple.
-    mem = ClearArrayNode::clear_memory(ctrl, mem, dest,
+    mem = ClearArrayNode::clear_memory(ctrl, mem, dest, val, raw_val,
                                        start_con, end_con, &_igvn);
   } else if (start_con >= 0 && dest_size != top()) {
     // Constant start, pre-rounded end after the tail of the array.
     Node* end = dest_size;
-    mem = ClearArrayNode::clear_memory(ctrl, mem, dest,
+    mem = ClearArrayNode::clear_memory(ctrl, mem, dest, val, raw_val,
                                        start_con, end, &_igvn);
   } else if (start_con >= 0 && slice_len != top()) {
     // Constant start, non-constant end.  End needs rounding up.
@@ -873,7 +882,7 @@
     end_base += end_round;
     end = transform_later(new AddXNode(end, MakeConX(end_base)) );
     end = transform_later(new AndXNode(end, MakeConX(~end_round)) );
-    mem = ClearArrayNode::clear_memory(ctrl, mem, dest,
+    mem = ClearArrayNode::clear_memory(ctrl, mem, dest, val, raw_val,
                                        start_con, end, &_igvn);
   } else if (start_con < 0 && dest_size != top()) {
     // Non-constant start, pre-rounded end after the tail of the array.
@@ -902,12 +911,18 @@
         // Store a zero to the immediately preceding jint:
         Node* x1 = transform_later(new AddXNode(start, MakeConX(-bump_bit)) );
         Node* p1 = basic_plus_adr(dest, x1);
-        mem = StoreNode::make(_igvn, ctrl, mem, p1, adr_type, intcon(0), T_INT, MemNode::unordered);
+        if (val == NULL) {
+          assert(raw_val == NULL, "val may not be null");
+          mem = StoreNode::make(_igvn, ctrl, mem, p1, adr_type, intcon(0), T_INT, MemNode::unordered);
+        } else {
+          assert(_igvn.type(val)->isa_narrowoop(), "should be narrow oop");
+          mem = new StoreNNode(ctrl, mem, p1, adr_type, val, MemNode::unordered);
+        }
         mem = transform_later(mem);
       }
     }
     Node* end = dest_size; // pre-rounded
-    mem = ClearArrayNode::clear_memory(ctrl, mem, dest,
+    mem = ClearArrayNode::clear_memory(ctrl, mem, dest, raw_val,
                                        start, end, &_igvn);
   } else {
     // Non-constant start, unrounded non-constant end.
--- a/src/hotspot/share/opto/matcher.cpp	Tue Sep 11 10:40:46 2018 -0400
+++ b/src/hotspot/share/opto/matcher.cpp	Wed Sep 12 16:54:40 2018 +0200
@@ -2386,6 +2386,13 @@
         n->del_req(3);
         break;
       }
+      case Op_ClearArray: {
+        Node* pair = new BinaryNode(n->in(2), n->in(3));
+        n->set_req(2, pair);
+        n->set_req(3, n->in(4));
+        n->del_req(4);
+        break;
+      }
       default:
         break;
       }
--- a/src/hotspot/share/opto/memnode.cpp	Tue Sep 11 10:40:46 2018 -0400
+++ b/src/hotspot/share/opto/memnode.cpp	Wed Sep 12 16:54:40 2018 +0200
@@ -1064,6 +1064,11 @@
       // can create new nodes.  Think of it as lazily manifesting
       // virtually pre-existing constants.)
       assert(memory_type() != T_VALUETYPE, "should not be used for value types");
+      Node* default_value = ld_alloc->in(AllocateNode::DefaultValue);
+      if (default_value != NULL) {
+        return default_value;
+      }
+      assert(ld_alloc->in(AllocateNode::RawDefaultValue) == NULL, "default value may not be null");
       return phase->zerocon(memory_type());
     }
 
@@ -1793,6 +1798,7 @@
     assert( off != Type::OffsetBot ||
             // arrays can be cast to Objects
             tp->is_oopptr()->klass()->is_java_lang_Object() ||
+            tp->is_oopptr()->klass() == ciEnv::current()->Class_klass() ||
             // unsafe field access may not have a constant offset
             C->has_unsafe_access(),
             "Field accesses must be precise" );
@@ -2497,6 +2503,7 @@
              phase->C->get_alias_index(adr_type()) == Compile::AliasIdxRaw ||
              (Opcode() == Op_StoreL && st->Opcode() == Op_StoreI) || // expanded ClearArrayNode
              (Opcode() == Op_StoreI && st->Opcode() == Op_StoreL) || // initialization by arraycopy
+             (Opcode() == Op_StoreL && st->Opcode() == Op_StoreN) ||
              (is_mismatched_access() || st->as_Store()->is_mismatched_access()),
              "no mismatched stores, except on raw memory: %s %s", NodeClassNames[Opcode()], NodeClassNames[st->Opcode()]);
 
@@ -2582,10 +2589,11 @@
   // Store of zero anywhere into a freshly-allocated object?
   // Then the store is useless.
   // (It must already have been captured by the InitializeNode.)
-  if (result == this &&
-      ReduceFieldZeroing && phase->type(val)->is_zero_type()) {
+  if (result == this && ReduceFieldZeroing) {
     // a newly allocated object is already all-zeroes everywhere
-    if (mem->is_Proj() && mem->in(0)->is_Allocate()) {
+    if (mem->is_Proj() && mem->in(0)->is_Allocate() &&
+        (phase->type(val)->is_zero_type() || mem->in(0)->in(AllocateNode::DefaultValue) == val)) {
+      assert(!phase->type(val)->is_zero_type() || mem->in(0)->in(AllocateNode::DefaultValue) == NULL, "storing null to value array is forbidden");
       result = mem;
     }
 
@@ -2598,7 +2606,15 @@
         if (prev_val != NULL && phase->eqv(prev_val, val)) {
           // prev_val and val might differ by a cast; it would be good
           // to keep the more informative of the two.
-          result = mem;
+          if (phase->type(val)->is_zero_type()) {
+            result = mem;
+          } else if (prev_mem->is_Proj() && prev_mem->in(0)->is_Initialize()) {
+            InitializeNode* init = prev_mem->in(0)->as_Initialize();
+            AllocateNode* alloc = init->allocation();
+            if (alloc != NULL && alloc->in(AllocateNode::DefaultValue) == val) {
+              result = mem;
+            }
+          }
         }
       }
     }
@@ -2906,7 +2922,7 @@
   // Length too long; communicate this to matchers and assemblers.
   // Assemblers are responsible to produce fast hardware clears for it.
   if (size > InitArrayShortSize) {
-    return new ClearArrayNode(in(0), in(1), in(2), in(3), true);
+    return new ClearArrayNode(in(0), in(1), in(2), in(3), in(4), true);
   }
   Node *mem = in(1);
   if( phase->type(mem)==Type::TOP ) return NULL;
@@ -2921,14 +2937,14 @@
   if( adr->Opcode() != Op_AddP ) Unimplemented();
   Node *base = adr->in(1);
 
-  Node *zero = phase->makecon(TypeLong::ZERO);
+  Node *val = in(4);
   Node *off  = phase->MakeConX(BytesPerLong);
-  mem = new StoreLNode(in(0),mem,adr,atp,zero,MemNode::unordered,false);
+  mem = new StoreLNode(in(0), mem, adr, atp, val, MemNode::unordered, false);
   count--;
   while( count-- ) {
     mem = phase->transform(mem);
     adr = phase->transform(new AddPNode(base,adr,off));
-    mem = new StoreLNode(in(0),mem,adr,atp,zero,MemNode::unordered,false);
+    mem = new StoreLNode(in(0), mem, adr, atp, val, MemNode::unordered, false);
   }
   return mem;
 }
@@ -2962,6 +2978,8 @@
 //----------------------------clear_memory-------------------------------------
 // Generate code to initialize object storage to zero.
 Node* ClearArrayNode::clear_memory(Node* ctl, Node* mem, Node* dest,
+                                   Node* val,
+                                   Node* raw_val,
                                    intptr_t start_offset,
                                    Node* end_offset,
                                    PhaseGVN* phase) {
@@ -2972,17 +2990,24 @@
     Node* adr = new AddPNode(dest, dest, phase->MakeConX(offset));
     adr = phase->transform(adr);
     const TypePtr* atp = TypeRawPtr::BOTTOM;
-    mem = StoreNode::make(*phase, ctl, mem, adr, atp, phase->zerocon(T_INT), T_INT, MemNode::unordered);
+    if (val != NULL) {
+      assert(phase->type(val)->isa_narrowoop(), "should be narrow oop");
+      mem = new StoreNNode(ctl, mem, adr, atp, val, MemNode::unordered);
+    } else {
+      assert(raw_val == NULL, "val may not be null");
+      mem = StoreNode::make(*phase, ctl, mem, adr, atp, phase->zerocon(T_INT), T_INT, MemNode::unordered);
+    }
     mem = phase->transform(mem);
     offset += BytesPerInt;
   }
   assert((offset % unit) == 0, "");
 
   // Initialize the remaining stuff, if any, with a ClearArray.
-  return clear_memory(ctl, mem, dest, phase->MakeConX(offset), end_offset, phase);
+  return clear_memory(ctl, mem, dest, raw_val, phase->MakeConX(offset), end_offset, phase);
 }
 
 Node* ClearArrayNode::clear_memory(Node* ctl, Node* mem, Node* dest,
+                                   Node* raw_val,
                                    Node* start_offset,
                                    Node* end_offset,
                                    PhaseGVN* phase) {
@@ -3005,11 +3030,16 @@
   // Bulk clear double-words
   Node* zsize = phase->transform(new SubXNode(zend, zbase) );
   Node* adr = phase->transform(new AddPNode(dest, dest, start_offset) );
-  mem = new ClearArrayNode(ctl, mem, zsize, adr, false);
+  if (raw_val == NULL) {
+    raw_val = phase->MakeConX(0);
+  }
+  mem = new ClearArrayNode(ctl, mem, zsize, adr, raw_val, false);
   return phase->transform(mem);
 }
 
 Node* ClearArrayNode::clear_memory(Node* ctl, Node* mem, Node* dest,
+                                   Node* val,
+                                   Node* raw_val,
                                    intptr_t start_offset,
                                    intptr_t end_offset,
                                    PhaseGVN* phase) {
@@ -3024,14 +3054,20 @@
     done_offset -= BytesPerInt;
   }
   if (done_offset > start_offset) {
-    mem = clear_memory(ctl, mem, dest,
+    mem = clear_memory(ctl, mem, dest, val, raw_val,
                        start_offset, phase->MakeConX(done_offset), phase);
   }
   if (done_offset < end_offset) { // emit the final 32-bit store
     Node* adr = new AddPNode(dest, dest, phase->MakeConX(done_offset));
     adr = phase->transform(adr);
     const TypePtr* atp = TypeRawPtr::BOTTOM;
-    mem = StoreNode::make(*phase, ctl, mem, adr, atp, phase->zerocon(T_INT), T_INT, MemNode::unordered);
+    if (val != NULL) {
+      assert(phase->type(val)->isa_narrowoop(), "should be narrow oop");
+      mem = new StoreNNode(ctl, mem, adr, atp, val, MemNode::unordered);
+    } else {
+      assert(raw_val == NULL, "val may not be null");
+      mem = StoreNode::make(*phase, ctl, mem, adr, atp, phase->zerocon(T_INT), T_INT, MemNode::unordered);
+    }
     mem = phase->transform(mem);
     done_offset += BytesPerInt;
   }
@@ -3413,7 +3449,6 @@
 
 void InitializeNode::set_complete(PhaseGVN* phase) {
   assert(!is_complete(), "caller responsibility");
-  assert(!is_unknown_value(), "unsupported");
   _is_complete = Complete;
 
   // After this node is complete, it contains a bunch of
@@ -3429,7 +3464,7 @@
 // return false if the init contains any stores already
 bool AllocateNode::maybe_set_complete(PhaseGVN* phase) {
   InitializeNode* init = initialization();
-  if (init == NULL || init->is_complete() || init->is_unknown_value()) {
+  if (init == NULL || init->is_complete()) {
     return false;
   }
   init->remove_extra_zeroes();
@@ -4174,6 +4209,8 @@
         // Do some incremental zeroing on rawmem, in parallel with inits.
         zeroes_done = align_down(zeroes_done, BytesPerInt);
         rawmem = ClearArrayNode::clear_memory(rawctl, rawmem, rawptr,
+                                              allocation()->in(AllocateNode::DefaultValue),
+                                              allocation()->in(AllocateNode::RawDefaultValue),
                                               zeroes_done, zeroes_needed,
                                               phase);
         zeroes_done = zeroes_needed;
@@ -4233,6 +4270,8 @@
     }
     if (zeroes_done < size_limit) {
       rawmem = ClearArrayNode::clear_memory(rawctl, rawmem, rawptr,
+                                            allocation()->in(AllocateNode::DefaultValue),
+                                            allocation()->in(AllocateNode::RawDefaultValue),
                                             zeroes_done, size_in_bytes, phase);
     }
   }
--- a/src/hotspot/share/opto/memnode.hpp	Tue Sep 11 10:40:46 2018 -0400
+++ b/src/hotspot/share/opto/memnode.hpp	Wed Sep 12 16:54:40 2018 +0200
@@ -1092,9 +1092,11 @@
 class ClearArrayNode: public Node {
 private:
   bool _is_large;
+  bool _word_copy_only;
 public:
-  ClearArrayNode( Node *ctrl, Node *arymem, Node *word_cnt, Node *base, bool is_large)
-    : Node(ctrl,arymem,word_cnt,base), _is_large(is_large) {
+  ClearArrayNode( Node *ctrl, Node *arymem, Node *word_cnt, Node *base, Node* val, bool is_large)
+    : Node(ctrl, arymem, word_cnt, base, val), _is_large(is_large),
+      _word_copy_only(val->bottom_type()->isa_long() && (!val->bottom_type()->is_long()->is_con() || val->bottom_type()->is_long()->get_con() != 0)) {
     init_class_id(Class_ClearArray);
   }
   virtual int         Opcode() const;
@@ -1106,20 +1108,26 @@
   virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
   virtual uint match_edge(uint idx) const;
   bool is_large() const { return _is_large; }
+  bool word_copy_only() const { return _word_copy_only; }
 
   // Clear the given area of an object or array.
   // The start offset must always be aligned mod BytesPerInt.
   // The end offset must always be aligned mod BytesPerLong.
   // Return the new memory.
   static Node* clear_memory(Node* control, Node* mem, Node* dest,
+                            Node* val,
+                            Node* raw_val,
                             intptr_t start_offset,
                             intptr_t end_offset,
                             PhaseGVN* phase);
   static Node* clear_memory(Node* control, Node* mem, Node* dest,
+                            Node* val,
+                            Node* raw_val,
                             intptr_t start_offset,
                             Node* end_offset,
                             PhaseGVN* phase);
   static Node* clear_memory(Node* control, Node* mem, Node* dest,
+                            Node* raw_val,
                             Node* start_offset,
                             Node* end_offset,
                             PhaseGVN* phase);
@@ -1303,7 +1311,6 @@
     Incomplete    = 0,
     Complete      = 1,
     WithArraycopy = 2,
-    UnknownValue  = 4
   };
   int _is_complete;
 
@@ -1341,21 +1348,13 @@
   // An InitializeNode must completed before macro expansion is done.
   // Completion requires that the AllocateNode must be followed by
   // initialization of the new memory to zero, then to any initializers.
-  bool is_complete() { return (_is_complete & ~UnknownValue) != Incomplete; }
+  bool is_complete() { return _is_complete != Incomplete; }
   bool is_complete_with_arraycopy() { return (_is_complete & WithArraycopy) != 0; }
 
   // Mark complete.  (Must not yet be complete.)
   void set_complete(PhaseGVN* phase);
   void set_complete_with_arraycopy() { _is_complete = Complete | WithArraycopy; }
 
-  void set_unknown_value() { assert(_is_complete == Incomplete, "bad state"); _is_complete |= UnknownValue; }
-  bool is_unknown_value() {
-    assert((_is_complete & UnknownValue) == 0 || (_is_complete & ~UnknownValue) == Incomplete, "bad state");
-    return (_is_complete & UnknownValue) != 0;
-  }
-  void clear_unknown_value() { _is_complete &= ~UnknownValue; }
-
-
   bool does_not_escape() { return _does_not_escape; }
   void set_does_not_escape() { _does_not_escape = true; }
 
--- a/src/hotspot/share/opto/valuetypenode.cpp	Tue Sep 11 10:40:46 2018 -0400
+++ b/src/hotspot/share/opto/valuetypenode.cpp	Wed Sep 12 16:54:40 2018 +0200
@@ -496,7 +496,7 @@
   Node* base = gvn.makecon(tip);
   Node* adr = gvn.transform(new AddPNode(base, base, gvn.MakeConX(vk->default_value_offset())));
   const Type* rt = Type::get_const_type(vk)->join_speculative(TypePtr::NOTNULL);
-  return gvn.transform(LoadNode::make(gvn, NULL, gvn.C->immutable_memory(), adr, tip, rt, T_VALUETYPE, MemNode::unordered));
+  return gvn.transform(LoadNode::make(gvn, NULL, gvn.C->immutable_memory(), adr, tip->add_offset(vk->default_value_offset()), rt, T_VALUETYPE, MemNode::unordered));
 }
 
 ValueTypeNode* ValueTypeNode::make_default(PhaseGVN& gvn, ciValueKlass* vk) {
--- a/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestArrays.java	Tue Sep 11 10:40:46 2018 -0400
+++ b/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestArrays.java	Wed Sep 12 16:54:40 2018 +0200
@@ -1452,4 +1452,124 @@
         Object[] result = test63(va, oa);
         verify(verif, result);
     }
+
+    // Test default initialization of value type arrays: small array
+    @Test
+    public MyValue1[] test64() {
+        return new MyValue1[8];
+    }
+
+    @DontCompile
+    public void test64_verifier(boolean warmup) {
+        MyValue1[] va = new MyValue1[8];
+        MyValue1[] var = test64();
+        for (int i = 0; i < 8; ++i) {
+            Asserts.assertEQ(va[i].hashPrimitive(), var[i].hashPrimitive());
+        }
+    }
+
+    // Test default initialization of value type arrays: large array
+    @Test
+    public MyValue1[] test65() {
+        return new MyValue1[32];
+    }
+
+    @DontCompile
+    public void test65_verifier(boolean warmup) {
+        MyValue1[] va = new MyValue1[32];
+        MyValue1[] var = test65();
+        for (int i = 0; i < 32; ++i) {
+            Asserts.assertEQ(va[i].hashPrimitive(), var[i].hashPrimitive());
+        }
+    }
+
+    // Check init store elimination
+    @Test
+    public MyValue1[] test66(MyValue1 vt) {
+        MyValue1[] va = new MyValue1[1];
+        va[0] = vt;
+        return va;
+    }
+
+    @DontCompile
+    public void test66_verifier(boolean warmup) {
+        MyValue1 vt = MyValue1.createWithFieldsDontInline(rI, rL);
+        MyValue1[] va = test66(vt);
+        Asserts.assertEQ(va[0].hashPrimitive(), vt.hashPrimitive());
+    }
+
+    // Zeroing elimination and arraycopy
+    @Test
+    public MyValue1[] test67(MyValue1[] src) {
+        MyValue1[] dst = new MyValue1[16];
+        System.arraycopy(src, 0, dst, 0, 13);
+        return dst;
+    }
+
+    @DontCompile
+    public void test67_verifier(boolean warmup) {
+        MyValue1[] va = new MyValue1[16];
+        MyValue1[] var = test67(va);
+        for (int i = 0; i < 16; ++i) {
+            Asserts.assertEQ(va[i].hashPrimitive(), var[i].hashPrimitive());
+        }
+    }
+
+    // A store with a default value can be eliminated
+    @Test
+    public MyValue1[] test68() {
+        MyValue1[] va = new MyValue1[2];
+        va[0] = va[1];
+        return va;
+    }
+
+    @DontCompile
+    public void test68_verifier(boolean warmup) {
+        MyValue1[] va = new MyValue1[2];
+        MyValue1[] var = test68();
+        for (int i = 0; i < 2; ++i) {
+            Asserts.assertEQ(va[i].hashPrimitive(), var[i].hashPrimitive());
+        }
+    }
+
+    // Requires individual stores to init array
+    @Test
+    public MyValue1[] test69(MyValue1 vt) {
+        MyValue1[] va = new MyValue1[4];
+        va[0] = vt;
+        va[3] = vt;
+        return va;
+    }
+
+    @DontCompile
+    public void test69_verifier(boolean warmup) {
+        MyValue1 vt = MyValue1.createWithFieldsDontInline(rI, rL);
+        MyValue1[] va = new MyValue1[4];
+        va[0] = vt;
+        va[3] = vt;
+        MyValue1[] var = test69(vt);
+        for (int i = 0; i < va.length; ++i) {
+            Asserts.assertEQ(va[i].hashPrimitive(), var[i].hashPrimitive());
+        }
+    }
+
+    // A store with a default value can be eliminated: same as test68
+    // but store is farther away from allocation
+    @Test
+    public MyValue1[] test70(MyValue1[] other) {
+        other[1] = other[0];
+        MyValue1[] va = new MyValue1[2];
+        other[0] = va[1];
+        va[0] = va[1];
+        return va;
+    }
+
+    @DontCompile
+    public void test70_verifier(boolean warmup) {
+        MyValue1[] va = new MyValue1[2];
+        MyValue1[] var = test70(va);
+        for (int i = 0; i < 2; ++i) {
+            Asserts.assertEQ(va[i].hashPrimitive(), var[i].hashPrimitive());
+        }
+    }
 }
--- a/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestIntrinsics.java	Tue Sep 11 10:40:46 2018 -0400
+++ b/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestIntrinsics.java	Wed Sep 12 16:54:40 2018 +0200
@@ -141,18 +141,19 @@
 
     // Test default value type array creation via reflection
     @Test()
-    public void test7(Class<?> componentType, int len, long hash) {
+    public Object[] test7(Class<?> componentType, int len, long hash) {
         Object[] va = (Object[])Array.newInstance(componentType, len);
-        for (int i = 0; i < len; ++i) {
-            Asserts.assertEQ(((MyValue1)va[i]).hashPrimitive(), hash);
-        }
+        return va;
     }
 
     @DontCompile
     public void test7_verifier(boolean warmup) {
         int len = Math.abs(rI) % 42;
         long hash = MyValue1.createDefaultDontInline().hashPrimitive();
-        test7(MyValue1.class, len, hash);
+        Object[] va = test7(MyValue1.class, len, hash);
+        for (int i = 0; i < len; ++i) {
+            Asserts.assertEQ(((MyValue1)va[i]).hashPrimitive(), hash);
+        }
     }
 
     // Class.isInstance