changeset 57416:85f25a764824 lworld

8231561: [lworld] C2 generates inefficient code for acmp Reviewed-by: roland
author thartmann
date Fri, 11 Oct 2019 14:13:17 +0200
parents 114caf2fa7b9
children a4b4f60e7812
files src/hotspot/cpu/x86/x86_64.ad src/hotspot/share/opto/compile.cpp src/hotspot/share/opto/graphKit.cpp src/hotspot/share/opto/graphKit.hpp src/hotspot/share/opto/lcm.cpp src/hotspot/share/opto/library_call.cpp src/hotspot/share/opto/macroArrayCopy.cpp src/hotspot/share/opto/memnode.cpp src/hotspot/share/opto/memnode.hpp src/hotspot/share/opto/parse2.cpp test/hotspot/jtreg/compiler/valhalla/valuetypes/TestCallingConventionC1.java test/hotspot/jtreg/compiler/valhalla/valuetypes/TestLWorld.java
diffstat 12 files changed, 190 insertions(+), 109 deletions(-) [+]
line wrap: on
line diff
--- a/src/hotspot/cpu/x86/x86_64.ad	Fri Oct 11 10:39:58 2019 +0200
+++ b/src/hotspot/cpu/x86/x86_64.ad	Fri Oct 11 14:13:17 2019 +0200
@@ -3937,6 +3937,22 @@
   %}
 %}
 
+// Indirect Narrow Oop Operand
+operand indCompressedOop(rRegN reg) %{
+  predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(DecodeN reg);
+
+  op_cost(10);
+  format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
+  interface(MEMORY_INTER) %{
+    base(0xc); // R12
+    index($reg);
+    scale(0x3);
+    disp(0x0);
+  %}
+%}
+
 // Indirect Narrow Oop Plus Offset Operand
 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 // we can't free r12 even with CompressedOops::base() == NULL.
@@ -4279,7 +4295,7 @@
 
 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
-               indCompressedOopOffset,
+               indCompressedOop, indCompressedOopOffset,
                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
@@ -11878,6 +11894,21 @@
   ins_pipe(ialu_mem_imm);
 %}
 
+// Clear array property bits
+instruct clear_property_bits(rRegN dst, memory mem, immU31 mask, rFlagsReg cr)
+%{
+  match(Set dst (CastI2N (AndI (CastN2I (LoadNKlass mem)) mask)));
+  effect(KILL cr);
+
+  format %{ "movl    $dst, $mem\t# clear property bits\n\t"
+            "andl    $dst, $mask" %}
+  ins_encode %{
+    __ movl($dst$$Register, $mem$$Address);
+    __ andl($dst$$Register, $mask$$constant);
+  %}
+  ins_pipe(ialu_reg_mem);
+%}
+
 // Unsigned compare Instructions; really, same as signed except they
 // produce an rFlagsRegU instead of rFlagsReg.
 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
@@ -12196,7 +12227,7 @@
 %{
   match(Set cr (CmpL (AndL (CastP2X (LoadKlass mem)) src) zero));
 
-  format %{ "testq   $src, $mem" %}
+  format %{ "testq   $src, $mem\t# test array properties" %}
   opcode(0x85);
   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
   ins_pipe(ialu_cr_reg_mem);
--- a/src/hotspot/share/opto/compile.cpp	Fri Oct 11 10:39:58 2019 +0200
+++ b/src/hotspot/share/opto/compile.cpp	Fri Oct 11 14:13:17 2019 +0200
@@ -3424,10 +3424,13 @@
       assert( !tp || oop_offset_is_sane(tp), "" );
     }
 #endif
-    if (EnableValhalla && (nop == Op_LoadKlass || nop == Op_LoadNKlass)) {
+    if (EnableValhalla &&
+        ((nop == Op_LoadKlass && ((LoadKlassNode*)n)->clear_prop_bits()) ||
+         (nop == Op_LoadNKlass && ((LoadNKlassNode*)n)->clear_prop_bits()))) {
       const TypeKlassPtr* tk = n->bottom_type()->make_ptr()->is_klassptr();
       assert(!tk->klass_is_exact(), "should have been folded");
-      if (tk->klass()->can_be_value_array_klass() && n->as_Mem()->adr_type()->offset() == oopDesc::klass_offset_in_bytes()) {
+      assert(n->as_Mem()->adr_type()->offset() == oopDesc::klass_offset_in_bytes(), "unexpected LoadKlass");
+      if (tk->klass()->can_be_value_array_klass()) {
         // Array load klass needs to filter out property bits (but not
         // GetNullFreePropertyNode or GetFlattenedPropertyNode which
         // needs to extract the storage property bits)
--- a/src/hotspot/share/opto/graphKit.cpp	Fri Oct 11 10:39:58 2019 +0200
+++ b/src/hotspot/share/opto/graphKit.cpp	Fri Oct 11 14:13:17 2019 +0200
@@ -1174,12 +1174,12 @@
 }
 
 //-------------------------load_object_klass-----------------------------------
-Node* GraphKit::load_object_klass(Node* obj) {
+Node* GraphKit::load_object_klass(Node* obj, bool clear_prop_bits) {
   // Special-case a fresh allocation to avoid building nodes:
   Node* akls = AllocateNode::Ideal_klass(obj, &_gvn);
   if (akls != NULL)  return akls;
   Node* k_adr = basic_plus_adr(obj, oopDesc::klass_offset_in_bytes());
-  return _gvn.transform(LoadKlassNode::make(_gvn, NULL, immutable_memory(), k_adr, TypeInstPtr::KLASS));
+  return _gvn.transform(LoadKlassNode::make(_gvn, NULL, immutable_memory(), k_adr, TypeInstPtr::KLASS, TypeKlassPtr::OBJECT, clear_prop_bits));
 }
 
 //-------------------------load_array_length-----------------------------------
@@ -3461,8 +3461,10 @@
 Node* GraphKit::is_always_locked(Node* obj) {
   Node* mark_addr = basic_plus_adr(obj, oopDesc::mark_offset_in_bytes());
   Node* mark = make_load(NULL, mark_addr, TypeX_X, TypeX_X->basic_type(), MemNode::unordered);
-  Node* value_mask = _gvn.MakeConX(markWord::always_locked_pattern);
-  return _gvn.transform(new AndXNode(mark, value_mask));
+  Node* mask = _gvn.MakeConX(markWord::always_locked_pattern);
+  Node* andx = _gvn.transform(new AndXNode(mark, mask));
+  Node* cmp = _gvn.transform(new CmpXNode(andx, mask));
+  return _gvn.transform(new BoolNode(cmp, BoolTest::eq));
 }
 
 Node* GraphKit::is_value_mirror(Node* mirror) {
@@ -3472,25 +3474,6 @@
   return _gvn.transform(new BoolNode(cmp, BoolTest::eq));
 }
 
-// Deoptimize if 'obj' is a value type
-void GraphKit::gen_value_type_guard(Node* obj, int nargs) {
-  assert(EnableValhalla, "should only be used if value types are enabled");
-  Node* bol = NULL;
-  if (obj->is_ValueTypeBase()) {
-    bol = intcon(0);
-  } else {
-    Node* is_value = is_always_locked(obj);
-    Node* value_mask = _gvn.MakeConX(markWord::always_locked_pattern);
-    Node* cmp = _gvn.transform(new CmpXNode(is_value, value_mask));
-    bol = _gvn.transform(new BoolNode(cmp, BoolTest::ne));
-  }
-  { BuildCutout unless(this, bol, PROB_MAX);
-    inc_sp(nargs);
-    uncommon_trap(Deoptimization::Reason_class_check,
-                  Deoptimization::Action_none);
-  }
-}
-
 // Check if 'ary' is a null-free value type array
 Node* GraphKit::gen_null_free_array_check(Node* ary) {
   assert(EnableValhalla, "should only be used if value types are enabled");
@@ -3499,9 +3482,9 @@
   const TypePtr* k_adr_type = k_adr->bottom_type()->isa_ptr();
   Node* klass = NULL;
   if (k_adr_type->is_ptr_to_narrowklass()) {
-    klass = _gvn.transform(new LoadNKlassNode(NULL, immutable_memory(), k_adr, TypeInstPtr::KLASS, TypeKlassPtr::OBJECT->make_narrowklass(), MemNode::unordered));
+    klass = _gvn.transform(new LoadNKlassNode(NULL, immutable_memory(), k_adr, TypeInstPtr::KLASS, TypeKlassPtr::OBJECT->make_narrowklass(), MemNode::unordered, true));
   } else {
-    klass = _gvn.transform(new LoadKlassNode(NULL, immutable_memory(), k_adr, TypeInstPtr::KLASS, TypeKlassPtr::OBJECT, MemNode::unordered));
+    klass = _gvn.transform(new LoadKlassNode(NULL, immutable_memory(), k_adr, TypeInstPtr::KLASS, TypeKlassPtr::OBJECT, MemNode::unordered, true));
   }
   Node* null_free = _gvn.transform(new GetNullFreePropertyNode(klass));
   Node* cmp = NULL;
@@ -3520,9 +3503,9 @@
   const TypePtr* k_adr_type = k_adr->bottom_type()->isa_ptr();
   Node* klass = NULL;
   if (k_adr_type->is_ptr_to_narrowklass()) {
-    klass = _gvn.transform(new LoadNKlassNode(NULL, immutable_memory(), k_adr, TypeInstPtr::KLASS, TypeKlassPtr::OBJECT->make_narrowklass(), MemNode::unordered));
+    klass = _gvn.transform(new LoadNKlassNode(NULL, immutable_memory(), k_adr, TypeInstPtr::KLASS, TypeKlassPtr::OBJECT->make_narrowklass(), MemNode::unordered, true));
   } else {
-    klass = _gvn.transform(new LoadKlassNode(NULL, immutable_memory(), k_adr, TypeInstPtr::KLASS, TypeKlassPtr::OBJECT, MemNode::unordered));
+    klass = _gvn.transform(new LoadKlassNode(NULL, immutable_memory(), k_adr, TypeInstPtr::KLASS, TypeKlassPtr::OBJECT, MemNode::unordered, true));
   }
   return _gvn.transform(new GetFlattenedPropertyNode(klass));
 }
--- a/src/hotspot/share/opto/graphKit.hpp	Fri Oct 11 10:39:58 2019 +0200
+++ b/src/hotspot/share/opto/graphKit.hpp	Fri Oct 11 14:13:17 2019 +0200
@@ -342,7 +342,7 @@
   Node* ConvI2UL(Node* offset);
   Node* ConvL2I(Node* offset);
   // Find out the klass of an object.
-  Node* load_object_klass(Node* object);
+  Node* load_object_klass(Node* object, bool clear_prop_bits = true);
   // Find out the length of an array.
   Node* load_array_length(Node* array);
 
@@ -858,7 +858,6 @@
 
   Node* is_always_locked(Node* obj);
   Node* is_value_mirror(Node* mirror);
-  void gen_value_type_guard(Node* obj, int nargs = 0);
   Node* gen_null_free_array_check(Node* ary);
   Node* gen_flattened_array_test(Node* ary);
   Node* gen_value_array_null_guard(Node* ary, Node* val, int nargs, bool safe_for_replace = false);
--- a/src/hotspot/share/opto/lcm.cpp	Fri Oct 11 10:39:58 2019 +0200
+++ b/src/hotspot/share/opto/lcm.cpp	Fri Oct 11 14:13:17 2019 +0200
@@ -313,7 +313,11 @@
         if( is_decoden ) continue;
       }
       // Block of memory-op input
-      Block *inb = get_block_for_node(mach->in(j));
+      Block* inb = get_block_for_node(mach->in(j));
+      if (mach->in(j)->is_Con() && inb == get_block_for_node(mach)) {
+        // Ignore constant loads scheduled in the same block (we can simply hoist them as well)
+        continue;
+      }
       Block *b = block;          // Start from nul check
       while( b != inb && b->_dom_depth > inb->_dom_depth )
         b = b->_idom;           // search upwards for input
@@ -389,7 +393,28 @@
         }
       }
     }
+  } else {
+    // Hoist constant load inputs as well.
+    for (uint i = 1; i < best->req(); ++i) {
+      Node* n = best->in(i);
+      if (n->is_Con() && get_block_for_node(n) == get_block_for_node(best)) {
+        get_block_for_node(n)->find_remove(n);
+        block->add_inst(n);
+        map_node_to_block(n, block);
+        // Constant loads may kill flags (for example, when XORing a register).
+        // Check for flag-killing projections that also need to be hoisted.
+        for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
+          Node* proj = n->fast_out(j);
+          if (proj->is_MachProj()) {
+            get_block_for_node(proj)->find_remove(proj);
+            block->add_inst(proj);
+            map_node_to_block(proj, block);
+          }
+        }
+      }
+    }
   }
+
   // Hoist the memory candidate up to the end of the test block.
   Block *old_block = get_block_for_node(best);
   old_block->find_remove(best);
--- a/src/hotspot/share/opto/library_call.cpp	Fri Oct 11 10:39:58 2019 +0200
+++ b/src/hotspot/share/opto/library_call.cpp	Fri Oct 11 14:13:17 2019 +0200
@@ -185,7 +185,6 @@
                                     int modifier_mask, int modifier_bits,
                                     RegionNode* region);
   Node* generate_interface_guard(Node* kls, RegionNode* region);
-  Node* generate_value_guard(Node* kls, RegionNode* region);
 
   enum ArrayKind {
     AnyArray,
@@ -3415,10 +3414,6 @@
   return generate_access_flags_guard(kls, JVM_ACC_INTERFACE, 0, region);
 }
 
-Node* LibraryCallKit::generate_value_guard(Node* kls, RegionNode* region) {
-  return generate_access_flags_guard(kls, JVM_ACC_VALUE, 0, region);
-}
-
 //-------------------------inline_native_Class_query-------------------
 bool LibraryCallKit::inline_native_Class_query(vmIntrinsics::ID id) {
   const Type* return_type = TypeInt::BOOL;
@@ -4347,13 +4342,6 @@
   RegionNode* slow_region = new RegionNode(1);
   record_for_igvn(slow_region);
 
-  const TypeOopPtr* obj_type = _gvn.type(obj)->is_oopptr();
-  assert(!obj_type->isa_valuetype() || !obj_type->is_valuetypeptr(), "no value type here");
-  if (is_static && obj_type->can_be_value_type()) {
-    Node* obj_klass = load_object_klass(obj);
-    generate_value_guard(obj_klass, slow_region);
-  }
-
   // If this is a virtual call, we generate a funny guard.  We pull out
   // the vtable entry corresponding to hashCode() from the target object.
   // If the target method which we are calling happens to be the native
@@ -4374,6 +4362,7 @@
   Node* header = make_load(no_ctrl, header_addr, TypeX_X, TypeX_X->basic_type(), MemNode::unordered);
 
   // Test the header to see if it is unlocked.
+  // This also serves as guard against value types (they have the always_locked_pattern set).
   Node *lock_mask      = _gvn.MakeConX(markWord::biased_lock_mask_in_place);
   Node *lmasked_header = _gvn.transform(new AndXNode(header, lock_mask));
   Node *unlocked_val   = _gvn.MakeConX(markWord::unlocked_value);
--- a/src/hotspot/share/opto/macroArrayCopy.cpp	Fri Oct 11 10:39:58 2019 +0200
+++ b/src/hotspot/share/opto/macroArrayCopy.cpp	Fri Oct 11 14:13:17 2019 +0200
@@ -203,7 +203,7 @@
   Node* kls = NULL;
   if (_igvn.type(obj_or_klass)->isa_oopptr()) {
     Node* k_adr = basic_plus_adr(obj_or_klass, oopDesc::klass_offset_in_bytes());
-    kls = transform_later(LoadKlassNode::make(_igvn, NULL, C->immutable_memory(), k_adr, TypeInstPtr::KLASS));
+    kls = transform_later(LoadKlassNode::make(_igvn, NULL, C->immutable_memory(), k_adr, TypeInstPtr::KLASS, TypeKlassPtr::OBJECT, /* clear_prop_bits = */ true));
   } else {
     assert(_igvn.type(obj_or_klass)->isa_klassptr(), "what else?");
     kls = obj_or_klass;
--- a/src/hotspot/share/opto/memnode.cpp	Fri Oct 11 10:39:58 2019 +0200
+++ b/src/hotspot/share/opto/memnode.cpp	Fri Oct 11 14:13:17 2019 +0200
@@ -2188,19 +2188,20 @@
 //=============================================================================
 //----------------------------LoadKlassNode::make------------------------------
 // Polymorphic factory method:
-Node* LoadKlassNode::make(PhaseGVN& gvn, Node* ctl, Node* mem, Node* adr, const TypePtr* at, const TypeKlassPtr* tk) {
+Node* LoadKlassNode::make(PhaseGVN& gvn, Node* ctl, Node* mem, Node* adr, const TypePtr* at,
+                          const TypeKlassPtr* tk, bool clear_prop_bits) {
   // sanity check the alias category against the created node type
   const TypePtr *adr_type = adr->bottom_type()->isa_ptr();
   assert(adr_type != NULL, "expecting TypeKlassPtr");
 #ifdef _LP64
   if (adr_type->is_ptr_to_narrowklass()) {
     assert(UseCompressedClassPointers, "no compressed klasses");
-    Node* load_klass = gvn.transform(new LoadNKlassNode(ctl, mem, adr, at, tk->make_narrowklass(), MemNode::unordered));
+    Node* load_klass = gvn.transform(new LoadNKlassNode(ctl, mem, adr, at, tk->make_narrowklass(), MemNode::unordered, clear_prop_bits));
     return new DecodeNKlassNode(load_klass, load_klass->bottom_type()->make_ptr());
   }
 #endif
   assert(!adr_type->is_ptr_to_narrowklass() && !adr_type->is_ptr_to_narrowoop(), "should have got back a narrow oop");
-  return new LoadKlassNode(ctl, mem, adr, at, tk, MemNode::unordered);
+  return new LoadKlassNode(ctl, mem, adr, at, tk, MemNode::unordered, clear_prop_bits);
 }
 
 //------------------------------Value------------------------------------------
--- a/src/hotspot/share/opto/memnode.hpp	Fri Oct 11 10:39:58 2019 +0200
+++ b/src/hotspot/share/opto/memnode.hpp	Fri Oct 11 14:13:17 2019 +0200
@@ -514,29 +514,44 @@
 //------------------------------LoadKlassNode----------------------------------
 // Load a Klass from an object
 class LoadKlassNode : public LoadPNode {
+private:
+  bool _clear_prop_bits; // Clear the ArrayStorageProperties bits
 protected:
   // In most cases, LoadKlassNode does not have the control input set. If the control
   // input is set, it must not be removed (by LoadNode::Ideal()).
   virtual bool can_remove_control() const;
 public:
-  LoadKlassNode(Node *c, Node *mem, Node *adr, const TypePtr *at, const TypeKlassPtr *tk, MemOrd mo)
-    : LoadPNode(c, mem, adr, at, tk, mo) {}
+  LoadKlassNode(Node *c, Node *mem, Node *adr, const TypePtr *at, const TypeKlassPtr *tk, MemOrd mo, bool clear_prop_bits)
+    : LoadPNode(c, mem, adr, at, tk, mo), _clear_prop_bits(clear_prop_bits) {}
+  virtual uint hash() const { return LoadPNode::hash() + _clear_prop_bits; }
+  virtual bool cmp(const Node &n) const {
+    return (_clear_prop_bits == ((LoadKlassNode&)n)._clear_prop_bits) && LoadPNode::cmp(n);
+  }
+  virtual uint size_of() const { return sizeof(*this); }
   virtual int Opcode() const;
   virtual const Type* Value(PhaseGVN* phase) const;
   virtual Node* Identity(PhaseGVN* phase);
   virtual bool depends_only_on_test() const { return true; }
+  bool clear_prop_bits() const { return _clear_prop_bits; }
 
   // Polymorphic factory method:
   static Node* make(PhaseGVN& gvn, Node* ctl, Node* mem, Node* adr, const TypePtr* at,
-                    const TypeKlassPtr* tk = TypeKlassPtr::OBJECT);
+                    const TypeKlassPtr* tk = TypeKlassPtr::OBJECT, bool clear_prop_bits = false);
 };
 
 //------------------------------LoadNKlassNode---------------------------------
 // Load a narrow Klass from an object.
 class LoadNKlassNode : public LoadNNode {
+private:
+  bool _clear_prop_bits; // Clear the ArrayStorageProperties bits
 public:
-  LoadNKlassNode(Node *c, Node *mem, Node *adr, const TypePtr *at, const TypeNarrowKlass *tk, MemOrd mo)
-    : LoadNNode(c, mem, adr, at, tk, mo) {}
+  LoadNKlassNode(Node *c, Node *mem, Node *adr, const TypePtr *at, const TypeNarrowKlass *tk, MemOrd mo, bool clear_prop_bits)
+    : LoadNNode(c, mem, adr, at, tk, mo), _clear_prop_bits(clear_prop_bits) {}
+  virtual uint hash() const { return LoadNNode::hash() + _clear_prop_bits; }
+  virtual bool cmp(const Node &n) const {
+    return (_clear_prop_bits == ((LoadNKlassNode&)n)._clear_prop_bits) && LoadNNode::cmp(n);
+  }
+  virtual uint size_of() const { return sizeof(*this); }
   virtual int Opcode() const;
   virtual uint ideal_reg() const { return Op_RegN; }
   virtual int store_Opcode() const { return Op_StoreNKlass; }
@@ -545,6 +560,7 @@
   virtual const Type* Value(PhaseGVN* phase) const;
   virtual Node* Identity(PhaseGVN* phase);
   virtual bool depends_only_on_test() const { return true; }
+  bool clear_prop_bits() const { return _clear_prop_bits; }
 };
 
 // Retrieve the null free/flattened property from an array klass. This
--- a/src/hotspot/share/opto/parse2.cpp	Fri Oct 11 10:39:58 2019 +0200
+++ b/src/hotspot/share/opto/parse2.cpp	Fri Oct 11 14:13:17 2019 +0200
@@ -159,9 +159,9 @@
         alloc_obj = _gvn.transform(alloc_obj);
 
         const Type* unknown_value = TypeInstPtr::BOTTOM->cast_to_flat_array();
-        
+
         alloc_obj = _gvn.transform(new CheckCastPPNode(control(), alloc_obj, unknown_value));
-        
+
         ideal.sync_kit(this);
 
         ideal.set(res, alloc_obj);
@@ -1925,7 +1925,7 @@
     return;
   }
 
-  // Substitutability test
+  // Allocate value type operands
   if (a->is_ValueType()) {
     inc_sp(2);
     a = a->as_ValueType()->allocate(this, true)->get_oop();
@@ -1937,19 +1937,17 @@
     dec_sp(2);
   }
 
+  // First, do a normal pointer comparison
   const TypeOopPtr* ta = _gvn.type(a)->isa_oopptr();
   const TypeOopPtr* tb = _gvn.type(b)->isa_oopptr();
-
+  Node* cmp = CmpP(a, b);
+  cmp = optimize_cmp_with_klass(cmp);
   if (ta == NULL || !ta->can_be_value_type_raw() ||
       tb == NULL || !tb->can_be_value_type_raw()) {
-    Node* cmp = CmpP(a, b);
-    cmp = optimize_cmp_with_klass(cmp);
+    // This is sufficient, if one of the operands can't be a value type
     do_if(btest, cmp);
     return;
   }
-
-  Node* cmp = CmpP(a, b);
-  cmp = optimize_cmp_with_klass(cmp);
   Node* eq_region = NULL;
   if (btest == BoolTest::eq) {
     do_if(btest, cmp, true);
@@ -1974,7 +1972,8 @@
     }
     set_control(is_not_equal);
   }
-  // Pointers not equal, check for values
+
+  // Pointers are not equal, check if first operand is non-null
   Node* ne_region = new RegionNode(6);
   inc_sp(2);
   Node* null_ctl = top();
@@ -1996,17 +1995,14 @@
     return;
   }
 
+  // First operand is non-null, check if it is a value type
   Node* is_value = is_always_locked(not_null_a);
-  Node* value_mask = _gvn.MakeConX(markWord::always_locked_pattern);
-  Node* is_value_cmp = _gvn.transform(new CmpXNode(is_value, value_mask));
-  Node* is_value_bol = _gvn.transform(new BoolNode(is_value_cmp, BoolTest::ne));
-  IfNode* is_value_iff = create_and_map_if(control(), is_value_bol, PROB_FAIR, COUNT_UNKNOWN);
-  Node* not_value = _gvn.transform(new IfTrueNode(is_value_iff));
-  set_control(_gvn.transform(new IfFalseNode(is_value_iff)));
+  IfNode* is_value_iff = create_and_map_if(control(), is_value, PROB_FAIR, COUNT_UNKNOWN);
+  Node* not_value = _gvn.transform(new IfFalseNode(is_value_iff));
   ne_region->init_req(2, not_value);
-
-  // One of the 2 pointers refers to a value, check if both are of
-  // the same class
+  set_control(_gvn.transform(new IfTrueNode(is_value_iff)));
+
+  // The first operand is a value type, check if the second operand is non-null
   inc_sp(2);
   null_ctl = top();
   Node* not_null_b = null_check_oop(b, &null_ctl, !too_many_traps(Deoptimization::Reason_null_check), false, false);
@@ -2026,8 +2022,11 @@
     }
     return;
   }
-  Node* kls_a = load_object_klass(not_null_a);
-  Node* kls_b = load_object_klass(not_null_b);
+
+  // Check if both operands are of the same class. We don't need to clear the array property
+  // bits in the klass pointer for the cmp because we know that the first operand is a value type.
+  Node* kls_a = load_object_klass(not_null_a, /* clear_prop_bits = */ false);
+  Node* kls_b = load_object_klass(not_null_b, /* clear_prop_bits = */ false);
   Node* kls_cmp = CmpP(kls_a, kls_b);
   Node* kls_bol = _gvn.transform(new BoolNode(kls_cmp, BoolTest::ne));
   IfNode* kls_iff = create_and_map_if(control(), kls_bol, PROB_FAIR, COUNT_UNKNOWN);
@@ -2049,10 +2048,9 @@
     }
     return;
   }
-  // Both are values of the same class, we need to perform a
-  // substitutability test. Delegate to
-  // ValueBootstrapMethods::isSubstitutable().
-
+
+  // Both operands are values types of the same class, we need to perform a
+  // substitutability test. Delegate to ValueBootstrapMethods::isSubstitutable().
   Node* ne_io_phi = PhiNode::make(ne_region, i_o());
   Node* mem = reset_memory();
   Node* ne_mem_phi = PhiNode::make(ne_region, mem);
--- a/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestCallingConventionC1.java	Fri Oct 11 10:39:58 2019 +0200
+++ b/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestCallingConventionC1.java	Fri Oct 11 14:13:17 2019 +0200
@@ -2272,92 +2272,91 @@
 
     /*** FIXME: disabled due to occassional timeout in mach5 testing. See JDK-8230408
 
-
     // C2->C1 invokeinterface -- C2 calls call Unverified Entry of MyImplVal2X.func1 (compiled by
     //                           C1, with VVEP_RO==VVEP)
     // This test is developed to validate JDK-8230325.
     @Test() @Warmup(0) @OSRCompileOnly
-    public int test115(Intf intf, int a, int b) {
+    public int test107(Intf intf, int a, int b) {
         return intf.func1(a, b);
     }
 
     @ForceCompile
-    public void test115_verifier(boolean warmup) {
+    public void test107_verifier(boolean warmup) {
         Intf intf1 = new MyImplVal1X();
         Intf intf2 = new MyImplVal2X();
 
         for (int i=0; i<1000; i++) {
-            test115(intf1, 123, 456);
+            test107(intf1, 123, 456);
         }
         for (int i=0; i<500000; i++) {
-            // Run enough loops so that test115 will be compiled by C2.
+            // Run enough loops so that test107 will be compiled by C2.
             if (i % 30 == 0) {
                 // This will indirectly call MyImplVal2X.func1, but the call frequency is low, so
-                // test115 will be compiled by C2, but MyImplVal2X.func1 will compiled by C1 only.
-                int result = test115(intf2, 123, 456) + i;
+                // test107 will be compiled by C2, but MyImplVal2X.func1 will compiled by C1 only.
+                int result = test107(intf2, 123, 456) + i;
                 Asserts.assertEQ(result, intf2.func1(123, 456) + i);
             } else {
-                // Call test115 with a mix of intf1 and intf2, so C2 will use a virtual call (not an optimized call)
-                // for the invokeinterface bytecode in test115.
-                test115(intf1, 123, 456);
+                // Call test107 with a mix of intf1 and intf2, so C2 will use a virtual call (not an optimized call)
+                // for the invokeinterface bytecode in test107.
+                test107(intf1, 123, 456);
             }
         }
     }
 
-    // Same as test115, except we call MyImplVal2X.func2 (compiled by C1, VVEP_RO != VVEP)
+    // Same as test107, except we call MyImplVal2X.func2 (compiled by C1, VVEP_RO != VVEP)
     @Test() @Warmup(0) @OSRCompileOnly
-    public int test116(Intf intf, int a, int b) {
+    public int test108(Intf intf, int a, int b) {
         return intf.func2(a, b, pointField);
     }
 
     @ForceCompile
-    public void test116_verifier(boolean warmup) {
+    public void test108_verifier(boolean warmup) {
         Intf intf1 = new MyImplVal1X();
         Intf intf2 = new MyImplVal2X();
 
         for (int i=0; i<1000; i++) {
-            test116(intf1, 123, 456);
+            test108(intf1, 123, 456);
         }
         for (int i=0; i<500000; i++) {
-            // Run enough loops so that test116 will be compiled by C2.
+            // Run enough loops so that test108 will be compiled by C2.
             if (i % 30 == 0) {
                 // This will indirectly call MyImplVal2X.func2, but the call frequency is low, so
-                // test116 will be compiled by C2, but MyImplVal2X.func2 will compiled by C1 only.
-                int result = test116(intf2, 123, 456) + i;
+                // test108 will be compiled by C2, but MyImplVal2X.func2 will compiled by C1 only.
+                int result = test108(intf2, 123, 456) + i;
                 Asserts.assertEQ(result, intf2.func2(123, 456, pointField) + i);
             } else {
-                // Call test116 with a mix of intf1 and intf2, so C2 will use a virtual call (not an optimized call)
-                // for the invokeinterface bytecode in test116.
-                test116(intf1, 123, 456);
+                // Call test108 with a mix of intf1 and intf2, so C2 will use a virtual call (not an optimized call)
+                // for the invokeinterface bytecode in test108.
+                test108(intf1, 123, 456);
             }
         }
     }
 
     // Same as test115, except we call MyImplPojo3.func2 (compiled by C1, VVEP_RO == VEP)
     @Test() @Warmup(0) @OSRCompileOnly
-    public int test117(Intf intf, int a, int b) {
+    public int test109(Intf intf, int a, int b) {
         return intf.func2(a, b, pointField);
     }
 
     @ForceCompile
-    public void test117_verifier(boolean warmup) {
+    public void test109_verifier(boolean warmup) {
         Intf intf1 = new MyImplPojo0();
         Intf intf2 = new MyImplPojo3();
 
         for (int i=0; i<1000; i++) {
-            test117(intf1, 123, 456);
+            test109(intf1, 123, 456);
         }
         for (int i=0; i<500000; i++) {
-            // Run enough loops so that test117 will be compiled by C2.
+            // Run enough loops so that test109 will be compiled by C2.
             if (i % 30 == 0) {
                 // This will indirectly call MyImplPojo3.func2, but the call frequency is low, so
-                // test117 will be compiled by C2, but MyImplPojo3.func2 will compiled by C1 only.
-                int result = test117(intf2, 123, 456) + i;
+                // test109 will be compiled by C2, but MyImplPojo3.func2 will compiled by C1 only.
+                int result = test109(intf2, 123, 456) + i;
                 Asserts.assertEQ(result, intf2.func2(123, 456, pointField) + i);
             } else {
-                // Call test117 with a mix of intf1 and intf2, so C2 will use a virtual call (not an optimized call)
-                // for the invokeinterface bytecode in test117.
-                test117(intf1, 123, 456);
+                // Call test109 with a mix of intf1 and intf2, so C2 will use a virtual call (not an optimized call)
+                // for the invokeinterface bytecode in test109.
+                test109(intf1, 123, 456);
             }
         }
     }
--- a/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestLWorld.java	Fri Oct 11 10:39:58 2019 +0200
+++ b/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestLWorld.java	Fri Oct 11 14:13:17 2019 +0200
@@ -2351,4 +2351,41 @@
         int result = test94(array);
         Asserts.assertEquals(result, 0x42 * 2);
     }
+
+    // Test that no code for clearing the array klass property bits is emitted for acmp
+    // because when loading the klass, we already know that the operand is a value type.
+    @Warmup(10000)
+    @Test(failOn = STORAGE_PROPERTY_CLEARING)
+    public boolean test95(Object o1, Object o2) {
+        return o1 == o2;
+    }
+
+    @DontCompile
+    public void test95_verifier(boolean warmup) {
+        Object o1 = new Object();
+        Object o2 = new Object();
+        Asserts.assertTrue(test95(o1, o1));
+        Asserts.assertTrue(test95(null, null));
+        Asserts.assertFalse(test95(o1, null));
+        Asserts.assertFalse(test95(o1, o2));
+    }
+
+    // Same as test95 but operands are never null
+    @Warmup(10000)
+    @Test(failOn = STORAGE_PROPERTY_CLEARING)
+    public boolean test96(Object o1, Object o2) {
+        return o1 == o2;
+    }
+
+    @DontCompile
+    public void test96_verifier(boolean warmup) {
+        Object o1 = new Object();
+        Object o2 = new Object();
+        Asserts.assertTrue(test96(o1, o1));
+        Asserts.assertFalse(test96(o1, o2));
+        if (!warmup) {
+            Asserts.assertTrue(test96(null, null));
+            Asserts.assertFalse(test96(o1, null));
+        }
+    }
 }