diff src/share/vm/opto/macro.cpp @ 423:a1980da045cc

6462850: generate biased locking code in C2 ideal graph Summary: Inline biased locking code in C2 ideal graph during macro nodes expansion Reviewed-by: never
author kvn
date Fri, 07 Nov 2008 09:29:38 -0800
parents f8199438385b
children 87559db65269
line wrap: on
line diff
--- a/src/share/vm/opto/macro.cpp	Thu Nov 06 20:00:03 2008 -0800
+++ b/src/share/vm/opto/macro.cpp	Fri Nov 07 09:29:38 2008 -0800
@@ -82,16 +82,31 @@
   }
 }
 
-Node* PhaseMacroExpand::opt_iff(Node* region, Node* iff) {
-  IfNode *opt_iff = transform_later(iff)->as_If();
+Node* PhaseMacroExpand::opt_bits_test(Node* ctrl, Node* region, int edge, Node* word, int mask, int bits, bool return_fast_path) {
+  Node* cmp;
+  if (mask != 0) {
+    Node* and_node = transform_later(new (C, 3) AndXNode(word, MakeConX(mask)));
+    cmp = transform_later(new (C, 3) CmpXNode(and_node, MakeConX(bits)));
+  } else {
+    cmp = word;
+  }
+  Node* bol = transform_later(new (C, 2) BoolNode(cmp, BoolTest::ne));
+  IfNode* iff = new (C, 2) IfNode( ctrl, bol, PROB_MIN, COUNT_UNKNOWN );
+  transform_later(iff);
 
-  // Fast path taken; set region slot 2
-  Node *fast_taken = transform_later( new (C, 1) IfFalseNode(opt_iff) );
-  region->init_req(2,fast_taken); // Capture fast-control
+  // Fast path taken.
+  Node *fast_taken = transform_later( new (C, 1) IfFalseNode(iff) );
 
   // Fast path not-taken, i.e. slow path
-  Node *slow_taken = transform_later( new (C, 1) IfTrueNode(opt_iff) );
-  return slow_taken;
+  Node *slow_taken = transform_later( new (C, 1) IfTrueNode(iff) );
+
+  if (return_fast_path) {
+    region->init_req(edge, slow_taken); // Capture slow-control
+    return fast_taken;
+  } else {
+    region->init_req(edge, fast_taken); // Capture fast-control
+    return slow_taken;
+  }
 }
 
 //--------------------copy_predefined_input_for_runtime_call--------------------
@@ -854,7 +869,7 @@
 
 Node* PhaseMacroExpand::make_load(Node* ctl, Node* mem, Node* base, int offset, const Type* value_type, BasicType bt) {
   Node* adr = basic_plus_adr(base, offset);
-  const TypePtr* adr_type = TypeRawPtr::BOTTOM;
+  const TypePtr* adr_type = adr->bottom_type()->is_ptr();
   Node* value = LoadNode::make(_igvn, ctl, mem, adr, adr_type, value_type, bt);
   transform_later(value);
   return value;
@@ -1583,12 +1598,194 @@
   Node* flock = lock->fastlock_node();
 
   // Make the merge point
-  Node *region = new (C, 3) RegionNode(3);
+  Node *region;
+  Node *mem_phi;
+  Node *slow_path;
 
-  Node *bol = transform_later(new (C, 2) BoolNode(flock,BoolTest::ne));
-  Node *iff = new (C, 2) IfNode( ctrl, bol, PROB_MIN, COUNT_UNKNOWN );
-  // Optimize test; set region slot 2
-  Node *slow_path = opt_iff(region,iff);
+  if (UseOptoBiasInlining) {
+    /*
+     *  See the full descrition in MacroAssembler::biased_locking_enter().
+     *
+     *  if( (mark_word & biased_lock_mask) == biased_lock_pattern ) {
+     *    // The object is biased.
+     *    proto_node = klass->prototype_header;
+     *    o_node = thread | proto_node;
+     *    x_node = o_node ^ mark_word;
+     *    if( (x_node & ~age_mask) == 0 ) { // Biased to the current thread ?
+     *      // Done.
+     *    } else {
+     *      if( (x_node & biased_lock_mask) != 0 ) {
+     *        // The klass's prototype header is no longer biased.
+     *        cas(&mark_word, mark_word, proto_node)
+     *        goto cas_lock;
+     *      } else {
+     *        // The klass's prototype header is still biased.
+     *        if( (x_node & epoch_mask) != 0 ) { // Expired epoch?
+     *          old = mark_word;
+     *          new = o_node;
+     *        } else {
+     *          // Different thread or anonymous biased.
+     *          old = mark_word & (epoch_mask | age_mask | biased_lock_mask);
+     *          new = thread | old;
+     *        }
+     *        // Try to rebias.
+     *        if( cas(&mark_word, old, new) == 0 ) {
+     *          // Done.
+     *        } else {
+     *          goto slow_path; // Failed.
+     *        }
+     *      }
+     *    }
+     *  } else {
+     *    // The object is not biased.
+     *    cas_lock:
+     *    if( FastLock(obj) == 0 ) {
+     *      // Done.
+     *    } else {
+     *      slow_path:
+     *      OptoRuntime::complete_monitor_locking_Java(obj);
+     *    }
+     *  }
+     */
+
+    region  = new (C, 5) RegionNode(5);
+    // create a Phi for the memory state
+    mem_phi = new (C, 5) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM);
+
+    Node* fast_lock_region  = new (C, 3) RegionNode(3);
+    Node* fast_lock_mem_phi = new (C, 3) PhiNode( fast_lock_region, Type::MEMORY, TypeRawPtr::BOTTOM);
+
+    // First, check mark word for the biased lock pattern.
+    Node* mark_node = make_load(ctrl, mem, obj, oopDesc::mark_offset_in_bytes(), TypeX_X, TypeX_X->basic_type());
+
+    // Get fast path - mark word has the biased lock pattern.
+    ctrl = opt_bits_test(ctrl, fast_lock_region, 1, mark_node,
+                         markOopDesc::biased_lock_mask_in_place,
+                         markOopDesc::biased_lock_pattern, true);
+    // fast_lock_region->in(1) is set to slow path.
+    fast_lock_mem_phi->init_req(1, mem);
+
+    // Now check that the lock is biased to the current thread and has
+    // the same epoch and bias as Klass::_prototype_header.
+
+    // Special-case a fresh allocation to avoid building nodes:
+    Node* klass_node = AllocateNode::Ideal_klass(obj, &_igvn);
+    if (klass_node == NULL) {
+      Node* k_adr = basic_plus_adr(obj, oopDesc::klass_offset_in_bytes());
+      klass_node = transform_later( LoadKlassNode::make(_igvn, mem, k_adr, _igvn.type(k_adr)->is_ptr()) );
+      klass_node->init_req(0, ctrl);
+    }
+    Node *proto_node = make_load(ctrl, mem, klass_node, Klass::prototype_header_offset_in_bytes() + sizeof(oopDesc), TypeX_X, TypeX_X->basic_type());
+
+    Node* thread = transform_later(new (C, 1) ThreadLocalNode());
+    Node* cast_thread = transform_later(new (C, 2) CastP2XNode(ctrl, thread));
+    Node* o_node = transform_later(new (C, 3) OrXNode(cast_thread, proto_node));
+    Node* x_node = transform_later(new (C, 3) XorXNode(o_node, mark_node));
+
+    // Get slow path - mark word does NOT match the value.
+    Node* not_biased_ctrl =  opt_bits_test(ctrl, region, 3, x_node,
+                                      (~markOopDesc::age_mask_in_place), 0);
+    // region->in(3) is set to fast path - the object is biased to the current thread.
+    mem_phi->init_req(3, mem);
+
+
+    // Mark word does NOT match the value (thread | Klass::_prototype_header).
+
+
+    // First, check biased pattern.
+    // Get fast path - _prototype_header has the same biased lock pattern.
+    ctrl =  opt_bits_test(not_biased_ctrl, fast_lock_region, 2, x_node,
+                          markOopDesc::biased_lock_mask_in_place, 0, true);
+
+    not_biased_ctrl = fast_lock_region->in(2); // Slow path
+    // fast_lock_region->in(2) - the prototype header is no longer biased
+    // and we have to revoke the bias on this object.
+    // We are going to try to reset the mark of this object to the prototype
+    // value and fall through to the CAS-based locking scheme.
+    Node* adr = basic_plus_adr(obj, oopDesc::mark_offset_in_bytes());
+    Node* cas = new (C, 5) StoreXConditionalNode(not_biased_ctrl, mem, adr,
+                                                 proto_node, mark_node);
+    transform_later(cas);
+    Node* proj = transform_later( new (C, 1) SCMemProjNode(cas));
+    fast_lock_mem_phi->init_req(2, proj);
+
+
+    // Second, check epoch bits.
+    Node* rebiased_region  = new (C, 3) RegionNode(3);
+    Node* old_phi = new (C, 3) PhiNode( rebiased_region, TypeX_X);
+    Node* new_phi = new (C, 3) PhiNode( rebiased_region, TypeX_X);
+
+    // Get slow path - mark word does NOT match epoch bits.
+    Node* epoch_ctrl =  opt_bits_test(ctrl, rebiased_region, 1, x_node,
+                                      markOopDesc::epoch_mask_in_place, 0);
+    // The epoch of the current bias is not valid, attempt to rebias the object
+    // toward the current thread.
+    rebiased_region->init_req(2, epoch_ctrl);
+    old_phi->init_req(2, mark_node);
+    new_phi->init_req(2, o_node);
+
+    // rebiased_region->in(1) is set to fast path.
+    // The epoch of the current bias is still valid but we know
+    // nothing about the owner; it might be set or it might be clear.
+    Node* cmask   = MakeConX(markOopDesc::biased_lock_mask_in_place |
+                             markOopDesc::age_mask_in_place |
+                             markOopDesc::epoch_mask_in_place);
+    Node* old = transform_later(new (C, 3) AndXNode(mark_node, cmask));
+    cast_thread = transform_later(new (C, 2) CastP2XNode(ctrl, thread));
+    Node* new_mark = transform_later(new (C, 3) OrXNode(cast_thread, old));
+    old_phi->init_req(1, old);
+    new_phi->init_req(1, new_mark);
+
+    transform_later(rebiased_region);
+    transform_later(old_phi);
+    transform_later(new_phi);
+
+    // Try to acquire the bias of the object using an atomic operation.
+    // If this fails we will go in to the runtime to revoke the object's bias.
+    cas = new (C, 5) StoreXConditionalNode(rebiased_region, mem, adr,
+                                           new_phi, old_phi);
+    transform_later(cas);
+    proj = transform_later( new (C, 1) SCMemProjNode(cas));
+
+    // Get slow path - Failed to CAS.
+    not_biased_ctrl = opt_bits_test(rebiased_region, region, 4, cas, 0, 0);
+    mem_phi->init_req(4, proj);
+    // region->in(4) is set to fast path - the object is rebiased to the current thread.
+
+    // Failed to CAS.
+    slow_path  = new (C, 3) RegionNode(3);
+    Node *slow_mem = new (C, 3) PhiNode( slow_path, Type::MEMORY, TypeRawPtr::BOTTOM);
+
+    slow_path->init_req(1, not_biased_ctrl); // Capture slow-control
+    slow_mem->init_req(1, proj);
+
+    // Call CAS-based locking scheme (FastLock node).
+
+    transform_later(fast_lock_region);
+    transform_later(fast_lock_mem_phi);
+
+    // Get slow path - FastLock failed to lock the object.
+    ctrl = opt_bits_test(fast_lock_region, region, 2, flock, 0, 0);
+    mem_phi->init_req(2, fast_lock_mem_phi);
+    // region->in(2) is set to fast path - the object is locked to the current thread.
+
+    slow_path->init_req(2, ctrl); // Capture slow-control
+    slow_mem->init_req(2, fast_lock_mem_phi);
+
+    transform_later(slow_path);
+    transform_later(slow_mem);
+    // Reset lock's memory edge.
+    lock->set_req(TypeFunc::Memory, slow_mem);
+
+  } else {
+    region  = new (C, 3) RegionNode(3);
+    // create a Phi for the memory state
+    mem_phi = new (C, 3) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM);
+
+    // Optimize test; set region slot 2
+    slow_path = opt_bits_test(ctrl, region, 2, flock, 0, 0);
+    mem_phi->init_req(2, mem);
+  }
 
   // Make slow path call
   CallNode *call = make_slow_call( (CallNode *) lock, OptoRuntime::complete_monitor_enter_Type(), OptoRuntime::complete_monitor_locking_Java(), NULL, slow_path, obj, box );
@@ -1614,16 +1811,11 @@
   transform_later(region);
   _igvn.subsume_node(_fallthroughproj, region);
 
-  // create a Phi for the memory state
-  Node *mem_phi = new (C, 3) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM);
-  Node *memproj = transform_later( new (C, 1) ProjNode(call, TypeFunc::Memory) );
+  Node *memproj = transform_later( new(C, 1) ProjNode(call, TypeFunc::Memory) );
   mem_phi->init_req(1, memproj );
-  mem_phi->init_req(2, mem);
   transform_later(mem_phi);
-    _igvn.hash_delete(_memproj_fallthrough);
+  _igvn.hash_delete(_memproj_fallthrough);
   _igvn.subsume_node(_memproj_fallthrough, mem_phi);
-
-
 }
 
 //------------------------------expand_unlock_node----------------------
@@ -1637,14 +1829,31 @@
   // No need for a null check on unlock
 
   // Make the merge point
-  RegionNode *region = new (C, 3) RegionNode(3);
+  Node *region;
+  Node *mem_phi;
+
+  if (UseOptoBiasInlining) {
+    // Check for biased locking unlock case, which is a no-op.
+    // See the full descrition in MacroAssembler::biased_locking_exit().
+    region  = new (C, 4) RegionNode(4);
+    // create a Phi for the memory state
+    mem_phi = new (C, 4) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM);
+    mem_phi->init_req(3, mem);
+
+    Node* mark_node = make_load(ctrl, mem, obj, oopDesc::mark_offset_in_bytes(), TypeX_X, TypeX_X->basic_type());
+    ctrl = opt_bits_test(ctrl, region, 3, mark_node,
+                         markOopDesc::biased_lock_mask_in_place,
+                         markOopDesc::biased_lock_pattern);
+  } else {
+    region  = new (C, 3) RegionNode(3);
+    // create a Phi for the memory state
+    mem_phi = new (C, 3) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM);
+  }
 
   FastUnlockNode *funlock = new (C, 3) FastUnlockNode( ctrl, obj, box );
   funlock = transform_later( funlock )->as_FastUnlock();
-  Node *bol = transform_later(new (C, 2) BoolNode(funlock,BoolTest::ne));
-  Node *iff = new (C, 2) IfNode( ctrl, bol, PROB_MIN, COUNT_UNKNOWN );
   // Optimize test; set region slot 2
-  Node *slow_path = opt_iff(region,iff);
+  Node *slow_path = opt_bits_test(ctrl, region, 2, funlock, 0, 0);
 
   CallNode *call = make_slow_call( (CallNode *) unlock, OptoRuntime::complete_monitor_exit_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), "complete_monitor_unlocking_C", slow_path, obj, box );
 
@@ -1666,16 +1875,12 @@
   transform_later(region);
   _igvn.subsume_node(_fallthroughproj, region);
 
-  // create a Phi for the memory state
-  Node *mem_phi = new (C, 3) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM);
   Node *memproj = transform_later( new(C, 1) ProjNode(call, TypeFunc::Memory) );
   mem_phi->init_req(1, memproj );
   mem_phi->init_req(2, mem);
   transform_later(mem_phi);
-    _igvn.hash_delete(_memproj_fallthrough);
+  _igvn.hash_delete(_memproj_fallthrough);
   _igvn.subsume_node(_memproj_fallthrough, mem_phi);
-
-
 }
 
 //------------------------------expand_macro_nodes----------------------