comparison src/share/vm/opto/macro.cpp @ 423:a1980da045cc

6462850: generate biased locking code in C2 ideal graph Summary: Inline biased locking code in C2 ideal graph during macro nodes expansion Reviewed-by: never
author kvn
date Fri, 07 Nov 2008 09:29:38 -0800
parents f8199438385b
children 87559db65269
comparison
equal deleted inserted replaced
21:8a0fe71c4a99 22:a6a872b9c027
80 jvms->set_scloff(jvms->scloff()+jvms_adj); 80 jvms->set_scloff(jvms->scloff()+jvms_adj);
81 jvms->set_endoff(jvms->endoff()+jvms_adj); 81 jvms->set_endoff(jvms->endoff()+jvms_adj);
82 } 82 }
83 } 83 }
84 84
85 Node* PhaseMacroExpand::opt_iff(Node* region, Node* iff) { 85 Node* PhaseMacroExpand::opt_bits_test(Node* ctrl, Node* region, int edge, Node* word, int mask, int bits, bool return_fast_path) {
86 IfNode *opt_iff = transform_later(iff)->as_If(); 86 Node* cmp;
87 87 if (mask != 0) {
88 // Fast path taken; set region slot 2 88 Node* and_node = transform_later(new (C, 3) AndXNode(word, MakeConX(mask)));
89 Node *fast_taken = transform_later( new (C, 1) IfFalseNode(opt_iff) ); 89 cmp = transform_later(new (C, 3) CmpXNode(and_node, MakeConX(bits)));
90 region->init_req(2,fast_taken); // Capture fast-control 90 } else {
91 cmp = word;
92 }
93 Node* bol = transform_later(new (C, 2) BoolNode(cmp, BoolTest::ne));
94 IfNode* iff = new (C, 2) IfNode( ctrl, bol, PROB_MIN, COUNT_UNKNOWN );
95 transform_later(iff);
96
97 // Fast path taken.
98 Node *fast_taken = transform_later( new (C, 1) IfFalseNode(iff) );
91 99
92 // Fast path not-taken, i.e. slow path 100 // Fast path not-taken, i.e. slow path
93 Node *slow_taken = transform_later( new (C, 1) IfTrueNode(opt_iff) ); 101 Node *slow_taken = transform_later( new (C, 1) IfTrueNode(iff) );
94 return slow_taken; 102
103 if (return_fast_path) {
104 region->init_req(edge, slow_taken); // Capture slow-control
105 return fast_taken;
106 } else {
107 region->init_req(edge, fast_taken); // Capture fast-control
108 return slow_taken;
109 }
95 } 110 }
96 111
97 //--------------------copy_predefined_input_for_runtime_call-------------------- 112 //--------------------copy_predefined_input_for_runtime_call--------------------
98 void PhaseMacroExpand::copy_predefined_input_for_runtime_call(Node * ctrl, CallNode* oldcall, CallNode* call) { 113 void PhaseMacroExpand::copy_predefined_input_for_runtime_call(Node * ctrl, CallNode* oldcall, CallNode* call) {
99 // Set fixed predefined input arguments 114 // Set fixed predefined input arguments
852 } 867 }
853 868
854 869
855 Node* PhaseMacroExpand::make_load(Node* ctl, Node* mem, Node* base, int offset, const Type* value_type, BasicType bt) { 870 Node* PhaseMacroExpand::make_load(Node* ctl, Node* mem, Node* base, int offset, const Type* value_type, BasicType bt) {
856 Node* adr = basic_plus_adr(base, offset); 871 Node* adr = basic_plus_adr(base, offset);
857 const TypePtr* adr_type = TypeRawPtr::BOTTOM; 872 const TypePtr* adr_type = adr->bottom_type()->is_ptr();
858 Node* value = LoadNode::make(_igvn, ctl, mem, adr, adr_type, value_type, bt); 873 Node* value = LoadNode::make(_igvn, ctl, mem, adr, adr_type, value_type, bt);
859 transform_later(value); 874 transform_later(value);
860 return value; 875 return value;
861 } 876 }
862 877
1581 Node* obj = lock->obj_node(); 1596 Node* obj = lock->obj_node();
1582 Node* box = lock->box_node(); 1597 Node* box = lock->box_node();
1583 Node* flock = lock->fastlock_node(); 1598 Node* flock = lock->fastlock_node();
1584 1599
1585 // Make the merge point 1600 // Make the merge point
1586 Node *region = new (C, 3) RegionNode(3); 1601 Node *region;
1587 1602 Node *mem_phi;
1588 Node *bol = transform_later(new (C, 2) BoolNode(flock,BoolTest::ne)); 1603 Node *slow_path;
1589 Node *iff = new (C, 2) IfNode( ctrl, bol, PROB_MIN, COUNT_UNKNOWN ); 1604
1590 // Optimize test; set region slot 2 1605 if (UseOptoBiasInlining) {
1591 Node *slow_path = opt_iff(region,iff); 1606 /*
1607 * See the full descrition in MacroAssembler::biased_locking_enter().
1608 *
1609 * if( (mark_word & biased_lock_mask) == biased_lock_pattern ) {
1610 * // The object is biased.
1611 * proto_node = klass->prototype_header;
1612 * o_node = thread | proto_node;
1613 * x_node = o_node ^ mark_word;
1614 * if( (x_node & ~age_mask) == 0 ) { // Biased to the current thread ?
1615 * // Done.
1616 * } else {
1617 * if( (x_node & biased_lock_mask) != 0 ) {
1618 * // The klass's prototype header is no longer biased.
1619 * cas(&mark_word, mark_word, proto_node)
1620 * goto cas_lock;
1621 * } else {
1622 * // The klass's prototype header is still biased.
1623 * if( (x_node & epoch_mask) != 0 ) { // Expired epoch?
1624 * old = mark_word;
1625 * new = o_node;
1626 * } else {
1627 * // Different thread or anonymous biased.
1628 * old = mark_word & (epoch_mask | age_mask | biased_lock_mask);
1629 * new = thread | old;
1630 * }
1631 * // Try to rebias.
1632 * if( cas(&mark_word, old, new) == 0 ) {
1633 * // Done.
1634 * } else {
1635 * goto slow_path; // Failed.
1636 * }
1637 * }
1638 * }
1639 * } else {
1640 * // The object is not biased.
1641 * cas_lock:
1642 * if( FastLock(obj) == 0 ) {
1643 * // Done.
1644 * } else {
1645 * slow_path:
1646 * OptoRuntime::complete_monitor_locking_Java(obj);
1647 * }
1648 * }
1649 */
1650
1651 region = new (C, 5) RegionNode(5);
1652 // create a Phi for the memory state
1653 mem_phi = new (C, 5) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM);
1654
1655 Node* fast_lock_region = new (C, 3) RegionNode(3);
1656 Node* fast_lock_mem_phi = new (C, 3) PhiNode( fast_lock_region, Type::MEMORY, TypeRawPtr::BOTTOM);
1657
1658 // First, check mark word for the biased lock pattern.
1659 Node* mark_node = make_load(ctrl, mem, obj, oopDesc::mark_offset_in_bytes(), TypeX_X, TypeX_X->basic_type());
1660
1661 // Get fast path - mark word has the biased lock pattern.
1662 ctrl = opt_bits_test(ctrl, fast_lock_region, 1, mark_node,
1663 markOopDesc::biased_lock_mask_in_place,
1664 markOopDesc::biased_lock_pattern, true);
1665 // fast_lock_region->in(1) is set to slow path.
1666 fast_lock_mem_phi->init_req(1, mem);
1667
1668 // Now check that the lock is biased to the current thread and has
1669 // the same epoch and bias as Klass::_prototype_header.
1670
1671 // Special-case a fresh allocation to avoid building nodes:
1672 Node* klass_node = AllocateNode::Ideal_klass(obj, &_igvn);
1673 if (klass_node == NULL) {
1674 Node* k_adr = basic_plus_adr(obj, oopDesc::klass_offset_in_bytes());
1675 klass_node = transform_later( LoadKlassNode::make(_igvn, mem, k_adr, _igvn.type(k_adr)->is_ptr()) );
1676 klass_node->init_req(0, ctrl);
1677 }
1678 Node *proto_node = make_load(ctrl, mem, klass_node, Klass::prototype_header_offset_in_bytes() + sizeof(oopDesc), TypeX_X, TypeX_X->basic_type());
1679
1680 Node* thread = transform_later(new (C, 1) ThreadLocalNode());
1681 Node* cast_thread = transform_later(new (C, 2) CastP2XNode(ctrl, thread));
1682 Node* o_node = transform_later(new (C, 3) OrXNode(cast_thread, proto_node));
1683 Node* x_node = transform_later(new (C, 3) XorXNode(o_node, mark_node));
1684
1685 // Get slow path - mark word does NOT match the value.
1686 Node* not_biased_ctrl = opt_bits_test(ctrl, region, 3, x_node,
1687 (~markOopDesc::age_mask_in_place), 0);
1688 // region->in(3) is set to fast path - the object is biased to the current thread.
1689 mem_phi->init_req(3, mem);
1690
1691
1692 // Mark word does NOT match the value (thread | Klass::_prototype_header).
1693
1694
1695 // First, check biased pattern.
1696 // Get fast path - _prototype_header has the same biased lock pattern.
1697 ctrl = opt_bits_test(not_biased_ctrl, fast_lock_region, 2, x_node,
1698 markOopDesc::biased_lock_mask_in_place, 0, true);
1699
1700 not_biased_ctrl = fast_lock_region->in(2); // Slow path
1701 // fast_lock_region->in(2) - the prototype header is no longer biased
1702 // and we have to revoke the bias on this object.
1703 // We are going to try to reset the mark of this object to the prototype
1704 // value and fall through to the CAS-based locking scheme.
1705 Node* adr = basic_plus_adr(obj, oopDesc::mark_offset_in_bytes());
1706 Node* cas = new (C, 5) StoreXConditionalNode(not_biased_ctrl, mem, adr,
1707 proto_node, mark_node);
1708 transform_later(cas);
1709 Node* proj = transform_later( new (C, 1) SCMemProjNode(cas));
1710 fast_lock_mem_phi->init_req(2, proj);
1711
1712
1713 // Second, check epoch bits.
1714 Node* rebiased_region = new (C, 3) RegionNode(3);
1715 Node* old_phi = new (C, 3) PhiNode( rebiased_region, TypeX_X);
1716 Node* new_phi = new (C, 3) PhiNode( rebiased_region, TypeX_X);
1717
1718 // Get slow path - mark word does NOT match epoch bits.
1719 Node* epoch_ctrl = opt_bits_test(ctrl, rebiased_region, 1, x_node,
1720 markOopDesc::epoch_mask_in_place, 0);
1721 // The epoch of the current bias is not valid, attempt to rebias the object
1722 // toward the current thread.
1723 rebiased_region->init_req(2, epoch_ctrl);
1724 old_phi->init_req(2, mark_node);
1725 new_phi->init_req(2, o_node);
1726
1727 // rebiased_region->in(1) is set to fast path.
1728 // The epoch of the current bias is still valid but we know
1729 // nothing about the owner; it might be set or it might be clear.
1730 Node* cmask = MakeConX(markOopDesc::biased_lock_mask_in_place |
1731 markOopDesc::age_mask_in_place |
1732 markOopDesc::epoch_mask_in_place);
1733 Node* old = transform_later(new (C, 3) AndXNode(mark_node, cmask));
1734 cast_thread = transform_later(new (C, 2) CastP2XNode(ctrl, thread));
1735 Node* new_mark = transform_later(new (C, 3) OrXNode(cast_thread, old));
1736 old_phi->init_req(1, old);
1737 new_phi->init_req(1, new_mark);
1738
1739 transform_later(rebiased_region);
1740 transform_later(old_phi);
1741 transform_later(new_phi);
1742
1743 // Try to acquire the bias of the object using an atomic operation.
1744 // If this fails we will go in to the runtime to revoke the object's bias.
1745 cas = new (C, 5) StoreXConditionalNode(rebiased_region, mem, adr,
1746 new_phi, old_phi);
1747 transform_later(cas);
1748 proj = transform_later( new (C, 1) SCMemProjNode(cas));
1749
1750 // Get slow path - Failed to CAS.
1751 not_biased_ctrl = opt_bits_test(rebiased_region, region, 4, cas, 0, 0);
1752 mem_phi->init_req(4, proj);
1753 // region->in(4) is set to fast path - the object is rebiased to the current thread.
1754
1755 // Failed to CAS.
1756 slow_path = new (C, 3) RegionNode(3);
1757 Node *slow_mem = new (C, 3) PhiNode( slow_path, Type::MEMORY, TypeRawPtr::BOTTOM);
1758
1759 slow_path->init_req(1, not_biased_ctrl); // Capture slow-control
1760 slow_mem->init_req(1, proj);
1761
1762 // Call CAS-based locking scheme (FastLock node).
1763
1764 transform_later(fast_lock_region);
1765 transform_later(fast_lock_mem_phi);
1766
1767 // Get slow path - FastLock failed to lock the object.
1768 ctrl = opt_bits_test(fast_lock_region, region, 2, flock, 0, 0);
1769 mem_phi->init_req(2, fast_lock_mem_phi);
1770 // region->in(2) is set to fast path - the object is locked to the current thread.
1771
1772 slow_path->init_req(2, ctrl); // Capture slow-control
1773 slow_mem->init_req(2, fast_lock_mem_phi);
1774
1775 transform_later(slow_path);
1776 transform_later(slow_mem);
1777 // Reset lock's memory edge.
1778 lock->set_req(TypeFunc::Memory, slow_mem);
1779
1780 } else {
1781 region = new (C, 3) RegionNode(3);
1782 // create a Phi for the memory state
1783 mem_phi = new (C, 3) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM);
1784
1785 // Optimize test; set region slot 2
1786 slow_path = opt_bits_test(ctrl, region, 2, flock, 0, 0);
1787 mem_phi->init_req(2, mem);
1788 }
1592 1789
1593 // Make slow path call 1790 // Make slow path call
1594 CallNode *call = make_slow_call( (CallNode *) lock, OptoRuntime::complete_monitor_enter_Type(), OptoRuntime::complete_monitor_locking_Java(), NULL, slow_path, obj, box ); 1791 CallNode *call = make_slow_call( (CallNode *) lock, OptoRuntime::complete_monitor_enter_Type(), OptoRuntime::complete_monitor_locking_Java(), NULL, slow_path, obj, box );
1595 1792
1596 extract_call_projections(call); 1793 extract_call_projections(call);
1612 region->init_req(1, slow_ctrl); 1809 region->init_req(1, slow_ctrl);
1613 // region inputs are now complete 1810 // region inputs are now complete
1614 transform_later(region); 1811 transform_later(region);
1615 _igvn.subsume_node(_fallthroughproj, region); 1812 _igvn.subsume_node(_fallthroughproj, region);
1616 1813
1617 // create a Phi for the memory state 1814 Node *memproj = transform_later( new(C, 1) ProjNode(call, TypeFunc::Memory) );
1618 Node *mem_phi = new (C, 3) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM);
1619 Node *memproj = transform_later( new (C, 1) ProjNode(call, TypeFunc::Memory) );
1620 mem_phi->init_req(1, memproj ); 1815 mem_phi->init_req(1, memproj );
1621 mem_phi->init_req(2, mem);
1622 transform_later(mem_phi); 1816 transform_later(mem_phi);
1623 _igvn.hash_delete(_memproj_fallthrough); 1817 _igvn.hash_delete(_memproj_fallthrough);
1624 _igvn.subsume_node(_memproj_fallthrough, mem_phi); 1818 _igvn.subsume_node(_memproj_fallthrough, mem_phi);
1625
1626
1627 } 1819 }
1628 1820
1629 //------------------------------expand_unlock_node---------------------- 1821 //------------------------------expand_unlock_node----------------------
1630 void PhaseMacroExpand::expand_unlock_node(UnlockNode *unlock) { 1822 void PhaseMacroExpand::expand_unlock_node(UnlockNode *unlock) {
1631 1823
1635 Node* box = unlock->box_node(); 1827 Node* box = unlock->box_node();
1636 1828
1637 // No need for a null check on unlock 1829 // No need for a null check on unlock
1638 1830
1639 // Make the merge point 1831 // Make the merge point
1640 RegionNode *region = new (C, 3) RegionNode(3); 1832 Node *region;
1833 Node *mem_phi;
1834
1835 if (UseOptoBiasInlining) {
1836 // Check for biased locking unlock case, which is a no-op.
1837 // See the full descrition in MacroAssembler::biased_locking_exit().
1838 region = new (C, 4) RegionNode(4);
1839 // create a Phi for the memory state
1840 mem_phi = new (C, 4) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM);
1841 mem_phi->init_req(3, mem);
1842
1843 Node* mark_node = make_load(ctrl, mem, obj, oopDesc::mark_offset_in_bytes(), TypeX_X, TypeX_X->basic_type());
1844 ctrl = opt_bits_test(ctrl, region, 3, mark_node,
1845 markOopDesc::biased_lock_mask_in_place,
1846 markOopDesc::biased_lock_pattern);
1847 } else {
1848 region = new (C, 3) RegionNode(3);
1849 // create a Phi for the memory state
1850 mem_phi = new (C, 3) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM);
1851 }
1641 1852
1642 FastUnlockNode *funlock = new (C, 3) FastUnlockNode( ctrl, obj, box ); 1853 FastUnlockNode *funlock = new (C, 3) FastUnlockNode( ctrl, obj, box );
1643 funlock = transform_later( funlock )->as_FastUnlock(); 1854 funlock = transform_later( funlock )->as_FastUnlock();
1644 Node *bol = transform_later(new (C, 2) BoolNode(funlock,BoolTest::ne));
1645 Node *iff = new (C, 2) IfNode( ctrl, bol, PROB_MIN, COUNT_UNKNOWN );
1646 // Optimize test; set region slot 2 1855 // Optimize test; set region slot 2
1647 Node *slow_path = opt_iff(region,iff); 1856 Node *slow_path = opt_bits_test(ctrl, region, 2, funlock, 0, 0);
1648 1857
1649 CallNode *call = make_slow_call( (CallNode *) unlock, OptoRuntime::complete_monitor_exit_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), "complete_monitor_unlocking_C", slow_path, obj, box ); 1858 CallNode *call = make_slow_call( (CallNode *) unlock, OptoRuntime::complete_monitor_exit_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), "complete_monitor_unlocking_C", slow_path, obj, box );
1650 1859
1651 extract_call_projections(call); 1860 extract_call_projections(call);
1652 1861
1664 region->init_req(1, slow_ctrl); 1873 region->init_req(1, slow_ctrl);
1665 // region inputs are now complete 1874 // region inputs are now complete
1666 transform_later(region); 1875 transform_later(region);
1667 _igvn.subsume_node(_fallthroughproj, region); 1876 _igvn.subsume_node(_fallthroughproj, region);
1668 1877
1669 // create a Phi for the memory state
1670 Node *mem_phi = new (C, 3) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM);
1671 Node *memproj = transform_later( new(C, 1) ProjNode(call, TypeFunc::Memory) ); 1878 Node *memproj = transform_later( new(C, 1) ProjNode(call, TypeFunc::Memory) );
1672 mem_phi->init_req(1, memproj ); 1879 mem_phi->init_req(1, memproj );
1673 mem_phi->init_req(2, mem); 1880 mem_phi->init_req(2, mem);
1674 transform_later(mem_phi); 1881 transform_later(mem_phi);
1675 _igvn.hash_delete(_memproj_fallthrough); 1882 _igvn.hash_delete(_memproj_fallthrough);
1676 _igvn.subsume_node(_memproj_fallthrough, mem_phi); 1883 _igvn.subsume_node(_memproj_fallthrough, mem_phi);
1677
1678
1679 } 1884 }
1680 1885
1681 //------------------------------expand_macro_nodes---------------------- 1886 //------------------------------expand_macro_nodes----------------------
1682 // Returns true if a failure occurred. 1887 // Returns true if a failure occurred.
1683 bool PhaseMacroExpand::expand_macro_nodes() { 1888 bool PhaseMacroExpand::expand_macro_nodes() {