changeset 7801:8e93f8517960

8063137: Never-taken branches should be pruned when GWT LambdaForms are shared Reviewed-by: jrose, kvn
author vlivanov
date Thu, 29 Jan 2015 10:25:59 -0800
parents c1a17f52db53
children ea802a4750aa
files src/share/vm/ci/ciMethod.cpp src/share/vm/ci/ciMethod.hpp src/share/vm/classfile/vmSymbols.hpp src/share/vm/opto/classes.hpp src/share/vm/opto/compile.cpp src/share/vm/opto/graphKit.hpp src/share/vm/opto/library_call.cpp src/share/vm/opto/opaquenode.cpp src/share/vm/opto/opaquenode.hpp src/share/vm/opto/parse.hpp src/share/vm/opto/parse2.cpp
diffstat 11 files changed, 185 insertions(+), 28 deletions(-) [+]
line wrap: on
line diff
--- a/src/share/vm/ci/ciMethod.cpp	Wed Jan 28 07:55:27 2015 +0100
+++ b/src/share/vm/ci/ciMethod.cpp	Thu Jan 29 10:25:59 2015 -0800
@@ -70,7 +70,8 @@
 // Loaded method.
 ciMethod::ciMethod(methodHandle h_m, ciInstanceKlass* holder) :
   ciMetadata(h_m()),
-  _holder(holder)
+  _holder(holder),
+  _has_injected_profile(false)
 {
   assert(h_m() != NULL, "no null method");
 
@@ -168,7 +169,8 @@
   _liveness(               NULL),
   _can_be_statically_bound(false),
   _method_blocks(          NULL),
-  _method_data(            NULL)
+  _method_data(            NULL),
+  _has_injected_profile(   false)
 #if defined(COMPILER2) || defined(SHARK)
   ,
   _flow(                   NULL),
--- a/src/share/vm/ci/ciMethod.hpp	Wed Jan 28 07:55:27 2015 +0100
+++ b/src/share/vm/ci/ciMethod.hpp	Thu Jan 29 10:25:59 2015 -0800
@@ -79,6 +79,7 @@
   bool _is_c1_compilable;
   bool _is_c2_compilable;
   bool _can_be_statically_bound;
+  bool _has_injected_profile;
 
   // Lazy fields, filled in on demand
   address              _code;
@@ -286,6 +287,9 @@
   int instructions_size();
   int scale_count(int count, float prof_factor = 1.);  // make MDO count commensurate with IIC
 
+  bool has_injected_profile() const { return _has_injected_profile;     }
+  void set_injected_profile(bool x) {        _has_injected_profile = x; }
+
   // Stack walking support
   bool is_ignored_by_security_stack_walk() const;
 
--- a/src/share/vm/classfile/vmSymbols.hpp	Wed Jan 28 07:55:27 2015 +0100
+++ b/src/share/vm/classfile/vmSymbols.hpp	Thu Jan 29 10:25:59 2015 -0800
@@ -243,7 +243,6 @@
   template(returnType_name,                           "returnType")                               \
   template(signature_name,                            "signature")                                \
   template(slot_name,                                 "slot")                                     \
-  template(selectAlternative_name,                    "selectAlternative")                        \
                                                                                                   \
   /* Support for annotations (JDK 1.5 and above) */                                               \
                                                                                                   \
@@ -295,8 +294,7 @@
   template(setTarget_signature,                       "(Ljava/lang/invoke/MethodHandle;)V")       \
   NOT_LP64(  do_alias(intptr_signature,               int_signature)  )                           \
   LP64_ONLY( do_alias(intptr_signature,               long_signature) )                           \
-  template(selectAlternative_signature, "(ZLjava/lang/invoke/MethodHandle;Ljava/lang/invoke/MethodHandle;)Ljava/lang/invoke/MethodHandle;") \
-                                                                      \
+                                                                                                  \
   /* common method and field names */                                                             \
   template(object_initializer_name,                   "<init>")                                   \
   template(class_initializer_name,                    "<clinit>")                                 \
@@ -868,6 +866,12 @@
    do_name(     fullFence_name,                                  "fullFence")                                           \
    do_alias(    fullFence_signature,                              void_method_signature)                                \
                                                                                                                         \
+  /* Custom branch frequencies profiling support for JSR292 */                                                          \
+  do_class(java_lang_invoke_MethodHandleImpl,               "java/lang/invoke/MethodHandleImpl")                        \
+  do_intrinsic(_profileBoolean, java_lang_invoke_MethodHandleImpl, profileBoolean_name, profileBoolean_signature,    F_S)  \
+   do_name(     profileBoolean_name,                               "profileBoolean")                                     \
+   do_signature(profileBoolean_signature,                           "(Z[I)Z")                                            \
+                                                                                                                        \
   /* unsafe memory references (there are a lot of them...) */                                                           \
   do_signature(getObject_signature,       "(Ljava/lang/Object;J)Ljava/lang/Object;")                                    \
   do_signature(putObject_signature,       "(Ljava/lang/Object;JLjava/lang/Object;)V")                                   \
--- a/src/share/vm/opto/classes.hpp	Wed Jan 28 07:55:27 2015 +0100
+++ b/src/share/vm/opto/classes.hpp	Thu Jan 29 10:25:59 2015 -0800
@@ -200,6 +200,7 @@
 macro(Opaque1)
 macro(Opaque2)
 macro(Opaque3)
+macro(ProfileBoolean)
 macro(OrI)
 macro(OrL)
 macro(OverflowAddI)
--- a/src/share/vm/opto/compile.cpp	Wed Jan 28 07:55:27 2015 +0100
+++ b/src/share/vm/opto/compile.cpp	Thu Jan 29 10:25:59 2015 -0800
@@ -3105,6 +3105,7 @@
   default:
     assert( !n->is_Call(), "" );
     assert( !n->is_Mem(), "" );
+    assert( nop != Op_ProfileBoolean, "should be eliminated during IGVN");
     break;
   }
 
@@ -3321,6 +3322,9 @@
 bool Compile::too_many_traps(ciMethod* method,
                              int bci,
                              Deoptimization::DeoptReason reason) {
+  if (method->has_injected_profile()) {
+    return false;
+  }
   ciMethodData* md = method->method_data();
   if (md->is_empty()) {
     // Assume the trap has not occurred, or that it occurred only
@@ -3370,6 +3374,9 @@
 bool Compile::too_many_recompiles(ciMethod* method,
                                   int bci,
                                   Deoptimization::DeoptReason reason) {
+  if (method->has_injected_profile()) {
+    return false;
+  }
   ciMethodData* md = method->method_data();
   if (md->is_empty()) {
     // Assume the trap has not occurred, or that it occurred only
--- a/src/share/vm/opto/graphKit.hpp	Wed Jan 28 07:55:27 2015 +0100
+++ b/src/share/vm/opto/graphKit.hpp	Thu Jan 29 10:25:59 2015 -0800
@@ -714,6 +714,15 @@
                   klass, reason_string, must_throw, keep_exact_action);
   }
 
+  // Bail out to the interpreter and keep exact action (avoid switching to Action_none).
+  void uncommon_trap_exact(Deoptimization::DeoptReason reason,
+                           Deoptimization::DeoptAction action,
+                           ciKlass* klass = NULL, const char* reason_string = NULL,
+                           bool must_throw = false) {
+    uncommon_trap(Deoptimization::make_trap_request(reason, action),
+                  klass, reason_string, must_throw, /*keep_exact_action=*/true);
+  }
+
   // SP when bytecode needs to be reexecuted.
   virtual int reexecute_sp() { return sp(); }
 
--- a/src/share/vm/opto/library_call.cpp	Wed Jan 28 07:55:27 2015 +0100
+++ b/src/share/vm/opto/library_call.cpp	Thu Jan 29 10:25:59 2015 -0800
@@ -41,6 +41,7 @@
 #include "opto/movenode.hpp"
 #include "opto/mulnode.hpp"
 #include "opto/narrowptrnode.hpp"
+#include "opto/opaquenode.hpp"
 #include "opto/parse.hpp"
 #include "opto/runtime.hpp"
 #include "opto/subnode.hpp"
@@ -287,6 +288,8 @@
   bool inline_updateBytesCRC32();
   bool inline_updateByteBufferCRC32();
   bool inline_multiplyToLen();
+
+  bool inline_profileBoolean();
 };
 
 
@@ -900,6 +903,9 @@
   case vmIntrinsics::_updateByteBufferCRC32:
     return inline_updateByteBufferCRC32();
 
+  case vmIntrinsics::_profileBoolean:
+    return inline_profileBoolean();
+
   default:
     // If you get here, it may be that someone has added a new intrinsic
     // to the list in vmSymbols.hpp without implementing it here.
@@ -5867,3 +5873,47 @@
 
   return instof_false;  // even if it is NULL
 }
+
+bool LibraryCallKit::inline_profileBoolean() {
+  Node* counts = argument(1);
+  const TypeAryPtr* ary = NULL;
+  ciArray* aobj = NULL;
+  if (counts->is_Con()
+      && (ary = counts->bottom_type()->isa_aryptr()) != NULL
+      && (aobj = ary->const_oop()->as_array()) != NULL
+      && (aobj->length() == 2)) {
+    // Profile is int[2] where [0] and [1] correspond to false and true value occurrences respectively.
+    jint false_cnt = aobj->element_value(0).as_int();
+    jint  true_cnt = aobj->element_value(1).as_int();
+
+    method()->set_injected_profile(true);
+
+    if (C->log() != NULL) {
+      C->log()->elem("observe source='profileBoolean' false='%d' true='%d'",
+                     false_cnt, true_cnt);
+    }
+
+    if (false_cnt + true_cnt == 0) {
+      // According to profile, never executed.
+      uncommon_trap_exact(Deoptimization::Reason_intrinsic,
+                          Deoptimization::Action_reinterpret);
+      return true;
+    }
+    // Stop profiling.
+    // MethodHandleImpl::profileBoolean() has profiling logic in it's bytecode.
+    // By replacing method's body with profile data (represented as ProfileBooleanNode
+    // on IR level) we effectively disable profiling.
+    // It enables full speed execution once optimized code is generated.
+    Node* profile = _gvn.transform(new ProfileBooleanNode(argument(0), false_cnt, true_cnt));
+    C->record_for_igvn(profile);
+    set_result(profile);
+    return true;
+  } else {
+    // Continue profiling.
+    // Profile data isn't available at the moment. So, execute method's bytecode version.
+    // Usually, when GWT LambdaForms are profiled it means that a stand-alone nmethod
+    // is compiled and counters aren't available since corresponding MethodHandle
+    // isn't a compile-time constant.
+    return false;
+  }
+}
--- a/src/share/vm/opto/opaquenode.cpp	Wed Jan 28 07:55:27 2015 +0100
+++ b/src/share/vm/opto/opaquenode.cpp	Thu Jan 29 10:25:59 2015 -0800
@@ -60,4 +60,27 @@
   return (&n == this);          // Always fail except on self
 }
 
+//=============================================================================
 
+uint ProfileBooleanNode::hash() const { return NO_HASH; }
+uint ProfileBooleanNode::cmp( const Node &n ) const {
+  return (&n == this);
+}
+
+Node *ProfileBooleanNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  if (can_reshape && _delay_removal) {
+    _delay_removal = false;
+    return this;
+  } else {
+    return NULL;
+  }
+}
+
+Node *ProfileBooleanNode::Identity( PhaseTransform *phase ) {
+  if (_delay_removal) {
+    return this;
+  } else {
+    assert(_consumed, "profile should be consumed before elimination");
+    return in(1);
+  }
+}
--- a/src/share/vm/opto/opaquenode.hpp	Wed Jan 28 07:55:27 2015 +0100
+++ b/src/share/vm/opto/opaquenode.hpp	Thu Jan 29 10:25:59 2015 -0800
@@ -87,5 +87,31 @@
   bool rtm_opt() const { return (_opt == RTM_OPT); }
 };
 
+//------------------------------ProfileBooleanNode-------------------------------
+// A node represents value profile for a boolean during parsing.
+// Once parsing is over, the node goes away (during IGVN).
+// It is used to override branch frequencies from MDO (see has_injected_profile in parse2.cpp).
+class ProfileBooleanNode : public Node {
+  uint _false_cnt;
+  uint _true_cnt;
+  bool _consumed;
+  bool _delay_removal;
+  virtual uint hash() const ;                  // { return NO_HASH; }
+  virtual uint cmp( const Node &n ) const;
+  public:
+  ProfileBooleanNode(Node *n, uint false_cnt, uint true_cnt) : Node(0, n),
+          _false_cnt(false_cnt), _true_cnt(true_cnt), _delay_removal(true), _consumed(false) {}
+
+  uint false_count() const { return _false_cnt; }
+  uint  true_count() const { return  _true_cnt; }
+
+  void consume() { _consumed = true;  }
+
+  virtual int Opcode() const;
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual Node *Identity(PhaseTransform *phase);
+  virtual const Type *bottom_type() const { return TypeInt::BOOL; }
+};
+
 #endif // SHARE_VM_OPTO_OPAQUENODE_HPP
 
--- a/src/share/vm/opto/parse.hpp	Wed Jan 28 07:55:27 2015 +0100
+++ b/src/share/vm/opto/parse.hpp	Thu Jan 29 10:25:59 2015 -0800
@@ -555,8 +555,8 @@
   void do_jsr();
   void do_ret();
 
-  float   dynamic_branch_prediction(float &cnt);
-  float   branch_prediction(float &cnt, BoolTest::mask btest, int target_bci);
+  float   dynamic_branch_prediction(float &cnt, BoolTest::mask btest, Node* test);
+  float   branch_prediction(float &cnt, BoolTest::mask btest, int target_bci, Node* test);
   bool    seems_never_taken(float prob) const;
   bool    path_is_suitable_for_uncommon_trap(float prob) const;
   bool    seems_stable_comparison() const;
--- a/src/share/vm/opto/parse2.cpp	Wed Jan 28 07:55:27 2015 +0100
+++ b/src/share/vm/opto/parse2.cpp	Thu Jan 29 10:25:59 2015 -0800
@@ -37,6 +37,7 @@
 #include "opto/matcher.hpp"
 #include "opto/memnode.hpp"
 #include "opto/mulnode.hpp"
+#include "opto/opaquenode.hpp"
 #include "opto/parse.hpp"
 #include "opto/runtime.hpp"
 #include "runtime/deoptimization.hpp"
@@ -763,35 +764,64 @@
   merge_common(target, pnum);
 }
 
+static bool has_injected_profile(BoolTest::mask btest, Node* test, int& taken, int& not_taken) {
+  if (btest != BoolTest::eq && btest != BoolTest::ne) {
+    // Only ::eq and ::ne are supported for profile injection.
+    return false;
+  }
+  if (test->is_Cmp() &&
+      test->in(1)->Opcode() == Op_ProfileBoolean) {
+    ProfileBooleanNode* profile = (ProfileBooleanNode*)test->in(1);
+    int false_cnt = profile->false_count();
+    int  true_cnt = profile->true_count();
+
+    // Counts matching depends on the actual test operation (::eq or ::ne).
+    // No need to scale the counts because profile injection was designed
+    // to feed exact counts into VM.
+    taken     = (btest == BoolTest::eq) ? false_cnt :  true_cnt;
+    not_taken = (btest == BoolTest::eq) ?  true_cnt : false_cnt;
+
+    profile->consume();
+    return true;
+  }
+  return false;
+}
 //--------------------------dynamic_branch_prediction--------------------------
 // Try to gather dynamic branch prediction behavior.  Return a probability
 // of the branch being taken and set the "cnt" field.  Returns a -1.0
 // if we need to use static prediction for some reason.
-float Parse::dynamic_branch_prediction(float &cnt) {
+float Parse::dynamic_branch_prediction(float &cnt, BoolTest::mask btest, Node* test) {
   ResourceMark rm;
 
   cnt  = COUNT_UNKNOWN;
 
-  // Use MethodData information if it is available
-  // FIXME: free the ProfileData structure
-  ciMethodData* methodData = method()->method_data();
-  if (!methodData->is_mature())  return PROB_UNKNOWN;
-  ciProfileData* data = methodData->bci_to_data(bci());
-  if (!data->is_JumpData())  return PROB_UNKNOWN;
+  int     taken = 0;
+  int not_taken = 0;
 
-  // get taken and not taken values
-  int     taken = data->as_JumpData()->taken();
-  int not_taken = 0;
-  if (data->is_BranchData()) {
-    not_taken = data->as_BranchData()->not_taken();
+  bool use_mdo = !has_injected_profile(btest, test, taken, not_taken);
+
+  if (use_mdo) {
+    // Use MethodData information if it is available
+    // FIXME: free the ProfileData structure
+    ciMethodData* methodData = method()->method_data();
+    if (!methodData->is_mature())  return PROB_UNKNOWN;
+    ciProfileData* data = methodData->bci_to_data(bci());
+    if (!data->is_JumpData())  return PROB_UNKNOWN;
+
+    // get taken and not taken values
+    taken = data->as_JumpData()->taken();
+    not_taken = 0;
+    if (data->is_BranchData()) {
+      not_taken = data->as_BranchData()->not_taken();
+    }
+
+    // scale the counts to be commensurate with invocation counts:
+    taken = method()->scale_count(taken);
+    not_taken = method()->scale_count(not_taken);
   }
 
-  // scale the counts to be commensurate with invocation counts:
-  taken = method()->scale_count(taken);
-  not_taken = method()->scale_count(not_taken);
-
   // Give up if too few (or too many, in which case the sum will overflow) counts to be meaningful.
-  // We also check that individual counters are positive first, overwise the sum can become positive.
+  // We also check that individual counters are positive first, otherwise the sum can become positive.
   if (taken < 0 || not_taken < 0 || taken + not_taken < 40) {
     if (C->log() != NULL) {
       C->log()->elem("branch target_bci='%d' taken='%d' not_taken='%d'", iter().get_dest(), taken, not_taken);
@@ -841,8 +871,9 @@
 //-----------------------------branch_prediction-------------------------------
 float Parse::branch_prediction(float& cnt,
                                BoolTest::mask btest,
-                               int target_bci) {
-  float prob = dynamic_branch_prediction(cnt);
+                               int target_bci,
+                               Node* test) {
+  float prob = dynamic_branch_prediction(cnt, btest, test);
   // If prob is unknown, switch to static prediction
   if (prob != PROB_UNKNOWN)  return prob;
 
@@ -932,7 +963,7 @@
   Block* next_block   = successor_for_bci(iter().next_bci());
 
   float cnt;
-  float prob = branch_prediction(cnt, btest, target_bci);
+  float prob = branch_prediction(cnt, btest, target_bci, c);
   if (prob == PROB_UNKNOWN) {
     // (An earlier version of do_ifnull omitted this trap for OSR methods.)
 #ifndef PRODUCT
@@ -1013,7 +1044,7 @@
   Block* next_block   = successor_for_bci(iter().next_bci());
 
   float cnt;
-  float prob = branch_prediction(cnt, btest, target_bci);
+  float prob = branch_prediction(cnt, btest, target_bci, c);
   float untaken_prob = 1.0 - prob;
 
   if (prob == PROB_UNKNOWN) {