changeset 2203:5d8f5a6dced7

7020403: Add AdvancedCompilationPolicy for tiered Summary: This implements adaptive tiered compilation policy. Reviewed-by: kvn, never
author iveresov
date Fri, 04 Mar 2011 15:14:16 -0800
parents 8c9c9ee30d71
children 8ec5e1f45ea1
files src/share/vm/oops/methodKlass.cpp src/share/vm/oops/methodOop.hpp src/share/vm/runtime/advancedThresholdPolicy.cpp src/share/vm/runtime/advancedThresholdPolicy.hpp src/share/vm/runtime/arguments.cpp src/share/vm/runtime/compilationPolicy.cpp
diffstat 6 files changed, 696 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/src/share/vm/oops/methodKlass.cpp	Thu Mar 03 23:31:45 2011 -0800
+++ b/src/share/vm/oops/methodKlass.cpp	Fri Mar 04 15:14:16 2011 -0800
@@ -103,6 +103,12 @@
   m->backedge_counter()->init();
   m->clear_number_of_breakpoints();
 
+#ifdef TIERED
+  m->set_rate(0);
+  m->set_prev_event_count(0);
+  m->set_prev_time(0);
+#endif
+
   assert(m->is_parsable(), "must be parsable here.");
   assert(m->size() == size, "wrong size for object");
   // We should not publish an uprasable object's reference
--- a/src/share/vm/oops/methodOop.hpp	Thu Mar 03 23:31:45 2011 -0800
+++ b/src/share/vm/oops/methodOop.hpp	Fri Mar 04 15:14:16 2011 -0800
@@ -84,6 +84,11 @@
 // | invocation_counter                                   |
 // | backedge_counter                                     |
 // |------------------------------------------------------|
+// |           prev_time (tiered only, 64 bit wide)       |
+// |                                                      |
+// |------------------------------------------------------|
+// |                  rate (tiered)                       |
+// |------------------------------------------------------|
 // | code                           (pointer)             |
 // | i2i                            (pointer)             |
 // | adapter                        (pointer)             |
@@ -124,6 +129,11 @@
   InvocationCounter _invocation_counter;         // Incremented before each activation of the method - used to trigger frequency-based optimizations
   InvocationCounter _backedge_counter;           // Incremented before each backedge taken - used to trigger frequencey-based optimizations
 
+#ifdef TIERED
+  jlong             _prev_time;                   // Previous time the rate was acquired
+  float             _rate;                        // Events (invocation and backedge counter increments) per millisecond
+#endif
+
 #ifndef PRODUCT
   int               _compiled_invocation_count;  // Number of nmethod invocations so far (for perf. debugging)
 #endif
@@ -304,6 +314,17 @@
   InvocationCounter* invocation_counter() { return &_invocation_counter; }
   InvocationCounter* backedge_counter()   { return &_backedge_counter; }
 
+#ifdef TIERED
+  // We are reusing interpreter_invocation_count as a holder for the previous event count!
+  // We can do that since interpreter_invocation_count is not used in tiered.
+  int prev_event_count() const                   { return _interpreter_invocation_count;  }
+  void set_prev_event_count(int count)           { _interpreter_invocation_count = count; }
+  jlong prev_time() const                        { return _prev_time; }
+  void set_prev_time(jlong time)                 { _prev_time = time; }
+  float rate() const                             { return _rate; }
+  void set_rate(float rate)                      { _rate = rate; }
+#endif
+
   int invocation_count();
   int backedge_count();
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/runtime/advancedThresholdPolicy.cpp	Fri Mar 04 15:14:16 2011 -0800
@@ -0,0 +1,450 @@
+/*
+* Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved.
+* ORACLE PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
+*/
+
+#include "precompiled.hpp"
+#include "runtime/advancedThresholdPolicy.hpp"
+#include "runtime/simpleThresholdPolicy.inline.hpp"
+
+#ifdef TIERED
+// Print an event.
+void AdvancedThresholdPolicy::print_specific(EventType type, methodHandle mh, methodHandle imh,
+                                             int bci, CompLevel level) {
+  tty->print(" rate: ");
+  if (mh->prev_time() == 0) tty->print("n/a");
+  else tty->print("%f", mh->rate());
+
+  tty->print(" k: %.2lf,%.2lf", threshold_scale(CompLevel_full_profile, Tier3LoadFeedback),
+                                threshold_scale(CompLevel_full_optimization, Tier4LoadFeedback));
+
+}
+
+void AdvancedThresholdPolicy::initialize() {
+  // Turn on ergonomic compiler count selection
+  if (FLAG_IS_DEFAULT(CICompilerCountPerCPU) && FLAG_IS_DEFAULT(CICompilerCount)) {
+    FLAG_SET_DEFAULT(CICompilerCountPerCPU, true);
+  }
+  int count = CICompilerCount;
+  if (CICompilerCountPerCPU) {
+    // Simple log n seems to grow too slowly for tiered, try something faster: log n * log log n
+    int log_cpu = log2_intptr(os::active_processor_count());
+    int loglog_cpu = log2_intptr(MAX2(log_cpu, 1));
+    count = MAX2(log_cpu * loglog_cpu, 1) * 3 / 2;
+  }
+
+  set_c1_count(MAX2(count / 3, 1));
+  set_c2_count(MAX2(count - count / 3, 1));
+
+  // Some inlining tuning
+#ifdef X86
+  if (FLAG_IS_DEFAULT(InlineSmallCode)) {
+    FLAG_SET_DEFAULT(InlineSmallCode, 2000);
+  }
+#endif
+
+#ifdef SPARC
+  if (FLAG_IS_DEFAULT(InlineSmallCode)) {
+    FLAG_SET_DEFAULT(InlineSmallCode, 2500);
+  }
+#endif
+
+
+  set_start_time(os::javaTimeMillis());
+}
+
+// update_rate() is called from select_task() while holding a compile queue lock.
+void AdvancedThresholdPolicy::update_rate(jlong t, methodOop m) {
+  if (is_old(m)) {
+    // We don't remove old methods from the queue,
+    // so we can just zero the rate.
+    m->set_rate(0);
+    return;
+  }
+
+  // We don't update the rate if we've just came out of a safepoint.
+  // delta_s is the time since last safepoint in milliseconds.
+  jlong delta_s = t - SafepointSynchronize::end_of_last_safepoint();
+  jlong delta_t = t - (m->prev_time() != 0 ? m->prev_time() : start_time()); // milliseconds since the last measurement
+  // How many events were there since the last time?
+  int event_count = m->invocation_count() + m->backedge_count();
+  int delta_e = event_count - m->prev_event_count();
+
+  // We should be running for at least 1ms.
+  if (delta_s >= TieredRateUpdateMinTime) {
+    // And we must've taken the previous point at least 1ms before.
+    if (delta_t >= TieredRateUpdateMinTime && delta_e > 0) {
+      m->set_prev_time(t);
+      m->set_prev_event_count(event_count);
+      m->set_rate((float)delta_e / (float)delta_t); // Rate is events per millisecond
+    } else
+      if (delta_t > TieredRateUpdateMaxTime && delta_e == 0) {
+        // If nothing happened for 25ms, zero the rate. Don't modify prev values.
+        m->set_rate(0);
+      }
+  }
+}
+
+// Check if this method has been stale from a given number of milliseconds.
+// See select_task().
+bool AdvancedThresholdPolicy::is_stale(jlong t, jlong timeout, methodOop m) {
+  jlong delta_s = t - SafepointSynchronize::end_of_last_safepoint();
+  jlong delta_t = t - m->prev_time();
+  if (delta_t > timeout && delta_s > timeout) {
+    int event_count = m->invocation_count() + m->backedge_count();
+    int delta_e = event_count - m->prev_event_count();
+    // Return true if there were no events.
+    return delta_e == 0;
+  }
+  return false;
+}
+
+// We don't remove old methods from the compile queue even if they have
+// very low activity. See select_task().
+bool AdvancedThresholdPolicy::is_old(methodOop method) {
+  return method->invocation_count() > 50000 || method->backedge_count() > 500000;
+}
+
+double AdvancedThresholdPolicy::weight(methodOop method) {
+  return (method->rate() + 1) * ((method->invocation_count() + 1) *  (method->backedge_count() + 1));
+}
+
+// Apply heuristics and return true if x should be compiled before y
+bool AdvancedThresholdPolicy::compare_methods(methodOop x, methodOop y) {
+  if (x->highest_comp_level() > y->highest_comp_level()) {
+    // recompilation after deopt
+    return true;
+  } else
+    if (x->highest_comp_level() == y->highest_comp_level()) {
+      if (weight(x) > weight(y)) {
+        return true;
+      }
+    }
+  return false;
+}
+
+// Is method profiled enough?
+bool AdvancedThresholdPolicy::is_method_profiled(methodOop method) {
+  methodDataOop mdo = method->method_data();
+  if (mdo != NULL) {
+    int i = mdo->invocation_count_delta();
+    int b = mdo->backedge_count_delta();
+    return call_predicate_helper<CompLevel_full_profile>(i, b, 1);
+  }
+  return false;
+}
+
+// Called with the queue locked and with at least one element
+CompileTask* AdvancedThresholdPolicy::select_task(CompileQueue* compile_queue) {
+  CompileTask *max_task = NULL;
+  methodOop max_method;
+  jlong t = os::javaTimeMillis();
+  // Iterate through the queue and find a method with a maximum rate.
+  for (CompileTask* task = compile_queue->first(); task != NULL;) {
+    CompileTask* next_task = task->next();
+    methodOop method = (methodOop)JNIHandles::resolve(task->method_handle());
+    methodDataOop mdo = method->method_data();
+    update_rate(t, method);
+    if (max_task == NULL) {
+      max_task = task;
+      max_method = method;
+    } else {
+      // If a method has been stale for some time, remove it from the queue.
+      if (is_stale(t, TieredCompileTaskTimeout, method) && !is_old(method)) {
+        if (PrintTieredEvents) {
+          print_event(KILL, method, method, task->osr_bci(), (CompLevel)task->comp_level());
+        }
+        CompileTaskWrapper ctw(task); // Frees the task
+        compile_queue->remove(task);
+        method->clear_queued_for_compilation();
+        task = next_task;
+        continue;
+      }
+
+      // Select a method with a higher rate
+      if (compare_methods(method, max_method)) {
+        max_task = task;
+        max_method = method;
+      }
+    }
+    task = next_task;
+  }
+
+  if (max_task->comp_level() == CompLevel_full_profile && is_method_profiled(max_method)) {
+    max_task->set_comp_level(CompLevel_limited_profile);
+    if (PrintTieredEvents) {
+      print_event(UPDATE, max_method, max_method, max_task->osr_bci(), (CompLevel)max_task->comp_level());
+    }
+  }
+
+  return max_task;
+}
+
+double AdvancedThresholdPolicy::threshold_scale(CompLevel level, int feedback_k) {
+  double queue_size = CompileBroker::queue_size(level);
+  int comp_count = compiler_count(level);
+  double k = queue_size / (feedback_k * comp_count) + 1;
+  return k;
+}
+
+// Call and loop predicates determine whether a transition to a higher
+// compilation level should be performed (pointers to predicate functions
+// are passed to common()).
+// Tier?LoadFeedback is basically a coefficient that determines of
+// how many methods per compiler thread can be in the queue before
+// the threshold values double.
+bool AdvancedThresholdPolicy::loop_predicate(int i, int b, CompLevel cur_level) {
+  switch(cur_level) {
+  case CompLevel_none:
+  case CompLevel_limited_profile: {
+    double k = threshold_scale(CompLevel_full_profile, Tier3LoadFeedback);
+    return loop_predicate_helper<CompLevel_none>(i, b, k);
+  }
+  case CompLevel_full_profile: {
+    double k = threshold_scale(CompLevel_full_optimization, Tier4LoadFeedback);
+    return loop_predicate_helper<CompLevel_full_profile>(i, b, k);
+  }
+  default:
+    return true;
+  }
+}
+
+bool AdvancedThresholdPolicy::call_predicate(int i, int b, CompLevel cur_level) {
+  switch(cur_level) {
+  case CompLevel_none:
+  case CompLevel_limited_profile: {
+    double k = threshold_scale(CompLevel_full_profile, Tier3LoadFeedback);
+    return call_predicate_helper<CompLevel_none>(i, b, k);
+  }
+  case CompLevel_full_profile: {
+    double k = threshold_scale(CompLevel_full_optimization, Tier4LoadFeedback);
+    return call_predicate_helper<CompLevel_full_profile>(i, b, k);
+  }
+  default:
+    return true;
+  }
+}
+
+// If a method is old enough and is still in the interpreter we would want to
+// start profiling without waiting for the compiled method to arrive.
+// We also take the load on compilers into the account.
+bool AdvancedThresholdPolicy::should_create_mdo(methodOop method, CompLevel cur_level) {
+  if (cur_level == CompLevel_none &&
+      CompileBroker::queue_size(CompLevel_full_optimization) <=
+      Tier3DelayOn * compiler_count(CompLevel_full_optimization)) {
+    int i = method->invocation_count();
+    int b = method->backedge_count();
+    double k = Tier0ProfilingStartPercentage / 100.0;
+    return call_predicate_helper<CompLevel_none>(i, b, k) || loop_predicate_helper<CompLevel_none>(i, b, k);
+  }
+  return false;
+}
+
+// Create MDO if necessary.
+void AdvancedThresholdPolicy::create_mdo(methodHandle mh, TRAPS) {
+  if (mh->is_native() || mh->is_abstract() || mh->is_accessor()) return;
+  if (mh->method_data() == NULL) {
+    methodOopDesc::build_interpreter_method_data(mh, THREAD);
+    if (HAS_PENDING_EXCEPTION) {
+      CLEAR_PENDING_EXCEPTION;
+    }
+  }
+}
+
+
+/*
+ * Method states:
+ *   0 - interpreter (CompLevel_none)
+ *   1 - pure C1 (CompLevel_simple)
+ *   2 - C1 with invocation and backedge counting (CompLevel_limited_profile)
+ *   3 - C1 with full profiling (CompLevel_full_profile)
+ *   4 - C2 (CompLevel_full_optimization)
+ *
+ * Common state transition patterns:
+ * a. 0 -> 3 -> 4.
+ *    The most common path. But note that even in this straightforward case
+ *    profiling can start at level 0 and finish at level 3.
+ *
+ * b. 0 -> 2 -> 3 -> 4.
+ *    This case occures when the load on C2 is deemed too high. So, instead of transitioning
+ *    into state 3 directly and over-profiling while a method is in the C2 queue we transition to
+ *    level 2 and wait until the load on C2 decreases. This path is disabled for OSRs.
+ *
+ * c. 0 -> (3->2) -> 4.
+ *    In this case we enqueue a method for compilation at level 3, but the C1 queue is long enough
+ *    to enable the profiling to fully occur at level 0. In this case we change the compilation level
+ *    of the method to 2, because it'll allow it to run much faster without full profiling while c2
+ *    is compiling.
+ *
+ * d. 0 -> 3 -> 1 or 0 -> 2 -> 1.
+ *    After a method was once compiled with C1 it can be identified as trivial and be compiled to
+ *    level 1. These transition can also occur if a method can't be compiled with C2 but can with C1.
+ *
+ * e. 0 -> 4.
+ *    This can happen if a method fails C1 compilation (it will still be profiled in the interpreter)
+ *    or because of a deopt that didn't require reprofiling (compilation won't happen in this case because
+ *    the compiled version already exists).
+ *
+ * Note that since state 0 can be reached from any other state via deoptimization different loops
+ * are possible.
+ *
+ */
+
+// Common transition function. Given a predicate determines if a method should transition to another level.
+CompLevel AdvancedThresholdPolicy::common(Predicate p, methodOop method, CompLevel cur_level) {
+  if (is_trivial(method)) return CompLevel_simple;
+
+  CompLevel next_level = cur_level;
+  int i = method->invocation_count();
+  int b = method->backedge_count();
+
+  switch(cur_level) {
+  case CompLevel_none:
+    // If we were at full profile level, would we switch to full opt?
+    if (common(p, method, CompLevel_full_profile) == CompLevel_full_optimization) {
+      next_level = CompLevel_full_optimization;
+    } else if ((this->*p)(i, b, cur_level)) {
+      // C1-generated fully profiled code is about 30% slower than the limited profile
+      // code that has only invocation and backedge counters. The observation is that
+      // if C2 queue is large enough we can spend too much time in the fully profiled code
+      // while waiting for C2 to pick the method from the queue. To alleviate this problem
+      // we introduce a feedback on the C2 queue size. If the C2 queue is sufficiently long
+      // we choose to compile a limited profiled version and then recompile with full profiling
+      // when the load on C2 goes down.
+      if (CompileBroker::queue_size(CompLevel_full_optimization) >
+          Tier3DelayOn * compiler_count(CompLevel_full_optimization)) {
+        next_level = CompLevel_limited_profile;
+      } else {
+        next_level = CompLevel_full_profile;
+      }
+    }
+    break;
+  case CompLevel_limited_profile:
+    if (is_method_profiled(method)) {
+      // Special case: we got here because this method was fully profiled in the interpreter.
+      next_level = CompLevel_full_optimization;
+    } else {
+      methodDataOop mdo = method->method_data();
+      if (mdo != NULL) {
+        if (mdo->would_profile()) {
+          if (CompileBroker::queue_size(CompLevel_full_optimization) <=
+              Tier3DelayOff * compiler_count(CompLevel_full_optimization) &&
+              (this->*p)(i, b, cur_level)) {
+            next_level = CompLevel_full_profile;
+          }
+        } else {
+          next_level = CompLevel_full_optimization;
+        }
+      }
+    }
+    break;
+  case CompLevel_full_profile:
+    {
+      methodDataOop mdo = method->method_data();
+      if (mdo != NULL) {
+        if (mdo->would_profile()) {
+          int mdo_i = mdo->invocation_count_delta();
+          int mdo_b = mdo->backedge_count_delta();
+          if ((this->*p)(mdo_i, mdo_b, cur_level)) {
+            next_level = CompLevel_full_optimization;
+          }
+        } else {
+          next_level = CompLevel_full_optimization;
+        }
+      }
+    }
+    break;
+  }
+  return next_level;
+}
+
+// Determine if a method should be compiled with a normal entry point at a different level.
+CompLevel AdvancedThresholdPolicy::call_event(methodOop method,  CompLevel cur_level) {
+  CompLevel osr_level = (CompLevel) method->highest_osr_comp_level();
+  CompLevel next_level = common(&AdvancedThresholdPolicy::call_predicate, method, cur_level);
+
+  // If OSR method level is greater than the regular method level, the levels should be
+  // equalized by raising the regular method level in order to avoid OSRs during each
+  // invocation of the method.
+  if (osr_level == CompLevel_full_optimization && cur_level == CompLevel_full_profile) {
+    methodDataOop mdo = method->method_data();
+    guarantee(mdo != NULL, "MDO should not be NULL");
+    if (mdo->invocation_count() >= 1) {
+      next_level = CompLevel_full_optimization;
+    }
+  } else {
+    next_level = MAX2(osr_level, next_level);
+  }
+
+  return next_level;
+}
+
+// Determine if we should do an OSR compilation of a given method.
+CompLevel AdvancedThresholdPolicy::loop_event(methodOop method, CompLevel cur_level) {
+  if (cur_level == CompLevel_none) {
+    // If there is a live OSR method that means that we deopted to the interpreter
+    // for the transition.
+    CompLevel osr_level = (CompLevel)method->highest_osr_comp_level();
+    if (osr_level > CompLevel_none) {
+      return osr_level;
+    }
+  }
+  return common(&AdvancedThresholdPolicy::loop_predicate, method, cur_level);
+}
+
+// Update the rate and submit compile
+void AdvancedThresholdPolicy::submit_compile(methodHandle mh, int bci, CompLevel level, TRAPS) {
+  int hot_count = (bci == InvocationEntryBci) ? mh->invocation_count() : mh->backedge_count();
+  update_rate(os::javaTimeMillis(), mh());
+  CompileBroker::compile_method(mh, bci, level, mh, hot_count, "tiered", THREAD);
+}
+
+
+// Handle the invocation event.
+void AdvancedThresholdPolicy::method_invocation_event(methodHandle mh, methodHandle imh,
+                                                      CompLevel level, TRAPS) {
+  if (should_create_mdo(mh(), level)) {
+    create_mdo(mh, THREAD);
+  }
+  if (is_compilation_enabled() && !CompileBroker::compilation_is_in_queue(mh, InvocationEntryBci)) {
+    CompLevel next_level = call_event(mh(), level);
+    if (next_level != level) {
+      compile(mh, InvocationEntryBci, next_level, THREAD);
+    }
+  }
+}
+
+// Handle the back branch event. Notice that we can compile the method
+// with a regular entry from here.
+void AdvancedThresholdPolicy::method_back_branch_event(methodHandle mh, methodHandle imh,
+                                                       int bci, CompLevel level, TRAPS) {
+  if (should_create_mdo(mh(), level)) {
+    create_mdo(mh, THREAD);
+  }
+
+  // If the method is already compiling, quickly bail out.
+  if (is_compilation_enabled() && !CompileBroker::compilation_is_in_queue(mh, bci)) {
+    // Use loop event as an opportinity to also check there's been
+    // enough calls.
+    CompLevel cur_level = comp_level(mh());
+    CompLevel next_level = call_event(mh(), cur_level);
+    CompLevel next_osr_level = loop_event(mh(), level);
+    if (next_osr_level  == CompLevel_limited_profile) {
+      next_osr_level = CompLevel_full_profile; // OSRs are supposed to be for very hot methods.
+    }
+    next_level = MAX2(next_level,
+                      next_osr_level < CompLevel_full_optimization ? next_osr_level : cur_level);
+    bool is_compiling = false;
+    if (next_level != cur_level) {
+      compile(mh, InvocationEntryBci, next_level, THREAD);
+      is_compiling = true;
+    }
+
+    // Do the OSR version
+    if (!is_compiling && next_osr_level != level) {
+      compile(mh, bci, next_osr_level, THREAD);
+    }
+  }
+}
+
+#endif // TIERED
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/runtime/advancedThresholdPolicy.hpp	Fri Mar 04 15:14:16 2011 -0800
@@ -0,0 +1,207 @@
+/*
+* Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved.
+* ORACLE PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
+*/
+
+#ifndef SHARE_VM_RUNTIME_ADVANCEDTHRESHOLDPOLICY_HPP
+#define SHARE_VM_RUNTIME_ADVANCEDTHRESHOLDPOLICY_HPP
+
+#include "runtime/simpleThresholdPolicy.hpp"
+
+#ifdef TIERED
+class CompileTask;
+class CompileQueue;
+
+/*
+ *  The system supports 5 execution levels:
+ *  * level 0 - interpreter
+ *  * level 1 - C1 with full optimization (no profiling)
+ *  * level 2 - C1 with invocation and backedge counters
+ *  * level 3 - C1 with full profiling (level 2 + MDO)
+ *  * level 4 - C2
+ *
+ * Levels 0, 2 and 3 periodically notify the runtime about the current value of the counters
+ * (invocation counters and backedge counters). The frequency of these notifications is
+ * different at each level. These notifications are used by the policy to decide what transition
+ * to make.
+ *
+ * Execution starts at level 0 (interpreter), then the policy can decide either to compile the
+ * method at level 3 or level 2. The decision is based on the following factors:
+ *    1. The length of the C2 queue determines the next level. The observation is that level 2
+ * is generally faster than level 3 by about 30%, therefore we would want to minimize the time
+ * a method spends at level 3. We should only spend the time at level 3 that is necessary to get
+ * adequate profiling. So, if the C2 queue is long enough it is more beneficial to go first to
+ * level 2, because if we transitioned to level 3 we would be stuck there until our C2 compile
+ * request makes its way through the long queue. When the load on C2 recedes we are going to
+ * recompile at level 3 and start gathering profiling information.
+ *    2. The length of C1 queue is used to dynamically adjust the thresholds, so as to introduce
+ * additional filtering if the compiler is overloaded. The rationale is that by the time a
+ * method gets compiled it can become unused, so it doesn't make sense to put too much onto the
+ * queue.
+ *
+ * After profiling is completed at level 3 the transition is made to level 4. Again, the length
+ * of the C2 queue is used as a feedback to adjust the thresholds.
+ *
+ * After the first C1 compile some basic information is determined about the code like the number
+ * of the blocks and the number of the loops. Based on that it can be decided that a method
+ * is trivial and compiling it with C1 will yield the same code. In this case the method is
+ * compiled at level 1 instead of 4.
+ *
+ * We also support profiling at level 0. If C1 is slow enough to produce the level 3 version of
+ * the code and the C2 queue is sufficiently small we can decide to start profiling in the
+ * interpreter (and continue profiling in the compiled code once the level 3 version arrives).
+ * If the profiling at level 0 is fully completed before level 3 version is produced, a level 2
+ * version is compiled instead in order to run faster waiting for a level 4 version.
+ *
+ * Compile queues are implemented as priority queues - for each method in the queue we compute
+ * the event rate (the number of invocation and backedge counter increments per unit of time).
+ * When getting an element off the queue we pick the one with the largest rate. Maintaining the
+ * rate also allows us to remove stale methods (the ones that got on the queue but stopped
+ * being used shortly after that).
+*/
+
+/* Command line options:
+ * - Tier?InvokeNotifyFreqLog and Tier?BackedgeNotifyFreqLog control the frequency of method
+ *   invocation and backedge notifications. Basically every n-th invocation or backedge a mutator thread
+ *   makes a call into the runtime.
+ *
+ * - Tier?CompileThreshold, Tier?BackEdgeThreshold, Tier?MinInvocationThreshold control
+ *   compilation thresholds.
+ *   Level 2 thresholds are not used and are provided for option-compatibility and potential future use.
+ *   Other thresholds work as follows:
+ *
+ *   Transition from interpreter (level 0) to C1 with full profiling (level 3) happens when
+ *   the following predicate is true (X is the level):
+ *
+ *   i > TierXInvocationThreshold * s || (i > TierXMinInvocationThreshold * s  && i + b > TierXCompileThreshold * s),
+ *
+ *   where $i$ is the number of method invocations, $b$ number of backedges and $s$ is the scaling
+ *   coefficient that will be discussed further.
+ *   The intuition is to equalize the time that is spend profiling each method.
+ *   The same predicate is used to control the transition from level 3 to level 4 (C2). It should be
+ *   noted though that the thresholds are relative. Moreover i and b for the 0->3 transition come
+ *   from methodOop and for 3->4 transition they come from MDO (since profiled invocations are
+ *   counted separately).
+ *
+ *   OSR transitions are controlled simply with b > TierXBackEdgeThreshold * s predicates.
+ *
+ * - Tier?LoadFeedback options are used to automatically scale the predicates described above depending
+ *   on the compiler load. The scaling coefficients are computed as follows:
+ *
+ *   s = queue_size_X / (TierXLoadFeedback * compiler_count_X) + 1,
+ *
+ *   where queue_size_X is the current size of the compiler queue of level X, and compiler_count_X
+ *   is the number of level X compiler threads.
+ *
+ *   Basically these parameters describe how many methods should be in the compile queue
+ *   per compiler thread before the scaling coefficient increases by one.
+ *
+ *   This feedback provides the mechanism to automatically control the flow of compilation requests
+ *   depending on the machine speed, mutator load and other external factors.
+ *
+ * - Tier3DelayOn and Tier3DelayOff parameters control another important feedback loop.
+ *   Consider the following observation: a method compiled with full profiling (level 3)
+ *   is about 30% slower than a method at level 2 (just invocation and backedge counters, no MDO).
+ *   Normally, the following transitions will occur: 0->3->4. The problem arises when the C2 queue
+ *   gets congested and the 3->4 transition is delayed. While the method is the C2 queue it continues
+ *   executing at level 3 for much longer time than is required by the predicate and at suboptimal speed.
+ *   The idea is to dynamically change the behavior of the system in such a way that if a substantial
+ *   load on C2 is detected we would first do the 0->2 transition allowing a method to run faster.
+ *   And then when the load decreases to allow 2->3 transitions.
+ *
+ *   Tier3Delay* parameters control this switching mechanism.
+ *   Tier3DelayOn is the number of methods in the C2 queue per compiler thread after which the policy
+ *   no longer does 0->3 transitions but does 0->2 transitions instead.
+ *   Tier3DelayOff switches the original behavior back when the number of methods in the C2 queue
+ *   per compiler thread falls below the specified amount.
+ *   The hysteresis is necessary to avoid jitter.
+ *
+ * - TieredCompileTaskTimeout is the amount of time an idle method can spend in the compile queue.
+ *   Basically, since we use the event rate d(i + b)/dt as a value of priority when selecting a method to
+ *   compile from the compile queue, we also can detect stale methods for which the rate has been
+ *   0 for some time in the same iteration. Stale methods can appear in the queue when an application
+ *   abruptly changes its behavior.
+ *
+ * - TieredStopAtLevel, is used mostly for testing. It allows to bypass the policy logic and stick
+ *   to a given level. For example it's useful to set TieredStopAtLevel = 1 in order to compile everything
+ *   with pure c1.
+ *
+ * - Tier0ProfilingStartPercentage allows the interpreter to start profiling when the inequalities in the
+ *   0->3 predicate are already exceeded by the given percentage but the level 3 version of the
+ *   method is still not ready. We can even go directly from level 0 to 4 if c1 doesn't produce a compiled
+ *   version in time. This reduces the overall transition to level 4 and decreases the startup time.
+ *   Note that this behavior is also guarded by the Tier3Delay mechanism: when the c2 queue is too long
+ *   these is not reason to start profiling prematurely.
+ *
+ * - TieredRateUpdateMinTime and TieredRateUpdateMaxTime are parameters of the rate computation.
+ *   Basically, the rate is not computed more frequently than TieredRateUpdateMinTime and is considered
+ *   to be zero if no events occurred in TieredRateUpdateMaxTime.
+ */
+
+
+class AdvancedThresholdPolicy : public SimpleThresholdPolicy {
+  jlong _start_time;
+
+  // Call and loop predicates determine whether a transition to a higher compilation
+  // level should be performed (pointers to predicate functions are passed to common().
+  // Predicates also take compiler load into account.
+  typedef bool (AdvancedThresholdPolicy::*Predicate)(int i, int b, CompLevel cur_level);
+  bool call_predicate(int i, int b, CompLevel cur_level);
+  bool loop_predicate(int i, int b, CompLevel cur_level);
+  // Common transition function. Given a predicate determines if a method should transition to another level.
+  CompLevel common(Predicate p, methodOop method, CompLevel cur_level);
+  // Transition functions.
+  // call_event determines if a method should be compiled at a different
+  // level with a regular invocation entry.
+  CompLevel call_event(methodOop method, CompLevel cur_level);
+  // loop_event checks if a method should be OSR compiled at a different
+  // level.
+  CompLevel loop_event(methodOop method, CompLevel cur_level);
+  // Has a method been long around?
+  // We don't remove old methods from the compile queue even if they have
+  // very low activity (see select_task()).
+  inline bool is_old(methodOop method);
+  // Was a given method inactive for a given number of milliseconds.
+  // If it is, we would remove it from the queue (see select_task()).
+  inline bool is_stale(jlong t, jlong timeout, methodOop m);
+  // Compute the weight of the method for the compilation scheduling
+  inline double weight(methodOop method);
+  // Apply heuristics and return true if x should be compiled before y
+  inline bool compare_methods(methodOop x, methodOop y);
+  // Compute event rate for a given method. The rate is the number of event (invocations + backedges)
+  // per millisecond.
+  inline void update_rate(jlong t, methodOop m);
+  // Compute threshold scaling coefficient
+  inline double threshold_scale(CompLevel level, int feedback_k);
+  // If a method is old enough and is still in the interpreter we would want to
+  // start profiling without waiting for the compiled method to arrive. This function
+  // determines whether we should do that.
+  inline bool should_create_mdo(methodOop method, CompLevel cur_level);
+  // Create MDO if necessary.
+  void create_mdo(methodHandle mh, TRAPS);
+  // Is method profiled enough?
+  bool is_method_profiled(methodOop method);
+
+protected:
+  void print_specific(EventType type, methodHandle mh, methodHandle imh, int bci, CompLevel level);
+
+  void set_start_time(jlong t) { _start_time = t;    }
+  jlong start_time() const     { return _start_time; }
+
+  // Submit a given method for compilation (and update the rate).
+  virtual void submit_compile(methodHandle mh, int bci, CompLevel level, TRAPS);
+  // event() from SimpleThresholdPolicy would call these.
+  virtual void method_invocation_event(methodHandle method, methodHandle inlinee,
+                                       CompLevel level, TRAPS);
+  virtual void method_back_branch_event(methodHandle method, methodHandle inlinee,
+                                        int bci, CompLevel level, TRAPS);
+public:
+  AdvancedThresholdPolicy() : _start_time(0) { }
+  // Select task is called by CompileBroker. We should return a task or NULL.
+  virtual CompileTask* select_task(CompileQueue* compile_queue);
+  virtual void initialize();
+};
+
+#endif // TIERED
+
+#endif // SHARE_VM_RUNTIME_ADVANCEDTHRESHOLDPOLICY_HPP
--- a/src/share/vm/runtime/arguments.cpp	Thu Mar 03 23:31:45 2011 -0800
+++ b/src/share/vm/runtime/arguments.cpp	Fri Mar 04 15:14:16 2011 -0800
@@ -1026,8 +1026,9 @@
 }
 
 void Arguments::set_tiered_flags() {
+  // With tiered, set default policy to AdvancedThresholdPolicy, which is 3.
   if (FLAG_IS_DEFAULT(CompilationPolicyChoice)) {
-    FLAG_SET_DEFAULT(CompilationPolicyChoice, 2);
+    FLAG_SET_DEFAULT(CompilationPolicyChoice, 3);
   }
   if (CompilationPolicyChoice < 2) {
     vm_exit_during_initialization(
--- a/src/share/vm/runtime/compilationPolicy.cpp	Thu Mar 03 23:31:45 2011 -0800
+++ b/src/share/vm/runtime/compilationPolicy.cpp	Fri Mar 04 15:14:16 2011 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -32,6 +32,7 @@
 #include "oops/methodOop.hpp"
 #include "oops/oop.inline.hpp"
 #include "prims/nativeLookup.hpp"
+#include "runtime/advancedThresholdPolicy.hpp"
 #include "runtime/compilationPolicy.hpp"
 #include "runtime/frame.hpp"
 #include "runtime/handles.inline.hpp"
@@ -72,8 +73,15 @@
     Unimplemented();
 #endif
     break;
+  case 3:
+#ifdef TIERED
+    CompilationPolicy::set_policy(new AdvancedThresholdPolicy());
+#else
+    Unimplemented();
+#endif
+    break;
   default:
-    fatal("CompilationPolicyChoice must be in the range: [0-2]");
+    fatal("CompilationPolicyChoice must be in the range: [0-3]");
   }
   CompilationPolicy::policy()->initialize();
 }