changeset 6068:2328dac1da27

Merge
author amurillo
date Fri, 28 Feb 2014 09:30:20 -0800
parents 2dd7abe7b841 16c705d792be
children 3812c088b945 ef7328717719 3c6ae9109a86 9c9f4dac029b
files
diffstat 178 files changed, 3659 insertions(+), 3519 deletions(-) [+]
line wrap: on
line diff
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/Block.java	Tue Feb 25 23:59:04 2014 -0800
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/Block.java	Fri Feb 28 09:30:20 2014 -0800
@@ -48,7 +48,7 @@
     preOrderField = new CIntField(type.getCIntegerField("_pre_order"), 0);
     domDepthField = new CIntField(type.getCIntegerField("_dom_depth"), 0);
     idomField = type.getAddressField("_idom");
-    freqField = type.getJFloatField("_freq");
+    freqField = type.getJDoubleField("_freq");
   }
 
   private static AddressField nodesField;
@@ -57,7 +57,7 @@
   private static CIntField preOrderField;
   private static CIntField domDepthField;
   private static AddressField idomField;
-  private static JFloatField freqField;
+  private static JDoubleField freqField;
 
   public Block(Address addr) {
     super(addr);
@@ -67,8 +67,8 @@
     return (int)preOrderField.getValue(getAddress());
   }
 
-  public float freq() {
-    return (float)freqField.getValue(getAddress());
+  public double freq() {
+    return (double)freqField.getValue(getAddress());
   }
 
   public Node_List nodes() {
--- a/make/excludeSrc.make	Tue Feb 25 23:59:04 2014 -0800
+++ b/make/excludeSrc.make	Fri Feb 28 09:30:20 2014 -0800
@@ -86,7 +86,7 @@
 	concurrentMark.cpp concurrentMarkThread.cpp dirtyCardQueue.cpp g1AllocRegion.cpp \
 	g1BlockOffsetTable.cpp g1CardCounts.cpp g1CollectedHeap.cpp g1CollectorPolicy.cpp \
 	g1ErgoVerbose.cpp g1GCPhaseTimes.cpp g1HRPrinter.cpp g1HotCardCache.cpp g1Log.cpp \
-	g1MMUTracker.cpp g1MarkSweep.cpp g1MemoryPool.cpp g1MonitoringSupport.cpp \
+	g1MMUTracker.cpp g1MarkSweep.cpp g1MemoryPool.cpp g1MonitoringSupport.cpp g1OopClosures.cpp \
 	g1RemSet.cpp g1RemSetSummary.cpp g1SATBCardTableModRefBS.cpp g1_globals.cpp heapRegion.cpp \
 	g1BiasedArray.cpp heapRegionRemSet.cpp heapRegionSeq.cpp heapRegionSet.cpp heapRegionSets.cpp \
 	ptrQueue.cpp satbQueue.cpp sparsePRT.cpp survRateGroup.cpp vm_operations_g1.cpp \
--- a/make/windows/makefiles/projectcreator.make	Tue Feb 25 23:59:04 2014 -0800
+++ b/make/windows/makefiles/projectcreator.make	Fri Feb 28 09:30:20 2014 -0800
@@ -72,6 +72,7 @@
         -ignorePath arm \
         -ignorePath ppc \
         -ignorePath zero \
+        -ignorePath aix \
         -hidePath .hg
 
 
--- a/src/cpu/ppc/vm/ppc.ad	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/cpu/ppc/vm/ppc.ad	Fri Feb 28 09:30:20 2014 -0800
@@ -2360,18 +2360,6 @@
   return RegMask();
 }
 
-const RegMask Matcher::mathExactI_result_proj_mask() {
-  return RARG4_BITS64_REG_mask();
-}
-
-const RegMask Matcher::mathExactL_result_proj_mask() {
-  return RARG4_BITS64_REG_mask();
-}
-
-const RegMask Matcher::mathExactI_flags_proj_mask() {
-  return INT_FLAGS_mask();
-}
-
 %}
 
 //----------ENCODING BLOCK-----------------------------------------------------
@@ -7572,16 +7560,6 @@
 //----------Arithmetic Instructions--------------------------------------------
 // Addition Instructions
 
-// PPC has no instruction setting overflow of 32-bit integer.
-//instruct addExactI_rReg(rarg4RegI dst, rRegI src, flagsReg cr) %{
-//  match(AddExactI dst src);
-//  effect(DEF cr);
-//
-//  format %{ "ADD     $dst, $dst, $src \t// addExact int, sets $cr" %}
-//  ins_encode( enc_add(dst, dst, src) );
-//  ins_pipe(pipe_class_default);
-//%}
-
 // Register Addition
 instruct addI_reg_reg(iRegIdst dst, iRegIsrc_iRegL2Isrc src1, iRegIsrc_iRegL2Isrc src2) %{
   match(Set dst (AddI src1 src2));
--- a/src/cpu/sparc/vm/frame_sparc.inline.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/cpu/sparc/vm/frame_sparc.inline.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -237,6 +237,10 @@
 inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const {
   return (ConstantPoolCache**)sp_addr_at( LcpoolCache->sp_offset_in_saved_window());
 }
+
+inline oop* frame::interpreter_frame_temp_oop_addr() const {
+  return (oop *)(fp() + interpreter_frame_oop_temp_offset);
+}
 #endif // CC_INTERP
 
 
--- a/src/cpu/sparc/vm/sparc.ad	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/cpu/sparc/vm/sparc.ad	Fri Feb 28 09:30:20 2014 -0800
@@ -2037,19 +2037,6 @@
   return L7_REGP_mask();
 }
 
-const RegMask Matcher::mathExactI_result_proj_mask() {
-  return G1_REGI_mask();
-}
-
-const RegMask Matcher::mathExactL_result_proj_mask() {
-  return G1_REGL_mask();
-}
-
-const RegMask Matcher::mathExactI_flags_proj_mask() {
-  return INT_FLAGS_mask();
-}
-
-
 %}
 
 
--- a/src/cpu/x86/vm/frame_x86.inline.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/cpu/x86/vm/frame_x86.inline.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -247,6 +247,10 @@
   }
 }
 
+inline oop* frame::interpreter_frame_temp_oop_addr() const {
+  return (oop *)(fp() + interpreter_frame_oop_temp_offset);
+}
+
 #endif /* CC_INTERP */
 
 inline int frame::pd_oop_map_offset_adjustment() const {
--- a/src/cpu/x86/vm/macroAssembler_x86.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/cpu/x86/vm/macroAssembler_x86.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -98,217 +98,6 @@
   return Address::make_array(adr);
 }
 
-int MacroAssembler::biased_locking_enter(Register lock_reg,
-                                         Register obj_reg,
-                                         Register swap_reg,
-                                         Register tmp_reg,
-                                         bool swap_reg_contains_mark,
-                                         Label& done,
-                                         Label* slow_case,
-                                         BiasedLockingCounters* counters) {
-  assert(UseBiasedLocking, "why call this otherwise?");
-  assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg");
-  assert_different_registers(lock_reg, obj_reg, swap_reg);
-
-  if (PrintBiasedLockingStatistics && counters == NULL)
-    counters = BiasedLocking::counters();
-
-  bool need_tmp_reg = false;
-  if (tmp_reg == noreg) {
-    need_tmp_reg = true;
-    tmp_reg = lock_reg;
-  } else {
-    assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
-  }
-  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
-  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
-  Address klass_addr     (obj_reg, oopDesc::klass_offset_in_bytes());
-  Address saved_mark_addr(lock_reg, 0);
-
-  // Biased locking
-  // See whether the lock is currently biased toward our thread and
-  // whether the epoch is still valid
-  // Note that the runtime guarantees sufficient alignment of JavaThread
-  // pointers to allow age to be placed into low bits
-  // First check to see whether biasing is even enabled for this object
-  Label cas_label;
-  int null_check_offset = -1;
-  if (!swap_reg_contains_mark) {
-    null_check_offset = offset();
-    movl(swap_reg, mark_addr);
-  }
-  if (need_tmp_reg) {
-    push(tmp_reg);
-  }
-  movl(tmp_reg, swap_reg);
-  andl(tmp_reg, markOopDesc::biased_lock_mask_in_place);
-  cmpl(tmp_reg, markOopDesc::biased_lock_pattern);
-  if (need_tmp_reg) {
-    pop(tmp_reg);
-  }
-  jcc(Assembler::notEqual, cas_label);
-  // The bias pattern is present in the object's header. Need to check
-  // whether the bias owner and the epoch are both still current.
-  // Note that because there is no current thread register on x86 we
-  // need to store off the mark word we read out of the object to
-  // avoid reloading it and needing to recheck invariants below. This
-  // store is unfortunate but it makes the overall code shorter and
-  // simpler.
-  movl(saved_mark_addr, swap_reg);
-  if (need_tmp_reg) {
-    push(tmp_reg);
-  }
-  get_thread(tmp_reg);
-  xorl(swap_reg, tmp_reg);
-  if (swap_reg_contains_mark) {
-    null_check_offset = offset();
-  }
-  movl(tmp_reg, klass_addr);
-  xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset()));
-  andl(swap_reg, ~((int) markOopDesc::age_mask_in_place));
-  if (need_tmp_reg) {
-    pop(tmp_reg);
-  }
-  if (counters != NULL) {
-    cond_inc32(Assembler::zero,
-               ExternalAddress((address)counters->biased_lock_entry_count_addr()));
-  }
-  jcc(Assembler::equal, done);
-
-  Label try_revoke_bias;
-  Label try_rebias;
-
-  // At this point we know that the header has the bias pattern and
-  // that we are not the bias owner in the current epoch. We need to
-  // figure out more details about the state of the header in order to
-  // know what operations can be legally performed on the object's
-  // header.
-
-  // If the low three bits in the xor result aren't clear, that means
-  // the prototype header is no longer biased and we have to revoke
-  // the bias on this object.
-  testl(swap_reg, markOopDesc::biased_lock_mask_in_place);
-  jcc(Assembler::notZero, try_revoke_bias);
-
-  // Biasing is still enabled for this data type. See whether the
-  // epoch of the current bias is still valid, meaning that the epoch
-  // bits of the mark word are equal to the epoch bits of the
-  // prototype header. (Note that the prototype header's epoch bits
-  // only change at a safepoint.) If not, attempt to rebias the object
-  // toward the current thread. Note that we must be absolutely sure
-  // that the current epoch is invalid in order to do this because
-  // otherwise the manipulations it performs on the mark word are
-  // illegal.
-  testl(swap_reg, markOopDesc::epoch_mask_in_place);
-  jcc(Assembler::notZero, try_rebias);
-
-  // The epoch of the current bias is still valid but we know nothing
-  // about the owner; it might be set or it might be clear. Try to
-  // acquire the bias of the object using an atomic operation. If this
-  // fails we will go in to the runtime to revoke the object's bias.
-  // Note that we first construct the presumed unbiased header so we
-  // don't accidentally blow away another thread's valid bias.
-  movl(swap_reg, saved_mark_addr);
-  andl(swap_reg,
-       markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
-  if (need_tmp_reg) {
-    push(tmp_reg);
-  }
-  get_thread(tmp_reg);
-  orl(tmp_reg, swap_reg);
-  if (os::is_MP()) {
-    lock();
-  }
-  cmpxchgptr(tmp_reg, Address(obj_reg, 0));
-  if (need_tmp_reg) {
-    pop(tmp_reg);
-  }
-  // If the biasing toward our thread failed, this means that
-  // another thread succeeded in biasing it toward itself and we
-  // need to revoke that bias. The revocation will occur in the
-  // interpreter runtime in the slow case.
-  if (counters != NULL) {
-    cond_inc32(Assembler::zero,
-               ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr()));
-  }
-  if (slow_case != NULL) {
-    jcc(Assembler::notZero, *slow_case);
-  }
-  jmp(done);
-
-  bind(try_rebias);
-  // At this point we know the epoch has expired, meaning that the
-  // current "bias owner", if any, is actually invalid. Under these
-  // circumstances _only_, we are allowed to use the current header's
-  // value as the comparison value when doing the cas to acquire the
-  // bias in the current epoch. In other words, we allow transfer of
-  // the bias from one thread to another directly in this situation.
-  //
-  // FIXME: due to a lack of registers we currently blow away the age
-  // bits in this situation. Should attempt to preserve them.
-  if (need_tmp_reg) {
-    push(tmp_reg);
-  }
-  get_thread(tmp_reg);
-  movl(swap_reg, klass_addr);
-  orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset()));
-  movl(swap_reg, saved_mark_addr);
-  if (os::is_MP()) {
-    lock();
-  }
-  cmpxchgptr(tmp_reg, Address(obj_reg, 0));
-  if (need_tmp_reg) {
-    pop(tmp_reg);
-  }
-  // If the biasing toward our thread failed, then another thread
-  // succeeded in biasing it toward itself and we need to revoke that
-  // bias. The revocation will occur in the runtime in the slow case.
-  if (counters != NULL) {
-    cond_inc32(Assembler::zero,
-               ExternalAddress((address)counters->rebiased_lock_entry_count_addr()));
-  }
-  if (slow_case != NULL) {
-    jcc(Assembler::notZero, *slow_case);
-  }
-  jmp(done);
-
-  bind(try_revoke_bias);
-  // The prototype mark in the klass doesn't have the bias bit set any
-  // more, indicating that objects of this data type are not supposed
-  // to be biased any more. We are going to try to reset the mark of
-  // this object to the prototype value and fall through to the
-  // CAS-based locking scheme. Note that if our CAS fails, it means
-  // that another thread raced us for the privilege of revoking the
-  // bias of this particular object, so it's okay to continue in the
-  // normal locking code.
-  //
-  // FIXME: due to a lack of registers we currently blow away the age
-  // bits in this situation. Should attempt to preserve them.
-  movl(swap_reg, saved_mark_addr);
-  if (need_tmp_reg) {
-    push(tmp_reg);
-  }
-  movl(tmp_reg, klass_addr);
-  movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset()));
-  if (os::is_MP()) {
-    lock();
-  }
-  cmpxchgptr(tmp_reg, Address(obj_reg, 0));
-  if (need_tmp_reg) {
-    pop(tmp_reg);
-  }
-  // Fall through to the normal CAS-based lock, because no matter what
-  // the result of the above CAS, some thread must have succeeded in
-  // removing the bias bit from the object's header.
-  if (counters != NULL) {
-    cond_inc32(Assembler::zero,
-               ExternalAddress((address)counters->revoked_lock_entry_count_addr()));
-  }
-
-  bind(cas_label);
-
-  return null_check_offset;
-}
 void MacroAssembler::call_VM_leaf_base(address entry_point,
                                        int number_of_arguments) {
   call(RuntimeAddress(entry_point));
@@ -726,165 +515,6 @@
   return array;
 }
 
-int MacroAssembler::biased_locking_enter(Register lock_reg,
-                                         Register obj_reg,
-                                         Register swap_reg,
-                                         Register tmp_reg,
-                                         bool swap_reg_contains_mark,
-                                         Label& done,
-                                         Label* slow_case,
-                                         BiasedLockingCounters* counters) {
-  assert(UseBiasedLocking, "why call this otherwise?");
-  assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
-  assert(tmp_reg != noreg, "tmp_reg must be supplied");
-  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
-  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
-  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
-  Address saved_mark_addr(lock_reg, 0);
-
-  if (PrintBiasedLockingStatistics && counters == NULL)
-    counters = BiasedLocking::counters();
-
-  // Biased locking
-  // See whether the lock is currently biased toward our thread and
-  // whether the epoch is still valid
-  // Note that the runtime guarantees sufficient alignment of JavaThread
-  // pointers to allow age to be placed into low bits
-  // First check to see whether biasing is even enabled for this object
-  Label cas_label;
-  int null_check_offset = -1;
-  if (!swap_reg_contains_mark) {
-    null_check_offset = offset();
-    movq(swap_reg, mark_addr);
-  }
-  movq(tmp_reg, swap_reg);
-  andq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
-  cmpq(tmp_reg, markOopDesc::biased_lock_pattern);
-  jcc(Assembler::notEqual, cas_label);
-  // The bias pattern is present in the object's header. Need to check
-  // whether the bias owner and the epoch are both still current.
-  load_prototype_header(tmp_reg, obj_reg);
-  orq(tmp_reg, r15_thread);
-  xorq(tmp_reg, swap_reg);
-  andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place));
-  if (counters != NULL) {
-    cond_inc32(Assembler::zero,
-               ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
-  }
-  jcc(Assembler::equal, done);
-
-  Label try_revoke_bias;
-  Label try_rebias;
-
-  // At this point we know that the header has the bias pattern and
-  // that we are not the bias owner in the current epoch. We need to
-  // figure out more details about the state of the header in order to
-  // know what operations can be legally performed on the object's
-  // header.
-
-  // If the low three bits in the xor result aren't clear, that means
-  // the prototype header is no longer biased and we have to revoke
-  // the bias on this object.
-  testq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
-  jcc(Assembler::notZero, try_revoke_bias);
-
-  // Biasing is still enabled for this data type. See whether the
-  // epoch of the current bias is still valid, meaning that the epoch
-  // bits of the mark word are equal to the epoch bits of the
-  // prototype header. (Note that the prototype header's epoch bits
-  // only change at a safepoint.) If not, attempt to rebias the object
-  // toward the current thread. Note that we must be absolutely sure
-  // that the current epoch is invalid in order to do this because
-  // otherwise the manipulations it performs on the mark word are
-  // illegal.
-  testq(tmp_reg, markOopDesc::epoch_mask_in_place);
-  jcc(Assembler::notZero, try_rebias);
-
-  // The epoch of the current bias is still valid but we know nothing
-  // about the owner; it might be set or it might be clear. Try to
-  // acquire the bias of the object using an atomic operation. If this
-  // fails we will go in to the runtime to revoke the object's bias.
-  // Note that we first construct the presumed unbiased header so we
-  // don't accidentally blow away another thread's valid bias.
-  andq(swap_reg,
-       markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
-  movq(tmp_reg, swap_reg);
-  orq(tmp_reg, r15_thread);
-  if (os::is_MP()) {
-    lock();
-  }
-  cmpxchgq(tmp_reg, Address(obj_reg, 0));
-  // If the biasing toward our thread failed, this means that
-  // another thread succeeded in biasing it toward itself and we
-  // need to revoke that bias. The revocation will occur in the
-  // interpreter runtime in the slow case.
-  if (counters != NULL) {
-    cond_inc32(Assembler::zero,
-               ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
-  }
-  if (slow_case != NULL) {
-    jcc(Assembler::notZero, *slow_case);
-  }
-  jmp(done);
-
-  bind(try_rebias);
-  // At this point we know the epoch has expired, meaning that the
-  // current "bias owner", if any, is actually invalid. Under these
-  // circumstances _only_, we are allowed to use the current header's
-  // value as the comparison value when doing the cas to acquire the
-  // bias in the current epoch. In other words, we allow transfer of
-  // the bias from one thread to another directly in this situation.
-  //
-  // FIXME: due to a lack of registers we currently blow away the age
-  // bits in this situation. Should attempt to preserve them.
-  load_prototype_header(tmp_reg, obj_reg);
-  orq(tmp_reg, r15_thread);
-  if (os::is_MP()) {
-    lock();
-  }
-  cmpxchgq(tmp_reg, Address(obj_reg, 0));
-  // If the biasing toward our thread failed, then another thread
-  // succeeded in biasing it toward itself and we need to revoke that
-  // bias. The revocation will occur in the runtime in the slow case.
-  if (counters != NULL) {
-    cond_inc32(Assembler::zero,
-               ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));
-  }
-  if (slow_case != NULL) {
-    jcc(Assembler::notZero, *slow_case);
-  }
-  jmp(done);
-
-  bind(try_revoke_bias);
-  // The prototype mark in the klass doesn't have the bias bit set any
-  // more, indicating that objects of this data type are not supposed
-  // to be biased any more. We are going to try to reset the mark of
-  // this object to the prototype value and fall through to the
-  // CAS-based locking scheme. Note that if our CAS fails, it means
-  // that another thread raced us for the privilege of revoking the
-  // bias of this particular object, so it's okay to continue in the
-  // normal locking code.
-  //
-  // FIXME: due to a lack of registers we currently blow away the age
-  // bits in this situation. Should attempt to preserve them.
-  load_prototype_header(tmp_reg, obj_reg);
-  if (os::is_MP()) {
-    lock();
-  }
-  cmpxchgq(tmp_reg, Address(obj_reg, 0));
-  // Fall through to the normal CAS-based lock, because no matter what
-  // the result of the above CAS, some thread must have succeeded in
-  // removing the bias bit from the object's header.
-  if (counters != NULL) {
-    cond_inc32(Assembler::zero,
-               ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
-  }
-
-  bind(cas_label);
-
-  return null_check_offset;
-}
-
 void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) {
   Label L, E;
 
@@ -1360,9 +990,16 @@
 
 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) {
   pushf();
-  if (os::is_MP())
-    lock();
-  incrementl(counter_addr);
+  if (reachable(counter_addr)) {
+    if (os::is_MP())
+      lock();
+    incrementl(as_Address(counter_addr));
+  } else {
+    lea(rscratch1, counter_addr);
+    if (os::is_MP())
+      lock();
+    incrementl(Address(rscratch1, 0));
+  }
   popf();
 }
 
@@ -1393,6 +1030,234 @@
   }
 }
 
+int MacroAssembler::biased_locking_enter(Register lock_reg,
+                                         Register obj_reg,
+                                         Register swap_reg,
+                                         Register tmp_reg,
+                                         bool swap_reg_contains_mark,
+                                         Label& done,
+                                         Label* slow_case,
+                                         BiasedLockingCounters* counters) {
+  assert(UseBiasedLocking, "why call this otherwise?");
+  assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
+  LP64_ONLY( assert(tmp_reg != noreg, "tmp_reg must be supplied"); )
+  bool need_tmp_reg = false;
+  if (tmp_reg == noreg) {
+    need_tmp_reg = true;
+    tmp_reg = lock_reg;
+    assert_different_registers(lock_reg, obj_reg, swap_reg);
+  } else {
+    assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
+  }
+  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
+  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
+  Address saved_mark_addr(lock_reg, 0);
+
+  if (PrintBiasedLockingStatistics && counters == NULL) {
+    counters = BiasedLocking::counters();
+  }
+  // Biased locking
+  // See whether the lock is currently biased toward our thread and
+  // whether the epoch is still valid
+  // Note that the runtime guarantees sufficient alignment of JavaThread
+  // pointers to allow age to be placed into low bits
+  // First check to see whether biasing is even enabled for this object
+  Label cas_label;
+  int null_check_offset = -1;
+  if (!swap_reg_contains_mark) {
+    null_check_offset = offset();
+    movptr(swap_reg, mark_addr);
+  }
+  if (need_tmp_reg) {
+    push(tmp_reg);
+  }
+  movptr(tmp_reg, swap_reg);
+  andptr(tmp_reg, markOopDesc::biased_lock_mask_in_place);
+  cmpptr(tmp_reg, markOopDesc::biased_lock_pattern);
+  if (need_tmp_reg) {
+    pop(tmp_reg);
+  }
+  jcc(Assembler::notEqual, cas_label);
+  // The bias pattern is present in the object's header. Need to check
+  // whether the bias owner and the epoch are both still current.
+#ifndef _LP64
+  // Note that because there is no current thread register on x86_32 we
+  // need to store off the mark word we read out of the object to
+  // avoid reloading it and needing to recheck invariants below. This
+  // store is unfortunate but it makes the overall code shorter and
+  // simpler.
+  movptr(saved_mark_addr, swap_reg);
+#endif
+  if (need_tmp_reg) {
+    push(tmp_reg);
+  }
+  if (swap_reg_contains_mark) {
+    null_check_offset = offset();
+  }
+  load_prototype_header(tmp_reg, obj_reg);
+#ifdef _LP64
+  orptr(tmp_reg, r15_thread);
+  xorptr(tmp_reg, swap_reg);
+  Register header_reg = tmp_reg;
+#else
+  xorptr(tmp_reg, swap_reg);
+  get_thread(swap_reg);
+  xorptr(swap_reg, tmp_reg);
+  Register header_reg = swap_reg;
+#endif
+  andptr(header_reg, ~((int) markOopDesc::age_mask_in_place));
+  if (need_tmp_reg) {
+    pop(tmp_reg);
+  }
+  if (counters != NULL) {
+    cond_inc32(Assembler::zero,
+               ExternalAddress((address) counters->biased_lock_entry_count_addr()));
+  }
+  jcc(Assembler::equal, done);
+
+  Label try_revoke_bias;
+  Label try_rebias;
+
+  // At this point we know that the header has the bias pattern and
+  // that we are not the bias owner in the current epoch. We need to
+  // figure out more details about the state of the header in order to
+  // know what operations can be legally performed on the object's
+  // header.
+
+  // If the low three bits in the xor result aren't clear, that means
+  // the prototype header is no longer biased and we have to revoke
+  // the bias on this object.
+  testptr(header_reg, markOopDesc::biased_lock_mask_in_place);
+  jccb(Assembler::notZero, try_revoke_bias);
+
+  // Biasing is still enabled for this data type. See whether the
+  // epoch of the current bias is still valid, meaning that the epoch
+  // bits of the mark word are equal to the epoch bits of the
+  // prototype header. (Note that the prototype header's epoch bits
+  // only change at a safepoint.) If not, attempt to rebias the object
+  // toward the current thread. Note that we must be absolutely sure
+  // that the current epoch is invalid in order to do this because
+  // otherwise the manipulations it performs on the mark word are
+  // illegal.
+  testptr(header_reg, markOopDesc::epoch_mask_in_place);
+  jccb(Assembler::notZero, try_rebias);
+
+  // The epoch of the current bias is still valid but we know nothing
+  // about the owner; it might be set or it might be clear. Try to
+  // acquire the bias of the object using an atomic operation. If this
+  // fails we will go in to the runtime to revoke the object's bias.
+  // Note that we first construct the presumed unbiased header so we
+  // don't accidentally blow away another thread's valid bias.
+  NOT_LP64( movptr(swap_reg, saved_mark_addr); )
+  andptr(swap_reg,
+         markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
+  if (need_tmp_reg) {
+    push(tmp_reg);
+  }
+#ifdef _LP64
+  movptr(tmp_reg, swap_reg);
+  orptr(tmp_reg, r15_thread);
+#else
+  get_thread(tmp_reg);
+  orptr(tmp_reg, swap_reg);
+#endif
+  if (os::is_MP()) {
+    lock();
+  }
+  cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg
+  if (need_tmp_reg) {
+    pop(tmp_reg);
+  }
+  // If the biasing toward our thread failed, this means that
+  // another thread succeeded in biasing it toward itself and we
+  // need to revoke that bias. The revocation will occur in the
+  // interpreter runtime in the slow case.
+  if (counters != NULL) {
+    cond_inc32(Assembler::zero,
+               ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
+  }
+  if (slow_case != NULL) {
+    jcc(Assembler::notZero, *slow_case);
+  }
+  jmp(done);
+
+  bind(try_rebias);
+  // At this point we know the epoch has expired, meaning that the
+  // current "bias owner", if any, is actually invalid. Under these
+  // circumstances _only_, we are allowed to use the current header's
+  // value as the comparison value when doing the cas to acquire the
+  // bias in the current epoch. In other words, we allow transfer of
+  // the bias from one thread to another directly in this situation.
+  //
+  // FIXME: due to a lack of registers we currently blow away the age
+  // bits in this situation. Should attempt to preserve them.
+  if (need_tmp_reg) {
+    push(tmp_reg);
+  }
+  load_prototype_header(tmp_reg, obj_reg);
+#ifdef _LP64
+  orptr(tmp_reg, r15_thread);
+#else
+  get_thread(swap_reg);
+  orptr(tmp_reg, swap_reg);
+  movptr(swap_reg, saved_mark_addr);
+#endif
+  if (os::is_MP()) {
+    lock();
+  }
+  cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg
+  if (need_tmp_reg) {
+    pop(tmp_reg);
+  }
+  // If the biasing toward our thread failed, then another thread
+  // succeeded in biasing it toward itself and we need to revoke that
+  // bias. The revocation will occur in the runtime in the slow case.
+  if (counters != NULL) {
+    cond_inc32(Assembler::zero,
+               ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));
+  }
+  if (slow_case != NULL) {
+    jcc(Assembler::notZero, *slow_case);
+  }
+  jmp(done);
+
+  bind(try_revoke_bias);
+  // The prototype mark in the klass doesn't have the bias bit set any
+  // more, indicating that objects of this data type are not supposed
+  // to be biased any more. We are going to try to reset the mark of
+  // this object to the prototype value and fall through to the
+  // CAS-based locking scheme. Note that if our CAS fails, it means
+  // that another thread raced us for the privilege of revoking the
+  // bias of this particular object, so it's okay to continue in the
+  // normal locking code.
+  //
+  // FIXME: due to a lack of registers we currently blow away the age
+  // bits in this situation. Should attempt to preserve them.
+  NOT_LP64( movptr(swap_reg, saved_mark_addr); )
+  if (need_tmp_reg) {
+    push(tmp_reg);
+  }
+  load_prototype_header(tmp_reg, obj_reg);
+  if (os::is_MP()) {
+    lock();
+  }
+  cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg
+  if (need_tmp_reg) {
+    pop(tmp_reg);
+  }
+  // Fall through to the normal CAS-based lock, because no matter what
+  // the result of the above CAS, some thread must have succeeded in
+  // removing the bias bit from the object's header.
+  if (counters != NULL) {
+    cond_inc32(Assembler::zero,
+               ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
+  }
+
+  bind(cas_label);
+
+  return null_check_offset;
+}
+
 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
   assert(UseBiasedLocking, "why call this otherwise?");
 
@@ -1408,6 +1273,620 @@
   jcc(Assembler::equal, done);
 }
 
+#ifdef COMPILER2
+// Fast_Lock and Fast_Unlock used by C2
+
+// Because the transitions from emitted code to the runtime
+// monitorenter/exit helper stubs are so slow it's critical that
+// we inline both the stack-locking fast-path and the inflated fast path.
+//
+// See also: cmpFastLock and cmpFastUnlock.
+//
+// What follows is a specialized inline transliteration of the code
+// in slow_enter() and slow_exit().  If we're concerned about I$ bloat
+// another option would be to emit TrySlowEnter and TrySlowExit methods
+// at startup-time.  These methods would accept arguments as
+// (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
+// indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
+// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
+// In practice, however, the # of lock sites is bounded and is usually small.
+// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
+// if the processor uses simple bimodal branch predictors keyed by EIP
+// Since the helper routines would be called from multiple synchronization
+// sites.
+//
+// An even better approach would be write "MonitorEnter()" and "MonitorExit()"
+// in java - using j.u.c and unsafe - and just bind the lock and unlock sites
+// to those specialized methods.  That'd give us a mostly platform-independent
+// implementation that the JITs could optimize and inline at their pleasure.
+// Done correctly, the only time we'd need to cross to native could would be
+// to park() or unpark() threads.  We'd also need a few more unsafe operators
+// to (a) prevent compiler-JIT reordering of non-volatile accesses, and
+// (b) explicit barriers or fence operations.
+//
+// TODO:
+//
+// *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
+//    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
+//    Given TLAB allocation, Self is usually manifested in a register, so passing it into
+//    the lock operators would typically be faster than reifying Self.
+//
+// *  Ideally I'd define the primitives as:
+//       fast_lock   (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
+//       fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
+//    Unfortunately ADLC bugs prevent us from expressing the ideal form.
+//    Instead, we're stuck with a rather awkward and brittle register assignments below.
+//    Furthermore the register assignments are overconstrained, possibly resulting in
+//    sub-optimal code near the synchronization site.
+//
+// *  Eliminate the sp-proximity tests and just use "== Self" tests instead.
+//    Alternately, use a better sp-proximity test.
+//
+// *  Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
+//    Either one is sufficient to uniquely identify a thread.
+//    TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
+//
+// *  Intrinsify notify() and notifyAll() for the common cases where the
+//    object is locked by the calling thread but the waitlist is empty.
+//    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
+//
+// *  use jccb and jmpb instead of jcc and jmp to improve code density.
+//    But beware of excessive branch density on AMD Opterons.
+//
+// *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
+//    or failure of the fast-path.  If the fast-path fails then we pass
+//    control to the slow-path, typically in C.  In Fast_Lock and
+//    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
+//    will emit a conditional branch immediately after the node.
+//    So we have branches to branches and lots of ICC.ZF games.
+//    Instead, it might be better to have C2 pass a "FailureLabel"
+//    into Fast_Lock and Fast_Unlock.  In the case of success, control
+//    will drop through the node.  ICC.ZF is undefined at exit.
+//    In the case of failure, the node will branch directly to the
+//    FailureLabel
+
+
+// obj: object to lock
+// box: on-stack box address (displaced header location) - KILLED
+// rax,: tmp -- KILLED
+// scr: tmp -- KILLED
+void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg, BiasedLockingCounters* counters) {
+  // Ensure the register assignents are disjoint
+  guarantee (objReg != boxReg, "");
+  guarantee (objReg != tmpReg, "");
+  guarantee (objReg != scrReg, "");
+  guarantee (boxReg != tmpReg, "");
+  guarantee (boxReg != scrReg, "");
+  guarantee (tmpReg == rax, "");
+
+  if (counters != NULL) {
+    atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()));
+  }
+  if (EmitSync & 1) {
+      // set box->dhw = unused_mark (3)
+      // Force all sync thru slow-path: slow_enter() and slow_exit()
+      movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
+      cmpptr (rsp, (int32_t)NULL_WORD);
+  } else
+  if (EmitSync & 2) {
+      Label DONE_LABEL ;
+      if (UseBiasedLocking) {
+         // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
+         biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, counters);
+      }
+
+      movptr(tmpReg, Address(objReg, 0));           // fetch markword
+      orptr (tmpReg, 0x1);
+      movptr(Address(boxReg, 0), tmpReg);           // Anticipate successful CAS
+      if (os::is_MP()) {
+        lock();
+      }
+      cmpxchgptr(boxReg, Address(objReg, 0));       // Updates tmpReg
+      jccb(Assembler::equal, DONE_LABEL);
+      // Recursive locking
+      subptr(tmpReg, rsp);
+      andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
+      movptr(Address(boxReg, 0), tmpReg);
+      bind(DONE_LABEL);
+  } else {
+    // Possible cases that we'll encounter in fast_lock
+    // ------------------------------------------------
+    // * Inflated
+    //    -- unlocked
+    //    -- Locked
+    //       = by self
+    //       = by other
+    // * biased
+    //    -- by Self
+    //    -- by other
+    // * neutral
+    // * stack-locked
+    //    -- by self
+    //       = sp-proximity test hits
+    //       = sp-proximity test generates false-negative
+    //    -- by other
+    //
+
+    Label IsInflated, DONE_LABEL;
+
+    // it's stack-locked, biased or neutral
+    // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
+    // order to reduce the number of conditional branches in the most common cases.
+    // Beware -- there's a subtle invariant that fetch of the markword
+    // at [FETCH], below, will never observe a biased encoding (*101b).
+    // If this invariant is not held we risk exclusion (safety) failure.
+    if (UseBiasedLocking && !UseOptoBiasInlining) {
+      biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, counters);
+    }
+
+    movptr(tmpReg, Address(objReg, 0));          // [FETCH]
+    testl (tmpReg, markOopDesc::monitor_value);  // inflated vs stack-locked|neutral|biased
+    jccb  (Assembler::notZero, IsInflated);
+
+    // Attempt stack-locking ...
+    orptr (tmpReg, 0x1);
+    movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
+    if (os::is_MP()) {
+      lock();
+    }
+    cmpxchgptr(boxReg, Address(objReg, 0));      // Updates tmpReg
+    if (counters != NULL) {
+      cond_inc32(Assembler::equal,
+                 ExternalAddress((address)counters->fast_path_entry_count_addr()));
+    }
+    jccb(Assembler::equal, DONE_LABEL);
+
+    // Recursive locking
+    subptr(tmpReg, rsp);
+    andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
+    movptr(Address(boxReg, 0), tmpReg);
+    if (counters != NULL) {
+      cond_inc32(Assembler::equal,
+                 ExternalAddress((address)counters->fast_path_entry_count_addr()));
+    }
+    jmpb(DONE_LABEL);
+
+    bind(IsInflated);
+#ifndef _LP64
+    // The object is inflated.
+    //
+    // TODO-FIXME: eliminate the ugly use of manifest constants:
+    //   Use markOopDesc::monitor_value instead of "2".
+    //   use markOop::unused_mark() instead of "3".
+    // The tmpReg value is an objectMonitor reference ORed with
+    // markOopDesc::monitor_value (2).   We can either convert tmpReg to an
+    // objectmonitor pointer by masking off the "2" bit or we can just
+    // use tmpReg as an objectmonitor pointer but bias the objectmonitor
+    // field offsets with "-2" to compensate for and annul the low-order tag bit.
+    //
+    // I use the latter as it avoids AGI stalls.
+    // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]"
+    // instead of "mov r, [tmpReg+OFFSETOF(Owner)]".
+    //
+    #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2)
+
+    // boxReg refers to the on-stack BasicLock in the current frame.
+    // We'd like to write:
+    //   set box->_displaced_header = markOop::unused_mark().  Any non-0 value suffices.
+    // This is convenient but results a ST-before-CAS penalty.  The following CAS suffers
+    // additional latency as we have another ST in the store buffer that must drain.
+
+    if (EmitSync & 8192) {
+       movptr(Address(boxReg, 0), 3);            // results in ST-before-CAS penalty
+       get_thread (scrReg);
+       movptr(boxReg, tmpReg);                    // consider: LEA box, [tmp-2]
+       movptr(tmpReg, NULL_WORD);                 // consider: xor vs mov
+       if (os::is_MP()) {
+         lock();
+       }
+       cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
+    } else
+    if ((EmitSync & 128) == 0) {                      // avoid ST-before-CAS
+       movptr(scrReg, boxReg);
+       movptr(boxReg, tmpReg);                   // consider: LEA box, [tmp-2]
+
+       // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
+       if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
+          // prefetchw [eax + Offset(_owner)-2]
+          prefetchw(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+       }
+
+       if ((EmitSync & 64) == 0) {
+         // Optimistic form: consider XORL tmpReg,tmpReg
+         movptr(tmpReg, NULL_WORD);
+       } else {
+         // Can suffer RTS->RTO upgrades on shared or cold $ lines
+         // Test-And-CAS instead of CAS
+         movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));   // rax, = m->_owner
+         testptr(tmpReg, tmpReg);                   // Locked ?
+         jccb  (Assembler::notZero, DONE_LABEL);
+       }
+
+       // Appears unlocked - try to swing _owner from null to non-null.
+       // Ideally, I'd manifest "Self" with get_thread and then attempt
+       // to CAS the register containing Self into m->Owner.
+       // But we don't have enough registers, so instead we can either try to CAS
+       // rsp or the address of the box (in scr) into &m->owner.  If the CAS succeeds
+       // we later store "Self" into m->Owner.  Transiently storing a stack address
+       // (rsp or the address of the box) into  m->owner is harmless.
+       // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
+       if (os::is_MP()) {
+         lock();
+       }
+       cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
+       movptr(Address(scrReg, 0), 3);          // box->_displaced_header = 3
+       jccb  (Assembler::notZero, DONE_LABEL);
+       get_thread (scrReg);                    // beware: clobbers ICCs
+       movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg);
+       xorptr(boxReg, boxReg);                 // set icc.ZFlag = 1 to indicate success
+
+       // If the CAS fails we can either retry or pass control to the slow-path.
+       // We use the latter tactic.
+       // Pass the CAS result in the icc.ZFlag into DONE_LABEL
+       // If the CAS was successful ...
+       //   Self has acquired the lock
+       //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
+       // Intentional fall-through into DONE_LABEL ...
+    } else {
+       movptr(Address(boxReg, 0), intptr_t(markOopDesc::unused_mark()));  // results in ST-before-CAS penalty
+       movptr(boxReg, tmpReg);
+
+       // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
+       if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
+          // prefetchw [eax + Offset(_owner)-2]
+          prefetchw(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+       }
+
+       if ((EmitSync & 64) == 0) {
+         // Optimistic form
+         xorptr  (tmpReg, tmpReg);
+       } else {
+         // Can suffer RTS->RTO upgrades on shared or cold $ lines
+         movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));   // rax, = m->_owner
+         testptr(tmpReg, tmpReg);                   // Locked ?
+         jccb  (Assembler::notZero, DONE_LABEL);
+       }
+
+       // Appears unlocked - try to swing _owner from null to non-null.
+       // Use either "Self" (in scr) or rsp as thread identity in _owner.
+       // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
+       get_thread (scrReg);
+       if (os::is_MP()) {
+         lock();
+       }
+       cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
+
+       // If the CAS fails we can either retry or pass control to the slow-path.
+       // We use the latter tactic.
+       // Pass the CAS result in the icc.ZFlag into DONE_LABEL
+       // If the CAS was successful ...
+       //   Self has acquired the lock
+       //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
+       // Intentional fall-through into DONE_LABEL ...
+    }
+#else // _LP64
+    // It's inflated
+
+    // TODO: someday avoid the ST-before-CAS penalty by
+    // relocating (deferring) the following ST.
+    // We should also think about trying a CAS without having
+    // fetched _owner.  If the CAS is successful we may
+    // avoid an RTO->RTS upgrade on the $line.
+
+    // Without cast to int32_t a movptr will destroy r10 which is typically obj
+    movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
+
+    mov    (boxReg, tmpReg);
+    movptr (tmpReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
+    testptr(tmpReg, tmpReg);
+    jccb   (Assembler::notZero, DONE_LABEL);
+
+    // It's inflated and appears unlocked
+    if (os::is_MP()) {
+      lock();
+    }
+    cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
+    // Intentional fall-through into DONE_LABEL ...
+
+#endif
+
+    // DONE_LABEL is a hot target - we'd really like to place it at the
+    // start of cache line by padding with NOPs.
+    // See the AMD and Intel software optimization manuals for the
+    // most efficient "long" NOP encodings.
+    // Unfortunately none of our alignment mechanisms suffice.
+    bind(DONE_LABEL);
+
+    // At DONE_LABEL the icc ZFlag is set as follows ...
+    // Fast_Unlock uses the same protocol.
+    // ZFlag == 1 -> Success
+    // ZFlag == 0 -> Failure - force control through the slow-path
+  }
+}
+
+// obj: object to unlock
+// box: box address (displaced header location), killed.  Must be EAX.
+// tmp: killed, cannot be obj nor box.
+//
+// Some commentary on balanced locking:
+//
+// Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
+// Methods that don't have provably balanced locking are forced to run in the
+// interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
+// The interpreter provides two properties:
+// I1:  At return-time the interpreter automatically and quietly unlocks any
+//      objects acquired the current activation (frame).  Recall that the
+//      interpreter maintains an on-stack list of locks currently held by
+//      a frame.
+// I2:  If a method attempts to unlock an object that is not held by the
+//      the frame the interpreter throws IMSX.
+//
+// Lets say A(), which has provably balanced locking, acquires O and then calls B().
+// B() doesn't have provably balanced locking so it runs in the interpreter.
+// Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
+// is still locked by A().
+//
+// The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
+// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
+// should not be unlocked by "normal" java-level locking and vice-versa.  The specification
+// doesn't specify what will occur if a program engages in such mixed-mode locking, however.
+
+void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) {
+  guarantee (objReg != boxReg, "");
+  guarantee (objReg != tmpReg, "");
+  guarantee (boxReg != tmpReg, "");
+  guarantee (boxReg == rax, "");
+
+  if (EmitSync & 4) {
+    // Disable - inhibit all inlining.  Force control through the slow-path
+    cmpptr (rsp, 0);
+  } else
+  if (EmitSync & 8) {
+    Label DONE_LABEL;
+    if (UseBiasedLocking) {
+       biased_locking_exit(objReg, tmpReg, DONE_LABEL);
+    }
+    // Classic stack-locking code ...
+    // Check whether the displaced header is 0
+    //(=> recursive unlock)
+    movptr(tmpReg, Address(boxReg, 0));
+    testptr(tmpReg, tmpReg);
+    jccb(Assembler::zero, DONE_LABEL);
+    // If not recursive lock, reset the header to displaced header
+    if (os::is_MP()) {
+      lock();
+    }
+    cmpxchgptr(tmpReg, Address(objReg, 0));   // Uses RAX which is box
+    bind(DONE_LABEL);
+  } else {
+    Label DONE_LABEL, Stacked, CheckSucc;
+
+    // Critically, the biased locking test must have precedence over
+    // and appear before the (box->dhw == 0) recursive stack-lock test.
+    if (UseBiasedLocking && !UseOptoBiasInlining) {
+       biased_locking_exit(objReg, tmpReg, DONE_LABEL);
+    }
+
+    cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
+    movptr(tmpReg, Address(objReg, 0));             // Examine the object's markword
+    jccb  (Assembler::zero, DONE_LABEL);            // 0 indicates recursive stack-lock
+
+    testptr(tmpReg, 0x02);                          // Inflated?
+    jccb  (Assembler::zero, Stacked);
+
+    // It's inflated.
+    // Despite our balanced locking property we still check that m->_owner == Self
+    // as java routines or native JNI code called by this thread might
+    // have released the lock.
+    // Refer to the comments in synchronizer.cpp for how we might encode extra
+    // state in _succ so we can avoid fetching EntryList|cxq.
+    //
+    // I'd like to add more cases in fast_lock() and fast_unlock() --
+    // such as recursive enter and exit -- but we have to be wary of
+    // I$ bloat, T$ effects and BP$ effects.
+    //
+    // If there's no contention try a 1-0 exit.  That is, exit without
+    // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
+    // we detect and recover from the race that the 1-0 exit admits.
+    //
+    // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
+    // before it STs null into _owner, releasing the lock.  Updates
+    // to data protected by the critical section must be visible before
+    // we drop the lock (and thus before any other thread could acquire
+    // the lock and observe the fields protected by the lock).
+    // IA32's memory-model is SPO, so STs are ordered with respect to
+    // each other and there's no need for an explicit barrier (fence).
+    // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
+#ifndef _LP64
+    get_thread (boxReg);
+    if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
+      // prefetchw [ebx + Offset(_owner)-2]
+      prefetchw(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+    }
+
+    // Note that we could employ various encoding schemes to reduce
+    // the number of loads below (currently 4) to just 2 or 3.
+    // Refer to the comments in synchronizer.cpp.
+    // In practice the chain of fetches doesn't seem to impact performance, however.
+    if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
+       // Attempt to reduce branch density - AMD's branch predictor.
+       xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+       orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2));
+       orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2));
+       orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2));
+       jccb  (Assembler::notZero, DONE_LABEL);
+       movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD);
+       jmpb  (DONE_LABEL);
+    } else {
+       xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+       orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2));
+       jccb  (Assembler::notZero, DONE_LABEL);
+       movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2));
+       orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2));
+       jccb  (Assembler::notZero, CheckSucc);
+       movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD);
+       jmpb  (DONE_LABEL);
+    }
+
+    // The Following code fragment (EmitSync & 65536) improves the performance of
+    // contended applications and contended synchronization microbenchmarks.
+    // Unfortunately the emission of the code - even though not executed - causes regressions
+    // in scimark and jetstream, evidently because of $ effects.  Replacing the code
+    // with an equal number of never-executed NOPs results in the same regression.
+    // We leave it off by default.
+
+    if ((EmitSync & 65536) != 0) {
+       Label LSuccess, LGoSlowPath ;
+
+       bind  (CheckSucc);
+
+       // Optional pre-test ... it's safe to elide this
+       if ((EmitSync & 16) == 0) {
+          cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD);
+          jccb  (Assembler::zero, LGoSlowPath);
+       }
+
+       // We have a classic Dekker-style idiom:
+       //    ST m->_owner = 0 ; MEMBAR; LD m->_succ
+       // There are a number of ways to implement the barrier:
+       // (1) lock:andl &m->_owner, 0
+       //     is fast, but mask doesn't currently support the "ANDL M,IMM32" form.
+       //     LOCK: ANDL [ebx+Offset(_Owner)-2], 0
+       //     Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8
+       // (2) If supported, an explicit MFENCE is appealing.
+       //     In older IA32 processors MFENCE is slower than lock:add or xchg
+       //     particularly if the write-buffer is full as might be the case if
+       //     if stores closely precede the fence or fence-equivalent instruction.
+       //     In more modern implementations MFENCE appears faster, however.
+       // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack
+       //     The $lines underlying the top-of-stack should be in M-state.
+       //     The locked add instruction is serializing, of course.
+       // (4) Use xchg, which is serializing
+       //     mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works
+       // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0.
+       //     The integer condition codes will tell us if succ was 0.
+       //     Since _succ and _owner should reside in the same $line and
+       //     we just stored into _owner, it's likely that the $line
+       //     remains in M-state for the lock:orl.
+       //
+       // We currently use (3), although it's likely that switching to (2)
+       // is correct for the future.
+
+       movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD);
+       if (os::is_MP()) {
+          if (VM_Version::supports_sse2() && 1 == FenceInstruction) {
+            mfence();
+          } else {
+            lock (); addptr(Address(rsp, 0), 0);
+          }
+       }
+       // Ratify _succ remains non-null
+       cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0);
+       jccb  (Assembler::notZero, LSuccess);
+
+       xorptr(boxReg, boxReg);                  // box is really EAX
+       if (os::is_MP()) { lock(); }
+       cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+       jccb  (Assembler::notEqual, LSuccess);
+       // Since we're low on registers we installed rsp as a placeholding in _owner.
+       // Now install Self over rsp.  This is safe as we're transitioning from
+       // non-null to non=null
+       get_thread (boxReg);
+       movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg);
+       // Intentional fall-through into LGoSlowPath ...
+
+       bind  (LGoSlowPath);
+       orptr(boxReg, 1);                      // set ICC.ZF=0 to indicate failure
+       jmpb  (DONE_LABEL);
+
+       bind  (LSuccess);
+       xorptr(boxReg, boxReg);                 // set ICC.ZF=1 to indicate success
+       jmpb  (DONE_LABEL);
+    }
+
+    bind (Stacked);
+    // It's not inflated and it's not recursively stack-locked and it's not biased.
+    // It must be stack-locked.
+    // Try to reset the header to displaced header.
+    // The "box" value on the stack is stable, so we can reload
+    // and be assured we observe the same value as above.
+    movptr(tmpReg, Address(boxReg, 0));
+    if (os::is_MP()) {
+      lock();
+    }
+    cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
+    // Intention fall-thru into DONE_LABEL
+
+    // DONE_LABEL is a hot target - we'd really like to place it at the
+    // start of cache line by padding with NOPs.
+    // See the AMD and Intel software optimization manuals for the
+    // most efficient "long" NOP encodings.
+    // Unfortunately none of our alignment mechanisms suffice.
+    if ((EmitSync & 65536) == 0) {
+       bind (CheckSucc);
+    }
+#else // _LP64
+    // It's inflated
+    movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+    xorptr(boxReg, r15_thread);
+    orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2));
+    jccb  (Assembler::notZero, DONE_LABEL);
+    movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2));
+    orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2));
+    jccb  (Assembler::notZero, CheckSucc);
+    movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD);
+    jmpb  (DONE_LABEL);
+
+    if ((EmitSync & 65536) == 0) {
+      Label LSuccess, LGoSlowPath ;
+      bind  (CheckSucc);
+      cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD);
+      jccb  (Assembler::zero, LGoSlowPath);
+
+      // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
+      // the explicit ST;MEMBAR combination, but masm doesn't currently support
+      // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
+      // are all faster when the write buffer is populated.
+      movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD);
+      if (os::is_MP()) {
+         lock (); addl (Address(rsp, 0), 0);
+      }
+      cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD);
+      jccb  (Assembler::notZero, LSuccess);
+
+      movptr (boxReg, (int32_t)NULL_WORD);                   // box is really EAX
+      if (os::is_MP()) { lock(); }
+      cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+      jccb  (Assembler::notEqual, LSuccess);
+      // Intentional fall-through into slow-path
+
+      bind  (LGoSlowPath);
+      orl   (boxReg, 1);                      // set ICC.ZF=0 to indicate failure
+      jmpb  (DONE_LABEL);
+
+      bind  (LSuccess);
+      testl (boxReg, 0);                      // set ICC.ZF=1 to indicate success
+      jmpb  (DONE_LABEL);
+    }
+
+    bind  (Stacked);
+    movptr(tmpReg, Address (boxReg, 0));      // re-fetch
+    if (os::is_MP()) { lock(); }
+    cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
+
+    if (EmitSync & 65536) {
+       bind (CheckSucc);
+    }
+#endif
+    bind(DONE_LABEL);
+    // Avoid branch to branch on AMD processors
+    if (EmitSync & 32768) {
+       nop();
+    }
+  }
+}
+#endif // COMPILER2
+
 void MacroAssembler::c2bool(Register x) {
   // implements x == 0 ? 0 : 1
   // note: must only look at least-significant byte of x
--- a/src/cpu/x86/vm/macroAssembler_x86.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/cpu/x86/vm/macroAssembler_x86.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -651,7 +651,12 @@
                            Label& done, Label* slow_case = NULL,
                            BiasedLockingCounters* counters = NULL);
   void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done);
-
+#ifdef COMPILER2
+  // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
+  // See full desription in macroAssembler_x86.cpp.
+  void fast_lock(Register obj, Register box, Register tmp, Register scr, BiasedLockingCounters* counters);
+  void fast_unlock(Register obj, Register box, Register tmp);
+#endif
 
   Condition negate_condition(Condition cond);
 
--- a/src/cpu/x86/vm/x86_32.ad	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/cpu/x86/vm/x86_32.ad	Fri Feb 28 09:30:20 2014 -0800
@@ -1542,19 +1542,6 @@
   return EBP_REG_mask();
 }
 
-const RegMask Matcher::mathExactI_result_proj_mask() {
-  return EAX_REG_mask();
-}
-
-const RegMask Matcher::mathExactL_result_proj_mask() {
-  ShouldNotReachHere();
-  return RegMask();
-}
-
-const RegMask Matcher::mathExactI_flags_proj_mask() {
-  return INT_FLAGS_mask();
-}
-
 // Returns true if the high 32 bits of the value is known to be zero.
 bool is_operand_hi32_zero(Node* n) {
   int opc = n->Opcode();
@@ -2918,542 +2905,6 @@
     emit_d8    (cbuf,0 );
   %}
 
-
-  // Because the transitions from emitted code to the runtime
-  // monitorenter/exit helper stubs are so slow it's critical that
-  // we inline both the stack-locking fast-path and the inflated fast path.
-  //
-  // See also: cmpFastLock and cmpFastUnlock.
-  //
-  // What follows is a specialized inline transliteration of the code
-  // in slow_enter() and slow_exit().  If we're concerned about I$ bloat
-  // another option would be to emit TrySlowEnter and TrySlowExit methods
-  // at startup-time.  These methods would accept arguments as
-  // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
-  // indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
-  // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
-  // In practice, however, the # of lock sites is bounded and is usually small.
-  // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
-  // if the processor uses simple bimodal branch predictors keyed by EIP
-  // Since the helper routines would be called from multiple synchronization
-  // sites.
-  //
-  // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
-  // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
-  // to those specialized methods.  That'd give us a mostly platform-independent
-  // implementation that the JITs could optimize and inline at their pleasure.
-  // Done correctly, the only time we'd need to cross to native could would be
-  // to park() or unpark() threads.  We'd also need a few more unsafe operators
-  // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
-  // (b) explicit barriers or fence operations.
-  //
-  // TODO:
-  //
-  // *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
-  //    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
-  //    Given TLAB allocation, Self is usually manifested in a register, so passing it into
-  //    the lock operators would typically be faster than reifying Self.
-  //
-  // *  Ideally I'd define the primitives as:
-  //       fast_lock   (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
-  //       fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
-  //    Unfortunately ADLC bugs prevent us from expressing the ideal form.
-  //    Instead, we're stuck with a rather awkward and brittle register assignments below.
-  //    Furthermore the register assignments are overconstrained, possibly resulting in
-  //    sub-optimal code near the synchronization site.
-  //
-  // *  Eliminate the sp-proximity tests and just use "== Self" tests instead.
-  //    Alternately, use a better sp-proximity test.
-  //
-  // *  Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
-  //    Either one is sufficient to uniquely identify a thread.
-  //    TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
-  //
-  // *  Intrinsify notify() and notifyAll() for the common cases where the
-  //    object is locked by the calling thread but the waitlist is empty.
-  //    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
-  //
-  // *  use jccb and jmpb instead of jcc and jmp to improve code density.
-  //    But beware of excessive branch density on AMD Opterons.
-  //
-  // *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
-  //    or failure of the fast-path.  If the fast-path fails then we pass
-  //    control to the slow-path, typically in C.  In Fast_Lock and
-  //    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
-  //    will emit a conditional branch immediately after the node.
-  //    So we have branches to branches and lots of ICC.ZF games.
-  //    Instead, it might be better to have C2 pass a "FailureLabel"
-  //    into Fast_Lock and Fast_Unlock.  In the case of success, control
-  //    will drop through the node.  ICC.ZF is undefined at exit.
-  //    In the case of failure, the node will branch directly to the
-  //    FailureLabel
-
-
-  // obj: object to lock
-  // box: on-stack box address (displaced header location) - KILLED
-  // rax,: tmp -- KILLED
-  // scr: tmp -- KILLED
-  enc_class Fast_Lock( eRegP obj, eRegP box, eAXRegI tmp, eRegP scr ) %{
-
-    Register objReg = as_Register($obj$$reg);
-    Register boxReg = as_Register($box$$reg);
-    Register tmpReg = as_Register($tmp$$reg);
-    Register scrReg = as_Register($scr$$reg);
-
-    // Ensure the register assignents are disjoint
-    guarantee (objReg != boxReg, "") ;
-    guarantee (objReg != tmpReg, "") ;
-    guarantee (objReg != scrReg, "") ;
-    guarantee (boxReg != tmpReg, "") ;
-    guarantee (boxReg != scrReg, "") ;
-    guarantee (tmpReg == as_Register(EAX_enc), "") ;
-
-    MacroAssembler masm(&cbuf);
-
-    if (_counters != NULL) {
-      masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
-    }
-    if (EmitSync & 1) {
-        // set box->dhw = unused_mark (3)
-        // Force all sync thru slow-path: slow_enter() and slow_exit() 
-        masm.movptr (Address(boxReg, 0), int32_t(markOopDesc::unused_mark())) ;             
-        masm.cmpptr (rsp, (int32_t)0) ;                        
-    } else 
-    if (EmitSync & 2) { 
-        Label DONE_LABEL ;           
-        if (UseBiasedLocking) {
-           // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
-           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
-        }
-
-        masm.movptr(tmpReg, Address(objReg, 0)) ;          // fetch markword 
-        masm.orptr (tmpReg, 0x1);
-        masm.movptr(Address(boxReg, 0), tmpReg);           // Anticipate successful CAS 
-        if (os::is_MP()) { masm.lock();  }
-        masm.cmpxchgptr(boxReg, Address(objReg, 0));          // Updates tmpReg
-        masm.jcc(Assembler::equal, DONE_LABEL);
-        // Recursive locking
-        masm.subptr(tmpReg, rsp);
-        masm.andptr(tmpReg, (int32_t) 0xFFFFF003 );
-        masm.movptr(Address(boxReg, 0), tmpReg);
-        masm.bind(DONE_LABEL) ; 
-    } else {  
-      // Possible cases that we'll encounter in fast_lock 
-      // ------------------------------------------------
-      // * Inflated
-      //    -- unlocked
-      //    -- Locked
-      //       = by self
-      //       = by other
-      // * biased
-      //    -- by Self
-      //    -- by other
-      // * neutral
-      // * stack-locked
-      //    -- by self
-      //       = sp-proximity test hits
-      //       = sp-proximity test generates false-negative
-      //    -- by other
-      //
-
-      Label IsInflated, DONE_LABEL, PopDone ;
-
-      // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
-      // order to reduce the number of conditional branches in the most common cases.
-      // Beware -- there's a subtle invariant that fetch of the markword
-      // at [FETCH], below, will never observe a biased encoding (*101b).
-      // If this invariant is not held we risk exclusion (safety) failure.
-      if (UseBiasedLocking && !UseOptoBiasInlining) {
-        masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
-      }
-
-      masm.movptr(tmpReg, Address(objReg, 0)) ;         // [FETCH]
-      masm.testptr(tmpReg, 0x02) ;                      // Inflated v (Stack-locked or neutral)
-      masm.jccb  (Assembler::notZero, IsInflated) ;
-
-      // Attempt stack-locking ...
-      masm.orptr (tmpReg, 0x1);
-      masm.movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
-      if (os::is_MP()) { masm.lock();  }
-      masm.cmpxchgptr(boxReg, Address(objReg, 0));           // Updates tmpReg
-      if (_counters != NULL) {
-        masm.cond_inc32(Assembler::equal,
-                        ExternalAddress((address)_counters->fast_path_entry_count_addr()));
-      }
-      masm.jccb (Assembler::equal, DONE_LABEL);
-
-      // Recursive locking
-      masm.subptr(tmpReg, rsp);
-      masm.andptr(tmpReg, 0xFFFFF003 );
-      masm.movptr(Address(boxReg, 0), tmpReg);
-      if (_counters != NULL) {
-        masm.cond_inc32(Assembler::equal,
-                        ExternalAddress((address)_counters->fast_path_entry_count_addr()));
-      }
-      masm.jmp  (DONE_LABEL) ;
-
-      masm.bind (IsInflated) ;
-
-      // The object is inflated.
-      //
-      // TODO-FIXME: eliminate the ugly use of manifest constants:
-      //   Use markOopDesc::monitor_value instead of "2".
-      //   use markOop::unused_mark() instead of "3".
-      // The tmpReg value is an objectMonitor reference ORed with
-      // markOopDesc::monitor_value (2).   We can either convert tmpReg to an
-      // objectmonitor pointer by masking off the "2" bit or we can just
-      // use tmpReg as an objectmonitor pointer but bias the objectmonitor
-      // field offsets with "-2" to compensate for and annul the low-order tag bit.
-      //
-      // I use the latter as it avoids AGI stalls.
-      // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]"
-      // instead of "mov r, [tmpReg+OFFSETOF(Owner)]".
-      //
-      #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2)
-
-      // boxReg refers to the on-stack BasicLock in the current frame.
-      // We'd like to write:
-      //   set box->_displaced_header = markOop::unused_mark().  Any non-0 value suffices.
-      // This is convenient but results a ST-before-CAS penalty.  The following CAS suffers
-      // additional latency as we have another ST in the store buffer that must drain.
-
-      if (EmitSync & 8192) { 
-         masm.movptr(Address(boxReg, 0), 3) ;            // results in ST-before-CAS penalty
-         masm.get_thread (scrReg) ; 
-         masm.movptr(boxReg, tmpReg);                    // consider: LEA box, [tmp-2] 
-         masm.movptr(tmpReg, NULL_WORD);                 // consider: xor vs mov
-         if (os::is_MP()) { masm.lock(); } 
-         masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
-      } else 
-      if ((EmitSync & 128) == 0) {                      // avoid ST-before-CAS
-         masm.movptr(scrReg, boxReg) ; 
-         masm.movptr(boxReg, tmpReg);                   // consider: LEA box, [tmp-2] 
-
-         // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
-         if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
-            // prefetchw [eax + Offset(_owner)-2]
-            masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
-         }
-
-         if ((EmitSync & 64) == 0) {
-           // Optimistic form: consider XORL tmpReg,tmpReg
-           masm.movptr(tmpReg, NULL_WORD) ; 
-         } else { 
-           // Can suffer RTS->RTO upgrades on shared or cold $ lines
-           // Test-And-CAS instead of CAS
-           masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;   // rax, = m->_owner
-           masm.testptr(tmpReg, tmpReg) ;                   // Locked ? 
-           masm.jccb  (Assembler::notZero, DONE_LABEL) ;                   
-         }
-
-         // Appears unlocked - try to swing _owner from null to non-null.
-         // Ideally, I'd manifest "Self" with get_thread and then attempt
-         // to CAS the register containing Self into m->Owner.
-         // But we don't have enough registers, so instead we can either try to CAS
-         // rsp or the address of the box (in scr) into &m->owner.  If the CAS succeeds
-         // we later store "Self" into m->Owner.  Transiently storing a stack address
-         // (rsp or the address of the box) into  m->owner is harmless.
-         // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
-         if (os::is_MP()) { masm.lock();  }
-         masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
-         masm.movptr(Address(scrReg, 0), 3) ;          // box->_displaced_header = 3
-         masm.jccb  (Assembler::notZero, DONE_LABEL) ; 
-         masm.get_thread (scrReg) ;                    // beware: clobbers ICCs
-         masm.movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg) ; 
-         masm.xorptr(boxReg, boxReg) ;                 // set icc.ZFlag = 1 to indicate success
-                       
-         // If the CAS fails we can either retry or pass control to the slow-path.  
-         // We use the latter tactic.  
-         // Pass the CAS result in the icc.ZFlag into DONE_LABEL
-         // If the CAS was successful ...
-         //   Self has acquired the lock
-         //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
-         // Intentional fall-through into DONE_LABEL ...
-      } else {
-         masm.movptr(Address(boxReg, 0), 3) ;       // results in ST-before-CAS penalty
-         masm.movptr(boxReg, tmpReg) ; 
-
-         // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
-         if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
-            // prefetchw [eax + Offset(_owner)-2]
-            masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
-         }
-
-         if ((EmitSync & 64) == 0) {
-           // Optimistic form
-           masm.xorptr  (tmpReg, tmpReg) ; 
-         } else { 
-           // Can suffer RTS->RTO upgrades on shared or cold $ lines
-           masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;   // rax, = m->_owner
-           masm.testptr(tmpReg, tmpReg) ;                   // Locked ? 
-           masm.jccb  (Assembler::notZero, DONE_LABEL) ;                   
-         }
-
-         // Appears unlocked - try to swing _owner from null to non-null.
-         // Use either "Self" (in scr) or rsp as thread identity in _owner.
-         // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
-         masm.get_thread (scrReg) ;
-         if (os::is_MP()) { masm.lock(); }
-         masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
-
-         // If the CAS fails we can either retry or pass control to the slow-path.
-         // We use the latter tactic.
-         // Pass the CAS result in the icc.ZFlag into DONE_LABEL
-         // If the CAS was successful ...
-         //   Self has acquired the lock
-         //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
-         // Intentional fall-through into DONE_LABEL ...
-      }
-
-      // DONE_LABEL is a hot target - we'd really like to place it at the
-      // start of cache line by padding with NOPs.
-      // See the AMD and Intel software optimization manuals for the
-      // most efficient "long" NOP encodings.
-      // Unfortunately none of our alignment mechanisms suffice.
-      masm.bind(DONE_LABEL);
-
-      // Avoid branch-to-branch on AMD processors
-      // This appears to be superstition.
-      if (EmitSync & 32) masm.nop() ;
-
-
-      // At DONE_LABEL the icc ZFlag is set as follows ...
-      // Fast_Unlock uses the same protocol.
-      // ZFlag == 1 -> Success
-      // ZFlag == 0 -> Failure - force control through the slow-path
-    }
-  %}
-
-  // obj: object to unlock
-  // box: box address (displaced header location), killed.  Must be EAX.
-  // rbx,: killed tmp; cannot be obj nor box.
-  //
-  // Some commentary on balanced locking:
-  //
-  // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
-  // Methods that don't have provably balanced locking are forced to run in the
-  // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
-  // The interpreter provides two properties:
-  // I1:  At return-time the interpreter automatically and quietly unlocks any
-  //      objects acquired the current activation (frame).  Recall that the
-  //      interpreter maintains an on-stack list of locks currently held by
-  //      a frame.
-  // I2:  If a method attempts to unlock an object that is not held by the
-  //      the frame the interpreter throws IMSX.
-  //
-  // Lets say A(), which has provably balanced locking, acquires O and then calls B().
-  // B() doesn't have provably balanced locking so it runs in the interpreter.
-  // Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
-  // is still locked by A().
-  //
-  // The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
-  // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
-  // should not be unlocked by "normal" java-level locking and vice-versa.  The specification
-  // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
-
-  enc_class Fast_Unlock( nabxRegP obj, eAXRegP box, eRegP tmp) %{
-
-    Register objReg = as_Register($obj$$reg);
-    Register boxReg = as_Register($box$$reg);
-    Register tmpReg = as_Register($tmp$$reg);
-
-    guarantee (objReg != boxReg, "") ;
-    guarantee (objReg != tmpReg, "") ;
-    guarantee (boxReg != tmpReg, "") ;
-    guarantee (boxReg == as_Register(EAX_enc), "") ;
-    MacroAssembler masm(&cbuf);
-
-    if (EmitSync & 4) {
-      // Disable - inhibit all inlining.  Force control through the slow-path
-      masm.cmpptr (rsp, 0) ; 
-    } else 
-    if (EmitSync & 8) {
-      Label DONE_LABEL ;
-      if (UseBiasedLocking) {
-         masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
-      }
-      // classic stack-locking code ...
-      masm.movptr(tmpReg, Address(boxReg, 0)) ;
-      masm.testptr(tmpReg, tmpReg) ;
-      masm.jcc   (Assembler::zero, DONE_LABEL) ;
-      if (os::is_MP()) { masm.lock(); }
-      masm.cmpxchgptr(tmpReg, Address(objReg, 0));          // Uses EAX which is box
-      masm.bind(DONE_LABEL);
-    } else {
-      Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
-
-      // Critically, the biased locking test must have precedence over
-      // and appear before the (box->dhw == 0) recursive stack-lock test.
-      if (UseBiasedLocking && !UseOptoBiasInlining) {
-         masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
-      }
-      
-      masm.cmpptr(Address(boxReg, 0), 0) ;            // Examine the displaced header
-      masm.movptr(tmpReg, Address(objReg, 0)) ;       // Examine the object's markword
-      masm.jccb  (Assembler::zero, DONE_LABEL) ;      // 0 indicates recursive stack-lock
-
-      masm.testptr(tmpReg, 0x02) ;                     // Inflated? 
-      masm.jccb  (Assembler::zero, Stacked) ;
-
-      masm.bind  (Inflated) ;
-      // It's inflated.
-      // Despite our balanced locking property we still check that m->_owner == Self
-      // as java routines or native JNI code called by this thread might
-      // have released the lock.
-      // Refer to the comments in synchronizer.cpp for how we might encode extra
-      // state in _succ so we can avoid fetching EntryList|cxq.
-      //
-      // I'd like to add more cases in fast_lock() and fast_unlock() --
-      // such as recursive enter and exit -- but we have to be wary of
-      // I$ bloat, T$ effects and BP$ effects.
-      //
-      // If there's no contention try a 1-0 exit.  That is, exit without
-      // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
-      // we detect and recover from the race that the 1-0 exit admits.
-      //
-      // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
-      // before it STs null into _owner, releasing the lock.  Updates
-      // to data protected by the critical section must be visible before
-      // we drop the lock (and thus before any other thread could acquire
-      // the lock and observe the fields protected by the lock).
-      // IA32's memory-model is SPO, so STs are ordered with respect to
-      // each other and there's no need for an explicit barrier (fence).
-      // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
-
-      masm.get_thread (boxReg) ;
-      if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
-        // prefetchw [ebx + Offset(_owner)-2]
-        masm.prefetchw(Address(rbx, ObjectMonitor::owner_offset_in_bytes()-2));
-      }
-
-      // Note that we could employ various encoding schemes to reduce
-      // the number of loads below (currently 4) to just 2 or 3.
-      // Refer to the comments in synchronizer.cpp.
-      // In practice the chain of fetches doesn't seem to impact performance, however.
-      if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
-         // Attempt to reduce branch density - AMD's branch predictor.
-         masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;  
-         masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
-         masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 
-         masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 
-         masm.jccb  (Assembler::notZero, DONE_LABEL) ; 
-         masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 
-         masm.jmpb  (DONE_LABEL) ; 
-      } else { 
-         masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;  
-         masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
-         masm.jccb  (Assembler::notZero, DONE_LABEL) ; 
-         masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 
-         masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 
-         masm.jccb  (Assembler::notZero, CheckSucc) ; 
-         masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 
-         masm.jmpb  (DONE_LABEL) ; 
-      }
-
-      // The Following code fragment (EmitSync & 65536) improves the performance of
-      // contended applications and contended synchronization microbenchmarks.
-      // Unfortunately the emission of the code - even though not executed - causes regressions
-      // in scimark and jetstream, evidently because of $ effects.  Replacing the code
-      // with an equal number of never-executed NOPs results in the same regression.
-      // We leave it off by default.
-
-      if ((EmitSync & 65536) != 0) {
-         Label LSuccess, LGoSlowPath ;
-
-         masm.bind  (CheckSucc) ;
-
-         // Optional pre-test ... it's safe to elide this
-         if ((EmitSync & 16) == 0) { 
-            masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ; 
-            masm.jccb  (Assembler::zero, LGoSlowPath) ; 
-         }
-
-         // We have a classic Dekker-style idiom:
-         //    ST m->_owner = 0 ; MEMBAR; LD m->_succ
-         // There are a number of ways to implement the barrier:
-         // (1) lock:andl &m->_owner, 0
-         //     is fast, but mask doesn't currently support the "ANDL M,IMM32" form.
-         //     LOCK: ANDL [ebx+Offset(_Owner)-2], 0
-         //     Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8
-         // (2) If supported, an explicit MFENCE is appealing.
-         //     In older IA32 processors MFENCE is slower than lock:add or xchg
-         //     particularly if the write-buffer is full as might be the case if
-         //     if stores closely precede the fence or fence-equivalent instruction.
-         //     In more modern implementations MFENCE appears faster, however.
-         // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack
-         //     The $lines underlying the top-of-stack should be in M-state.
-         //     The locked add instruction is serializing, of course.
-         // (4) Use xchg, which is serializing
-         //     mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works
-         // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0.
-         //     The integer condition codes will tell us if succ was 0.
-         //     Since _succ and _owner should reside in the same $line and
-         //     we just stored into _owner, it's likely that the $line
-         //     remains in M-state for the lock:orl.
-         //
-         // We currently use (3), although it's likely that switching to (2)
-         // is correct for the future.
-            
-         masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 
-         if (os::is_MP()) { 
-            if (VM_Version::supports_sse2() && 1 == FenceInstruction) { 
-              masm.mfence();
-            } else { 
-              masm.lock () ; masm.addptr(Address(rsp, 0), 0) ; 
-            }
-         }
-         // Ratify _succ remains non-null
-         masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ; 
-         masm.jccb  (Assembler::notZero, LSuccess) ; 
-
-         masm.xorptr(boxReg, boxReg) ;                  // box is really EAX
-         if (os::is_MP()) { masm.lock(); }
-         masm.cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
-         masm.jccb  (Assembler::notEqual, LSuccess) ;
-         // Since we're low on registers we installed rsp as a placeholding in _owner.
-         // Now install Self over rsp.  This is safe as we're transitioning from
-         // non-null to non=null
-         masm.get_thread (boxReg) ;
-         masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg) ;
-         // Intentional fall-through into LGoSlowPath ...
-
-         masm.bind  (LGoSlowPath) ; 
-         masm.orptr(boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
-         masm.jmpb  (DONE_LABEL) ; 
-
-         masm.bind  (LSuccess) ; 
-         masm.xorptr(boxReg, boxReg) ;                 // set ICC.ZF=1 to indicate success
-         masm.jmpb  (DONE_LABEL) ; 
-      }
-
-      masm.bind (Stacked) ;
-      // It's not inflated and it's not recursively stack-locked and it's not biased.
-      // It must be stack-locked.
-      // Try to reset the header to displaced header.
-      // The "box" value on the stack is stable, so we can reload
-      // and be assured we observe the same value as above.
-      masm.movptr(tmpReg, Address(boxReg, 0)) ;
-      if (os::is_MP()) {   masm.lock();    }
-      masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box
-      // Intention fall-thru into DONE_LABEL
-
-
-      // DONE_LABEL is a hot target - we'd really like to place it at the
-      // start of cache line by padding with NOPs.
-      // See the AMD and Intel software optimization manuals for the
-      // most efficient "long" NOP encodings.
-      // Unfortunately none of our alignment mechanisms suffice.
-      if ((EmitSync & 65536) == 0) {
-         masm.bind (CheckSucc) ;
-      }
-      masm.bind(DONE_LABEL);
-
-      // Avoid branch to branch on AMD processors
-      if (EmitSync & 32768) { masm.nop() ; }
-    }
-  %}
-
-
   enc_class enc_pop_rdx() %{
     emit_opcode(cbuf,0x5A);
   %}
@@ -7545,44 +6996,6 @@
 //----------Arithmetic Instructions--------------------------------------------
 //----------Addition Instructions----------------------------------------------
 
-instruct addExactI_eReg(eAXRegI dst, rRegI src, eFlagsReg cr)
-%{
-  match(AddExactI dst src);
-  effect(DEF cr);
-
-  format %{ "ADD    $dst, $src\t# addExact int" %}
-  ins_encode %{
-    __ addl($dst$$Register, $src$$Register);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-
-instruct addExactI_eReg_imm(eAXRegI dst, immI src, eFlagsReg cr)
-%{
-  match(AddExactI dst src);
-  effect(DEF cr);
-
-  format %{ "ADD    $dst, $src\t# addExact int" %}
-  ins_encode %{
-    __ addl($dst$$Register, $src$$constant);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-
-instruct addExactI_eReg_mem(eAXRegI dst, memory src, eFlagsReg cr)
-%{
-  match(AddExactI dst (LoadI src));
-  effect(DEF cr);
-
-  ins_cost(125);
-  format %{ "ADD    $dst,$src\t# addExact int" %}
-  ins_encode %{
-    __ addl($dst$$Register, $src$$Address);
-  %}
-  ins_pipe( ialu_reg_mem );
-%}
-
-
 // Integer Addition Instructions
 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
   match(Set dst (AddI dst src));
@@ -7892,43 +7305,6 @@
 
 //----------Subtraction Instructions-------------------------------------------
 
-instruct subExactI_eReg(eAXRegI dst, rRegI src, eFlagsReg cr)
-%{
-  match(SubExactI dst src);
-  effect(DEF cr);
-
-  format %{ "SUB    $dst, $src\t# subExact int" %}
-  ins_encode %{
-    __ subl($dst$$Register, $src$$Register);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-
-instruct subExactI_eReg_imm(eAXRegI dst, immI src, eFlagsReg cr)
-%{
-  match(SubExactI dst src);
-  effect(DEF cr);
-
-  format %{ "SUB    $dst, $src\t# subExact int" %}
-  ins_encode %{
-    __ subl($dst$$Register, $src$$constant);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-
-instruct subExactI_eReg_mem(eAXRegI dst, memory src, eFlagsReg cr)
-%{
-  match(SubExactI dst (LoadI src));
-  effect(DEF cr);
-
-  ins_cost(125);
-  format %{ "SUB    $dst,$src\t# subExact int" %}
-  ins_encode %{
-    __ subl($dst$$Register, $src$$Address);
-  %}
-  ins_pipe( ialu_reg_mem );
-%}
-
 // Integer Subtraction Instructions
 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
   match(Set dst (SubI dst src));
@@ -7997,17 +7373,6 @@
   ins_pipe( ialu_reg );
 %}
 
-instruct negExactI_eReg(eAXRegI dst, eFlagsReg cr) %{
-  match(NegExactI dst);
-  effect(DEF cr);
-
-  format %{ "NEG    $dst\t# negExact int"%}
-  ins_encode %{
-    __ negl($dst$$Register);
-  %}
-  ins_pipe(ialu_reg);
-%}
-
 //----------Multiplication/Division Instructions-------------------------------
 // Integer Multiplication Instructions
 // Multiply Register
@@ -8219,46 +7584,6 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct mulExactI_eReg(eAXRegI dst, rRegI src, eFlagsReg cr)
-%{
-  match(MulExactI dst src);
-  effect(DEF cr);
-
-  ins_cost(300);
-  format %{ "IMUL   $dst, $src\t# mulExact int" %}
-  ins_encode %{
-    __ imull($dst$$Register, $src$$Register);
-  %}
-  ins_pipe(ialu_reg_reg_alu0);
-%}
-
-instruct mulExactI_eReg_imm(eAXRegI dst, rRegI src, immI imm, eFlagsReg cr)
-%{
-  match(MulExactI src imm);
-  effect(DEF cr);
-
-  ins_cost(300);
-  format %{ "IMUL   $dst, $src, $imm\t# mulExact int" %}
-  ins_encode %{
-    __ imull($dst$$Register, $src$$Register, $imm$$constant);
-  %}
-  ins_pipe(ialu_reg_reg_alu0);
-%}
-
-instruct mulExactI_eReg_mem(eAXRegI dst, memory src, eFlagsReg cr)
-%{
-  match(MulExactI dst (LoadI src));
-  effect(DEF cr);
-
-  ins_cost(350);
-  format %{ "IMUL   $dst, $src\t# mulExact int" %}
-  ins_encode %{
-    __ imull($dst$$Register, $src$$Address);
-  %}
-  ins_pipe(ialu_reg_mem_alu0);
-%}
-
-
 // Integer DIV with Register
 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
   match(Set rax (DivI rax div));
@@ -9124,6 +8449,91 @@
 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
 */
+//----------Overflow Math Instructions-----------------------------------------
+
+instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
+%{
+  match(Set cr (OverflowAddI op1 op2));
+  effect(DEF cr, USE_KILL op1, USE op2);
+
+  format %{ "ADD    $op1, $op2\t# overflow check int" %}
+
+  ins_encode %{
+    __ addl($op1$$Register, $op2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
+%{
+  match(Set cr (OverflowAddI op1 op2));
+  effect(DEF cr, USE_KILL op1, USE op2);
+
+  format %{ "ADD    $op1, $op2\t# overflow check int" %}
+
+  ins_encode %{
+    __ addl($op1$$Register, $op2$$constant);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
+%{
+  match(Set cr (OverflowSubI op1 op2));
+
+  format %{ "CMP    $op1, $op2\t# overflow check int" %}
+  ins_encode %{
+    __ cmpl($op1$$Register, $op2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
+%{
+  match(Set cr (OverflowSubI op1 op2));
+
+  format %{ "CMP    $op1, $op2\t# overflow check int" %}
+  ins_encode %{
+    __ cmpl($op1$$Register, $op2$$constant);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
+%{
+  match(Set cr (OverflowSubI zero op2));
+  effect(DEF cr, USE_KILL op2);
+
+  format %{ "NEG    $op2\t# overflow check int" %}
+  ins_encode %{
+    __ negl($op2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
+%{
+  match(Set cr (OverflowMulI op1 op2));
+  effect(DEF cr, USE_KILL op1, USE op2);
+
+  format %{ "IMUL    $op1, $op2\t# overflow check int" %}
+  ins_encode %{
+    __ imull($op1$$Register, $op2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg_alu0);
+%}
+
+instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
+%{
+  match(Set cr (OverflowMulI op1 op2));
+  effect(DEF cr, TEMP tmp, USE op1, USE op2);
+
+  format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
+  ins_encode %{
+    __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
+  %}
+  ins_pipe(ialu_reg_reg_alu0);
+%}
 
 //----------Long Instructions------------------------------------------------
 // Add Long Register with Register
@@ -13157,23 +12567,26 @@
 
 // inlined locking and unlocking
 
-
-instruct cmpFastLock( eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
-  match( Set cr (FastLock object box) );
-  effect( TEMP tmp, TEMP scr, USE_KILL box );
+instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
+  match(Set cr (FastLock object box));
+  effect(TEMP tmp, TEMP scr, USE_KILL box);
   ins_cost(300);
   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
-  ins_encode( Fast_Lock(object,box,tmp,scr) );
-  ins_pipe( pipe_slow );
-%}
-
-instruct cmpFastUnlock( eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
-  match( Set cr (FastUnlock object box) );
-  effect( TEMP tmp, USE_KILL box );
+  ins_encode %{
+    __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, $scr$$Register, _counters);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
+  match(Set cr (FastUnlock object box));
+  effect(TEMP tmp, USE_KILL box);
   ins_cost(300);
   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
-  ins_encode( Fast_Unlock(object,box,tmp) );
-  ins_pipe( pipe_slow );
+  ins_encode %{
+    __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register);
+  %}
+  ins_pipe(pipe_slow);
 %}
 
 
--- a/src/cpu/x86/vm/x86_64.ad	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/cpu/x86/vm/x86_64.ad	Fri Feb 28 09:30:20 2014 -0800
@@ -1657,18 +1657,6 @@
   return PTR_RBP_REG_mask();
 }
 
-const RegMask Matcher::mathExactI_result_proj_mask() {
-  return INT_RAX_REG_mask();
-}
-
-const RegMask Matcher::mathExactL_result_proj_mask() {
-  return LONG_RAX_REG_mask();
-}
-
-const RegMask Matcher::mathExactI_flags_proj_mask() {
-  return INT_FLAGS_mask();
-}
-
 %}
 
 //----------ENCODING BLOCK-----------------------------------------------------
@@ -2599,231 +2587,6 @@
   %}
 
 
-  // obj: object to lock
-  // box: box address (header location) -- killed
-  // tmp: rax -- killed
-  // scr: rbx -- killed
-  //
-  // What follows is a direct transliteration of fast_lock() and fast_unlock()
-  // from i486.ad.  See that file for comments.
-  // TODO: where possible switch from movq (r, 0) to movl(r,0) and
-  // use the shorter encoding.  (Movl clears the high-order 32-bits).
-
-
-  enc_class Fast_Lock(rRegP obj, rRegP box, rax_RegI tmp, rRegP scr)
-  %{
-    Register objReg = as_Register((int)$obj$$reg);
-    Register boxReg = as_Register((int)$box$$reg);
-    Register tmpReg = as_Register($tmp$$reg);
-    Register scrReg = as_Register($scr$$reg);
-    MacroAssembler masm(&cbuf);
-
-    // Verify uniqueness of register assignments -- necessary but not sufficient
-    assert (objReg != boxReg && objReg != tmpReg &&
-            objReg != scrReg && tmpReg != scrReg, "invariant") ;
-
-    if (_counters != NULL) {
-      masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
-    }
-    if (EmitSync & 1) {
-        // Without cast to int32_t a movptr will destroy r10 which is typically obj
-        masm.movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ;
-        masm.cmpptr(rsp, (int32_t)NULL_WORD) ;
-    } else
-    if (EmitSync & 2) {
-        Label DONE_LABEL;
-        if (UseBiasedLocking) {
-           // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
-          masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
-        }
-        // QQQ was movl...
-        masm.movptr(tmpReg, 0x1);
-        masm.orptr(tmpReg, Address(objReg, 0));
-        masm.movptr(Address(boxReg, 0), tmpReg);
-        if (os::is_MP()) {
-          masm.lock();
-        }
-        masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
-        masm.jcc(Assembler::equal, DONE_LABEL);
-
-        // Recursive locking
-        masm.subptr(tmpReg, rsp);
-        masm.andptr(tmpReg, 7 - os::vm_page_size());
-        masm.movptr(Address(boxReg, 0), tmpReg);
-
-        masm.bind(DONE_LABEL);
-        masm.nop(); // avoid branch to branch
-    } else {
-        Label DONE_LABEL, IsInflated, Egress;
-
-        masm.movptr(tmpReg, Address(objReg, 0)) ;
-        masm.testl (tmpReg, 0x02) ;         // inflated vs stack-locked|neutral|biased
-        masm.jcc   (Assembler::notZero, IsInflated) ;
-
-        // it's stack-locked, biased or neutral
-        // TODO: optimize markword triage order to reduce the number of
-        // conditional branches in the most common cases.
-        // Beware -- there's a subtle invariant that fetch of the markword
-        // at [FETCH], below, will never observe a biased encoding (*101b).
-        // If this invariant is not held we'll suffer exclusion (safety) failure.
-
-        if (UseBiasedLocking && !UseOptoBiasInlining) {
-          masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters);
-          masm.movptr(tmpReg, Address(objReg, 0)) ;        // [FETCH]
-        }
-
-        // was q will it destroy high?
-        masm.orl   (tmpReg, 1) ;
-        masm.movptr(Address(boxReg, 0), tmpReg) ;
-        if (os::is_MP()) { masm.lock(); }
-        masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
-        if (_counters != NULL) {
-           masm.cond_inc32(Assembler::equal,
-                           ExternalAddress((address) _counters->fast_path_entry_count_addr()));
-        }
-        masm.jcc   (Assembler::equal, DONE_LABEL);
-
-        // Recursive locking
-        masm.subptr(tmpReg, rsp);
-        masm.andptr(tmpReg, 7 - os::vm_page_size());
-        masm.movptr(Address(boxReg, 0), tmpReg);
-        if (_counters != NULL) {
-           masm.cond_inc32(Assembler::equal,
-                           ExternalAddress((address) _counters->fast_path_entry_count_addr()));
-        }
-        masm.jmp   (DONE_LABEL) ;
-
-        masm.bind  (IsInflated) ;
-        // It's inflated
-
-        // TODO: someday avoid the ST-before-CAS penalty by
-        // relocating (deferring) the following ST.
-        // We should also think about trying a CAS without having
-        // fetched _owner.  If the CAS is successful we may
-        // avoid an RTO->RTS upgrade on the $line.
-        // Without cast to int32_t a movptr will destroy r10 which is typically obj
-        masm.movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ;
-
-        masm.mov    (boxReg, tmpReg) ;
-        masm.movptr (tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
-        masm.testptr(tmpReg, tmpReg) ;
-        masm.jcc    (Assembler::notZero, DONE_LABEL) ;
-
-        // It's inflated and appears unlocked
-        if (os::is_MP()) { masm.lock(); }
-        masm.cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
-        // Intentional fall-through into DONE_LABEL ...
-
-        masm.bind  (DONE_LABEL) ;
-        masm.nop   () ;                 // avoid jmp to jmp
-    }
-  %}
-
-  // obj: object to unlock
-  // box: box address (displaced header location), killed
-  // RBX: killed tmp; cannot be obj nor box
-  enc_class Fast_Unlock(rRegP obj, rax_RegP box, rRegP tmp)
-  %{
-
-    Register objReg = as_Register($obj$$reg);
-    Register boxReg = as_Register($box$$reg);
-    Register tmpReg = as_Register($tmp$$reg);
-    MacroAssembler masm(&cbuf);
-
-    if (EmitSync & 4) {
-       masm.cmpptr(rsp, 0) ;
-    } else
-    if (EmitSync & 8) {
-       Label DONE_LABEL;
-       if (UseBiasedLocking) {
-         masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
-       }
-
-       // Check whether the displaced header is 0
-       //(=> recursive unlock)
-       masm.movptr(tmpReg, Address(boxReg, 0));
-       masm.testptr(tmpReg, tmpReg);
-       masm.jcc(Assembler::zero, DONE_LABEL);
-
-       // If not recursive lock, reset the header to displaced header
-       if (os::is_MP()) {
-         masm.lock();
-       }
-       masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
-       masm.bind(DONE_LABEL);
-       masm.nop(); // avoid branch to branch
-    } else {
-       Label DONE_LABEL, Stacked, CheckSucc ;
-
-       if (UseBiasedLocking && !UseOptoBiasInlining) {
-         masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
-       }
-
-       masm.movptr(tmpReg, Address(objReg, 0)) ;
-       masm.cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD) ;
-       masm.jcc   (Assembler::zero, DONE_LABEL) ;
-       masm.testl (tmpReg, 0x02) ;
-       masm.jcc   (Assembler::zero, Stacked) ;
-
-       // It's inflated
-       masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
-       masm.xorptr(boxReg, r15_thread) ;
-       masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
-       masm.jcc   (Assembler::notZero, DONE_LABEL) ;
-       masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
-       masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
-       masm.jcc   (Assembler::notZero, CheckSucc) ;
-       masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
-       masm.jmp   (DONE_LABEL) ;
-
-       if ((EmitSync & 65536) == 0) {
-         Label LSuccess, LGoSlowPath ;
-         masm.bind  (CheckSucc) ;
-         masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
-         masm.jcc   (Assembler::zero, LGoSlowPath) ;
-
-         // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
-         // the explicit ST;MEMBAR combination, but masm doesn't currently support
-         // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
-         // are all faster when the write buffer is populated.
-         masm.movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
-         if (os::is_MP()) {
-            masm.lock () ; masm.addl (Address(rsp, 0), 0) ;
-         }
-         masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
-         masm.jcc   (Assembler::notZero, LSuccess) ;
-
-         masm.movptr (boxReg, (int32_t)NULL_WORD) ;                   // box is really EAX
-         if (os::is_MP()) { masm.lock(); }
-         masm.cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
-         masm.jcc   (Assembler::notEqual, LSuccess) ;
-         // Intentional fall-through into slow-path
-
-         masm.bind  (LGoSlowPath) ;
-         masm.orl   (boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
-         masm.jmp   (DONE_LABEL) ;
-
-         masm.bind  (LSuccess) ;
-         masm.testl (boxReg, 0) ;                      // set ICC.ZF=1 to indicate success
-         masm.jmp   (DONE_LABEL) ;
-       }
-
-       masm.bind  (Stacked) ;
-       masm.movptr(tmpReg, Address (boxReg, 0)) ;      // re-fetch
-       if (os::is_MP()) { masm.lock(); }
-       masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
-
-       if (EmitSync & 65536) {
-          masm.bind (CheckSucc) ;
-       }
-       masm.bind(DONE_LABEL);
-       if (EmitSync & 32768) {
-          masm.nop();                      // avoid branch to branch
-       }
-    }
-  %}
-
-
   enc_class enc_rethrow()
   %{
     cbuf.set_insts_mark();
@@ -6963,82 +6726,6 @@
 //----------Arithmetic Instructions--------------------------------------------
 //----------Addition Instructions----------------------------------------------
 
-instruct addExactI_rReg(rax_RegI dst, rRegI src, rFlagsReg cr)
-%{
-  match(AddExactI dst src);
-  effect(DEF cr);
-
-  format %{ "addl    $dst, $src\t# addExact int" %}
-  ins_encode %{
-    __ addl($dst$$Register, $src$$Register);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-
-instruct addExactI_rReg_imm(rax_RegI dst, immI src, rFlagsReg cr)
-%{
-  match(AddExactI dst src);
-  effect(DEF cr);
-
-  format %{ "addl    $dst, $src\t# addExact int" %}
-  ins_encode %{
-    __ addl($dst$$Register, $src$$constant);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-
-instruct addExactI_rReg_mem(rax_RegI dst, memory src, rFlagsReg cr)
-%{
-  match(AddExactI dst (LoadI src));
-  effect(DEF cr);
-
-  ins_cost(125); // XXX
-  format %{ "addl    $dst, $src\t# addExact int" %}
-  ins_encode %{
-    __ addl($dst$$Register, $src$$Address);
-  %}
-
-  ins_pipe(ialu_reg_mem);
-%}
-
-instruct addExactL_rReg(rax_RegL dst, rRegL src, rFlagsReg cr)
-%{
-  match(AddExactL dst src);
-  effect(DEF cr);
-
-  format %{ "addq    $dst, $src\t# addExact long" %}
-  ins_encode %{
-    __ addq($dst$$Register, $src$$Register);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-
-instruct addExactL_rReg_imm(rax_RegL dst, immL32 src, rFlagsReg cr)
-%{
-  match(AddExactL dst src);
-  effect(DEF cr);
-
-  format %{ "addq    $dst, $src\t# addExact long" %}
-  ins_encode %{
-    __ addq($dst$$Register, $src$$constant);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-
-instruct addExactL_rReg_mem(rax_RegL dst, memory src, rFlagsReg cr)
-%{
-  match(AddExactL dst (LoadL src));
-  effect(DEF cr);
-
-  ins_cost(125); // XXX
-  format %{ "addq    $dst, $src\t# addExact long" %}
-  ins_encode %{
-    __ addq($dst$$Register, $src$$Address);
-  %}
-
-  ins_pipe(ialu_reg_mem);
-%}
-
 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 %{
   match(Set dst (AddI dst src));
@@ -7651,80 +7338,6 @@
   ins_pipe(ialu_mem_imm);
 %}
 
-instruct subExactI_rReg(rax_RegI dst, rRegI src, rFlagsReg cr)
-%{
-  match(SubExactI dst src);
-  effect(DEF cr);
-
-  format %{ "subl    $dst, $src\t# subExact int" %}
-  ins_encode %{
-    __ subl($dst$$Register, $src$$Register);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-
-instruct subExactI_rReg_imm(rax_RegI dst, immI src, rFlagsReg cr)
-%{
-  match(SubExactI dst src);
-  effect(DEF cr);
-
-  format %{ "subl    $dst, $src\t# subExact int" %}
-  ins_encode %{
-    __ subl($dst$$Register, $src$$constant);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-
-instruct subExactI_rReg_mem(rax_RegI dst, memory src, rFlagsReg cr)
-%{
-  match(SubExactI dst (LoadI src));
-  effect(DEF cr);
-
-  ins_cost(125);
-  format %{ "subl    $dst, $src\t# subExact int" %}
-  ins_encode %{
-    __ subl($dst$$Register, $src$$Address);
-  %}
-  ins_pipe(ialu_reg_mem);
-%}
-
-instruct subExactL_rReg(rax_RegL dst, rRegL src, rFlagsReg cr)
-%{
-  match(SubExactL dst src);
-  effect(DEF cr);
-
-  format %{ "subq    $dst, $src\t# subExact long" %}
-  ins_encode %{
-    __ subq($dst$$Register, $src$$Register);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-
-instruct subExactL_rReg_imm(rax_RegL dst, immL32 src, rFlagsReg cr)
-%{
-  match(SubExactL dst (LoadL src));
-  effect(DEF cr);
-
-  format %{ "subq    $dst, $src\t# subExact long" %}
-  ins_encode %{
-    __ subq($dst$$Register, $src$$constant);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-
-instruct subExactL_rReg_mem(rax_RegI dst, memory src, rFlagsReg cr)
-%{
-  match(SubExactI dst src);
-  effect(DEF cr);
-
-  ins_cost(125);
-  format %{ "subq    $dst, $src\t# subExact long" %}
-  ins_encode %{
-    __ subq($dst$$Register, $src$$Address);
-  %}
-  ins_pipe(ialu_reg_mem);
-%}
-
 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
 %{
   match(Set dst (SubL dst src));
@@ -7841,31 +7454,6 @@
   ins_pipe(ialu_reg);
 %}
 
-instruct negExactI_rReg(rax_RegI dst, rFlagsReg cr)
-%{
-  match(NegExactI dst);
-  effect(KILL cr);
-
-  format %{ "negl    $dst\t# negExact int" %}
-  ins_encode %{
-    __ negl($dst$$Register);
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-instruct negExactL_rReg(rax_RegL dst, rFlagsReg cr)
-%{
-  match(NegExactL dst);
-  effect(KILL cr);
-
-  format %{ "negq    $dst\t# negExact long" %}
-  ins_encode %{
-    __ negq($dst$$Register);
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-
 //----------Multiplication/Division Instructions-------------------------------
 // Integer Multiplication Instructions
 // Multiply Register
@@ -7982,86 +7570,6 @@
   ins_pipe(ialu_reg_reg_alu0);
 %}
 
-
-instruct mulExactI_rReg(rax_RegI dst, rRegI src, rFlagsReg cr)
-%{
-  match(MulExactI dst src);
-  effect(DEF cr);
-
-  ins_cost(300);
-  format %{ "imull   $dst, $src\t# mulExact int" %}
-  ins_encode %{
-    __ imull($dst$$Register, $src$$Register);
-  %}
-  ins_pipe(ialu_reg_reg_alu0);
-%}
-
-
-instruct mulExactI_rReg_imm(rax_RegI dst, rRegI src, immI imm, rFlagsReg cr)
-%{
-  match(MulExactI src imm);
-  effect(DEF cr);
-
-  ins_cost(300);
-  format %{ "imull   $dst, $src, $imm\t# mulExact int" %}
-  ins_encode %{
-    __ imull($dst$$Register, $src$$Register, $imm$$constant);
-  %}
-  ins_pipe(ialu_reg_reg_alu0);
-%}
-
-instruct mulExactI_rReg_mem(rax_RegI dst, memory src, rFlagsReg cr)
-%{
-  match(MulExactI dst (LoadI src));
-  effect(DEF cr);
-
-  ins_cost(350);
-  format %{ "imull   $dst, $src\t# mulExact int" %}
-  ins_encode %{
-    __ imull($dst$$Register, $src$$Address);
-  %}
-  ins_pipe(ialu_reg_mem_alu0);
-%}
-
-instruct mulExactL_rReg(rax_RegL dst, rRegL src, rFlagsReg cr)
-%{
-  match(MulExactL dst src);
-  effect(DEF cr);
-
-  ins_cost(300);
-  format %{ "imulq   $dst, $src\t# mulExact long" %}
-  ins_encode %{
-    __ imulq($dst$$Register, $src$$Register);
-  %}
-  ins_pipe(ialu_reg_reg_alu0);
-%}
-
-instruct mulExactL_rReg_imm(rax_RegL dst, rRegL src, immL32 imm, rFlagsReg cr)
-%{
-  match(MulExactL src imm);
-  effect(DEF cr);
-
-  ins_cost(300);
-  format %{ "imulq   $dst, $src, $imm\t# mulExact long" %}
-  ins_encode %{
-    __ imulq($dst$$Register, $src$$Register, $imm$$constant);
-  %}
-  ins_pipe(ialu_reg_reg_alu0);
-%}
-
-instruct mulExactL_rReg_mem(rax_RegL dst, memory src, rFlagsReg cr)
-%{
-  match(MulExactL dst (LoadL src));
-  effect(DEF cr);
-
-  ins_cost(350);
-  format %{ "imulq   $dst, $src\t# mulExact long" %}
-  ins_encode %{
-    __ imulq($dst$$Register, $src$$Address);
-  %}
-  ins_pipe(ialu_reg_mem_alu0);
-%}
-
 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
                    rFlagsReg cr)
 %{
@@ -10670,6 +10178,174 @@
   ins_pipe( pipe_slow );
 %}
 
+//----------Overflow Math Instructions-----------------------------------------
+
+instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
+%{
+  match(Set cr (OverflowAddI op1 op2));
+  effect(DEF cr, USE_KILL op1, USE op2);
+
+  format %{ "addl    $op1, $op2\t# overflow check int" %}
+
+  ins_encode %{
+    __ addl($op1$$Register, $op2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
+%{
+  match(Set cr (OverflowAddI op1 op2));
+  effect(DEF cr, USE_KILL op1, USE op2);
+
+  format %{ "addl    $op1, $op2\t# overflow check int" %}
+
+  ins_encode %{
+    __ addl($op1$$Register, $op2$$constant);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
+%{
+  match(Set cr (OverflowAddL op1 op2));
+  effect(DEF cr, USE_KILL op1, USE op2);
+
+  format %{ "addq    $op1, $op2\t# overflow check long" %}
+  ins_encode %{
+    __ addq($op1$$Register, $op2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
+%{
+  match(Set cr (OverflowAddL op1 op2));
+  effect(DEF cr, USE_KILL op1, USE op2);
+
+  format %{ "addq    $op1, $op2\t# overflow check long" %}
+  ins_encode %{
+    __ addq($op1$$Register, $op2$$constant);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
+%{
+  match(Set cr (OverflowSubI op1 op2));
+
+  format %{ "cmpl    $op1, $op2\t# overflow check int" %}
+  ins_encode %{
+    __ cmpl($op1$$Register, $op2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
+%{
+  match(Set cr (OverflowSubI op1 op2));
+
+  format %{ "cmpl    $op1, $op2\t# overflow check int" %}
+  ins_encode %{
+    __ cmpl($op1$$Register, $op2$$constant);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
+%{
+  match(Set cr (OverflowSubL op1 op2));
+
+  format %{ "cmpq    $op1, $op2\t# overflow check long" %}
+  ins_encode %{
+    __ cmpq($op1$$Register, $op2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
+%{
+  match(Set cr (OverflowSubL op1 op2));
+
+  format %{ "cmpq    $op1, $op2\t# overflow check long" %}
+  ins_encode %{
+    __ cmpq($op1$$Register, $op2$$constant);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct overflowNegI_rReg(rFlagsReg cr, immI0 zero, rax_RegI op2)
+%{
+  match(Set cr (OverflowSubI zero op2));
+  effect(DEF cr, USE_KILL op2);
+
+  format %{ "negl    $op2\t# overflow check int" %}
+  ins_encode %{
+    __ negl($op2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
+%{
+  match(Set cr (OverflowSubL zero op2));
+  effect(DEF cr, USE_KILL op2);
+
+  format %{ "negq    $op2\t# overflow check long" %}
+  ins_encode %{
+    __ negq($op2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
+%{
+  match(Set cr (OverflowMulI op1 op2));
+  effect(DEF cr, USE_KILL op1, USE op2);
+
+  format %{ "imull    $op1, $op2\t# overflow check int" %}
+  ins_encode %{
+    __ imull($op1$$Register, $op2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg_alu0);
+%}
+
+instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
+%{
+  match(Set cr (OverflowMulI op1 op2));
+  effect(DEF cr, TEMP tmp, USE op1, USE op2);
+
+  format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
+  ins_encode %{
+    __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
+  %}
+  ins_pipe(ialu_reg_reg_alu0);
+%}
+
+instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
+%{
+  match(Set cr (OverflowMulL op1 op2));
+  effect(DEF cr, USE_KILL op1, USE op2);
+
+  format %{ "imulq    $op1, $op2\t# overflow check long" %}
+  ins_encode %{
+    __ imulq($op1$$Register, $op2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg_alu0);
+%}
+
+instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
+%{
+  match(Set cr (OverflowMulL op1 op2));
+  effect(DEF cr, TEMP tmp, USE op1, USE op2);
+
+  format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
+  ins_encode %{
+    __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
+  %}
+  ins_pipe(ialu_reg_reg_alu0);
+%}
+
 
 //----------Control Flow Instructions------------------------------------------
 // Signed compare Instructions
@@ -11453,27 +11129,25 @@
 // ============================================================================
 // inlined locking and unlocking
 
-instruct cmpFastLock(rFlagsReg cr,
-                     rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr)
-%{
+instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr) %{
   match(Set cr (FastLock object box));
   effect(TEMP tmp, TEMP scr, USE_KILL box);
-
   ins_cost(300);
   format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %}
-  ins_encode(Fast_Lock(object, box, tmp, scr));
+  ins_encode %{
+    __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, $scr$$Register, _counters);
+  %}
   ins_pipe(pipe_slow);
 %}
 
-instruct cmpFastUnlock(rFlagsReg cr,
-                       rRegP object, rax_RegP box, rRegP tmp)
-%{
+instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP box, rRegP tmp) %{
   match(Set cr (FastUnlock object box));
   effect(TEMP tmp, USE_KILL box);
-
   ins_cost(300);
   format %{ "fastunlock $object,$box\t! kills $box,$tmp" %}
-  ins_encode(Fast_Unlock(object, box, tmp));
+  ins_encode %{
+    __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register);
+  %}
   ins_pipe(pipe_slow);
 %}
 
--- a/src/os/bsd/vm/os_bsd.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/os/bsd/vm/os_bsd.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -994,7 +994,7 @@
 
 
 jlong os::javaTimeNanos() {
-  if (Bsd::supports_monotonic_clock()) {
+  if (os::supports_monotonic_clock()) {
     struct timespec tp;
     int status = Bsd::clock_gettime(CLOCK_MONOTONIC, &tp);
     assert(status == 0, "gettime error");
@@ -1010,7 +1010,7 @@
 }
 
 void os::javaTimeNanos_info(jvmtiTimerInfo *info_ptr) {
-  if (Bsd::supports_monotonic_clock()) {
+  if (os::supports_monotonic_clock()) {
     info_ptr->max_value = ALL_64_BITS;
 
     // CLOCK_MONOTONIC - amount of time since some arbitrary point in the past
@@ -2513,85 +2513,6 @@
   RESTARTABLE_RETURN_INT(::read(fd, buf, nBytes));
 }
 
-// TODO-FIXME: reconcile Solaris' os::sleep with the bsd variation.
-// Solaris uses poll(), bsd uses park().
-// Poll() is likely a better choice, assuming that Thread.interrupt()
-// generates a SIGUSRx signal. Note that SIGUSR1 can interfere with
-// SIGSEGV, see 4355769.
-
-int os::sleep(Thread* thread, jlong millis, bool interruptible) {
-  assert(thread == Thread::current(),  "thread consistency check");
-
-  ParkEvent * const slp = thread->_SleepEvent ;
-  slp->reset() ;
-  OrderAccess::fence() ;
-
-  if (interruptible) {
-    jlong prevtime = javaTimeNanos();
-
-    for (;;) {
-      if (os::is_interrupted(thread, true)) {
-        return OS_INTRPT;
-      }
-
-      jlong newtime = javaTimeNanos();
-
-      if (newtime - prevtime < 0) {
-        // time moving backwards, should only happen if no monotonic clock
-        // not a guarantee() because JVM should not abort on kernel/glibc bugs
-        assert(!Bsd::supports_monotonic_clock(), "time moving backwards");
-      } else {
-        millis -= (newtime - prevtime) / NANOSECS_PER_MILLISEC;
-      }
-
-      if(millis <= 0) {
-        return OS_OK;
-      }
-
-      prevtime = newtime;
-
-      {
-        assert(thread->is_Java_thread(), "sanity check");
-        JavaThread *jt = (JavaThread *) thread;
-        ThreadBlockInVM tbivm(jt);
-        OSThreadWaitState osts(jt->osthread(), false /* not Object.wait() */);
-
-        jt->set_suspend_equivalent();
-        // cleared by handle_special_suspend_equivalent_condition() or
-        // java_suspend_self() via check_and_wait_while_suspended()
-
-        slp->park(millis);
-
-        // were we externally suspended while we were waiting?
-        jt->check_and_wait_while_suspended();
-      }
-    }
-  } else {
-    OSThreadWaitState osts(thread->osthread(), false /* not Object.wait() */);
-    jlong prevtime = javaTimeNanos();
-
-    for (;;) {
-      // It'd be nice to avoid the back-to-back javaTimeNanos() calls on
-      // the 1st iteration ...
-      jlong newtime = javaTimeNanos();
-
-      if (newtime - prevtime < 0) {
-        // time moving backwards, should only happen if no monotonic clock
-        // not a guarantee() because JVM should not abort on kernel/glibc bugs
-        assert(!Bsd::supports_monotonic_clock(), "time moving backwards");
-      } else {
-        millis -= (newtime - prevtime) / NANOSECS_PER_MILLISEC;
-      }
-
-      if(millis <= 0) break ;
-
-      prevtime = newtime;
-      slp->park(millis);
-    }
-    return OS_OK ;
-  }
-}
-
 void os::naked_short_sleep(jlong ms) {
   struct timespec req;
 
@@ -2987,50 +2908,6 @@
   guarantee(osthread->sr.is_running(), "Must be running!");
 }
 
-////////////////////////////////////////////////////////////////////////////////
-// interrupt support
-
-void os::interrupt(Thread* thread) {
-  assert(Thread::current() == thread || Threads_lock->owned_by_self(),
-    "possibility of dangling Thread pointer");
-
-  OSThread* osthread = thread->osthread();
-
-  if (!osthread->interrupted()) {
-    osthread->set_interrupted(true);
-    // More than one thread can get here with the same value of osthread,
-    // resulting in multiple notifications.  We do, however, want the store
-    // to interrupted() to be visible to other threads before we execute unpark().
-    OrderAccess::fence();
-    ParkEvent * const slp = thread->_SleepEvent ;
-    if (slp != NULL) slp->unpark() ;
-  }
-
-  // For JSR166. Unpark even if interrupt status already was set
-  if (thread->is_Java_thread())
-    ((JavaThread*)thread)->parker()->unpark();
-
-  ParkEvent * ev = thread->_ParkEvent ;
-  if (ev != NULL) ev->unpark() ;
-
-}
-
-bool os::is_interrupted(Thread* thread, bool clear_interrupted) {
-  assert(Thread::current() == thread || Threads_lock->owned_by_self(),
-    "possibility of dangling Thread pointer");
-
-  OSThread* osthread = thread->osthread();
-
-  bool interrupted = osthread->interrupted();
-
-  if (interrupted && clear_interrupted) {
-    osthread->set_interrupted(false);
-    // consider thread->_SleepEvent->reset() ... optional optimization
-  }
-
-  return interrupted;
-}
-
 ///////////////////////////////////////////////////////////////////////////////////
 // signal handling (except suspend/resume)
 
--- a/src/os/bsd/vm/os_bsd.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/os/bsd/vm/os_bsd.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -134,10 +134,6 @@
   // Real-time clock functions
   static void clock_init(void);
 
-  static inline bool supports_monotonic_clock() {
-    return _clock_gettime != NULL;
-  }
-
   static int clock_gettime(clockid_t clock_id, struct timespec *tp) {
     return _clock_gettime ? _clock_gettime(clock_id, tp) : -1;
   }
--- a/src/os/bsd/vm/os_bsd.inline.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/os/bsd/vm/os_bsd.inline.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -286,4 +286,8 @@
   return ::setsockopt(fd, level, optname, optval, optlen);
 }
 
+inline bool os::supports_monotonic_clock() {
+  return Bsd::_clock_gettime != NULL;
+}
+
 #endif // OS_BSD_VM_OS_BSD_INLINE_HPP
--- a/src/os/linux/vm/os_linux.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/os/linux/vm/os_linux.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -1456,7 +1456,7 @@
 }
 
 jlong os::javaTimeNanos() {
-  if (Linux::supports_monotonic_clock()) {
+  if (os::supports_monotonic_clock()) {
     struct timespec tp;
     int status = Linux::clock_gettime(CLOCK_MONOTONIC, &tp);
     assert(status == 0, "gettime error");
@@ -1472,7 +1472,7 @@
 }
 
 void os::javaTimeNanos_info(jvmtiTimerInfo *info_ptr) {
-  if (Linux::supports_monotonic_clock()) {
+  if (os::supports_monotonic_clock()) {
     info_ptr->max_value = ALL_64_BITS;
 
     // CLOCK_MONOTONIC - amount of time since some arbitrary point in the past
@@ -3757,85 +3757,6 @@
   return ::read(fd, buf, nBytes);
 }
 
-// TODO-FIXME: reconcile Solaris' os::sleep with the linux variation.
-// Solaris uses poll(), linux uses park().
-// Poll() is likely a better choice, assuming that Thread.interrupt()
-// generates a SIGUSRx signal. Note that SIGUSR1 can interfere with
-// SIGSEGV, see 4355769.
-
-int os::sleep(Thread* thread, jlong millis, bool interruptible) {
-  assert(thread == Thread::current(),  "thread consistency check");
-
-  ParkEvent * const slp = thread->_SleepEvent ;
-  slp->reset() ;
-  OrderAccess::fence() ;
-
-  if (interruptible) {
-    jlong prevtime = javaTimeNanos();
-
-    for (;;) {
-      if (os::is_interrupted(thread, true)) {
-        return OS_INTRPT;
-      }
-
-      jlong newtime = javaTimeNanos();
-
-      if (newtime - prevtime < 0) {
-        // time moving backwards, should only happen if no monotonic clock
-        // not a guarantee() because JVM should not abort on kernel/glibc bugs
-        assert(!Linux::supports_monotonic_clock(), "time moving backwards");
-      } else {
-        millis -= (newtime - prevtime) / NANOSECS_PER_MILLISEC;
-      }
-
-      if(millis <= 0) {
-        return OS_OK;
-      }
-
-      prevtime = newtime;
-
-      {
-        assert(thread->is_Java_thread(), "sanity check");
-        JavaThread *jt = (JavaThread *) thread;
-        ThreadBlockInVM tbivm(jt);
-        OSThreadWaitState osts(jt->osthread(), false /* not Object.wait() */);
-
-        jt->set_suspend_equivalent();
-        // cleared by handle_special_suspend_equivalent_condition() or
-        // java_suspend_self() via check_and_wait_while_suspended()
-
-        slp->park(millis);
-
-        // were we externally suspended while we were waiting?
-        jt->check_and_wait_while_suspended();
-      }
-    }
-  } else {
-    OSThreadWaitState osts(thread->osthread(), false /* not Object.wait() */);
-    jlong prevtime = javaTimeNanos();
-
-    for (;;) {
-      // It'd be nice to avoid the back-to-back javaTimeNanos() calls on
-      // the 1st iteration ...
-      jlong newtime = javaTimeNanos();
-
-      if (newtime - prevtime < 0) {
-        // time moving backwards, should only happen if no monotonic clock
-        // not a guarantee() because JVM should not abort on kernel/glibc bugs
-        assert(!Linux::supports_monotonic_clock(), "time moving backwards");
-      } else {
-        millis -= (newtime - prevtime) / NANOSECS_PER_MILLISEC;
-      }
-
-      if(millis <= 0) break ;
-
-      prevtime = newtime;
-      slp->park(millis);
-    }
-    return OS_OK ;
-  }
-}
-
 //
 // Short sleep, direct OS call.
 //
@@ -4188,50 +4109,6 @@
   guarantee(osthread->sr.is_running(), "Must be running!");
 }
 
-////////////////////////////////////////////////////////////////////////////////
-// interrupt support
-
-void os::interrupt(Thread* thread) {
-  assert(Thread::current() == thread || Threads_lock->owned_by_self(),
-    "possibility of dangling Thread pointer");
-
-  OSThread* osthread = thread->osthread();
-
-  if (!osthread->interrupted()) {
-    osthread->set_interrupted(true);
-    // More than one thread can get here with the same value of osthread,
-    // resulting in multiple notifications.  We do, however, want the store
-    // to interrupted() to be visible to other threads before we execute unpark().
-    OrderAccess::fence();
-    ParkEvent * const slp = thread->_SleepEvent ;
-    if (slp != NULL) slp->unpark() ;
-  }
-
-  // For JSR166. Unpark even if interrupt status already was set
-  if (thread->is_Java_thread())
-    ((JavaThread*)thread)->parker()->unpark();
-
-  ParkEvent * ev = thread->_ParkEvent ;
-  if (ev != NULL) ev->unpark() ;
-
-}
-
-bool os::is_interrupted(Thread* thread, bool clear_interrupted) {
-  assert(Thread::current() == thread || Threads_lock->owned_by_self(),
-    "possibility of dangling Thread pointer");
-
-  OSThread* osthread = thread->osthread();
-
-  bool interrupted = osthread->interrupted();
-
-  if (interrupted && clear_interrupted) {
-    osthread->set_interrupted(false);
-    // consider thread->_SleepEvent->reset() ... optional optimization
-  }
-
-  return interrupted;
-}
-
 ///////////////////////////////////////////////////////////////////////////////////
 // signal handling (except suspend/resume)
 
@@ -4756,7 +4633,7 @@
     fatal(err_msg("pthread_condattr_init: %s", strerror(status)));
   }
   // Only set the clock if CLOCK_MONOTONIC is available
-  if (Linux::supports_monotonic_clock()) {
+  if (os::supports_monotonic_clock()) {
     if ((status = pthread_condattr_setclock(_condattr, CLOCK_MONOTONIC)) != 0) {
       if (status == EINVAL) {
         warning("Unable to use monotonic clock with relative timed-waits" \
@@ -5586,7 +5463,7 @@
     seconds = 50000000;
   }
 
-  if (os::Linux::supports_monotonic_clock()) {
+  if (os::supports_monotonic_clock()) {
     struct timespec now;
     int status = os::Linux::clock_gettime(CLOCK_MONOTONIC, &now);
     assert_status(status == 0, status, "clock_gettime");
@@ -5803,7 +5680,7 @@
   assert (time > 0, "convertTime");
   time_t max_secs = 0;
 
-  if (!os::Linux::supports_monotonic_clock() || isAbsolute) {
+  if (!os::supports_monotonic_clock() || isAbsolute) {
     struct timeval now;
     int status = gettimeofday(&now, NULL);
     assert(status == 0, "gettimeofday");
--- a/src/os/linux/vm/os_linux.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/os/linux/vm/os_linux.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -206,10 +206,6 @@
   // fast POSIX clocks support
   static void fast_thread_clock_init(void);
 
-  static inline bool supports_monotonic_clock() {
-    return _clock_gettime != NULL;
-  }
-
   static int clock_gettime(clockid_t clock_id, struct timespec *tp) {
     return _clock_gettime ? _clock_gettime(clock_id, tp) : -1;
   }
--- a/src/os/linux/vm/os_linux.inline.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/os/linux/vm/os_linux.inline.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -288,4 +288,8 @@
   return ::setsockopt(fd, level, optname, optval, optlen);
 }
 
+inline bool os::supports_monotonic_clock() {
+  return Linux::_clock_gettime != NULL;
+}
+
 #endif // OS_LINUX_VM_OS_LINUX_INLINE_HPP
--- a/src/os/posix/vm/os_posix.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/os/posix/vm/os_posix.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -1,5 +1,5 @@
 /*
-* Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,6 +25,7 @@
 #include "utilities/globalDefinitions.hpp"
 #include "prims/jvm.h"
 #include "runtime/frame.inline.hpp"
+#include "runtime/interfaceSupport.hpp"
 #include "runtime/os.hpp"
 #include "utilities/vmError.hpp"
 
@@ -315,6 +316,135 @@
   return agent_entry_name;
 }
 
+int os::sleep(Thread* thread, jlong millis, bool interruptible) {
+  assert(thread == Thread::current(),  "thread consistency check");
+
+  ParkEvent * const slp = thread->_SleepEvent ;
+  slp->reset() ;
+  OrderAccess::fence() ;
+
+  if (interruptible) {
+    jlong prevtime = javaTimeNanos();
+
+    for (;;) {
+      if (os::is_interrupted(thread, true)) {
+        return OS_INTRPT;
+      }
+
+      jlong newtime = javaTimeNanos();
+
+      if (newtime - prevtime < 0) {
+        // time moving backwards, should only happen if no monotonic clock
+        // not a guarantee() because JVM should not abort on kernel/glibc bugs
+        assert(!os::supports_monotonic_clock(), "unexpected time moving backwards detected in os::sleep(interruptible)");
+      } else {
+        millis -= (newtime - prevtime) / NANOSECS_PER_MILLISEC;
+      }
+
+      if (millis <= 0) {
+        return OS_OK;
+      }
+
+      prevtime = newtime;
+
+      {
+        assert(thread->is_Java_thread(), "sanity check");
+        JavaThread *jt = (JavaThread *) thread;
+        ThreadBlockInVM tbivm(jt);
+        OSThreadWaitState osts(jt->osthread(), false /* not Object.wait() */);
+
+        jt->set_suspend_equivalent();
+        // cleared by handle_special_suspend_equivalent_condition() or
+        // java_suspend_self() via check_and_wait_while_suspended()
+
+        slp->park(millis);
+
+        // were we externally suspended while we were waiting?
+        jt->check_and_wait_while_suspended();
+      }
+    }
+  } else {
+    OSThreadWaitState osts(thread->osthread(), false /* not Object.wait() */);
+    jlong prevtime = javaTimeNanos();
+
+    for (;;) {
+      // It'd be nice to avoid the back-to-back javaTimeNanos() calls on
+      // the 1st iteration ...
+      jlong newtime = javaTimeNanos();
+
+      if (newtime - prevtime < 0) {
+        // time moving backwards, should only happen if no monotonic clock
+        // not a guarantee() because JVM should not abort on kernel/glibc bugs
+        assert(!os::supports_monotonic_clock(), "unexpected time moving backwards detected on os::sleep(!interruptible)");
+      } else {
+        millis -= (newtime - prevtime) / NANOSECS_PER_MILLISEC;
+      }
+
+      if (millis <= 0) break ;
+
+      prevtime = newtime;
+      slp->park(millis);
+    }
+    return OS_OK ;
+  }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// interrupt support
+
+void os::interrupt(Thread* thread) {
+  assert(Thread::current() == thread || Threads_lock->owned_by_self(),
+    "possibility of dangling Thread pointer");
+
+  OSThread* osthread = thread->osthread();
+
+  if (!osthread->interrupted()) {
+    osthread->set_interrupted(true);
+    // More than one thread can get here with the same value of osthread,
+    // resulting in multiple notifications.  We do, however, want the store
+    // to interrupted() to be visible to other threads before we execute unpark().
+    OrderAccess::fence();
+    ParkEvent * const slp = thread->_SleepEvent ;
+    if (slp != NULL) slp->unpark() ;
+  }
+
+  // For JSR166. Unpark even if interrupt status already was set
+  if (thread->is_Java_thread())
+    ((JavaThread*)thread)->parker()->unpark();
+
+  ParkEvent * ev = thread->_ParkEvent ;
+  if (ev != NULL) ev->unpark() ;
+
+}
+
+bool os::is_interrupted(Thread* thread, bool clear_interrupted) {
+  assert(Thread::current() == thread || Threads_lock->owned_by_self(),
+    "possibility of dangling Thread pointer");
+
+  OSThread* osthread = thread->osthread();
+
+  bool interrupted = osthread->interrupted();
+
+  // NOTE that since there is no "lock" around the interrupt and
+  // is_interrupted operations, there is the possibility that the
+  // interrupted flag (in osThread) will be "false" but that the
+  // low-level events will be in the signaled state. This is
+  // intentional. The effect of this is that Object.wait() and
+  // LockSupport.park() will appear to have a spurious wakeup, which
+  // is allowed and not harmful, and the possibility is so rare that
+  // it is not worth the added complexity to add yet another lock.
+  // For the sleep event an explicit reset is performed on entry
+  // to os::sleep, so there is no early return. It has also been
+  // recommended not to put the interrupted flag into the "event"
+  // structure because it hides the issue.
+  if (interrupted && clear_interrupted) {
+    osthread->set_interrupted(false);
+    // consider thread->_SleepEvent->reset() ... optional optimization
+  }
+
+  return interrupted;
+}
+
 // Returned string is a constant. For unknown signals "UNKNOWN" is returned.
 const char* os::Posix::get_signal_name(int sig, char* out, size_t outlen) {
 
--- a/src/os/solaris/vm/os_solaris.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/os/solaris/vm/os_solaris.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -332,12 +332,6 @@
   ThreadStateTransition::transition(thread, thread_state, _thread_blocked);
 }
 
-// Version of setup_interruptible() for threads that are already in
-// _thread_blocked. Used by os_sleep().
-void os::Solaris::setup_interruptible_already_blocked(JavaThread* thread) {
-  thread->frame_anchor()->make_walkable(thread);
-}
-
 JavaThread* os::Solaris::setup_interruptible() {
   JavaThread* thread = (JavaThread*)ThreadLocalStorage::thread();
   setup_interruptible(thread);
@@ -3370,61 +3364,6 @@
   return true;
 }
 
-static int os_sleep(jlong millis, bool interruptible) {
-  const jlong limit = INT_MAX;
-  jlong prevtime;
-  int res;
-
-  while (millis > limit) {
-    if ((res = os_sleep(limit, interruptible)) != OS_OK)
-      return res;
-    millis -= limit;
-  }
-
-  // Restart interrupted polls with new parameters until the proper delay
-  // has been completed.
-
-  prevtime = getTimeMillis();
-
-  while (millis > 0) {
-    jlong newtime;
-
-    if (!interruptible) {
-      // Following assert fails for os::yield_all:
-      // assert(!thread->is_Java_thread(), "must not be java thread");
-      res = poll(NULL, 0, millis);
-    } else {
-      JavaThread *jt = JavaThread::current();
-
-      INTERRUPTIBLE_NORESTART_VM_ALWAYS(poll(NULL, 0, millis), res, jt,
-        os::Solaris::clear_interrupted);
-    }
-
-    // INTERRUPTIBLE_NORESTART_VM_ALWAYS returns res == OS_INTRPT for
-    // thread.Interrupt.
-
-    // See c/r 6751923. Poll can return 0 before time
-    // has elapsed if time is set via clock_settime (as NTP does).
-    // res == 0 if poll timed out (see man poll RETURN VALUES)
-    // using the logic below checks that we really did
-    // sleep at least "millis" if not we'll sleep again.
-    if( ( res == 0 ) || ((res == OS_ERR) && (errno == EINTR))) {
-      newtime = getTimeMillis();
-      assert(newtime >= prevtime, "time moving backwards");
-    /* Doing prevtime and newtime in microseconds doesn't help precision,
-       and trying to round up to avoid lost milliseconds can result in a
-       too-short delay. */
-      millis -= newtime - prevtime;
-      if(millis <= 0)
-        return OS_OK;
-      prevtime = newtime;
-    } else
-      return res;
-  }
-
-  return OS_OK;
-}
-
 // Read calls from inside the vm need to perform state transitions
 size_t os::read(int fd, void *buf, unsigned int nBytes) {
   INTERRUPTIBLE_RETURN_INT_VM(::read(fd, buf, nBytes), os::Solaris::clear_interrupted);
@@ -3434,69 +3373,6 @@
   INTERRUPTIBLE_RETURN_INT(::read(fd, buf, nBytes), os::Solaris::clear_interrupted);
 }
 
-int os::sleep(Thread* thread, jlong millis, bool interruptible) {
-  assert(thread == Thread::current(),  "thread consistency check");
-
-  // TODO-FIXME: this should be removed.
-  // On Solaris machines (especially 2.5.1) we found that sometimes the VM gets into a live lock
-  // situation with a JavaThread being starved out of a lwp. The kernel doesn't seem to generate
-  // a SIGWAITING signal which would enable the threads library to create a new lwp for the starving
-  // thread. We suspect that because the Watcher thread keeps waking up at periodic intervals the kernel
-  // is fooled into believing that the system is making progress. In the code below we block the
-  // the watcher thread while safepoint is in progress so that it would not appear as though the
-  // system is making progress.
-  if (!Solaris::T2_libthread() &&
-      thread->is_Watcher_thread() && SafepointSynchronize::is_synchronizing() && !Arguments::has_profile()) {
-    // We now try to acquire the threads lock. Since this lock is held by the VM thread during
-    // the entire safepoint, the watcher thread will  line up here during the safepoint.
-    Threads_lock->lock_without_safepoint_check();
-    Threads_lock->unlock();
-  }
-
-  if (thread->is_Java_thread()) {
-    // This is a JavaThread so we honor the _thread_blocked protocol
-    // even for sleeps of 0 milliseconds. This was originally done
-    // as a workaround for bug 4338139. However, now we also do it
-    // to honor the suspend-equivalent protocol.
-
-    JavaThread *jt = (JavaThread *) thread;
-    ThreadBlockInVM tbivm(jt);
-
-    jt->set_suspend_equivalent();
-    // cleared by handle_special_suspend_equivalent_condition() or
-    // java_suspend_self() via check_and_wait_while_suspended()
-
-    int ret_code;
-    if (millis <= 0) {
-      thr_yield();
-      ret_code = 0;
-    } else {
-      // The original sleep() implementation did not create an
-      // OSThreadWaitState helper for sleeps of 0 milliseconds.
-      // I'm preserving that decision for now.
-      OSThreadWaitState osts(jt->osthread(), false /* not Object.wait() */);
-
-      ret_code = os_sleep(millis, interruptible);
-    }
-
-    // were we externally suspended while we were waiting?
-    jt->check_and_wait_while_suspended();
-
-    return ret_code;
-  }
-
-  // non-JavaThread from this point on:
-
-  if (millis <= 0) {
-    thr_yield();
-    return 0;
-  }
-
-  OSThreadWaitState osts(thread->osthread(), false /* not Object.wait() */);
-
-  return os_sleep(millis, interruptible);
-}
-
 void os::naked_short_sleep(jlong ms) {
   assert(ms < 1000, "Un-interruptable sleep, short time use only");
 
@@ -4139,68 +4015,6 @@
   errno = old_errno;
 }
 
-
-void os::interrupt(Thread* thread) {
-  assert(Thread::current() == thread || Threads_lock->owned_by_self(), "possibility of dangling Thread pointer");
-
-  OSThread* osthread = thread->osthread();
-
-  int isInterrupted = osthread->interrupted();
-  if (!isInterrupted) {
-      osthread->set_interrupted(true);
-      OrderAccess::fence();
-      // os::sleep() is implemented with either poll (NULL,0,timeout) or
-      // by parking on _SleepEvent.  If the former, thr_kill will unwedge
-      // the sleeper by SIGINTR, otherwise the unpark() will wake the sleeper.
-      ParkEvent * const slp = thread->_SleepEvent ;
-      if (slp != NULL) slp->unpark() ;
-  }
-
-  // For JSR166:  unpark after setting status but before thr_kill -dl
-  if (thread->is_Java_thread()) {
-    ((JavaThread*)thread)->parker()->unpark();
-  }
-
-  // Handle interruptible wait() ...
-  ParkEvent * const ev = thread->_ParkEvent ;
-  if (ev != NULL) ev->unpark() ;
-
-  // When events are used everywhere for os::sleep, then this thr_kill
-  // will only be needed if UseVMInterruptibleIO is true.
-
-  if (!isInterrupted) {
-    int status = thr_kill(osthread->thread_id(), os::Solaris::SIGinterrupt());
-    assert_status(status == 0, status, "thr_kill");
-
-    // Bump thread interruption counter
-    RuntimeService::record_thread_interrupt_signaled_count();
-  }
-}
-
-
-bool os::is_interrupted(Thread* thread, bool clear_interrupted) {
-  assert(Thread::current() == thread || Threads_lock->owned_by_self(), "possibility of dangling Thread pointer");
-
-  OSThread* osthread = thread->osthread();
-
-  bool res = osthread->interrupted();
-
-  // NOTE that since there is no "lock" around these two operations,
-  // there is the possibility that the interrupted flag will be
-  // "false" but that the interrupt event will be set. This is
-  // intentional. The effect of this is that Object.wait() will appear
-  // to have a spurious wakeup, which is not harmful, and the
-  // possibility is so rare that it is not worth the added complexity
-  // to add yet another lock. It has also been recommended not to put
-  // the interrupted flag into the os::Solaris::Event structure,
-  // because it hides the issue.
-  if (res && clear_interrupted) {
-    osthread->set_interrupted(false);
-  }
-  return res;
-}
-
-
 void os::print_statistics() {
 }
 
--- a/src/os/solaris/vm/os_solaris.inline.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/os/solaris/vm/os_solaris.inline.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -260,4 +260,10 @@
                             const char *optval, socklen_t optlen) {
   return ::setsockopt(fd, level, optname, optval, optlen);
 }
+
+inline bool os::supports_monotonic_clock() {
+  // javaTimeNanos() is monotonic on Solaris, see getTimeNanos() comments
+  return true;
+}
+
 #endif // OS_SOLARIS_VM_OS_SOLARIS_INLINE_HPP
--- a/src/os/windows/vm/os_windows.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/os/windows/vm/os_windows.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -628,8 +628,6 @@
   delete osthread;
 }
 
-
-static int    has_performance_count = 0;
 static jlong first_filetime;
 static jlong initial_performance_count;
 static jlong performance_frequency;
@@ -645,7 +643,7 @@
 
 jlong os::elapsed_counter() {
   LARGE_INTEGER count;
-  if (has_performance_count) {
+  if (win32::_has_performance_count) {
     QueryPerformanceCounter(&count);
     return as_long(count) - initial_performance_count;
   } else {
@@ -657,7 +655,7 @@
 
 
 jlong os::elapsed_frequency() {
-  if (has_performance_count) {
+  if (win32::_has_performance_count) {
     return performance_frequency;
   } else {
    // the FILETIME time is the number of 100-nanosecond intervals since January 1,1601.
@@ -736,15 +734,15 @@
   return false;
 }
 
-static void initialize_performance_counter() {
+void os::win32::initialize_performance_counter() {
   LARGE_INTEGER count;
   if (QueryPerformanceFrequency(&count)) {
-    has_performance_count = 1;
+    win32::_has_performance_count = 1;
     performance_frequency = as_long(count);
     QueryPerformanceCounter(&count);
     initial_performance_count = as_long(count);
   } else {
-    has_performance_count = 0;
+    win32::_has_performance_count = 0;
     FILETIME wt;
     GetSystemTimeAsFileTime(&wt);
     first_filetime = jlong_from(wt.dwHighDateTime, wt.dwLowDateTime);
@@ -839,7 +837,7 @@
 }
 
 jlong os::javaTimeNanos() {
-  if (!has_performance_count) {
+  if (!win32::_has_performance_count) {
     return javaTimeMillis() * NANOSECS_PER_MILLISEC; // the best we can do.
   } else {
     LARGE_INTEGER current_count;
@@ -852,7 +850,7 @@
 }
 
 void os::javaTimeNanos_info(jvmtiTimerInfo *info_ptr) {
-  if (!has_performance_count) {
+  if (!win32::_has_performance_count) {
     // javaTimeMillis() doesn't have much percision,
     // but it is not going to wrap -- so all 64 bits
     info_ptr->max_value = ALL_64_BITS;
@@ -3682,6 +3680,8 @@
 bool   os::win32::_is_windows_2003    = false;
 bool   os::win32::_is_windows_server  = false;
 
+bool   os::win32::_has_performance_count = 0;
+
 void os::win32::initialize_system_info() {
   SYSTEM_INFO si;
   GetSystemInfo(&si);
--- a/src/os/windows/vm/os_windows.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/os/windows/vm/os_windows.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -42,6 +42,7 @@
   static bool   _is_nt;
   static bool   _is_windows_2003;
   static bool   _is_windows_server;
+  static bool   _has_performance_count;
 
   static void print_windows_version(outputStream* st);
 
@@ -63,6 +64,9 @@
   // load dll from Windows system directory or Windows directory
   static HINSTANCE load_Windows_dll(const char* name, char *ebuf, int ebuflen);
 
+  private:
+    static void initialize_performance_counter();
+
  public:
   // Generic interface:
 
--- a/src/os/windows/vm/os_windows.inline.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/os/windows/vm/os_windows.inline.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -107,6 +107,10 @@
   return ::close(fd);
 }
 
+inline bool os::supports_monotonic_clock() {
+  return win32::_has_performance_count;
+}
+
 #ifndef PRODUCT
   #define CALL_TEST_FUNC_WITH_WRAPPER_IF_NEEDED(f) \
             os::win32::call_test_func_with_wrapper(f)
--- a/src/share/vm/adlc/archDesc.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/adlc/archDesc.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -1167,15 +1167,12 @@
          || strcmp(idealName,"CmpF") == 0
          || strcmp(idealName,"FastLock") == 0
          || strcmp(idealName,"FastUnlock") == 0
-         || strcmp(idealName,"AddExactI") == 0
-         || strcmp(idealName,"AddExactL") == 0
-         || strcmp(idealName,"SubExactI") == 0
-         || strcmp(idealName,"SubExactL") == 0
-         || strcmp(idealName,"MulExactI") == 0
-         || strcmp(idealName,"MulExactL") == 0
-         || strcmp(idealName,"NegExactI") == 0
-         || strcmp(idealName,"NegExactL") == 0
-         || strcmp(idealName,"FlagsProj") == 0
+         || strcmp(idealName,"OverflowAddI") == 0
+         || strcmp(idealName,"OverflowAddL") == 0
+         || strcmp(idealName,"OverflowSubI") == 0
+         || strcmp(idealName,"OverflowSubL") == 0
+         || strcmp(idealName,"OverflowMulI") == 0
+         || strcmp(idealName,"OverflowMulL") == 0
          || strcmp(idealName,"Bool") == 0
          || strcmp(idealName,"Binary") == 0 ) {
       // Removed ConI from the must_clone list.  CPUs that cannot use
--- a/src/share/vm/c1/c1_GraphBuilder.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/c1/c1_GraphBuilder.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -2276,7 +2276,7 @@
   if (!has_handler() && (!instruction->needs_exception_state() || instruction->exception_state() != NULL)) {
     assert(instruction->exception_state() == NULL
            || instruction->exception_state()->kind() == ValueStack::EmptyExceptionState
-           || (instruction->exception_state()->kind() == ValueStack::ExceptionState && _compilation->env()->jvmti_can_access_local_variables()),
+           || (instruction->exception_state()->kind() == ValueStack::ExceptionState && _compilation->env()->should_retain_local_variables()),
            "exception_state should be of exception kind");
     return new XHandlers();
   }
@@ -2367,7 +2367,7 @@
       // This scope and all callees do not handle exceptions, so the local
       // variables of this scope are not needed. However, the scope itself is
       // required for a correct exception stack trace -> clear out the locals.
-      if (_compilation->env()->jvmti_can_access_local_variables()) {
+      if (_compilation->env()->should_retain_local_variables()) {
         cur_state = cur_state->copy(ValueStack::ExceptionState, cur_state->bci());
       } else {
         cur_state = cur_state->copy(ValueStack::EmptyExceptionState, cur_state->bci());
@@ -3251,7 +3251,7 @@
 ValueStack* GraphBuilder::copy_state_for_exception_with_bci(int bci) {
   ValueStack* s = copy_state_exhandling_with_bci(bci);
   if (s == NULL) {
-    if (_compilation->env()->jvmti_can_access_local_variables()) {
+    if (_compilation->env()->should_retain_local_variables()) {
       s = state()->copy(ValueStack::ExceptionState, bci);
     } else {
       s = state()->copy(ValueStack::EmptyExceptionState, bci);
--- a/src/share/vm/c1/c1_Instruction.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/c1/c1_Instruction.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -76,7 +76,7 @@
 
 void Instruction::update_exception_state(ValueStack* state) {
   if (state != NULL && (state->kind() == ValueStack::EmptyExceptionState || state->kind() == ValueStack::ExceptionState)) {
-    assert(state->kind() == ValueStack::EmptyExceptionState || Compilation::current()->env()->jvmti_can_access_local_variables(), "unexpected state kind");
+    assert(state->kind() == ValueStack::EmptyExceptionState || Compilation::current()->env()->should_retain_local_variables(), "unexpected state kind");
     _exception_state = state;
   } else {
     _exception_state = NULL;
--- a/src/share/vm/c1/c1_ValueStack.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/c1/c1_ValueStack.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -52,7 +52,7 @@
   , _stack()
   , _locks(copy_from->locks_size())
 {
-  assert(kind != EmptyExceptionState || !Compilation::current()->env()->jvmti_can_access_local_variables(), "need locals");
+  assert(kind != EmptyExceptionState || !Compilation::current()->env()->should_retain_local_variables(), "need locals");
   if (kind != EmptyExceptionState) {
     // only allocate space if we need to copy the locals-array
     _locals = Values(copy_from->locals_size());
--- a/src/share/vm/c1/c1_ValueStack.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/c1/c1_ValueStack.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -75,7 +75,7 @@
 
   void set_caller_state(ValueStack* s)           {
     assert(kind() == EmptyExceptionState ||
-           (Compilation::current()->env()->jvmti_can_access_local_variables() && kind() == ExceptionState),
+           (Compilation::current()->env()->should_retain_local_variables() && kind() == ExceptionState),
            "only EmptyExceptionStates can be modified");
     _caller_state = s;
   }
--- a/src/share/vm/ci/ciClassList.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/ci/ciClassList.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -103,6 +103,7 @@
 friend class ciMethodType;             \
 friend class ciReceiverTypeData;       \
 friend class ciTypeEntries;            \
+friend class ciSpeculativeTrapData;    \
 friend class ciSymbol;                 \
 friend class ciArray;                  \
 friend class ciObjArray;               \
--- a/src/share/vm/ci/ciEnv.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/ci/ciEnv.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -136,6 +136,11 @@
   _ClassCastException_instance = NULL;
   _the_null_string = NULL;
   _the_min_jint_string = NULL;
+
+  _jvmti_can_hotswap_or_post_breakpoint = false;
+  _jvmti_can_access_local_variables = false;
+  _jvmti_can_post_on_exceptions = false;
+  _jvmti_can_pop_frame = false;
 }
 
 ciEnv::ciEnv(Arena* arena) {
@@ -186,6 +191,11 @@
   _ClassCastException_instance = NULL;
   _the_null_string = NULL;
   _the_min_jint_string = NULL;
+
+  _jvmti_can_hotswap_or_post_breakpoint = false;
+  _jvmti_can_access_local_variables = false;
+  _jvmti_can_post_on_exceptions = false;
+  _jvmti_can_pop_frame = false;
 }
 
 ciEnv::~ciEnv() {
@@ -205,6 +215,31 @@
   _jvmti_can_hotswap_or_post_breakpoint = JvmtiExport::can_hotswap_or_post_breakpoint();
   _jvmti_can_access_local_variables     = JvmtiExport::can_access_local_variables();
   _jvmti_can_post_on_exceptions         = JvmtiExport::can_post_on_exceptions();
+  _jvmti_can_pop_frame                  = JvmtiExport::can_pop_frame();
+}
+
+bool ciEnv::should_retain_local_variables() const {
+  return _jvmti_can_access_local_variables || _jvmti_can_pop_frame;
+}
+
+bool ciEnv::jvmti_state_changed() const {
+  if (!_jvmti_can_access_local_variables &&
+      JvmtiExport::can_access_local_variables()) {
+    return true;
+  }
+  if (!_jvmti_can_hotswap_or_post_breakpoint &&
+      JvmtiExport::can_hotswap_or_post_breakpoint()) {
+    return true;
+  }
+  if (!_jvmti_can_post_on_exceptions &&
+      JvmtiExport::can_post_on_exceptions()) {
+    return true;
+  }
+  if (!_jvmti_can_pop_frame &&
+      JvmtiExport::can_pop_frame()) {
+    return true;
+  }
+  return false;
 }
 
 // ------------------------------------------------------------------
@@ -940,13 +975,7 @@
     No_Safepoint_Verifier nsv;
 
     // Change in Jvmti state may invalidate compilation.
-    if (!failing() &&
-        ( (!jvmti_can_hotswap_or_post_breakpoint() &&
-           JvmtiExport::can_hotswap_or_post_breakpoint()) ||
-          (!jvmti_can_access_local_variables() &&
-           JvmtiExport::can_access_local_variables()) ||
-          (!jvmti_can_post_on_exceptions() &&
-           JvmtiExport::can_post_on_exceptions()) )) {
+    if (!failing() && jvmti_state_changed()) {
       record_failure("Jvmti state change invalidated dependencies");
     }
 
--- a/src/share/vm/ci/ciEnv.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/ci/ciEnv.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -69,6 +69,7 @@
   bool  _jvmti_can_hotswap_or_post_breakpoint;
   bool  _jvmti_can_access_local_variables;
   bool  _jvmti_can_post_on_exceptions;
+  bool  _jvmti_can_pop_frame;
 
   // Cache DTrace flags
   bool  _dtrace_extended_probes;
@@ -332,8 +333,9 @@
 
   // Cache Jvmti state
   void  cache_jvmti_state();
+  bool  jvmti_state_changed() const;
+  bool  should_retain_local_variables() const;
   bool  jvmti_can_hotswap_or_post_breakpoint() const { return _jvmti_can_hotswap_or_post_breakpoint; }
-  bool  jvmti_can_access_local_variables()     const { return _jvmti_can_access_local_variables; }
   bool  jvmti_can_post_on_exceptions()         const { return _jvmti_can_post_on_exceptions; }
 
   // Cache DTrace flags
--- a/src/share/vm/ci/ciMethod.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/ci/ciMethod.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -412,7 +412,7 @@
 // information.
 MethodLivenessResult ciMethod::liveness_at_bci(int bci) {
   MethodLivenessResult result = raw_liveness_at_bci(bci);
-  if (CURRENT_ENV->jvmti_can_access_local_variables() || DeoptimizeALot || CompileTheWorld) {
+  if (CURRENT_ENV->should_retain_local_variables() || DeoptimizeALot || CompileTheWorld) {
     // Keep all locals live for the user's edification and amusement.
     result.at_put_range(0, result.size(), true);
   }
--- a/src/share/vm/ci/ciMethodData.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/ci/ciMethodData.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -78,6 +78,35 @@
   _parameters = NULL;
 }
 
+void ciMethodData::load_extra_data() {
+  MethodData* mdo = get_MethodData();
+
+  // speculative trap entries also hold a pointer to a Method so need to be translated
+  DataLayout* dp_src  = mdo->extra_data_base();
+  DataLayout* end_src = mdo->extra_data_limit();
+  DataLayout* dp_dst  = extra_data_base();
+  for (;; dp_src = MethodData::next_extra(dp_src), dp_dst = MethodData::next_extra(dp_dst)) {
+    assert(dp_src < end_src, "moved past end of extra data");
+    assert(dp_src->tag() == dp_dst->tag(), err_msg("should be same tags %d != %d", dp_src->tag(), dp_dst->tag()));
+    switch(dp_src->tag()) {
+    case DataLayout::speculative_trap_data_tag: {
+      ciSpeculativeTrapData* data_dst = new ciSpeculativeTrapData(dp_dst);
+      SpeculativeTrapData* data_src = new SpeculativeTrapData(dp_src);
+      data_dst->translate_from(data_src);
+      break;
+    }
+    case DataLayout::bit_data_tag:
+      break;
+    case DataLayout::no_tag:
+    case DataLayout::arg_info_data_tag:
+      // An empty slot or ArgInfoData entry marks the end of the trap data
+      return;
+    default:
+      fatal(err_msg("bad tag = %d", dp_src->tag()));
+    }
+  }
+}
+
 void ciMethodData::load_data() {
   MethodData* mdo = get_MethodData();
   if (mdo == NULL) {
@@ -116,6 +145,8 @@
     parameters->translate_from(mdo->parameters_type_data());
   }
 
+  load_extra_data();
+
   // Note:  Extra data are all BitData, and do not need translation.
   _current_mileage = MethodData::mileage_of(mdo->method());
   _invocation_counter = mdo->invocation_count();
@@ -156,6 +187,12 @@
   set_type(translate_klass(k));
 }
 
+void ciSpeculativeTrapData::translate_from(const ProfileData* data) {
+  Method* m = data->as_SpeculativeTrapData()->method();
+  ciMethod* ci_m = CURRENT_ENV->get_method(m);
+  set_method(ci_m);
+}
+
 // Get the data at an arbitrary (sort of) data index.
 ciProfileData* ciMethodData::data_at(int data_index) {
   if (out_of_bounds(data_index)) {
@@ -203,33 +240,65 @@
   return next;
 }
 
-// Translate a bci to its corresponding data, or NULL.
-ciProfileData* ciMethodData::bci_to_data(int bci) {
-  ciProfileData* data = data_before(bci);
-  for ( ; is_valid(data); data = next_data(data)) {
-    if (data->bci() == bci) {
-      set_hint_di(dp_to_di(data->dp()));
-      return data;
-    } else if (data->bci() > bci) {
-      break;
-    }
-  }
+ciProfileData* ciMethodData::bci_to_extra_data(int bci, ciMethod* m, bool& two_free_slots) {
   // bci_to_extra_data(bci) ...
   DataLayout* dp  = data_layout_at(data_size());
   DataLayout* end = data_layout_at(data_size() + extra_data_size());
-  for (; dp < end; dp = MethodData::next_extra(dp)) {
-    if (dp->tag() == DataLayout::no_tag) {
+  two_free_slots = false;
+  for (;dp < end; dp = MethodData::next_extra(dp)) {
+    switch(dp->tag()) {
+    case DataLayout::no_tag:
       _saw_free_extra_data = true;  // observed an empty slot (common case)
+      two_free_slots = (MethodData::next_extra(dp)->tag() == DataLayout::no_tag);
       return NULL;
+    case DataLayout::arg_info_data_tag:
+      return NULL; // ArgInfoData is at the end of extra data section.
+    case DataLayout::bit_data_tag:
+      if (m == NULL && dp->bci() == bci) {
+        return new ciBitData(dp);
+      }
+      break;
+    case DataLayout::speculative_trap_data_tag: {
+      ciSpeculativeTrapData* data = new ciSpeculativeTrapData(dp);
+      // data->method() might be null if the MDO is snapshotted
+      // concurrently with a trap
+      if (m != NULL && data->method() == m && dp->bci() == bci) {
+        return data;
+      }
+      break;
     }
-    if (dp->tag() == DataLayout::arg_info_data_tag) {
-      break; // ArgInfoData is at the end of extra data section.
+    default:
+      fatal(err_msg("bad tag = %d", dp->tag()));
     }
-    if (dp->bci() == bci) {
-      assert(dp->tag() == DataLayout::bit_data_tag, "sane");
-      return new ciBitData(dp);
+  }
+  return NULL;
+}
+
+// Translate a bci to its corresponding data, or NULL.
+ciProfileData* ciMethodData::bci_to_data(int bci, ciMethod* m) {
+  // If m is not NULL we look for a SpeculativeTrapData entry
+  if (m == NULL) {
+    ciProfileData* data = data_before(bci);
+    for ( ; is_valid(data); data = next_data(data)) {
+      if (data->bci() == bci) {
+        set_hint_di(dp_to_di(data->dp()));
+        return data;
+      } else if (data->bci() > bci) {
+        break;
+      }
     }
   }
+  bool two_free_slots = false;
+  ciProfileData* result = bci_to_extra_data(bci, m, two_free_slots);
+  if (result != NULL) {
+    return result;
+  }
+  if (m != NULL && !two_free_slots) {
+    // We were looking for a SpeculativeTrapData entry we didn't
+    // find. Room is not available for more SpeculativeTrapData
+    // entries, look in the non SpeculativeTrapData entries.
+    return bci_to_data(bci, NULL);
+  }
   return NULL;
 }
 
@@ -525,18 +594,25 @@
   st->print_cr("--- Extra data:");
   DataLayout* dp  = data_layout_at(data_size());
   DataLayout* end = data_layout_at(data_size() + extra_data_size());
-  for (; dp < end; dp = MethodData::next_extra(dp)) {
-    if (dp->tag() == DataLayout::no_tag)  continue;
-    if (dp->tag() == DataLayout::bit_data_tag) {
+  for (;; dp = MethodData::next_extra(dp)) {
+    assert(dp < end, "moved past end of extra data");
+    switch (dp->tag()) {
+    case DataLayout::no_tag:
+      continue;
+    case DataLayout::bit_data_tag:
       data = new BitData(dp);
-    } else {
-      assert(dp->tag() == DataLayout::arg_info_data_tag, "must be BitData or ArgInfo");
+      break;
+    case DataLayout::arg_info_data_tag:
       data = new ciArgInfoData(dp);
       dp = end; // ArgInfoData is at the end of extra data section.
+      break;
+    default:
+      fatal(err_msg("unexpected tag %d", dp->tag()));
     }
     st->print("%d", dp_to_di(data->dp()));
     st->fill_to(6);
     data->print_data_on(st);
+    if (dp >= end) return;
   }
 }
 
@@ -569,8 +645,8 @@
   st->cr();
 }
 
-void ciCallTypeData::print_data_on(outputStream* st) const {
-  print_shared(st, "ciCallTypeData");
+void ciCallTypeData::print_data_on(outputStream* st, const char* extra) const {
+  print_shared(st, "ciCallTypeData", extra);
   if (has_arguments()) {
     tab(st, true);
     st->print("argument types");
@@ -599,18 +675,18 @@
   }
 }
 
-void ciReceiverTypeData::print_data_on(outputStream* st) const {
-  print_shared(st, "ciReceiverTypeData");
+void ciReceiverTypeData::print_data_on(outputStream* st, const char* extra) const {
+  print_shared(st, "ciReceiverTypeData", extra);
   print_receiver_data_on(st);
 }
 
-void ciVirtualCallData::print_data_on(outputStream* st) const {
-  print_shared(st, "ciVirtualCallData");
+void ciVirtualCallData::print_data_on(outputStream* st, const char* extra) const {
+  print_shared(st, "ciVirtualCallData", extra);
   rtd_super()->print_receiver_data_on(st);
 }
 
-void ciVirtualCallTypeData::print_data_on(outputStream* st) const {
-  print_shared(st, "ciVirtualCallTypeData");
+void ciVirtualCallTypeData::print_data_on(outputStream* st, const char* extra) const {
+  print_shared(st, "ciVirtualCallTypeData", extra);
   rtd_super()->print_receiver_data_on(st);
   if (has_arguments()) {
     tab(st, true);
@@ -624,8 +700,15 @@
   }
 }
 
-void ciParametersTypeData::print_data_on(outputStream* st) const {
-  st->print_cr("Parametertypes");
+void ciParametersTypeData::print_data_on(outputStream* st, const char* extra) const {
+  st->print_cr("ciParametersTypeData");
   parameters()->print_data_on(st);
 }
+
+void ciSpeculativeTrapData::print_data_on(outputStream* st, const char* extra) const {
+  st->print_cr("ciSpeculativeTrapData");
+  tab(st);
+  method()->print_short_name(st);
+  st->cr();
+}
 #endif
--- a/src/share/vm/ci/ciMethodData.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/ci/ciMethodData.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -31,6 +31,7 @@
 #include "ci/ciUtilities.hpp"
 #include "oops/methodData.hpp"
 #include "oops/oop.inline.hpp"
+#include "runtime/deoptimization.hpp"
 
 class ciBitData;
 class ciCounterData;
@@ -44,6 +45,7 @@
 class ciCallTypeData;
 class ciVirtualCallTypeData;
 class ciParametersTypeData;
+class ciSpeculativeTrapData;;
 
 typedef ProfileData ciProfileData;
 
@@ -173,7 +175,7 @@
   }
 
 #ifndef PRODUCT
-  void print_data_on(outputStream* st) const;
+  void print_data_on(outputStream* st, const char* extra) const;
 #endif
 };
 
@@ -200,7 +202,7 @@
   }
   void translate_receiver_data_from(const ProfileData* data);
 #ifndef PRODUCT
-  void print_data_on(outputStream* st) const;
+  void print_data_on(outputStream* st, const char* extra) const;
   void print_receiver_data_on(outputStream* st) const;
 #endif
 };
@@ -225,7 +227,7 @@
     rtd_super()->translate_receiver_data_from(data);
   }
 #ifndef PRODUCT
-  void print_data_on(outputStream* st) const;
+  void print_data_on(outputStream* st, const char* extra) const;
 #endif
 };
 
@@ -287,7 +289,7 @@
   }
 
 #ifndef PRODUCT
-  void print_data_on(outputStream* st) const;
+  void print_data_on(outputStream* st, const char* extra) const;
 #endif
 };
 
@@ -336,7 +338,26 @@
   }
 
 #ifndef PRODUCT
-  void print_data_on(outputStream* st) const;
+  void print_data_on(outputStream* st, const char* extra) const;
+#endif
+};
+
+class ciSpeculativeTrapData : public SpeculativeTrapData {
+public:
+  ciSpeculativeTrapData(DataLayout* layout) : SpeculativeTrapData(layout) {}
+
+  virtual void translate_from(const ProfileData* data);
+
+  ciMethod* method() const {
+    return (ciMethod*)intptr_at(method_offset);
+  }
+
+  void set_method(ciMethod* m) {
+    set_intptr_at(method_offset, (intptr_t)m);
+  }
+
+#ifndef PRODUCT
+  void print_data_on(outputStream* st, const char* extra) const;
 #endif
 };
 
@@ -436,6 +457,16 @@
 
   ciArgInfoData *arg_info() const;
 
+  address data_base() const {
+    return (address) _data;
+  }
+  DataLayout* limit_data_position() const {
+    return (DataLayout*)((address)data_base() + _data_size);
+  }
+
+  void load_extra_data();
+  ciProfileData* bci_to_extra_data(int bci, ciMethod* m, bool& two_free_slots);
+
 public:
   bool is_method_data() const { return true; }
 
@@ -475,9 +506,11 @@
   ciProfileData* next_data(ciProfileData* current);
   bool is_valid(ciProfileData* current) { return current != NULL; }
 
-  // Get the data at an arbitrary bci, or NULL if there is none.
-  ciProfileData* bci_to_data(int bci);
-  ciProfileData* bci_to_extra_data(int bci, bool create_if_missing);
+  DataLayout* extra_data_base() const { return limit_data_position(); }
+
+  // Get the data at an arbitrary bci, or NULL if there is none. If m
+  // is not NULL look for a SpeculativeTrapData if any first.
+  ciProfileData* bci_to_data(int bci, ciMethod* m = NULL);
 
   uint overflow_trap_count() const {
     return _orig.overflow_trap_count();
@@ -496,12 +529,13 @@
 
   // Helpful query functions that decode trap_state.
   int has_trap_at(ciProfileData* data, int reason);
-  int has_trap_at(int bci, int reason) {
-    return has_trap_at(bci_to_data(bci), reason);
+  int has_trap_at(int bci, ciMethod* m, int reason) {
+    assert((m != NULL) == Deoptimization::reason_is_speculate(reason), "inconsistent method/reason");
+    return has_trap_at(bci_to_data(bci, m), reason);
   }
   int trap_recompiled_at(ciProfileData* data);
-  int trap_recompiled_at(int bci) {
-    return trap_recompiled_at(bci_to_data(bci));
+  int trap_recompiled_at(int bci, ciMethod* m) {
+    return trap_recompiled_at(bci_to_data(bci, m));
   }
 
   void clear_escape_info();
--- a/src/share/vm/classfile/classLoaderData.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/classfile/classLoaderData.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -73,7 +73,11 @@
 
 ClassLoaderData::ClassLoaderData(Handle h_class_loader, bool is_anonymous, Dependencies dependencies) :
   _class_loader(h_class_loader()),
-  _is_anonymous(is_anonymous), _keep_alive(is_anonymous), // initially
+  _is_anonymous(is_anonymous),
+  // An anonymous class loader data doesn't have anything to keep
+  // it from being unloaded during parsing of the anonymous class.
+  // The null-class-loader should always be kept alive.
+  _keep_alive(is_anonymous || h_class_loader.is_null()),
   _metaspace(NULL), _unloading(false), _klasses(NULL),
   _claimed(0), _jmethod_ids(NULL), _handles(NULL), _deallocate_list(NULL),
   _next(NULL), _dependencies(dependencies),
@@ -317,11 +321,15 @@
   }
 }
 
+oop ClassLoaderData::keep_alive_object() const {
+  assert(!keep_alive(), "Don't use with CLDs that are artificially kept alive");
+  return is_anonymous() ? _klasses->java_mirror() : class_loader();
+}
+
 bool ClassLoaderData::is_alive(BoolObjectClosure* is_alive_closure) const {
-  bool alive =
-    is_anonymous() ?
-       is_alive_closure->do_object_b(_klasses->java_mirror()) :
-       class_loader() == NULL || is_alive_closure->do_object_b(class_loader());
+  bool alive = keep_alive() // null class loader and incomplete anonymous klasses.
+      || is_alive_closure->do_object_b(keep_alive_object());
+
   assert(!alive || claimed(), "must be claimed");
   return alive;
 }
@@ -598,8 +606,6 @@
 
 void ClassLoaderDataGraph::always_strong_oops_do(OopClosure* f, KlassClosure* klass_closure, bool must_claim) {
   if (ClassUnloading) {
-    ClassLoaderData::the_null_class_loader_data()->oops_do(f, klass_closure, must_claim);
-    // keep any special CLDs alive.
     ClassLoaderDataGraph::keep_alive_oops_do(f, klass_closure, must_claim);
   } else {
     ClassLoaderDataGraph::oops_do(f, klass_closure, must_claim);
@@ -705,7 +711,7 @@
   bool has_redefined_a_class = JvmtiExport::has_redefined_a_class();
   MetadataOnStackMark md_on_stack;
   while (data != NULL) {
-    if (data->keep_alive() || data->is_alive(is_alive_closure)) {
+    if (data->is_alive(is_alive_closure)) {
       if (has_redefined_a_class) {
         data->classes_do(InstanceKlass::purge_previous_versions);
       }
--- a/src/share/vm/classfile/classLoaderData.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/classfile/classLoaderData.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -139,7 +139,7 @@
                            // classes in the class loader are allocated.
   Mutex* _metaspace_lock;  // Locks the metaspace for allocations and setup.
   bool _unloading;         // true if this class loader goes away
-  bool _keep_alive;        // if this CLD can be unloaded for anonymous loaders
+  bool _keep_alive;        // if this CLD is kept alive without a keep_alive_object().
   bool _is_anonymous;      // if this CLD is for an anonymous class
   volatile int _claimed;   // true if claimed, for example during GC traces.
                            // To avoid applying oop closure more than once.
@@ -230,13 +230,16 @@
 
   oop class_loader() const      { return _class_loader; }
 
+  // The object the GC is using to keep this ClassLoaderData alive.
+  oop keep_alive_object() const;
+
   // Returns true if this class loader data is for a loader going away.
   bool is_unloading() const     {
     assert(!(is_the_null_class_loader_data() && _unloading), "The null class loader can never be unloaded");
     return _unloading;
   }
-  // Anonymous class loader data doesn't have anything to keep them from
-  // being unloaded during parsing the anonymous class.
+
+  // Used to make sure that this CLD is not unloaded.
   void set_keep_alive(bool value) { _keep_alive = value; }
 
   unsigned int identity_hash() {
--- a/src/share/vm/classfile/javaClasses.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/classfile/javaClasses.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -461,12 +461,11 @@
   return true;
 }
 
-void java_lang_String::print(Handle java_string, outputStream* st) {
-  oop          obj    = java_string();
-  assert(obj->klass() == SystemDictionary::String_klass(), "must be java_string");
-  typeArrayOop value  = java_lang_String::value(obj);
-  int          offset = java_lang_String::offset(obj);
-  int          length = java_lang_String::length(obj);
+void java_lang_String::print(oop java_string, outputStream* st) {
+  assert(java_string->klass() == SystemDictionary::String_klass(), "must be java_string");
+  typeArrayOop value  = java_lang_String::value(java_string);
+  int          offset = java_lang_String::offset(java_string);
+  int          length = java_lang_String::length(java_string);
 
   int end = MIN2(length, 100);
   if (value == NULL) {
--- a/src/share/vm/classfile/javaClasses.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/classfile/javaClasses.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -198,7 +198,7 @@
   }
 
   // Debugging
-  static void print(Handle java_string, outputStream* st);
+  static void print(oop java_string, outputStream* st);
   friend class JavaClasses;
 };
 
--- a/src/share/vm/code/codeCache.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/code/codeCache.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -595,11 +595,8 @@
   }
 }
 
-#ifndef PRODUCT
 // Keeps track of time spent for checking dependencies
-static elapsedTimer dependentCheckTime;
-#endif
-
+NOT_PRODUCT(static elapsedTimer dependentCheckTime;)
 
 int CodeCache::mark_for_deoptimization(DepChange& changes) {
   MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
--- a/src/share/vm/code/dependencies.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/code/dependencies.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -725,56 +725,19 @@
 }
 
 // ----------------- DependencySignature --------------------------------------
-bool DependencySignature::equals(const DependencySignature& sig) const {
-  if (type() != sig.type()) {
+bool DependencySignature::equals(DependencySignature* sig) const {
+  if ((type() != sig->type()) || (args_count() != sig->args_count())) {
     return false;
   }
 
-  if (args_count() != sig.args_count()) {
-    return false;
-  }
-
-  for (int i = 0; i < sig.args_count(); i++) {
-    if (arg(i) != sig.arg(i)) {
+  for (int i = 0; i < sig->args_count(); i++) {
+    if (arg(i) != sig->arg(i)) {
       return false;
     }
   }
   return true;
 }
 
-
-// ----------------- DependencySignatureBuffer --------------------------------------
-DependencySignatureBuffer::DependencySignatureBuffer() {
-  _signatures = NEW_RESOURCE_ARRAY(GrowableArray<DependencySignature*>*, Dependencies::TYPE_LIMIT);
-  memset(_signatures, 0, sizeof(DependencySignature*) * Dependencies::TYPE_LIMIT);
-}
-
-/* Check if arguments are identical. Two dependency signatures are considered
- * identical, if the type as well as all argument identifiers are identical.
- * If the dependency has not already been checked, the dependency signature is
- * added to the checked dependencies of the same type. The function returns
- * false, which causes the dependency to be checked in the caller.
- */
-bool DependencySignatureBuffer::add_if_missing(const DependencySignature& sig) {
-  const int index = sig.type();
-  GrowableArray<DependencySignature*>* buffer = _signatures[index];
-  if (buffer == NULL) {
-    buffer = new GrowableArray<DependencySignature*>();
-    _signatures[index] = buffer;
-  }
-
-  // Check if we have already checked the dependency
-  for (int i = 0; i < buffer->length(); i++) {
-    DependencySignature* checked_signature = buffer->at(i);
-    if (checked_signature->equals(sig)) {
-      return true;
-    }
-  }
-  buffer->append((DependencySignature*)&sig);
-  return false;
-}
-
-
 /// Checking dependencies:
 
 // This hierarchy walker inspects subtypes of a given type,
--- a/src/share/vm/code/dependencies.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/code/dependencies.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -32,6 +32,7 @@
 #include "code/compressedStream.hpp"
 #include "code/nmethod.hpp"
 #include "utilities/growableArray.hpp"
+#include "utilities/hashtable.hpp"
 
 //** Dependencies represent assertions (approximate invariants) within
 // the runtime system, e.g. class hierarchy changes.  An example is an
@@ -526,13 +527,12 @@
 };
 
 
-class DependencySignature : public ResourceObj {
+class DependencySignature : public GenericHashtableEntry<DependencySignature, ResourceObj> {
  private:
   int                   _args_count;
   uintptr_t             _argument_hash[Dependencies::max_arg_count];
   Dependencies::DepType _type;
 
-
  public:
   DependencySignature(Dependencies::DepStream& dep) {
     _args_count = dep.argument_count();
@@ -542,21 +542,14 @@
     }
   }
 
-  bool equals(const DependencySignature& sig) const;
+  bool equals(DependencySignature* sig) const;
+  uintptr_t key() const { return _argument_hash[0] >> 2; }
 
   int args_count()             const { return _args_count; }
   uintptr_t arg(int idx)       const { return _argument_hash[idx]; }
   Dependencies::DepType type() const { return _type; }
 };
 
-class DependencySignatureBuffer : public StackObj {
- private:
-  GrowableArray<DependencySignature*>**  _signatures;
-
- public:
-  DependencySignatureBuffer();
-  bool add_if_missing(const DependencySignature& sig);
-};
 
 // Every particular DepChange is a sub-class of this class.
 class DepChange : public StackObj {
--- a/src/share/vm/code/nmethod.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/code/nmethod.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -2135,25 +2135,21 @@
   // Turn off dependency tracing while actually testing dependencies.
   NOT_PRODUCT( FlagSetting fs(TraceDependencies, false) );
 
-  // 'dep_signature_buffers' caches already checked dependencies.
-  DependencySignatureBuffer dep_signature_buffers;
-
+ GenericHashtable<DependencySignature, ResourceObj>* table = new GenericHashtable<DependencySignature, ResourceObj>(11027);
   // Iterate over live nmethods and check dependencies of all nmethods that are not
   // marked for deoptimization. A particular dependency is only checked once.
   for(nmethod* nm = CodeCache::alive_nmethod(CodeCache::first()); nm != NULL; nm = CodeCache::alive_nmethod(CodeCache::next(nm))) {
     if (!nm->is_marked_for_deoptimization()) {
       for (Dependencies::DepStream deps(nm); deps.next(); ) {
         // Construct abstraction of a dependency.
-        const DependencySignature* current_sig = new DependencySignature(deps);
-        // Determine if 'deps' is already checked. If it is not checked,
-        // 'add_if_missing()' adds the dependency signature and returns
-        // false.
-        if (!dep_signature_buffers.add_if_missing(*current_sig)) {
+        DependencySignature* current_sig = new DependencySignature(deps);
+        // Determine if 'deps' is already checked. table->add() returns
+        // 'true' if the dependency was added (i.e., was not in the hashtable).
+        if (table->add(current_sig)) {
           if (deps.check_dependency() != NULL) {
             // Dependency checking failed. Print out information about the failed
             // dependency and finally fail with an assert. We can fail here, since
             // dependency checking is never done in a product build.
-            ResourceMark rm;
             changes.print();
             nm->print();
             nm->print_dependencies();
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -4534,7 +4534,7 @@
 G1ParGCAllocBuffer::G1ParGCAllocBuffer(size_t gclab_word_size) :
   ParGCAllocBuffer(gclab_word_size), _retired(false) { }
 
-G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, uint queue_num)
+G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, uint queue_num, ReferenceProcessor* rp)
   : _g1h(g1h),
     _refs(g1h->task_queue(queue_num)),
     _dcq(&g1h->dirty_card_queue_set()),
@@ -4544,7 +4544,7 @@
     _term_attempts(0),
     _surviving_alloc_buffer(g1h->desired_plab_sz(GCAllocForSurvived)),
     _tenured_alloc_buffer(g1h->desired_plab_sz(GCAllocForTenured)),
-    _age_table(false),
+    _age_table(false), _scanner(g1h, this, rp),
     _strong_roots_time(0), _term_time(0),
     _alloc_buffer_waste(0), _undo_waste(0) {
   // we allocate G1YoungSurvRateNumRegions plus one entries, since
@@ -4653,14 +4653,10 @@
 
 G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1,
                                      G1ParScanThreadState* par_scan_state) :
-  _g1(g1), _g1_rem(_g1->g1_rem_set()), _cm(_g1->concurrent_mark()),
-  _par_scan_state(par_scan_state),
-  _worker_id(par_scan_state->queue_num()),
-  _during_initial_mark(_g1->g1_policy()->during_initial_mark_pause()),
-  _mark_in_progress(_g1->mark_in_progress()) { }
-
-template <G1Barrier barrier, bool do_mark_object>
-void G1ParCopyClosure<barrier, do_mark_object>::mark_object(oop obj) {
+  _g1(g1), _par_scan_state(par_scan_state),
+  _worker_id(par_scan_state->queue_num()) { }
+
+void G1ParCopyHelper::mark_object(oop obj) {
 #ifdef ASSERT
   HeapRegion* hr = _g1->heap_region_containing(obj);
   assert(hr != NULL, "sanity");
@@ -4671,9 +4667,7 @@
   _cm->grayRoot(obj, (size_t) obj->size(), _worker_id);
 }
 
-template <G1Barrier barrier, bool do_mark_object>
-void G1ParCopyClosure<barrier, do_mark_object>
-  ::mark_forwarded_object(oop from_obj, oop to_obj) {
+void G1ParCopyHelper::mark_forwarded_object(oop from_obj, oop to_obj) {
 #ifdef ASSERT
   assert(from_obj->is_forwarded(), "from obj should be forwarded");
   assert(from_obj->forwardee() == to_obj, "to obj should be the forwardee");
@@ -4695,27 +4689,25 @@
   _cm->grayRoot(to_obj, (size_t) from_obj->size(), _worker_id);
 }
 
-template <G1Barrier barrier, bool do_mark_object>
-oop G1ParCopyClosure<barrier, do_mark_object>
-  ::copy_to_survivor_space(oop old) {
+oop G1ParScanThreadState::copy_to_survivor_space(oop const old) {
   size_t word_sz = old->size();
-  HeapRegion* from_region = _g1->heap_region_containing_raw(old);
+  HeapRegion* from_region = _g1h->heap_region_containing_raw(old);
   // +1 to make the -1 indexes valid...
   int       young_index = from_region->young_index_in_cset()+1;
   assert( (from_region->is_young() && young_index >  0) ||
          (!from_region->is_young() && young_index == 0), "invariant" );
-  G1CollectorPolicy* g1p = _g1->g1_policy();
+  G1CollectorPolicy* g1p = _g1h->g1_policy();
   markOop m = old->mark();
   int age = m->has_displaced_mark_helper() ? m->displaced_mark_helper()->age()
                                            : m->age();
   GCAllocPurpose alloc_purpose = g1p->evacuation_destination(from_region, age,
                                                              word_sz);
-  HeapWord* obj_ptr = _par_scan_state->allocate(alloc_purpose, word_sz);
+  HeapWord* obj_ptr = allocate(alloc_purpose, word_sz);
 #ifndef PRODUCT
   // Should this evacuation fail?
-  if (_g1->evacuation_should_fail()) {
+  if (_g1h->evacuation_should_fail()) {
     if (obj_ptr != NULL) {
-      _par_scan_state->undo_allocation(alloc_purpose, obj_ptr, word_sz);
+      undo_allocation(alloc_purpose, obj_ptr, word_sz);
       obj_ptr = NULL;
     }
   }
@@ -4724,7 +4716,7 @@
   if (obj_ptr == NULL) {
     // This will either forward-to-self, or detect that someone else has
     // installed a forwarding pointer.
-    return _g1->handle_evacuation_failure_par(_par_scan_state, old);
+    return _g1h->handle_evacuation_failure_par(this, old);
   }
 
   oop obj = oop(obj_ptr);
@@ -4757,12 +4749,12 @@
         m = m->incr_age();
         obj->set_mark(m);
       }
-      _par_scan_state->age_table()->add(obj, word_sz);
+      age_table()->add(obj, word_sz);
     } else {
       obj->set_mark(m);
     }
 
-    size_t* surv_young_words = _par_scan_state->surviving_young_words();
+    size_t* surv_young_words = surviving_young_words();
     surv_young_words[young_index] += word_sz;
 
     if (obj->is_objArray() && arrayOop(obj)->length() >= ParGCArrayScanChunk) {
@@ -4771,15 +4763,15 @@
       // length field of the from-space object.
       arrayOop(obj)->set_length(0);
       oop* old_p = set_partial_array_mask(old);
-      _par_scan_state->push_on_queue(old_p);
+      push_on_queue(old_p);
     } else {
       // No point in using the slower heap_region_containing() method,
       // given that we know obj is in the heap.
-      _scanner.set_region(_g1->heap_region_containing_raw(obj));
+      _scanner.set_region(_g1h->heap_region_containing_raw(obj));
       obj->oop_iterate_backwards(&_scanner);
     }
   } else {
-    _par_scan_state->undo_allocation(alloc_purpose, obj_ptr, word_sz);
+    undo_allocation(alloc_purpose, obj_ptr, word_sz);
     obj = forward_ptr;
   }
   return obj;
@@ -4794,19 +4786,23 @@
 
 template <G1Barrier barrier, bool do_mark_object>
 template <class T>
-void G1ParCopyClosure<barrier, do_mark_object>
-::do_oop_work(T* p) {
-  oop obj = oopDesc::load_decode_heap_oop(p);
+void G1ParCopyClosure<barrier, do_mark_object>::do_oop_work(T* p) {
+  T heap_oop = oopDesc::load_heap_oop(p);
+
+  if (oopDesc::is_null(heap_oop)) {
+    return;
+  }
+
+  oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
 
   assert(_worker_id == _par_scan_state->queue_num(), "sanity");
 
-  // here the null check is implicit in the cset_fast_test() test
   if (_g1->in_cset_fast_test(obj)) {
     oop forwardee;
     if (obj->is_forwarded()) {
       forwardee = obj->forwardee();
     } else {
-      forwardee = copy_to_survivor_space(obj);
+      forwardee = _par_scan_state->copy_to_survivor_space(obj);
     }
     assert(forwardee != NULL, "forwardee should not be NULL");
     oopDesc::encode_store_heap_oop(p, forwardee);
@@ -4823,12 +4819,12 @@
     // The object is not in collection set. If we're a root scanning
     // closure during an initial mark pause (i.e. do_mark_object will
     // be true) then attempt to mark the object.
-    if (do_mark_object && _g1->is_in_g1_reserved(obj)) {
+    if (do_mark_object) {
       mark_object(obj);
     }
   }
 
-  if (barrier == G1BarrierEvac && obj != NULL) {
+  if (barrier == G1BarrierEvac) {
     _par_scan_state->update_rs(_from, p, _worker_id);
   }
 }
@@ -5025,7 +5021,7 @@
 
       ReferenceProcessor*             rp = _g1h->ref_processor_stw();
 
-      G1ParScanThreadState            pss(_g1h, worker_id);
+      G1ParScanThreadState            pss(_g1h, worker_id, rp);
       G1ParScanHeapEvacClosure        scan_evac_cl(_g1h, &pss, rp);
       G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss, rp);
       G1ParScanPartialArrayClosure    partial_scan_cl(_g1h, &pss, rp);
@@ -5456,7 +5452,7 @@
 
     G1STWIsAliveClosure is_alive(_g1h);
 
-    G1ParScanThreadState pss(_g1h, worker_id);
+    G1ParScanThreadState            pss(_g1h, worker_id, NULL);
 
     G1ParScanHeapEvacClosure        scan_evac_cl(_g1h, &pss, NULL);
     G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss, NULL);
@@ -5568,7 +5564,7 @@
     ResourceMark rm;
     HandleMark   hm;
 
-    G1ParScanThreadState            pss(_g1h, worker_id);
+    G1ParScanThreadState            pss(_g1h, worker_id, NULL);
     G1ParScanHeapEvacClosure        scan_evac_cl(_g1h, &pss, NULL);
     G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss, NULL);
     G1ParScanPartialArrayClosure    partial_scan_cl(_g1h, &pss, NULL);
@@ -5694,7 +5690,7 @@
   // JNI refs.
 
   // Use only a single queue for this PSS.
-  G1ParScanThreadState pss(this, 0);
+  G1ParScanThreadState            pss(this, 0, NULL);
 
   // We do not embed a reference processor in the copying/scanning
   // closures while we're actually processing the discovered
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -606,6 +606,11 @@
   // may not be a humongous - it must fit into a single heap region.
   HeapWord* par_allocate_during_gc(GCAllocPurpose purpose, size_t word_size);
 
+  HeapWord* allocate_during_gc_slow(GCAllocPurpose purpose,
+                                    HeapRegion*    alloc_region,
+                                    bool           par,
+                                    size_t         word_size);
+
   // Ensure that no further allocations can happen in "r", bearing in mind
   // that parallel threads might be attempting allocations.
   void par_allocate_remaining_space(HeapRegion* r);
@@ -698,23 +703,20 @@
   }
 
   // This is a fast test on whether a reference points into the
-  // collection set or not. It does not assume that the reference
-  // points into the heap; if it doesn't, it will return false.
+  // collection set or not. Assume that the reference
+  // points into the heap.
   bool in_cset_fast_test(oop obj) {
     assert(_in_cset_fast_test != NULL, "sanity");
-    if (_g1_committed.contains((HeapWord*) obj)) {
-      // no need to subtract the bottom of the heap from obj,
-      // _in_cset_fast_test is biased
-      uintx index = cast_from_oop<uintx>(obj) >> HeapRegion::LogOfHRGrainBytes;
-      bool ret = _in_cset_fast_test[index];
-      // let's make sure the result is consistent with what the slower
-      // test returns
-      assert( ret || !obj_in_cs(obj), "sanity");
-      assert(!ret ||  obj_in_cs(obj), "sanity");
-      return ret;
-    } else {
-      return false;
-    }
+    assert(_g1_committed.contains((HeapWord*) obj), err_msg("Given reference outside of heap, is "PTR_FORMAT, (HeapWord*)obj));
+    // no need to subtract the bottom of the heap from obj,
+    // _in_cset_fast_test is biased
+    uintx index = cast_from_oop<uintx>(obj) >> HeapRegion::LogOfHRGrainBytes;
+    bool ret = _in_cset_fast_test[index];
+    // let's make sure the result is consistent with what the slower
+    // test returns
+    assert( ret || !obj_in_cs(obj), "sanity");
+    assert(!ret ||  obj_in_cs(obj), "sanity");
+    return ret;
   }
 
   void clear_cset_fast_test() {
@@ -1774,95 +1776,6 @@
     ParGCAllocBuffer::retire(end_of_gc, retain);
     _retired = true;
   }
-
-  bool is_retired() {
-    return _retired;
-  }
-};
-
-class G1ParGCAllocBufferContainer {
-protected:
-  static int const _priority_max = 2;
-  G1ParGCAllocBuffer* _priority_buffer[_priority_max];
-
-public:
-  G1ParGCAllocBufferContainer(size_t gclab_word_size) {
-    for (int pr = 0; pr < _priority_max; ++pr) {
-      _priority_buffer[pr] = new G1ParGCAllocBuffer(gclab_word_size);
-    }
-  }
-
-  ~G1ParGCAllocBufferContainer() {
-    for (int pr = 0; pr < _priority_max; ++pr) {
-      assert(_priority_buffer[pr]->is_retired(), "alloc buffers should all retire at this point.");
-      delete _priority_buffer[pr];
-    }
-  }
-
-  HeapWord* allocate(size_t word_sz) {
-    HeapWord* obj;
-    for (int pr = 0; pr < _priority_max; ++pr) {
-      obj = _priority_buffer[pr]->allocate(word_sz);
-      if (obj != NULL) return obj;
-    }
-    return obj;
-  }
-
-  bool contains(void* addr) {
-    for (int pr = 0; pr < _priority_max; ++pr) {
-      if (_priority_buffer[pr]->contains(addr)) return true;
-    }
-    return false;
-  }
-
-  void undo_allocation(HeapWord* obj, size_t word_sz) {
-    bool finish_undo;
-    for (int pr = 0; pr < _priority_max; ++pr) {
-      if (_priority_buffer[pr]->contains(obj)) {
-        _priority_buffer[pr]->undo_allocation(obj, word_sz);
-        finish_undo = true;
-      }
-    }
-    if (!finish_undo) ShouldNotReachHere();
-  }
-
-  size_t words_remaining() {
-    size_t result = 0;
-    for (int pr = 0; pr < _priority_max; ++pr) {
-      result += _priority_buffer[pr]->words_remaining();
-    }
-    return result;
-  }
-
-  size_t words_remaining_in_retired_buffer() {
-    G1ParGCAllocBuffer* retired = _priority_buffer[0];
-    return retired->words_remaining();
-  }
-
-  void flush_stats_and_retire(PLABStats* stats, bool end_of_gc, bool retain) {
-    for (int pr = 0; pr < _priority_max; ++pr) {
-      _priority_buffer[pr]->flush_stats_and_retire(stats, end_of_gc, retain);
-    }
-  }
-
-  void update(bool end_of_gc, bool retain, HeapWord* buf, size_t word_sz) {
-    G1ParGCAllocBuffer* retired_and_set = _priority_buffer[0];
-    retired_and_set->retire(end_of_gc, retain);
-    retired_and_set->set_buf(buf);
-    retired_and_set->set_word_size(word_sz);
-    adjust_priority_order();
-  }
-
-private:
-  void adjust_priority_order() {
-    G1ParGCAllocBuffer* retired_and_set = _priority_buffer[0];
-
-    int last = _priority_max - 1;
-    for (int pr = 0; pr < last; ++pr) {
-      _priority_buffer[pr] = _priority_buffer[pr + 1];
-    }
-    _priority_buffer[last] = retired_and_set;
-  }
 };
 
 class G1ParScanThreadState : public StackObj {
@@ -1873,11 +1786,13 @@
   G1SATBCardTableModRefBS* _ct_bs;
   G1RemSet* _g1_rem;
 
-  G1ParGCAllocBufferContainer  _surviving_alloc_buffer;
-  G1ParGCAllocBufferContainer  _tenured_alloc_buffer;
-  G1ParGCAllocBufferContainer* _alloc_buffers[GCAllocPurposeCount];
+  G1ParGCAllocBuffer  _surviving_alloc_buffer;
+  G1ParGCAllocBuffer  _tenured_alloc_buffer;
+  G1ParGCAllocBuffer* _alloc_buffers[GCAllocPurposeCount];
   ageTable            _age_table;
 
+  G1ParScanClosure    _scanner;
+
   size_t           _alloc_buffer_waste;
   size_t           _undo_waste;
 
@@ -1930,7 +1845,7 @@
   }
 
 public:
-  G1ParScanThreadState(G1CollectedHeap* g1h, uint queue_num);
+  G1ParScanThreadState(G1CollectedHeap* g1h, uint queue_num, ReferenceProcessor* rp);
 
   ~G1ParScanThreadState() {
     FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_base, mtGC);
@@ -1939,7 +1854,7 @@
   RefToScanQueue*   refs()            { return _refs;             }
   ageTable*         age_table()       { return &_age_table;       }
 
-  G1ParGCAllocBufferContainer* alloc_buffer(GCAllocPurpose purpose) {
+  G1ParGCAllocBuffer* alloc_buffer(GCAllocPurpose purpose) {
     return _alloc_buffers[purpose];
   }
 
@@ -1969,13 +1884,15 @@
     HeapWord* obj = NULL;
     size_t gclab_word_size = _g1h->desired_plab_sz(purpose);
     if (word_sz * 100 < gclab_word_size * ParallelGCBufferWastePct) {
-      G1ParGCAllocBufferContainer* alloc_buf = alloc_buffer(purpose);
+      G1ParGCAllocBuffer* alloc_buf = alloc_buffer(purpose);
+      add_to_alloc_buffer_waste(alloc_buf->words_remaining());
+      alloc_buf->retire(false /* end_of_gc */, false /* retain */);
 
       HeapWord* buf = _g1h->par_allocate_during_gc(purpose, gclab_word_size);
       if (buf == NULL) return NULL; // Let caller handle allocation failure.
-
-      add_to_alloc_buffer_waste(alloc_buf->words_remaining_in_retired_buffer());
-      alloc_buf->update(false /* end_of_gc */, false /* retain */, buf, gclab_word_size);
+      // Otherwise.
+      alloc_buf->set_word_size(gclab_word_size);
+      alloc_buf->set_buf(buf);
 
       obj = alloc_buf->allocate(word_sz);
       assert(obj != NULL, "buffer was definitely big enough...");
@@ -2065,6 +1982,8 @@
     }
   }
 
+  oop copy_to_survivor_space(oop const obj);
+
   template <class T> void deal_with_reference(T* ref_to_scan) {
     if (has_partial_array_mask(ref_to_scan)) {
       _partial_scan_cl->do_oop_nv(ref_to_scan);
@@ -2087,6 +2006,7 @@
     }
   }
 
+public:
   void trim_queue();
 };
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1OopClosures.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "gc_implementation/g1/g1OopClosures.inline.hpp"
+
+G1ParCopyHelper::G1ParCopyHelper(G1CollectedHeap* g1,  G1ParScanThreadState* par_scan_state) :
+  G1ParClosureSuper(g1, par_scan_state), _scanned_klass(NULL),
+  _cm(_g1->concurrent_mark()) {}
--- a/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -48,12 +48,8 @@
 class G1ParClosureSuper : public OopsInHeapRegionClosure {
 protected:
   G1CollectedHeap* _g1;
-  G1RemSet* _g1_rem;
-  ConcurrentMark* _cm;
   G1ParScanThreadState* _par_scan_state;
   uint _worker_id;
-  bool _during_initial_mark;
-  bool _mark_in_progress;
 public:
   G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state);
   bool apply_to_weak_ref_discovered_field() { return true; }
@@ -133,23 +129,10 @@
 
 // Add back base class for metadata
 class G1ParCopyHelper : public G1ParClosureSuper {
+protected:
   Klass* _scanned_klass;
+  ConcurrentMark* _cm;
 
- public:
-  G1ParCopyHelper(G1CollectedHeap* g1,  G1ParScanThreadState* par_scan_state) :
-      _scanned_klass(NULL),
-      G1ParClosureSuper(g1, par_scan_state) {}
-
-  void set_scanned_klass(Klass* k) { _scanned_klass = k; }
-  template <class T> void do_klass_barrier(T* p, oop new_obj);
-};
-
-template <G1Barrier barrier, bool do_mark_object>
-class G1ParCopyClosure : public G1ParCopyHelper {
-  G1ParScanClosure _scanner;
-  template <class T> void do_oop_work(T* p);
-
-protected:
   // Mark the object if it's not already marked. This is used to mark
   // objects pointed to by roots that are guaranteed not to move
   // during the GC (i.e., non-CSet objects). It is MT-safe.
@@ -159,22 +142,26 @@
   // objects pointed to by roots that have been forwarded during a
   // GC. It is MT-safe.
   void mark_forwarded_object(oop from_obj, oop to_obj);
+ public:
+  G1ParCopyHelper(G1CollectedHeap* g1,  G1ParScanThreadState* par_scan_state);
 
-  oop copy_to_survivor_space(oop obj);
+  void set_scanned_klass(Klass* k) { _scanned_klass = k; }
+  template <class T> void do_klass_barrier(T* p, oop new_obj);
+};
+
+template <G1Barrier barrier, bool do_mark_object>
+class G1ParCopyClosure : public G1ParCopyHelper {
+private:
+  template <class T> void do_oop_work(T* p);
 
 public:
   G1ParCopyClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state,
                    ReferenceProcessor* rp) :
-      _scanner(g1, par_scan_state, rp),
       G1ParCopyHelper(g1, par_scan_state) {
     assert(_ref_processor == NULL, "sanity");
   }
 
-  G1ParScanClosure* scanner() { return &_scanner; }
-
-  template <class T> void do_oop_nv(T* p) {
-    do_oop_work(p);
-  }
+  template <class T> void do_oop_nv(T* p) { do_oop_work(p); }
   virtual void do_oop(oop* p)       { do_oop_nv(p); }
   virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
 };
--- a/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -83,7 +83,7 @@
 
       _par_scan_state->push_on_queue(p);
     } else {
-      _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num());
+      _par_scan_state->update_rs(_from, p, _worker_id);
     }
   }
 }
--- a/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -65,7 +65,7 @@
     case threads:
     {
       ResourceMark rm;
-      CLDToOopClosure* cld_closure = NULL; // Not needed. All CLDs are already visited.
+      CLDClosure* cld_closure = NULL; // Not needed. All CLDs are already visited.
       Threads::oops_do(&roots_closure, cld_closure, NULL);
     }
     break;
@@ -122,7 +122,7 @@
 
   PSPromotionManager* pm = PSPromotionManager::gc_thread_promotion_manager(which);
   PSScavengeRootsClosure roots_closure(pm);
-  CLDToOopClosure* roots_from_clds = NULL;  // Not needed. All CLDs are already visited.
+  CLDClosure* roots_from_clds = NULL;  // Not needed. All CLDs are already visited.
   CodeBlobToOopClosure roots_in_blobs(&roots_closure, /*do_marking=*/ true);
 
   if (_java_thread != NULL)
--- a/src/share/vm/gc_implementation/shared/parGCAllocBuffer.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/gc_implementation/shared/parGCAllocBuffer.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -158,7 +158,7 @@
   // Fills in the unallocated portion of the buffer with a garbage object.
   // If "end_of_gc" is TRUE, is after the last use in the GC.  IF "retain"
   // is true, attempt to re-use the unused portion in the next GC.
-  virtual void retire(bool end_of_gc, bool retain);
+  void retire(bool end_of_gc, bool retain);
 
   void print() PRODUCT_RETURN;
 };
--- a/src/share/vm/interpreter/bytecodeTracer.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/interpreter/bytecodeTracer.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -596,7 +596,7 @@
     if (data != NULL) {
       st->print("  %d", mdo->dp_to_di(data->dp()));
       st->fill_to(6);
-      data->print_data_on(st);
+      data->print_data_on(st, mdo);
     }
   }
 }
--- a/src/share/vm/memory/iterator.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/memory/iterator.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -128,6 +128,11 @@
   virtual void do_klass(Klass* k) = 0;
 };
 
+class CLDClosure : public Closure {
+ public:
+  virtual void do_cld(ClassLoaderData* cld) = 0;
+};
+
 class KlassToOopClosure : public KlassClosure {
   OopClosure* _oop_closure;
  public:
@@ -135,7 +140,7 @@
   virtual void do_klass(Klass* k);
 };
 
-class CLDToOopClosure {
+class CLDToOopClosure : public CLDClosure {
   OopClosure* _oop_closure;
   KlassToOopClosure _klass_closure;
   bool _must_claim_cld;
--- a/src/share/vm/oops/instanceKlass.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/oops/instanceKlass.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -49,6 +49,7 @@
 #include "prims/jvmtiExport.hpp"
 #include "prims/jvmtiRedefineClassesTrace.hpp"
 #include "prims/jvmtiRedefineClasses.hpp"
+#include "prims/jvmtiThreadState.hpp"
 #include "prims/methodComparator.hpp"
 #include "runtime/fieldDescriptor.hpp"
 #include "runtime/handles.inline.hpp"
@@ -862,10 +863,16 @@
     // Step 10 and 11
     Handle e(THREAD, PENDING_EXCEPTION);
     CLEAR_PENDING_EXCEPTION;
+    // JVMTI has already reported the pending exception
+    // JVMTI internal flag reset is needed in order to report ExceptionInInitializerError
+    JvmtiExport::clear_detected_exception((JavaThread*)THREAD);
     {
       EXCEPTION_MARK;
       this_oop->set_initialization_state_and_notify(initialization_error, THREAD);
       CLEAR_PENDING_EXCEPTION;   // ignore any exception thrown, class initialization error is thrown below
+      // JVMTI has already reported the pending exception
+      // JVMTI internal flag reset is needed in order to report ExceptionInInitializerError
+      JvmtiExport::clear_detected_exception((JavaThread*)THREAD);
     }
     DTRACE_CLASSINIT_PROBE_WAIT(error, InstanceKlass::cast(this_oop()), -1,wait);
     if (e->is_a(SystemDictionary::Error_klass())) {
@@ -2192,15 +2199,7 @@
   for (int m = 0; m < methods()->length(); m++) {
     MethodData* mdo = methods()->at(m)->method_data();
     if (mdo != NULL) {
-      for (ProfileData* data = mdo->first_data();
-           mdo->is_valid(data);
-           data = mdo->next_data(data)) {
-        data->clean_weak_klass_links(is_alive);
-      }
-      ParametersTypeData* parameters = mdo->parameters_type_data();
-      if (parameters != NULL) {
-        parameters->clean_weak_klass_links(is_alive);
-      }
+      mdo->clean_method_data(is_alive);
     }
   }
 }
@@ -2719,7 +2718,7 @@
   Method* m = n->method();
   // Search for match
   while(cur != NULL && cur != n) {
-    if (TieredCompilation) {
+    if (TieredCompilation && m == cur->method()) {
       // Find max level before n
       max_level = MAX2(max_level, cur->comp_level());
     }
@@ -2741,7 +2740,9 @@
     cur = next;
     while (cur != NULL) {
       // Find max level after n
-      max_level = MAX2(max_level, cur->comp_level());
+      if (m == cur->method()) {
+        max_level = MAX2(max_level, cur->comp_level());
+      }
       cur = cur->osr_link();
     }
     m->set_highest_osr_comp_level(max_level);
@@ -2987,8 +2988,7 @@
         offset          <= (juint) value->length() &&
         offset + length <= (juint) value->length()) {
       st->print(BULLET"string: ");
-      Handle h_obj(obj);
-      java_lang_String::print(h_obj, st);
+      java_lang_String::print(obj, st);
       st->cr();
       if (!WizardMode)  return;  // that is enough
     }
--- a/src/share/vm/oops/instanceKlass.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/oops/instanceKlass.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -306,7 +306,7 @@
   //   three cases:
   //     NULL: no implementor.
   //     A Klass* that's not itself: one implementor.
-  //     Itsef: more than one implementors.
+  //     Itself: more than one implementors.
   // embedded host klass follows here
   //   The embedded host klass only exists in an anonymous class for
   //   dynamic language support (JSR 292 enabled). The host class grants
--- a/src/share/vm/oops/klass.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/oops/klass.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -692,3 +692,21 @@
 }
 
 #endif
+
+/////////////// Unit tests ///////////////
+
+#ifndef PRODUCT
+
+class TestKlass {
+ public:
+  static void test_oop_is_instanceClassLoader() {
+    assert(SystemDictionary::ClassLoader_klass()->oop_is_instanceClassLoader(), "assert");
+    assert(!SystemDictionary::String_klass()->oop_is_instanceClassLoader(), "assert");
+  }
+};
+
+void TestKlass_test() {
+  TestKlass::test_oop_is_instanceClassLoader();
+}
+
+#endif
--- a/src/share/vm/oops/klass.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/oops/klass.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -498,6 +498,7 @@
   virtual bool oop_is_objArray_slow()       const { return false; }
   virtual bool oop_is_typeArray_slow()      const { return false; }
  public:
+  virtual bool oop_is_instanceClassLoader() const { return false; }
   virtual bool oop_is_instanceMirror()      const { return false; }
   virtual bool oop_is_instanceRef()         const { return false; }
 
--- a/src/share/vm/oops/methodData.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/oops/methodData.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -80,8 +80,42 @@
   _data = NULL;
 }
 
+char* ProfileData::print_data_on_helper(const MethodData* md) const {
+  DataLayout* dp  = md->extra_data_base();
+  DataLayout* end = md->extra_data_limit();
+  stringStream ss;
+  for (;; dp = MethodData::next_extra(dp)) {
+    assert(dp < end, "moved past end of extra data");
+    switch(dp->tag()) {
+    case DataLayout::speculative_trap_data_tag:
+      if (dp->bci() == bci()) {
+        SpeculativeTrapData* data = new SpeculativeTrapData(dp);
+        int trap = data->trap_state();
+        char buf[100];
+        ss.print("trap/");
+        data->method()->print_short_name(&ss);
+        ss.print("(%s) ", Deoptimization::format_trap_state(buf, sizeof(buf), trap));
+      }
+      break;
+    case DataLayout::bit_data_tag:
+      break;
+    case DataLayout::no_tag:
+    case DataLayout::arg_info_data_tag:
+      return ss.as_string();
+      break;
+    default:
+      fatal(err_msg("unexpected tag %d", dp->tag()));
+    }
+  }
+  return NULL;
+}
+
+void ProfileData::print_data_on(outputStream* st, const MethodData* md) const {
+  print_data_on(st, print_data_on_helper(md));
+}
+
 #ifndef PRODUCT
-void ProfileData::print_shared(outputStream* st, const char* name) const {
+void ProfileData::print_shared(outputStream* st, const char* name, const char* extra) const {
   st->print("bci: %d", bci());
   st->fill_to(tab_width_one);
   st->print("%s", name);
@@ -91,9 +125,13 @@
     char buf[100];
     st->print("trap(%s) ", Deoptimization::format_trap_state(buf, sizeof(buf), trap));
   }
+  if (extra != NULL) {
+    st->print(extra);
+  }
   int flags = data()->flags();
-  if (flags != 0)
+  if (flags != 0) {
     st->print("flags(%d) ", flags);
+  }
 }
 
 void ProfileData::tab(outputStream* st, bool first) const {
@@ -109,8 +147,8 @@
 
 
 #ifndef PRODUCT
-void BitData::print_data_on(outputStream* st) const {
-  print_shared(st, "BitData");
+void BitData::print_data_on(outputStream* st, const char* extra) const {
+  print_shared(st, "BitData", extra);
 }
 #endif // !PRODUCT
 
@@ -120,8 +158,8 @@
 // A CounterData corresponds to a simple counter.
 
 #ifndef PRODUCT
-void CounterData::print_data_on(outputStream* st) const {
-  print_shared(st, "CounterData");
+void CounterData::print_data_on(outputStream* st, const char* extra) const {
+  print_shared(st, "CounterData", extra);
   st->print_cr("count(%u)", count());
 }
 #endif // !PRODUCT
@@ -150,8 +188,8 @@
 }
 
 #ifndef PRODUCT
-void JumpData::print_data_on(outputStream* st) const {
-  print_shared(st, "JumpData");
+void JumpData::print_data_on(outputStream* st, const char* extra) const {
+  print_shared(st, "JumpData", extra);
   st->print_cr("taken(%u) displacement(%d)", taken(), displacement());
 }
 #endif // !PRODUCT
@@ -332,8 +370,8 @@
   st->cr();
 }
 
-void CallTypeData::print_data_on(outputStream* st) const {
-  CounterData::print_data_on(st);
+void CallTypeData::print_data_on(outputStream* st, const char* extra) const {
+  CounterData::print_data_on(st, extra);
   if (has_arguments()) {
     tab(st, true);
     st->print("argument types");
@@ -346,8 +384,8 @@
   }
 }
 
-void VirtualCallTypeData::print_data_on(outputStream* st) const {
-  VirtualCallData::print_data_on(st);
+void VirtualCallTypeData::print_data_on(outputStream* st, const char* extra) const {
+  VirtualCallData::print_data_on(st, extra);
   if (has_arguments()) {
     tab(st, true);
     st->print("argument types");
@@ -400,12 +438,12 @@
     }
   }
 }
-void ReceiverTypeData::print_data_on(outputStream* st) const {
-  print_shared(st, "ReceiverTypeData");
+void ReceiverTypeData::print_data_on(outputStream* st, const char* extra) const {
+  print_shared(st, "ReceiverTypeData", extra);
   print_receiver_data_on(st);
 }
-void VirtualCallData::print_data_on(outputStream* st) const {
-  print_shared(st, "VirtualCallData");
+void VirtualCallData::print_data_on(outputStream* st, const char* extra) const {
+  print_shared(st, "VirtualCallData", extra);
   print_receiver_data_on(st);
 }
 #endif // !PRODUCT
@@ -461,8 +499,8 @@
 #endif // CC_INTERP
 
 #ifndef PRODUCT
-void RetData::print_data_on(outputStream* st) const {
-  print_shared(st, "RetData");
+void RetData::print_data_on(outputStream* st, const char* extra) const {
+  print_shared(st, "RetData", extra);
   uint row;
   int entries = 0;
   for (row = 0; row < row_limit(); row++) {
@@ -496,8 +534,8 @@
 }
 
 #ifndef PRODUCT
-void BranchData::print_data_on(outputStream* st) const {
-  print_shared(st, "BranchData");
+void BranchData::print_data_on(outputStream* st, const char* extra) const {
+  print_shared(st, "BranchData", extra);
   st->print_cr("taken(%u) displacement(%d)",
                taken(), displacement());
   tab(st);
@@ -570,8 +608,8 @@
 }
 
 #ifndef PRODUCT
-void MultiBranchData::print_data_on(outputStream* st) const {
-  print_shared(st, "MultiBranchData");
+void MultiBranchData::print_data_on(outputStream* st, const char* extra) const {
+  print_shared(st, "MultiBranchData", extra);
   st->print_cr("default_count(%u) displacement(%d)",
                default_count(), default_displacement());
   int cases = number_of_cases();
@@ -584,8 +622,8 @@
 #endif
 
 #ifndef PRODUCT
-void ArgInfoData::print_data_on(outputStream* st) const {
-  print_shared(st, "ArgInfoData");
+void ArgInfoData::print_data_on(outputStream* st, const char* extra) const {
+  print_shared(st, "ArgInfoData", extra);
   int nargs = number_of_args();
   for (int i = 0; i < nargs; i++) {
     st->print("  0x%x", arg_modified(i));
@@ -616,10 +654,17 @@
 }
 
 #ifndef PRODUCT
-void ParametersTypeData::print_data_on(outputStream* st) const {
-  st->print("parameter types");
+void ParametersTypeData::print_data_on(outputStream* st, const char* extra) const {
+  st->print("parameter types", extra);
   _parameters.print_data_on(st);
 }
+
+void SpeculativeTrapData::print_data_on(outputStream* st, const char* extra) const {
+  print_shared(st, "SpeculativeTrapData", extra);
+  tab(st);
+  method()->print_short_name(st);
+  st->cr();
+}
 #endif
 
 // ==================================================================
@@ -745,7 +790,27 @@
   return DataLayout::compute_size_in_bytes(cell_count);
 }
 
-int MethodData::compute_extra_data_count(int data_size, int empty_bc_count) {
+bool MethodData::is_speculative_trap_bytecode(Bytecodes::Code code) {
+  // Bytecodes for which we may use speculation
+  switch (code) {
+  case Bytecodes::_checkcast:
+  case Bytecodes::_instanceof:
+  case Bytecodes::_aastore:
+  case Bytecodes::_invokevirtual:
+  case Bytecodes::_invokeinterface:
+  case Bytecodes::_if_acmpeq:
+  case Bytecodes::_if_acmpne:
+  case Bytecodes::_invokestatic:
+#ifdef COMPILER2
+    return UseTypeSpeculation;
+#endif
+  default:
+    return false;
+  }
+  return false;
+}
+
+int MethodData::compute_extra_data_count(int data_size, int empty_bc_count, bool needs_speculative_traps) {
   if (ProfileTraps) {
     // Assume that up to 3% of BCIs with no MDP will need to allocate one.
     int extra_data_count = (uint)(empty_bc_count * 3) / 128 + 1;
@@ -756,7 +821,18 @@
       extra_data_count = one_percent_of_data;
     if (extra_data_count > empty_bc_count)
       extra_data_count = empty_bc_count;  // no need for more
-    return extra_data_count;
+
+    // Make sure we have a minimum number of extra data slots to
+    // allocate SpeculativeTrapData entries. We would want to have one
+    // entry per compilation that inlines this method and for which
+    // some type speculation assumption fails. So the room we need for
+    // the SpeculativeTrapData entries doesn't directly depend on the
+    // size of the method. Because it's hard to estimate, we reserve
+    // space for an arbitrary number of entries.
+    int spec_data_count = (needs_speculative_traps ? SpecTrapLimitExtraEntries : 0) *
+      (SpeculativeTrapData::static_cell_count() + DataLayout::header_size_in_cells());
+
+    return MAX2(extra_data_count, spec_data_count);
   } else {
     return 0;
   }
@@ -769,15 +845,17 @@
   BytecodeStream stream(method);
   Bytecodes::Code c;
   int empty_bc_count = 0;  // number of bytecodes lacking data
+  bool needs_speculative_traps = false;
   while ((c = stream.next()) >= 0) {
     int size_in_bytes = compute_data_size(&stream);
     data_size += size_in_bytes;
     if (size_in_bytes == 0)  empty_bc_count += 1;
+    needs_speculative_traps = needs_speculative_traps || is_speculative_trap_bytecode(c);
   }
   int object_size = in_bytes(data_offset()) + data_size;
 
   // Add some extra DataLayout cells (at least one) to track stray traps.
-  int extra_data_count = compute_extra_data_count(data_size, empty_bc_count);
+  int extra_data_count = compute_extra_data_count(data_size, empty_bc_count, needs_speculative_traps);
   object_size += extra_data_count * DataLayout::compute_size_in_bytes(0);
 
   // Add a cell to record information about modified arguments.
@@ -1009,18 +1087,23 @@
   _data[0] = 0;  // apparently not set below.
   BytecodeStream stream(method);
   Bytecodes::Code c;
+  bool needs_speculative_traps = false;
   while ((c = stream.next()) >= 0) {
     int size_in_bytes = initialize_data(&stream, data_size);
     data_size += size_in_bytes;
     if (size_in_bytes == 0)  empty_bc_count += 1;
+    needs_speculative_traps = needs_speculative_traps || is_speculative_trap_bytecode(c);
   }
   _data_size = data_size;
   int object_size = in_bytes(data_offset()) + data_size;
 
   // Add some extra DataLayout cells (at least one) to track stray traps.
-  int extra_data_count = compute_extra_data_count(data_size, empty_bc_count);
+  int extra_data_count = compute_extra_data_count(data_size, empty_bc_count, needs_speculative_traps);
   int extra_size = extra_data_count * DataLayout::compute_size_in_bytes(0);
 
+  // Let's zero the space for the extra data
+  Copy::zero_to_bytes(((address)_data) + data_size, extra_size);
+
   // Add a cell to record information about modified arguments.
   // Set up _args_modified array after traps cells so that
   // the code for traps cells works.
@@ -1032,17 +1115,17 @@
   int arg_data_size = DataLayout::compute_size_in_bytes(arg_size+1);
   object_size += extra_size + arg_data_size;
 
-  int args_cell = ParametersTypeData::compute_cell_count(method());
+  int parms_cell = ParametersTypeData::compute_cell_count(method());
   // If we are profiling parameters, we reserver an area near the end
   // of the MDO after the slots for bytecodes (because there's no bci
   // for method entry so they don't fit with the framework for the
   // profiling of bytecodes). We store the offset within the MDO of
   // this area (or -1 if no parameter is profiled)
-  if (args_cell > 0) {
-    object_size += DataLayout::compute_size_in_bytes(args_cell);
+  if (parms_cell > 0) {
+    object_size += DataLayout::compute_size_in_bytes(parms_cell);
     _parameters_type_data_di = data_size + extra_size + arg_data_size;
     DataLayout *dp = data_layout_at(data_size + extra_size + arg_data_size);
-    dp->initialize(DataLayout::parameters_type_data_tag, 0, args_cell);
+    dp->initialize(DataLayout::parameters_type_data_tag, 0, parms_cell);
   } else {
     _parameters_type_data_di = -1;
   }
@@ -1133,39 +1216,113 @@
       break;
     }
   }
-  return bci_to_extra_data(bci, false);
+  return bci_to_extra_data(bci, NULL, false);
 }
 
-// Translate a bci to its corresponding extra data, or NULL.
-ProfileData* MethodData::bci_to_extra_data(int bci, bool create_if_missing) {
-  DataLayout* dp    = extra_data_base();
-  DataLayout* end   = extra_data_limit();
-  DataLayout* avail = NULL;
-  for (; dp < end; dp = next_extra(dp)) {
+DataLayout* MethodData::next_extra(DataLayout* dp) {
+  int nb_cells = 0;
+  switch(dp->tag()) {
+  case DataLayout::bit_data_tag:
+  case DataLayout::no_tag:
+    nb_cells = BitData::static_cell_count();
+    break;
+  case DataLayout::speculative_trap_data_tag:
+    nb_cells = SpeculativeTrapData::static_cell_count();
+    break;
+  default:
+    fatal(err_msg("unexpected tag %d", dp->tag()));
+  }
+  return (DataLayout*)((address)dp + DataLayout::compute_size_in_bytes(nb_cells));
+}
+
+ProfileData* MethodData::bci_to_extra_data_helper(int bci, Method* m, DataLayout*& dp) {
+  DataLayout* end = extra_data_limit();
+
+  for (;; dp = next_extra(dp)) {
+    assert(dp < end, "moved past end of extra data");
     // No need for "OrderAccess::load_acquire" ops,
     // since the data structure is monotonic.
-    if (dp->tag() == DataLayout::no_tag)  break;
-    if (dp->tag() == DataLayout::arg_info_data_tag) {
-      dp = end; // ArgInfoData is at the end of extra data section.
+    switch(dp->tag()) {
+    case DataLayout::no_tag:
+      return NULL;
+    case DataLayout::arg_info_data_tag:
+      dp = end;
+      return NULL; // ArgInfoData is at the end of extra data section.
+    case DataLayout::bit_data_tag:
+      if (m == NULL && dp->bci() == bci) {
+        return new BitData(dp);
+      }
       break;
-    }
-    if (dp->bci() == bci) {
-      assert(dp->tag() == DataLayout::bit_data_tag, "sane");
-      return new BitData(dp);
+    case DataLayout::speculative_trap_data_tag:
+      if (m != NULL) {
+        SpeculativeTrapData* data = new SpeculativeTrapData(dp);
+        // data->method() may be null in case of a concurrent
+        // allocation. Assume it's for the same method and use that
+        // entry in that case.
+        if (dp->bci() == bci) {
+          if (data->method() == NULL) {
+            return NULL;
+          } else if (data->method() == m) {
+            return data;
+          }
+        }
+      }
+      break;
+    default:
+      fatal(err_msg("unexpected tag %d", dp->tag()));
     }
   }
-  if (create_if_missing && dp < end) {
-    // Allocate this one.  There is no mutual exclusion,
-    // so two threads could allocate different BCIs to the
-    // same data layout.  This means these extra data
-    // records, like most other MDO contents, must not be
-    // trusted too much.
-    DataLayout temp;
-    temp.initialize(DataLayout::bit_data_tag, bci, 0);
-    dp->release_set_header(temp.header());
-    assert(dp->tag() == DataLayout::bit_data_tag, "sane");
-    //NO: assert(dp->bci() == bci, "no concurrent allocation");
-    return new BitData(dp);
+  return NULL;
+}
+
+
+// Translate a bci to its corresponding extra data, or NULL.
+ProfileData* MethodData::bci_to_extra_data(int bci, Method* m, bool create_if_missing) {
+  // This code assumes an entry for a SpeculativeTrapData is 2 cells
+  assert(2*DataLayout::compute_size_in_bytes(BitData::static_cell_count()) ==
+         DataLayout::compute_size_in_bytes(SpeculativeTrapData::static_cell_count()),
+         "code needs to be adjusted");
+
+  DataLayout* dp  = extra_data_base();
+  DataLayout* end = extra_data_limit();
+
+  // Allocation in the extra data space has to be atomic because not
+  // all entries have the same size and non atomic concurrent
+  // allocation would result in a corrupted extra data space.
+  while (true) {
+    ProfileData* result = bci_to_extra_data_helper(bci, m, dp);
+    if (result != NULL) {
+      return result;
+    }
+
+    if (create_if_missing && dp < end) {
+      assert(dp->tag() == DataLayout::no_tag || (dp->tag() == DataLayout::speculative_trap_data_tag && m != NULL), "should be free");
+      assert(next_extra(dp)->tag() == DataLayout::no_tag || next_extra(dp)->tag() == DataLayout::arg_info_data_tag, "should be free or arg info");
+      u1 tag = m == NULL ? DataLayout::bit_data_tag : DataLayout::speculative_trap_data_tag;
+      // SpeculativeTrapData is 2 slots. Make sure we have room.
+      if (m != NULL && next_extra(dp)->tag() != DataLayout::no_tag) {
+        return NULL;
+      }
+      DataLayout temp;
+      temp.initialize(tag, bci, 0);
+      // May have been set concurrently
+      if (dp->header() != temp.header() && !dp->atomic_set_header(temp.header())) {
+        // Allocation failure because of concurrent allocation. Try
+        // again.
+        continue;
+      }
+      assert(dp->tag() == tag, "sane");
+      assert(dp->bci() == bci, "no concurrent allocation");
+      if (tag == DataLayout::bit_data_tag) {
+        return new BitData(dp);
+      } else {
+        // If being allocated concurrently, one trap may be lost
+        SpeculativeTrapData* data = new SpeculativeTrapData(dp);
+        data->set_method(m);
+        return data;
+      }
+    }
+    return NULL;
   }
   return NULL;
 }
@@ -1210,25 +1367,35 @@
   for ( ; is_valid(data); data = next_data(data)) {
     st->print("%d", dp_to_di(data->dp()));
     st->fill_to(6);
-    data->print_data_on(st);
+    data->print_data_on(st, this);
   }
   st->print_cr("--- Extra data:");
   DataLayout* dp    = extra_data_base();
   DataLayout* end   = extra_data_limit();
-  for (; dp < end; dp = next_extra(dp)) {
+  for (;; dp = next_extra(dp)) {
+    assert(dp < end, "moved past end of extra data");
     // No need for "OrderAccess::load_acquire" ops,
     // since the data structure is monotonic.
-    if (dp->tag() == DataLayout::no_tag)  continue;
-    if (dp->tag() == DataLayout::bit_data_tag) {
+    switch(dp->tag()) {
+    case DataLayout::no_tag:
+      continue;
+    case DataLayout::bit_data_tag:
       data = new BitData(dp);
-    } else {
-      assert(dp->tag() == DataLayout::arg_info_data_tag, "must be BitData or ArgInfo");
+      break;
+    case DataLayout::speculative_trap_data_tag:
+      data = new SpeculativeTrapData(dp);
+      break;
+    case DataLayout::arg_info_data_tag:
       data = new ArgInfoData(dp);
       dp = end; // ArgInfoData is at the end of extra data section.
+      break;
+    default:
+      fatal(err_msg("unexpected tag %d", dp->tag()));
     }
     st->print("%d", dp_to_di(data->dp()));
     st->fill_to(6);
     data->print_data_on(st);
+    if (dp >= end) return;
   }
 }
 #endif
@@ -1351,3 +1518,110 @@
   assert(profile_parameters_jsr292_only(), "inconsistent");
   return m->is_compiled_lambda_form();
 }
+
+void MethodData::clean_extra_data_helper(DataLayout* dp, int shift, bool reset) {
+  if (shift == 0) {
+    return;
+  }
+  if (!reset) {
+    // Move all cells of trap entry at dp left by "shift" cells
+    intptr_t* start = (intptr_t*)dp;
+    intptr_t* end = (intptr_t*)next_extra(dp);
+    for (intptr_t* ptr = start; ptr < end; ptr++) {
+      *(ptr-shift) = *ptr;
+    }
+  } else {
+    // Reset "shift" cells stopping at dp
+    intptr_t* start = ((intptr_t*)dp) - shift;
+    intptr_t* end = (intptr_t*)dp;
+    for (intptr_t* ptr = start; ptr < end; ptr++) {
+      *ptr = 0;
+    }
+  }
+}
+
+// Remove SpeculativeTrapData entries that reference an unloaded
+// method
+void MethodData::clean_extra_data(BoolObjectClosure* is_alive) {
+  DataLayout* dp  = extra_data_base();
+  DataLayout* end = extra_data_limit();
+
+  int shift = 0;
+  for (; dp < end; dp = next_extra(dp)) {
+    switch(dp->tag()) {
+    case DataLayout::speculative_trap_data_tag: {
+      SpeculativeTrapData* data = new SpeculativeTrapData(dp);
+      Method* m = data->method();
+      assert(m != NULL, "should have a method");
+      if (!m->method_holder()->is_loader_alive(is_alive)) {
+        // "shift" accumulates the number of cells for dead
+        // SpeculativeTrapData entries that have been seen so
+        // far. Following entries must be shifted left by that many
+        // cells to remove the dead SpeculativeTrapData entries.
+        shift += (int)((intptr_t*)next_extra(dp) - (intptr_t*)dp);
+      } else {
+        // Shift this entry left if it follows dead
+        // SpeculativeTrapData entries
+        clean_extra_data_helper(dp, shift);
+      }
+      break;
+    }
+    case DataLayout::bit_data_tag:
+      // Shift this entry left if it follows dead SpeculativeTrapData
+      // entries
+      clean_extra_data_helper(dp, shift);
+      continue;
+    case DataLayout::no_tag:
+    case DataLayout::arg_info_data_tag:
+      // We are at end of the live trap entries. The previous "shift"
+      // cells contain entries that are either dead or were shifted
+      // left. They need to be reset to no_tag
+      clean_extra_data_helper(dp, shift, true);
+      return;
+    default:
+      fatal(err_msg("unexpected tag %d", dp->tag()));
+    }
+  }
+}
+
+// Verify there's no unloaded method referenced by a
+// SpeculativeTrapData entry
+void MethodData::verify_extra_data_clean(BoolObjectClosure* is_alive) {
+#ifdef ASSERT
+  DataLayout* dp  = extra_data_base();
+  DataLayout* end = extra_data_limit();
+
+  for (; dp < end; dp = next_extra(dp)) {
+    switch(dp->tag()) {
+    case DataLayout::speculative_trap_data_tag: {
+      SpeculativeTrapData* data = new SpeculativeTrapData(dp);
+      Method* m = data->method();
+      assert(m != NULL && m->method_holder()->is_loader_alive(is_alive), "Method should exist");
+      break;
+    }
+    case DataLayout::bit_data_tag:
+      continue;
+    case DataLayout::no_tag:
+    case DataLayout::arg_info_data_tag:
+      return;
+    default:
+      fatal(err_msg("unexpected tag %d", dp->tag()));
+    }
+  }
+#endif
+}
+
+void MethodData::clean_method_data(BoolObjectClosure* is_alive) {
+  for (ProfileData* data = first_data();
+       is_valid(data);
+       data = next_data(data)) {
+    data->clean_weak_klass_links(is_alive);
+  }
+  ParametersTypeData* parameters = parameters_type_data();
+  if (parameters != NULL) {
+    parameters->clean_weak_klass_links(is_alive);
+  }
+
+  clean_extra_data(is_alive);
+  verify_extra_data_clean(is_alive);
+}
--- a/src/share/vm/oops/methodData.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/oops/methodData.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -120,7 +120,8 @@
     arg_info_data_tag,
     call_type_data_tag,
     virtual_call_type_data_tag,
-    parameters_type_data_tag
+    parameters_type_data_tag,
+    speculative_trap_data_tag
   };
 
   enum {
@@ -189,8 +190,11 @@
   void set_header(intptr_t value) {
     _header._bits = value;
   }
-  void release_set_header(intptr_t value) {
-    OrderAccess::release_store_ptr(&_header._bits, value);
+  bool atomic_set_header(intptr_t value) {
+    if (Atomic::cmpxchg_ptr(value, (volatile intptr_t*)&_header._bits, 0) == 0) {
+      return true;
+    }
+    return false;
   }
   intptr_t header() {
     return _header._bits;
@@ -271,6 +275,7 @@
 class     MultiBranchData;
 class     ArgInfoData;
 class     ParametersTypeData;
+class   SpeculativeTrapData;
 
 // ProfileData
 //
@@ -291,6 +296,8 @@
   // This is a pointer to a section of profiling data.
   DataLayout* _data;
 
+  char* print_data_on_helper(const MethodData* md) const;
+
 protected:
   DataLayout* data() { return _data; }
   const DataLayout* data() const { return _data; }
@@ -440,6 +447,7 @@
   virtual bool is_CallTypeData()    const { return false; }
   virtual bool is_VirtualCallTypeData()const { return false; }
   virtual bool is_ParametersTypeData() const { return false; }
+  virtual bool is_SpeculativeTrapData()const { return false; }
 
 
   BitData* as_BitData() const {
@@ -494,6 +502,10 @@
     assert(is_ParametersTypeData(), "wrong type");
     return is_ParametersTypeData() ? (ParametersTypeData*)this : NULL;
   }
+  SpeculativeTrapData* as_SpeculativeTrapData() const {
+    assert(is_SpeculativeTrapData(), "wrong type");
+    return is_SpeculativeTrapData() ? (SpeculativeTrapData*)this : NULL;
+  }
 
 
   // Subclass specific initialization
@@ -509,12 +521,14 @@
   // translation here, and the required translators are in the ci subclasses.
   virtual void translate_from(const ProfileData* data) {}
 
-  virtual void print_data_on(outputStream* st) const {
+  virtual void print_data_on(outputStream* st, const char* extra = NULL) const {
     ShouldNotReachHere();
   }
 
+  void print_data_on(outputStream* st, const MethodData* md) const;
+
 #ifndef PRODUCT
-  void print_shared(outputStream* st, const char* name) const;
+  void print_shared(outputStream* st, const char* name, const char* extra) const;
   void tab(outputStream* st, bool first = false) const;
 #endif
 };
@@ -576,7 +590,7 @@
 #endif // CC_INTERP
 
 #ifndef PRODUCT
-  void print_data_on(outputStream* st) const;
+  void print_data_on(outputStream* st, const char* extra = NULL) const;
 #endif
 };
 
@@ -639,7 +653,7 @@
 #endif // CC_INTERP
 
 #ifndef PRODUCT
-  void print_data_on(outputStream* st) const;
+  void print_data_on(outputStream* st, const char* extra = NULL) const;
 #endif
 };
 
@@ -726,7 +740,7 @@
   void post_initialize(BytecodeStream* stream, MethodData* mdo);
 
 #ifndef PRODUCT
-  void print_data_on(outputStream* st) const;
+  void print_data_on(outputStream* st, const char* extra = NULL) const;
 #endif
 };
 
@@ -1137,7 +1151,7 @@
   }
 
 #ifndef PRODUCT
-  virtual void print_data_on(outputStream* st) const;
+  virtual void print_data_on(outputStream* st, const char* extra = NULL) const;
 #endif
 };
 
@@ -1282,7 +1296,7 @@
 
 #ifndef PRODUCT
   void print_receiver_data_on(outputStream* st) const;
-  void print_data_on(outputStream* st) const;
+  void print_data_on(outputStream* st, const char* extra = NULL) const;
 #endif
 };
 
@@ -1325,7 +1339,7 @@
 #endif // CC_INTERP
 
 #ifndef PRODUCT
-  void print_data_on(outputStream* st) const;
+  void print_data_on(outputStream* st, const char* extra = NULL) const;
 #endif
 };
 
@@ -1451,7 +1465,7 @@
   }
 
 #ifndef PRODUCT
-  virtual void print_data_on(outputStream* st) const;
+  virtual void print_data_on(outputStream* st, const char* extra = NULL) const;
 #endif
 };
 
@@ -1554,7 +1568,7 @@
   void post_initialize(BytecodeStream* stream, MethodData* mdo);
 
 #ifndef PRODUCT
-  void print_data_on(outputStream* st) const;
+  void print_data_on(outputStream* st, const char* extra = NULL) const;
 #endif
 };
 
@@ -1632,7 +1646,7 @@
   void post_initialize(BytecodeStream* stream, MethodData* mdo);
 
 #ifndef PRODUCT
-  void print_data_on(outputStream* st) const;
+  void print_data_on(outputStream* st, const char* extra = NULL) const;
 #endif
 };
 
@@ -1825,7 +1839,7 @@
   void post_initialize(BytecodeStream* stream, MethodData* mdo);
 
 #ifndef PRODUCT
-  void print_data_on(outputStream* st) const;
+  void print_data_on(outputStream* st, const char* extra = NULL) const;
 #endif
 };
 
@@ -1852,7 +1866,7 @@
   }
 
 #ifndef PRODUCT
-  void print_data_on(outputStream* st) const;
+  void print_data_on(outputStream* st, const char* extra = NULL) const;
 #endif
 };
 
@@ -1913,7 +1927,7 @@
   }
 
 #ifndef PRODUCT
-  virtual void print_data_on(outputStream* st) const;
+  virtual void print_data_on(outputStream* st, const char* extra = NULL) const;
 #endif
 
   static ByteSize stack_slot_offset(int i) {
@@ -1925,6 +1939,54 @@
   }
 };
 
+// SpeculativeTrapData
+//
+// A SpeculativeTrapData is used to record traps due to type
+// speculation. It records the root of the compilation: that type
+// speculation is wrong in the context of one compilation (for
+// method1) doesn't mean it's wrong in the context of another one (for
+// method2). Type speculation could have more/different data in the
+// context of the compilation of method2 and it's worthwhile to try an
+// optimization that failed for compilation of method1 in the context
+// of compilation of method2.
+// Space for SpeculativeTrapData entries is allocated from the extra
+// data space in the MDO. If we run out of space, the trap data for
+// the ProfileData at that bci is updated.
+class SpeculativeTrapData : public ProfileData {
+protected:
+  enum {
+    method_offset,
+    speculative_trap_cell_count
+  };
+public:
+  SpeculativeTrapData(DataLayout* layout) : ProfileData(layout) {
+    assert(layout->tag() == DataLayout::speculative_trap_data_tag, "wrong type");
+  }
+
+  virtual bool is_SpeculativeTrapData() const { return true; }
+
+  static int static_cell_count() {
+    return speculative_trap_cell_count;
+  }
+
+  virtual int cell_count() const {
+    return static_cell_count();
+  }
+
+  // Direct accessor
+  Method* method() const {
+    return (Method*)intptr_at(method_offset);
+  }
+
+  void set_method(Method* m) {
+    set_intptr_at(method_offset, (intptr_t)m);
+  }
+
+#ifndef PRODUCT
+  virtual void print_data_on(outputStream* st, const char* extra = NULL) const;
+#endif
+};
+
 // MethodData*
 //
 // A MethodData* holds information which has been collected about
@@ -1994,7 +2056,7 @@
 
   // Whole-method sticky bits and flags
   enum {
-    _trap_hist_limit    = 17,   // decoupled from Deoptimization::Reason_LIMIT
+    _trap_hist_limit    = 18,   // decoupled from Deoptimization::Reason_LIMIT
     _trap_hist_mask     = max_jubyte,
     _extra_data_count   = 4     // extra DataLayout headers, for trap history
   }; // Public flag values
@@ -2049,6 +2111,7 @@
   // Helper for size computation
   static int compute_data_size(BytecodeStream* stream);
   static int bytecode_cell_count(Bytecodes::Code code);
+  static bool is_speculative_trap_bytecode(Bytecodes::Code code);
   enum { no_profile_data = -1, variable_cell_count = -2 };
 
   // Helper for initialization
@@ -2092,8 +2155,9 @@
   // What is the index of the first data entry?
   int first_di() const { return 0; }
 
+  ProfileData* bci_to_extra_data_helper(int bci, Method* m, DataLayout*& dp);
   // Find or create an extra ProfileData:
-  ProfileData* bci_to_extra_data(int bci, bool create_if_missing);
+  ProfileData* bci_to_extra_data(int bci, Method* m, bool create_if_missing);
 
   // return the argument info cell
   ArgInfoData *arg_info();
@@ -2116,6 +2180,10 @@
   static bool profile_parameters_jsr292_only();
   static bool profile_all_parameters();
 
+  void clean_extra_data(BoolObjectClosure* is_alive);
+  void clean_extra_data_helper(DataLayout* dp, int shift, bool reset = false);
+  void verify_extra_data_clean(BoolObjectClosure* is_alive);
+
 public:
   static int header_size() {
     return sizeof(MethodData)/wordSize;
@@ -2124,7 +2192,7 @@
   // Compute the size of a MethodData* before it is created.
   static int compute_allocation_size_in_bytes(methodHandle method);
   static int compute_allocation_size_in_words(methodHandle method);
-  static int compute_extra_data_count(int data_size, int empty_bc_count);
+  static int compute_extra_data_count(int data_size, int empty_bc_count, bool needs_speculative_traps);
 
   // Determine if a given bytecode can have profile information.
   static bool bytecode_has_profile(Bytecodes::Code code) {
@@ -2265,9 +2333,26 @@
   ProfileData* bci_to_data(int bci);
 
   // Same, but try to create an extra_data record if one is needed:
-  ProfileData* allocate_bci_to_data(int bci) {
-    ProfileData* data = bci_to_data(bci);
-    return (data != NULL) ? data : bci_to_extra_data(bci, true);
+  ProfileData* allocate_bci_to_data(int bci, Method* m) {
+    ProfileData* data = NULL;
+    // If m not NULL, try to allocate a SpeculativeTrapData entry
+    if (m == NULL) {
+      data = bci_to_data(bci);
+    }
+    if (data != NULL) {
+      return data;
+    }
+    data = bci_to_extra_data(bci, m, true);
+    if (data != NULL) {
+      return data;
+    }
+    // If SpeculativeTrapData allocation fails try to allocate a
+    // regular entry
+    data = bci_to_data(bci);
+    if (data != NULL) {
+      return data;
+    }
+    return bci_to_extra_data(bci, NULL, true);
   }
 
   // Add a handful of extra data records, for trap tracking.
@@ -2275,7 +2360,7 @@
   DataLayout* extra_data_limit() const { return (DataLayout*)((address)this + size_in_bytes()); }
   int extra_data_size() const { return (address)extra_data_limit()
                                - (address)extra_data_base(); }
-  static DataLayout* next_extra(DataLayout* dp) { return (DataLayout*)((address)dp + in_bytes(DataLayout::cell_offset(0))); }
+  static DataLayout* next_extra(DataLayout* dp);
 
   // Return (uint)-1 for overflow.
   uint trap_count(int reason) const {
@@ -2375,6 +2460,8 @@
   static bool profile_return();
   static bool profile_parameters();
   static bool profile_return_jsr292_only();
+
+  void clean_method_data(BoolObjectClosure* is_alive);
 };
 
 #endif // SHARE_VM_OOPS_METHODDATAOOP_HPP
--- a/src/share/vm/oops/oop.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/oops/oop.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -109,12 +109,13 @@
   int size_given_klass(Klass* klass);
 
   // type test operations (inlined in oop.inline.h)
-  bool is_instance()           const;
-  bool is_instanceMirror()     const;
-  bool is_instanceRef()        const;
-  bool is_array()              const;
-  bool is_objArray()           const;
-  bool is_typeArray()          const;
+  bool is_instance()            const;
+  bool is_instanceMirror()      const;
+  bool is_instanceClassLoader() const;
+  bool is_instanceRef()         const;
+  bool is_array()               const;
+  bool is_objArray()            const;
+  bool is_typeArray()           const;
 
  private:
   // field addresses in oop
--- a/src/share/vm/oops/oop.inline.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/oops/oop.inline.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -147,12 +147,13 @@
 
 inline bool oopDesc::is_a(Klass* k)        const { return klass()->is_subtype_of(k); }
 
-inline bool oopDesc::is_instance()           const { return klass()->oop_is_instance(); }
-inline bool oopDesc::is_instanceMirror()     const { return klass()->oop_is_instanceMirror(); }
-inline bool oopDesc::is_instanceRef()        const { return klass()->oop_is_instanceRef(); }
-inline bool oopDesc::is_array()              const { return klass()->oop_is_array(); }
-inline bool oopDesc::is_objArray()           const { return klass()->oop_is_objArray(); }
-inline bool oopDesc::is_typeArray()          const { return klass()->oop_is_typeArray(); }
+inline bool oopDesc::is_instance()            const { return klass()->oop_is_instance(); }
+inline bool oopDesc::is_instanceClassLoader() const { return klass()->oop_is_instanceClassLoader(); }
+inline bool oopDesc::is_instanceMirror()      const { return klass()->oop_is_instanceMirror(); }
+inline bool oopDesc::is_instanceRef()         const { return klass()->oop_is_instanceRef(); }
+inline bool oopDesc::is_array()               const { return klass()->oop_is_array(); }
+inline bool oopDesc::is_objArray()            const { return klass()->oop_is_objArray(); }
+inline bool oopDesc::is_typeArray()           const { return klass()->oop_is_typeArray(); }
 
 inline void*     oopDesc::field_base(int offset)        const { return (void*)&((char*)this)[offset]; }
 
--- a/src/share/vm/opto/block.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/opto/block.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -90,9 +90,9 @@
 class CFGElement : public ResourceObj {
   friend class VMStructs;
  public:
-  float _freq; // Execution frequency (estimate)
+  double _freq; // Execution frequency (estimate)
 
-  CFGElement() : _freq(0.0f) {}
+  CFGElement() : _freq(0.0) {}
   virtual bool is_block() { return false; }
   virtual bool is_loop()  { return false; }
   Block*   as_Block() { assert(is_block(), "must be block"); return (Block*)this; }
@@ -202,7 +202,7 @@
   // BLOCK_FREQUENCY is a sentinel to mark uses of constant block frequencies.
   // It is currently also used to scale such frequencies relative to
   // FreqCountInvocations relative to the old value of 1500.
-#define BLOCK_FREQUENCY(f) ((f * (float) 1500) / FreqCountInvocations)
+#define BLOCK_FREQUENCY(f) ((f * (double) 1500) / FreqCountInvocations)
 
   // Register Pressure (estimate) for Splitting heuristic
   uint _reg_pressure;
@@ -393,7 +393,7 @@
   CFGLoop* _root_loop;
 
   // Outmost loop frequency
-  float _outer_loop_frequency;
+  double _outer_loop_frequency;
 
   // Per node latency estimation, valid only during GCM
   GrowableArray<uint>* _node_latency;
@@ -508,7 +508,7 @@
   }
 
   // Get the outer most frequency
-  float get_outer_loop_frequency() const {
+  double get_outer_loop_frequency() const {
     return _outer_loop_frequency;
   }
 
@@ -656,13 +656,13 @@
 class BlockProbPair VALUE_OBJ_CLASS_SPEC {
 protected:
   Block* _target;      // block target
-  float  _prob;        // probability of edge to block
+  double  _prob;        // probability of edge to block
 public:
   BlockProbPair() : _target(NULL), _prob(0.0) {}
-  BlockProbPair(Block* b, float p) : _target(b), _prob(p) {}
+  BlockProbPair(Block* b, double p) : _target(b), _prob(p) {}
 
   Block* get_target() const { return _target; }
-  float get_prob() const { return _prob; }
+  double get_prob() const { return _prob; }
 };
 
 //------------------------------CFGLoop-------------------------------------------
@@ -675,8 +675,8 @@
   CFGLoop *_child;       // first child, use child's sibling to visit all immediately nested loops
   GrowableArray<CFGElement*> _members; // list of members of loop
   GrowableArray<BlockProbPair> _exits; // list of successor blocks and their probabilities
-  float _exit_prob;       // probability any loop exit is taken on a single loop iteration
-  void update_succ_freq(Block* b, float freq);
+  double _exit_prob;       // probability any loop exit is taken on a single loop iteration
+  void update_succ_freq(Block* b, double freq);
 
  public:
   CFGLoop(int id) :
@@ -702,9 +702,9 @@
   void compute_loop_depth(int depth);
   void compute_freq(); // compute frequency with loop assuming head freq 1.0f
   void scale_freq();   // scale frequency by loop trip count (including outer loops)
-  float outer_loop_freq() const; // frequency of outer loop
+  double outer_loop_freq() const; // frequency of outer loop
   bool in_loop_nest(Block* b);
-  float trip_count() const { return 1.0f / _exit_prob; }
+  double trip_count() const { return 1.0 / _exit_prob; }
   virtual bool is_loop()  { return true; }
   int id() { return _id; }
 
@@ -723,7 +723,7 @@
  private:
   Block * _from;        // Source basic block
   Block * _to;          // Destination basic block
-  float _freq;          // Execution frequency (estimate)
+  double _freq;          // Execution frequency (estimate)
   int   _state;
   bool  _infrequent;
   int   _from_pct;
@@ -742,13 +742,13 @@
     interior            // edge is interior to trace (could be backedge)
   };
 
-  CFGEdge(Block *from, Block *to, float freq, int from_pct, int to_pct) :
+  CFGEdge(Block *from, Block *to, double freq, int from_pct, int to_pct) :
     _from(from), _to(to), _freq(freq),
     _from_pct(from_pct), _to_pct(to_pct), _state(open) {
     _infrequent = from_infrequent() || to_infrequent();
   }
 
-  float  freq() const { return _freq; }
+  double  freq() const { return _freq; }
   Block* from() const { return _from; }
   Block* to  () const { return _to;   }
   int  infrequent() const { return _infrequent; }
--- a/src/share/vm/opto/c2_globals.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/opto/c2_globals.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -644,7 +644,7 @@
   diagnostic(bool, OptimizeExpensiveOps, true,                              \
           "Find best control for expensive operations")                     \
                                                                             \
-  experimental(bool, UseMathExactIntrinsics, false,                         \
+  product(bool, UseMathExactIntrinsics, true,                               \
           "Enables intrinsification of various java.lang.Math functions")   \
                                                                             \
   experimental(bool, ReplaceInParentMaps, false,                            \
@@ -653,6 +653,10 @@
   experimental(bool, UseTypeSpeculation, false,                             \
           "Speculatively propagate types from profiles")                    \
                                                                             \
+  diagnostic(bool, UseInlineDepthForSpeculativeTypes, true,                 \
+          "Carry inline depth of profile point with speculative type "      \
+          "and give priority to profiling from lower inline depth")         \
+                                                                            \
   product_pd(bool, TrapBasedRangeChecks,                                    \
           "Generate code for range checks that uses a cmp and trap "        \
           "instruction raising SIGTRAP. Used on PPC64.")                    \
--- a/src/share/vm/opto/c2compiler.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/opto/c2compiler.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -114,7 +114,7 @@
   assert(is_initialized(), "Compiler thread must be initialized");
 
   bool subsume_loads = SubsumeLoads;
-  bool do_escape_analysis = DoEscapeAnalysis && !env->jvmti_can_access_local_variables();
+  bool do_escape_analysis = DoEscapeAnalysis && !env->should_retain_local_variables();
   bool eliminate_boxing = EliminateAutoBox;
   while (!env->failing()) {
     // Attempt to compile while subsuming loads into machine instructions.
--- a/src/share/vm/opto/chaitin.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/opto/chaitin.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -210,7 +210,7 @@
 {
   NOT_PRODUCT( Compile::TracePhase t3("ctorChaitin", &_t_ctorChaitin, TimeCompiler); )
 
-  _high_frequency_lrg = MIN2(float(OPTO_LRG_HIGH_FREQ), _cfg.get_outer_loop_frequency());
+  _high_frequency_lrg = MIN2(double(OPTO_LRG_HIGH_FREQ), _cfg.get_outer_loop_frequency());
 
   // Build a list of basic blocks, sorted by frequency
   _blks = NEW_RESOURCE_ARRAY(Block *, _cfg.number_of_blocks());
@@ -1799,7 +1799,7 @@
           Block *phi_block = _cfg.get_block_for_node(phi);
           if (_cfg.get_block_for_node(phi_block->pred(2)) == block) {
             const RegMask *mask = C->matcher()->idealreg2spillmask[Op_RegI];
-            Node *spill = new (C) MachSpillCopyNode( phi, *mask, *mask );
+            Node *spill = new (C) MachSpillCopyNode(MachSpillCopyNode::LoopPhiInput, phi, *mask, *mask);
             insert_proj( phi_block, 1, spill, maxlrg++ );
             n->set_req(1,spill);
             must_recompute_live = true;
--- a/src/share/vm/opto/chaitin.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/opto/chaitin.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -34,10 +34,9 @@
 #include "opto/phase.hpp"
 #include "opto/regalloc.hpp"
 #include "opto/regmask.hpp"
+#include "opto/machnode.hpp"
 
 class LoopTree;
-class MachCallNode;
-class MachSafePointNode;
 class Matcher;
 class PhaseCFG;
 class PhaseLive;
@@ -424,8 +423,8 @@
   uint _simplified;             // Linked list head of simplified LRGs
 
   // Helper functions for Split()
-  uint split_DEF( Node *def, Block *b, int loc, uint max, Node **Reachblock, Node **debug_defs, GrowableArray<uint> splits, int slidx );
-  uint split_USE( Node *def, Block *b, Node *use, uint useidx, uint max, bool def_down, bool cisc_sp, GrowableArray<uint> splits, int slidx );
+  uint split_DEF(Node *def, Block *b, int loc, uint max, Node **Reachblock, Node **debug_defs, GrowableArray<uint> splits, int slidx );
+  uint split_USE(MachSpillCopyNode::SpillType spill_type, Node *def, Block *b, Node *use, uint useidx, uint max, bool def_down, bool cisc_sp, GrowableArray<uint> splits, int slidx );
 
   //------------------------------clone_projs------------------------------------
   // After cloning some rematerialized instruction, clone any MachProj's that
@@ -447,7 +446,7 @@
                             int slidx, uint *lrg2reach, Node **Reachblock, bool walkThru);
   // True if lidx is used before any real register is def'd in the block
   bool prompt_use( Block *b, uint lidx );
-  Node *get_spillcopy_wide( Node *def, Node *use, uint uidx );
+  Node *get_spillcopy_wide(MachSpillCopyNode::SpillType spill_type, Node *def, Node *use, uint uidx );
   // Insert the spill at chosen location.  Skip over any intervening Proj's or
   // Phis.  Skip over a CatchNode and projs, inserting in the fall-through block
   // instead.  Update high-pressure indices.  Create a new live range.
@@ -501,8 +500,9 @@
   // Used for aggressive coalescing.
   void build_ifg_virtual( );
 
+  // used when computing the register pressure for each block in the CFG. This
+  // is done during IFG creation.
   class Pressure {
-    public:
       // keeps track of the register pressure at the current
       // instruction (used when stepping backwards in the block)
       uint _current_pressure;
@@ -518,6 +518,7 @@
 
       // number of live ranges that constitute high register pressure
       const uint _high_pressure_limit;
+    public:
 
       // lower the register pressure and look for a low to high pressure
       // transition
@@ -525,9 +526,6 @@
         _current_pressure -= lrg.reg_pressure();
         if (_current_pressure == _high_pressure_limit) {
           _high_pressure_index = location;
-          if (_current_pressure > _final_pressure) {
-            _final_pressure = _current_pressure + 1;
-          }
         }
       }
 
@@ -540,6 +538,45 @@
         }
       }
 
+      uint high_pressure_index() const {
+        return _high_pressure_index;
+      }
+
+      uint final_pressure() const {
+        return _final_pressure;
+      }
+
+      uint current_pressure() const {
+        return _current_pressure;
+      }
+
+      uint high_pressure_limit() const {
+        return _high_pressure_limit;
+      }
+
+      void lower_high_pressure_index() {
+        _high_pressure_index--;
+      }
+
+      void set_high_pressure_index_to_block_start() {
+        _high_pressure_index = 0;
+      }
+
+      void check_pressure_at_fatproj(uint fatproj_location, RegMask& fatproj_mask) {
+        // this pressure is only valid at this instruction, i.e. we don't need to lower
+        // the register pressure since the fat proj was never live before (going backwards)
+        uint new_pressure = current_pressure() + fatproj_mask.Size();
+        if (new_pressure > final_pressure()) {
+          _final_pressure = new_pressure;
+        }
+
+        // if we were at a low pressure and now and the fat proj is at high pressure, record the fat proj location
+        // as coming from a low to high (to low again)
+        if (current_pressure() <= high_pressure_limit() && new_pressure > high_pressure_limit()) {
+          _high_pressure_index = fatproj_location;
+        }
+      }
+
       Pressure(uint high_pressure_index, uint high_pressure_limit)
       : _current_pressure(0)
       , _high_pressure_index(high_pressure_index)
--- a/src/share/vm/opto/classes.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/opto/classes.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -29,8 +29,6 @@
 macro(AbsF)
 macro(AbsI)
 macro(AddD)
-macro(AddExactI)
-macro(AddExactL)
 macro(AddF)
 macro(AddI)
 macro(AddL)
@@ -135,7 +133,6 @@
 macro(ExpD)
 macro(FastLock)
 macro(FastUnlock)
-macro(FlagsProj)
 macro(Goto)
 macro(Halt)
 macro(If)
@@ -170,9 +167,6 @@
 macro(LoopLimit)
 macro(Mach)
 macro(MachProj)
-macro(MathExact)
-macro(MathExactI)
-macro(MathExactL)
 macro(MaxI)
 macro(MemBarAcquire)
 macro(LoadFence)
@@ -194,22 +188,24 @@
 macro(MoveL2D)
 macro(MoveD2L)
 macro(MulD)
-macro(MulExactI)
-macro(MulExactL)
 macro(MulF)
 macro(MulHiL)
 macro(MulI)
 macro(MulL)
 macro(Multi)
 macro(NegD)
-macro(NegExactI)
-macro(NegExactL)
 macro(NegF)
 macro(NeverBranch)
 macro(Opaque1)
 macro(Opaque2)
 macro(OrI)
 macro(OrL)
+macro(OverflowAddI)
+macro(OverflowSubI)
+macro(OverflowMulI)
+macro(OverflowAddL)
+macro(OverflowSubL)
+macro(OverflowMulL)
 macro(PCTable)
 macro(Parm)
 macro(PartialSubtypeCheck)
@@ -253,8 +249,6 @@
 macro(StrEquals)
 macro(StrIndexOf)
 macro(SubD)
-macro(SubExactI)
-macro(SubExactL)
 macro(SubF)
 macro(SubI)
 macro(SubL)
--- a/src/share/vm/opto/coalesce.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/opto/coalesce.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -291,7 +291,7 @@
               _phc.clone_projs(pred, pred->end_idx(), m, copy, _phc._lrg_map);
             } else {
               const RegMask *rm = C->matcher()->idealreg2spillmask[m->ideal_reg()];
-              copy = new (C) MachSpillCopyNode(m, *rm, *rm);
+              copy = new (C) MachSpillCopyNode(MachSpillCopyNode::PhiInput, m, *rm, *rm);
               // Find a good place to insert.  Kinda tricky, use a subroutine
               insert_copy_with_overlap(pred,copy,phi_name,src_name);
             }
@@ -325,7 +325,7 @@
               l += _phc.clone_projs(b, l, m, copy, _phc._lrg_map);
             } else {
               const RegMask *rm = C->matcher()->idealreg2spillmask[m->ideal_reg()];
-              copy = new (C) MachSpillCopyNode(m, *rm, *rm);
+              copy = new (C) MachSpillCopyNode(MachSpillCopyNode::TwoAddress, m, *rm, *rm);
               // Insert the copy in the basic block, just before us
               b->insert_node(copy, l++);
             }
@@ -372,7 +372,7 @@
                 continue;     // Live out; do not pre-split
               // Split the lrg at this use
               const RegMask *rm = C->matcher()->idealreg2spillmask[inp->ideal_reg()];
-              Node *copy = new (C) MachSpillCopyNode( inp, *rm, *rm );
+              Node* copy = new (C) MachSpillCopyNode(MachSpillCopyNode::DebugUse, inp, *rm, *rm);
               // Insert the copy in the use-def chain
               n->set_req(inpidx, copy );
               // Insert the copy in the basic block, just before us
--- a/src/share/vm/opto/compile.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/opto/compile.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -3028,42 +3028,6 @@
       n->set_req(MemBarNode::Precedent, top());
     }
     break;
-    // Must set a control edge on all nodes that produce a FlagsProj
-    // so they can't escape the block that consumes the flags.
-    // Must also set the non throwing branch as the control
-    // for all nodes that depends on the result. Unless the node
-    // already have a control that isn't the control of the
-    // flag producer
-  case Op_FlagsProj:
-    {
-      MathExactNode* math = (MathExactNode*)  n->in(0);
-      Node* ctrl = math->control_node();
-      Node* non_throwing = math->non_throwing_branch();
-      math->set_req(0, ctrl);
-
-      Node* result = math->result_node();
-      if (result != NULL) {
-        for (DUIterator_Fast jmax, j = result->fast_outs(jmax); j < jmax; j++) {
-          Node* out = result->fast_out(j);
-          // Phi nodes shouldn't be moved. They would only match below if they
-          // had the same control as the MathExactNode. The only time that
-          // would happen is if the Phi is also an input to the MathExact
-          //
-          // Cmp nodes shouldn't have control set at all.
-          if (out->is_Phi() ||
-              out->is_Cmp()) {
-            continue;
-          }
-
-          if (out->in(0) == NULL) {
-            out->set_req(0, non_throwing);
-          } else if (out->in(0) == ctrl) {
-            out->set_req(0, non_throwing);
-          }
-        }
-      }
-    }
-    break;
   default:
     assert( !n->is_Call(), "" );
     assert( !n->is_Mem(), "" );
@@ -3285,7 +3249,8 @@
     // because of a transient condition during start-up in the interpreter.
     return false;
   }
-  if (md->has_trap_at(bci, reason) != 0) {
+  ciMethod* m = Deoptimization::reason_is_speculate(reason) ? this->method() : NULL;
+  if (md->has_trap_at(bci, m, reason) != 0) {
     // Assume PerBytecodeTrapLimit==0, for a more conservative heuristic.
     // Also, if there are multiple reasons, or if there is no per-BCI record,
     // assume the worst.
@@ -3303,7 +3268,7 @@
 // Less-accurate variant which does not require a method and bci.
 bool Compile::too_many_traps(Deoptimization::DeoptReason reason,
                              ciMethodData* logmd) {
- if (trap_count(reason) >= (uint)PerMethodTrapLimit) {
+  if (trap_count(reason) >= Deoptimization::per_method_trap_limit(reason)) {
     // Too many traps globally.
     // Note that we use cumulative trap_count, not just md->trap_count.
     if (log()) {
@@ -3338,10 +3303,11 @@
   uint m_cutoff  = (uint) PerMethodRecompilationCutoff / 2 + 1;  // not zero
   Deoptimization::DeoptReason per_bc_reason
     = Deoptimization::reason_recorded_per_bytecode_if_any(reason);
+  ciMethod* m = Deoptimization::reason_is_speculate(reason) ? this->method() : NULL;
   if ((per_bc_reason == Deoptimization::Reason_none
-       || md->has_trap_at(bci, reason) != 0)
+       || md->has_trap_at(bci, m, reason) != 0)
       // The trap frequency measure we care about is the recompile count:
-      && md->trap_recompiled_at(bci)
+      && md->trap_recompiled_at(bci, m)
       && md->overflow_recompile_count() >= bc_cutoff) {
     // Do not emit a trap here if it has already caused recompilations.
     // Also, if there are multiple reasons, or if there is no per-BCI record,
--- a/src/share/vm/opto/doCall.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/opto/doCall.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -250,7 +250,7 @@
           CallGenerator* miss_cg;
           Deoptimization::DeoptReason reason = morphism == 2 ?
                                     Deoptimization::Reason_bimorphic :
-                                    Deoptimization::Reason_class_check;
+                                    (speculative_receiver_type == NULL ? Deoptimization::Reason_class_check : Deoptimization::Reason_speculate_class_check);
           if ((morphism == 1 || (morphism == 2 && next_hit_cg != NULL)) &&
               !too_many_traps(jvms->method(), jvms->bci(), reason)
              ) {
--- a/src/share/vm/opto/gcm.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/opto/gcm.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -1661,10 +1661,10 @@
   }
   assert (_members.length() > 0, "no empty loops");
   Block* hd = head();
-  hd->_freq = 1.0f;
+  hd->_freq = 1.0;
   for (int i = 0; i < _members.length(); i++) {
     CFGElement* s = _members.at(i);
-    float freq = s->_freq;
+    double freq = s->_freq;
     if (s->is_block()) {
       Block* b = s->as_Block();
       for (uint j = 0; j < b->_num_succs; j++) {
@@ -1676,7 +1676,7 @@
       assert(lp->_parent == this, "immediate child");
       for (int k = 0; k < lp->_exits.length(); k++) {
         Block* eb = lp->_exits.at(k).get_target();
-        float prob = lp->_exits.at(k).get_prob();
+        double prob = lp->_exits.at(k).get_prob();
         update_succ_freq(eb, freq * prob);
       }
     }
@@ -1688,7 +1688,7 @@
   // inner blocks do not get erroneously scaled.
   if (_depth != 0) {
     // Total the exit probabilities for this loop.
-    float exits_sum = 0.0f;
+    double exits_sum = 0.0f;
     for (int i = 0; i < _exits.length(); i++) {
       exits_sum += _exits.at(i).get_prob();
     }
@@ -1935,7 +1935,7 @@
 //------------------------------update_succ_freq-------------------------------
 // Update the appropriate frequency associated with block 'b', a successor of
 // a block in this loop.
-void CFGLoop::update_succ_freq(Block* b, float freq) {
+void CFGLoop::update_succ_freq(Block* b, double freq) {
   if (b->_loop == this) {
     if (b == head()) {
       // back branch within the loop
@@ -1976,11 +1976,11 @@
 // Scale frequency of loops and blocks by trip counts from outer loops
 // Do a top down traversal of loop tree (visit outer loops first.)
 void CFGLoop::scale_freq() {
-  float loop_freq = _freq * trip_count();
+  double loop_freq = _freq * trip_count();
   _freq = loop_freq;
   for (int i = 0; i < _members.length(); i++) {
     CFGElement* s = _members.at(i);
-    float block_freq = s->_freq * loop_freq;
+    double block_freq = s->_freq * loop_freq;
     if (g_isnan(block_freq) || block_freq < MIN_BLOCK_FREQUENCY)
       block_freq = MIN_BLOCK_FREQUENCY;
     s->_freq = block_freq;
@@ -1993,7 +1993,7 @@
 }
 
 // Frequency of outer loop
-float CFGLoop::outer_loop_freq() const {
+double CFGLoop::outer_loop_freq() const {
   if (_child != NULL) {
     return _child->_freq;
   }
@@ -2042,7 +2042,7 @@
       k = 0;
     }
     Block *blk = _exits.at(i).get_target();
-    float prob = _exits.at(i).get_prob();
+    double prob = _exits.at(i).get_prob();
     tty->print(" ->%d@%d%%", blk->_pre_order, (int)(prob*100));
   }
   tty->print("\n");
--- a/src/share/vm/opto/graphKit.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/opto/graphKit.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -612,9 +612,10 @@
   // Usual case:  Bail to interpreter.
   // Reserve the right to recompile if we haven't seen anything yet.
 
+  assert(!Deoptimization::reason_is_speculate(reason), "unsupported");
   Deoptimization::DeoptAction action = Deoptimization::Action_maybe_recompile;
   if (treat_throw_as_hot
-      && (method()->method_data()->trap_recompiled_at(bci())
+      && (method()->method_data()->trap_recompiled_at(bci(), NULL)
           || C->too_many_traps(reason))) {
     // We cannot afford to take more traps here.  Suffer in the interpreter.
     if (C->log() != NULL)
@@ -864,7 +865,7 @@
     }
   }
 
-  if (env()->jvmti_can_access_local_variables()) {
+  if (env()->should_retain_local_variables()) {
     // At any safepoint, this method can get breakpointed, which would
     // then require an immediate deoptimization.
     can_prune_locals = false;  // do not prune locals
@@ -2112,30 +2113,33 @@
  * @return           node with improved type
  */
 Node* GraphKit::record_profile_for_speculation(Node* n, ciKlass* exact_kls) {
-  const TypeOopPtr* current_type = _gvn.type(n)->isa_oopptr();
+  const Type* current_type = _gvn.type(n);
   assert(UseTypeSpeculation, "type speculation must be on");
-  if (exact_kls != NULL &&
-      // nothing to improve if type is already exact
-      (current_type == NULL ||
-       (!current_type->klass_is_exact() &&
-        (current_type->speculative() == NULL ||
-         !current_type->speculative()->klass_is_exact())))) {
+
+  const TypeOopPtr* speculative = current_type->speculative();
+
+  if (current_type->would_improve_type(exact_kls, jvms()->depth())) {
     const TypeKlassPtr* tklass = TypeKlassPtr::make(exact_kls);
     const TypeOopPtr* xtype = tklass->as_instance_type();
     assert(xtype->klass_is_exact(), "Should be exact");
-
+    // record the new speculative type's depth
+    speculative = xtype->with_inline_depth(jvms()->depth());
+  }
+
+  if (speculative != current_type->speculative()) {
     // Build a type with a speculative type (what we think we know
     // about the type but will need a guard when we use it)
-    const TypeOopPtr* spec_type = TypeOopPtr::make(TypePtr::BotPTR, Type::OffsetBot, TypeOopPtr::InstanceBot, xtype);
-    // We're changing the type, we need a new cast node to carry the
-    // new type. The new type depends on the control: what profiling
-    // tells us is only valid from here as far as we can tell.
-    Node* cast = new(C) CastPPNode(n, spec_type);
-    cast->init_req(0, control());
+    const TypeOopPtr* spec_type = TypeOopPtr::make(TypePtr::BotPTR, Type::OffsetBot, TypeOopPtr::InstanceBot, speculative);
+    // We're changing the type, we need a new CheckCast node to carry
+    // the new type. The new type depends on the control: what
+    // profiling tells us is only valid from here as far as we can
+    // tell.
+    Node* cast = new(C) CheckCastPPNode(control(), n, current_type->remove_speculative()->join_speculative(spec_type));
     cast = _gvn.transform(cast);
     replace_in_map(n, cast);
     n = cast;
   }
+
   return n;
 }
 
@@ -2145,7 +2149,7 @@
  *
  * @param n  receiver node
  *
- * @return           node with improved type
+ * @return   node with improved type
  */
 Node* GraphKit::record_profiled_receiver_for_speculation(Node* n) {
   if (!UseTypeSpeculation) {
@@ -2739,12 +2743,14 @@
 // Subsequent type checks will always fold up.
 Node* GraphKit::maybe_cast_profiled_receiver(Node* not_null_obj,
                                              ciKlass* require_klass,
-                                            ciKlass* spec_klass,
+                                             ciKlass* spec_klass,
                                              bool safe_for_replace) {
   if (!UseTypeProfile || !TypeProfileCasts) return NULL;
 
+  Deoptimization::DeoptReason reason = spec_klass == NULL ? Deoptimization::Reason_class_check : Deoptimization::Reason_speculate_class_check;
+
   // Make sure we haven't already deoptimized from this tactic.
-  if (too_many_traps(Deoptimization::Reason_class_check))
+  if (too_many_traps(reason))
     return NULL;
 
   // (No, this isn't a call, but it's enough like a virtual call
@@ -2766,7 +2772,7 @@
                                             &exact_obj);
       { PreserveJVMState pjvms(this);
         set_control(slow_ctl);
-        uncommon_trap(Deoptimization::Reason_class_check,
+        uncommon_trap(reason,
                       Deoptimization::Action_maybe_recompile);
       }
       if (safe_for_replace) {
@@ -2793,8 +2799,10 @@
                                         bool not_null) {
   // type == NULL if profiling tells us this object is always null
   if (type != NULL) {
-    if (!too_many_traps(Deoptimization::Reason_null_check) &&
-        !too_many_traps(Deoptimization::Reason_class_check)) {
+    Deoptimization::DeoptReason class_reason = Deoptimization::Reason_speculate_class_check;
+    Deoptimization::DeoptReason null_reason = Deoptimization::Reason_null_check;
+    if (!too_many_traps(null_reason) &&
+        !too_many_traps(class_reason)) {
       Node* not_null_obj = NULL;
       // not_null is true if we know the object is not null and
       // there's no need for a null check
@@ -2813,7 +2821,7 @@
       {
         PreserveJVMState pjvms(this);
         set_control(slow_ctl);
-        uncommon_trap(Deoptimization::Reason_class_check,
+        uncommon_trap(class_reason,
                       Deoptimization::Action_maybe_recompile);
       }
       replace_in_map(not_null_obj, exact_obj);
@@ -2882,7 +2890,7 @@
   }
 
   if (known_statically && UseTypeSpeculation) {
-    // If we know the type check always succeed then we don't use the
+    // If we know the type check always succeeds then we don't use the
     // profiling data at this bytecode. Don't lose it, feed it to the
     // type system as a speculative type.
     not_null_obj = record_profiled_receiver_for_speculation(not_null_obj);
--- a/src/share/vm/opto/graphKit.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/opto/graphKit.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -406,7 +406,7 @@
   // Use the type profile to narrow an object type.
   Node* maybe_cast_profiled_receiver(Node* not_null_obj,
                                      ciKlass* require_klass,
-                                    ciKlass* spec,
+                                     ciKlass* spec,
                                      bool safe_for_replace);
 
   // Cast obj to type and emit guard unless we had too many traps here already
--- a/src/share/vm/opto/ifg.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/opto/ifg.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -439,8 +439,8 @@
       }
     }
   }
-  assert(int_pressure._current_pressure == count_int_pressure(liveout), "the int pressure is incorrect");
-  assert(float_pressure._current_pressure == count_float_pressure(liveout), "the float pressure is incorrect");
+  assert(int_pressure.current_pressure() == count_int_pressure(liveout), "the int pressure is incorrect");
+  assert(float_pressure.current_pressure() == count_float_pressure(liveout), "the float pressure is incorrect");
 }
 
 /* Go to the first non-phi index in a block */
@@ -513,8 +513,8 @@
     raise_pressure(b, lrg, int_pressure, float_pressure);
     lid = elements.next();
   }
-  assert(int_pressure._current_pressure == count_int_pressure(liveout), "the int pressure is incorrect");
-  assert(float_pressure._current_pressure == count_float_pressure(liveout), "the float pressure is incorrect");
+  assert(int_pressure.current_pressure() == count_int_pressure(liveout), "the int pressure is incorrect");
+  assert(float_pressure.current_pressure() == count_float_pressure(liveout), "the float pressure is incorrect");
 }
 
 /*
@@ -548,17 +548,7 @@
 void PhaseChaitin::check_for_high_pressure_transition_at_fatproj(uint& block_reg_pressure, uint location, LRG& lrg, Pressure& pressure, const int op_regtype) {
   RegMask mask_tmp = lrg.mask();
   mask_tmp.AND(*Matcher::idealreg2regmask[op_regtype]);
-  // this pressure is only valid at this instruction, i.e. we don't need to lower
-  // the register pressure since the fat proj was never live before (going backwards)
-  uint new_pressure = pressure._current_pressure + mask_tmp.Size();
-  if (new_pressure > pressure._final_pressure) {
-    pressure._final_pressure = new_pressure;
-  }
-  // if we were at a low pressure and now at the fat proj is at high pressure, record the fat proj location
-  // as coming from a low to high (to low again)
-  if (pressure._current_pressure <= pressure._high_pressure_limit && new_pressure > pressure._high_pressure_limit) {
-    pressure._high_pressure_index = location;
-  }
+  pressure.check_pressure_at_fatproj(location, mask_tmp);
 }
 
 /*
@@ -700,23 +690,23 @@
       // Newly live things assumed live from here to top of block
       lrg._area += cost;
       raise_pressure(b, lrg, int_pressure, float_pressure);
-      assert(int_pressure._current_pressure == count_int_pressure(liveout), "the int pressure is incorrect");
-      assert(float_pressure._current_pressure == count_float_pressure(liveout), "the float pressure is incorrect");
+      assert(int_pressure.current_pressure() == count_int_pressure(liveout), "the int pressure is incorrect");
+      assert(float_pressure.current_pressure() == count_float_pressure(liveout), "the float pressure is incorrect");
     }
-    assert(!(lrg._area < 0.0), "negative spill area" );
+    assert(lrg._area >= 0.0, "negative spill area" );
   }
 }
 
 /*
  * If we run off the top of the block with high pressure just record that the
  * whole block is high pressure. (Even though we might have a transition
- * lower down in the block)
+ * later down in the block)
  */
 void PhaseChaitin::check_for_high_pressure_block(Pressure& pressure) {
   // current pressure now means the pressure before the first instruction in the block
   // (since we have stepped through all instructions backwards)
-  if (pressure._current_pressure > pressure._high_pressure_limit) {
-    pressure._high_pressure_index = 0;
+  if (pressure.current_pressure() > pressure.high_pressure_limit()) {
+    pressure.set_high_pressure_index_to_block_start();
   }
 }
 
@@ -725,7 +715,7 @@
  * and set the high pressure index for the block
  */
 void PhaseChaitin::adjust_high_pressure_index(Block* b, uint& block_hrp_index, Pressure& pressure) {
-  uint i = pressure._high_pressure_index;
+  uint i = pressure.high_pressure_index();
   if (i < b->number_of_nodes() && i < b->end_idx() + 1) {
     Node* cur = b->get_node(i);
     while (cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch()) {
@@ -772,7 +762,7 @@
 
     int inst_count = last_inst - first_inst;
     double cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count);
-    assert(!(cost < 0.0), "negative spill cost" );
+    assert(cost >= 0.0, "negative spill cost" );
 
     compute_initial_block_pressure(block, &liveout, int_pressure, float_pressure, cost);
 
@@ -789,8 +779,8 @@
 
         if (!liveout.member(lid) && n->Opcode() != Op_SafePoint) {
           if (remove_node_if_not_used(block, location, n, lid, &liveout)) {
-            float_pressure._high_pressure_index--;
-            int_pressure._high_pressure_index--;
+            float_pressure.lower_high_pressure_index();
+            int_pressure.lower_high_pressure_index();
             continue;
           }
           if (lrg._fat_proj) {
@@ -799,7 +789,11 @@
           }
         } else {
           // A live range ends at its definition, remove the remaining area.
-          lrg._area -= cost;
+          // If the cost is +Inf (which might happen in extreme cases), the lrg area will also be +Inf,
+          // and +Inf - +Inf = NaN. So let's not do that subtraction.
+          if (g_isfinite(cost)) {
+            lrg._area -= cost;
+          }
           assert(lrg._area >= 0.0, "negative spill area" );
 
           assign_high_score_to_immediate_copies(block, n, lrg, location + 1, last_inst);
@@ -837,13 +831,13 @@
     adjust_high_pressure_index(block, block->_ihrp_index, int_pressure);
     adjust_high_pressure_index(block, block->_fhrp_index, float_pressure);
     // set the final_pressure as the register pressure for the block
-    block->_reg_pressure = int_pressure._final_pressure;
-    block->_freg_pressure = float_pressure._final_pressure;
+    block->_reg_pressure = int_pressure.final_pressure();
+    block->_freg_pressure = float_pressure.final_pressure();
 
 #ifndef PRODUCT
     // Gather Register Pressure Statistics
     if (PrintOptoStatistics) {
-      if (block->_reg_pressure > int_pressure._high_pressure_limit || block->_freg_pressure > float_pressure._high_pressure_limit) {
+      if (block->_reg_pressure > int_pressure.high_pressure_limit() || block->_freg_pressure > float_pressure.high_pressure_limit()) {
         _high_pressure++;
       } else {
         _low_pressure++;
--- a/src/share/vm/opto/ifnode.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/opto/ifnode.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -76,7 +76,6 @@
   if( !i1->is_Bool() ) return NULL;
   BoolNode *b = i1->as_Bool();
   Node *cmp = b->in(1);
-  if( cmp->is_FlagsProj() ) return NULL;
   if( !cmp->is_Cmp() ) return NULL;
   i1 = cmp->in(1);
   if( i1 == NULL || !i1->is_Phi() ) return NULL;
--- a/src/share/vm/opto/lcm.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/opto/lcm.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -520,13 +520,6 @@
           break;
         }
 
-        // For nodes that produce a FlagsProj, make the node adjacent to the
-        // use of the FlagsProj
-        if (use->is_FlagsProj() && get_block_for_node(use) == block) {
-          found_machif = true;
-          break;
-        }
-
         // More than this instruction pending for successor to be ready,
         // don't choose this if other opportunities are ready
         if (ready_cnt.at(use->_idx) > 1)
--- a/src/share/vm/opto/library_call.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/opto/library_call.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -203,7 +203,9 @@
   bool inline_math_native(vmIntrinsics::ID id);
   bool inline_trig(vmIntrinsics::ID id);
   bool inline_math(vmIntrinsics::ID id);
-  void inline_math_mathExact(Node* math);
+  template <typename OverflowOp>
+  bool inline_math_overflow(Node* arg1, Node* arg2);
+  void inline_math_mathExact(Node* math, Node* test);
   bool inline_math_addExactI(bool is_increment);
   bool inline_math_addExactL(bool is_increment);
   bool inline_math_multiplyExactI();
@@ -517,31 +519,31 @@
 
   case vmIntrinsics::_incrementExactI:
   case vmIntrinsics::_addExactI:
-    if (!Matcher::match_rule_supported(Op_AddExactI) || !UseMathExactIntrinsics) return NULL;
+    if (!Matcher::match_rule_supported(Op_OverflowAddI) || !UseMathExactIntrinsics) return NULL;
     break;
   case vmIntrinsics::_incrementExactL:
   case vmIntrinsics::_addExactL:
-    if (!Matcher::match_rule_supported(Op_AddExactL) || !UseMathExactIntrinsics) return NULL;
+    if (!Matcher::match_rule_supported(Op_OverflowAddL) || !UseMathExactIntrinsics) return NULL;
     break;
   case vmIntrinsics::_decrementExactI:
   case vmIntrinsics::_subtractExactI:
-    if (!Matcher::match_rule_supported(Op_SubExactI) || !UseMathExactIntrinsics) return NULL;
+    if (!Matcher::match_rule_supported(Op_OverflowSubI) || !UseMathExactIntrinsics) return NULL;
     break;
   case vmIntrinsics::_decrementExactL:
   case vmIntrinsics::_subtractExactL:
-    if (!Matcher::match_rule_supported(Op_SubExactL) || !UseMathExactIntrinsics) return NULL;
+    if (!Matcher::match_rule_supported(Op_OverflowSubL) || !UseMathExactIntrinsics) return NULL;
     break;
   case vmIntrinsics::_negateExactI:
-    if (!Matcher::match_rule_supported(Op_NegExactI) || !UseMathExactIntrinsics) return NULL;
+    if (!Matcher::match_rule_supported(Op_OverflowSubI) || !UseMathExactIntrinsics) return NULL;
     break;
   case vmIntrinsics::_negateExactL:
-    if (!Matcher::match_rule_supported(Op_NegExactL) || !UseMathExactIntrinsics) return NULL;
+    if (!Matcher::match_rule_supported(Op_OverflowSubL) || !UseMathExactIntrinsics) return NULL;
     break;
   case vmIntrinsics::_multiplyExactI:
-    if (!Matcher::match_rule_supported(Op_MulExactI) || !UseMathExactIntrinsics) return NULL;
+    if (!Matcher::match_rule_supported(Op_OverflowMulI) || !UseMathExactIntrinsics) return NULL;
     break;
   case vmIntrinsics::_multiplyExactL:
-    if (!Matcher::match_rule_supported(Op_MulExactL) || !UseMathExactIntrinsics) return NULL;
+    if (!Matcher::match_rule_supported(Op_OverflowMulL) || !UseMathExactIntrinsics) return NULL;
     break;
 
  default:
@@ -1970,18 +1972,8 @@
   return true;
 }
 
-void LibraryCallKit::inline_math_mathExact(Node* math) {
-  // If we didn't get the expected opcode it means we have optimized
-  // the node to something else and don't need the exception edge.
-  if (!math->is_MathExact()) {
-    set_result(math);
-    return;
-  }
-
-  Node* result = _gvn.transform( new(C) ProjNode(math, MathExactNode::result_proj_node));
-  Node* flags = _gvn.transform( new(C) FlagsProjNode(math, MathExactNode::flags_proj_node));
-
-  Node* bol = _gvn.transform( new (C) BoolNode(flags, BoolTest::overflow) );
+void LibraryCallKit::inline_math_mathExact(Node* math, Node *test) {
+  Node* bol = _gvn.transform( new (C) BoolNode(test, BoolTest::overflow) );
   IfNode* check = create_and_map_if(control(), bol, PROB_UNLIKELY_MAG(3), COUNT_UNKNOWN);
   Node* fast_path = _gvn.transform( new (C) IfFalseNode(check));
   Node* slow_path = _gvn.transform( new (C) IfTrueNode(check) );
@@ -1999,108 +1991,50 @@
   }
 
   set_control(fast_path);
-  set_result(result);
+  set_result(math);
 }
 
-bool LibraryCallKit::inline_math_addExactI(bool is_increment) {
-  Node* arg1 = argument(0);
-  Node* arg2 = NULL;
-
-  if (is_increment) {
-    arg2 = intcon(1);
-  } else {
-    arg2 = argument(1);
-  }
-
-  Node* add = _gvn.transform( new(C) AddExactINode(NULL, arg1, arg2) );
-  inline_math_mathExact(add);
+template <typename OverflowOp>
+bool LibraryCallKit::inline_math_overflow(Node* arg1, Node* arg2) {
+  typedef typename OverflowOp::MathOp MathOp;
+
+  MathOp* mathOp = new(C) MathOp(arg1, arg2);
+  Node* operation = _gvn.transform( mathOp );
+  Node* ofcheck = _gvn.transform( new(C) OverflowOp(arg1, arg2) );
+  inline_math_mathExact(operation, ofcheck);
   return true;
 }
 
+bool LibraryCallKit::inline_math_addExactI(bool is_increment) {
+  return inline_math_overflow<OverflowAddINode>(argument(0), is_increment ? intcon(1) : argument(1));
+}
+
 bool LibraryCallKit::inline_math_addExactL(bool is_increment) {
-  Node* arg1 = argument(0); // type long
-  // argument(1) == TOP
-  Node* arg2 = NULL;
-
-  if (is_increment) {
-    arg2 = longcon(1);
-  } else {
-    arg2 = argument(2); // type long
-    // argument(3) == TOP
-  }
-
-  Node* add = _gvn.transform(new(C) AddExactLNode(NULL, arg1, arg2));
-  inline_math_mathExact(add);
-  return true;
+  return inline_math_overflow<OverflowAddLNode>(argument(0), is_increment ? longcon(1) : argument(2));
 }
 
 bool LibraryCallKit::inline_math_subtractExactI(bool is_decrement) {
-  Node* arg1 = argument(0);
-  Node* arg2 = NULL;
-
-  if (is_decrement) {
-    arg2 = intcon(1);
-  } else {
-    arg2 = argument(1);
-  }
-
-  Node* sub = _gvn.transform(new(C) SubExactINode(NULL, arg1, arg2));
-  inline_math_mathExact(sub);
-  return true;
+  return inline_math_overflow<OverflowSubINode>(argument(0), is_decrement ? intcon(1) : argument(1));
 }
 
 bool LibraryCallKit::inline_math_subtractExactL(bool is_decrement) {
-  Node* arg1 = argument(0); // type long
-  // argument(1) == TOP
-  Node* arg2 = NULL;
-
-  if (is_decrement) {
-    arg2 = longcon(1);
-  } else {
-    arg2 = argument(2); // type long
-    // argument(3) == TOP
-  }
-
-  Node* sub = _gvn.transform(new(C) SubExactLNode(NULL, arg1, arg2));
-  inline_math_mathExact(sub);
-  return true;
+  return inline_math_overflow<OverflowSubLNode>(argument(0), is_decrement ? longcon(1) : argument(2));
 }
 
 bool LibraryCallKit::inline_math_negateExactI() {
-  Node* arg1 = argument(0);
-
-  Node* neg = _gvn.transform(new(C) NegExactINode(NULL, arg1));
-  inline_math_mathExact(neg);
-  return true;
+  return inline_math_overflow<OverflowSubINode>(intcon(0), argument(0));
 }
 
 bool LibraryCallKit::inline_math_negateExactL() {
-  Node* arg1 = argument(0);
-  // argument(1) == TOP
-
-  Node* neg = _gvn.transform(new(C) NegExactLNode(NULL, arg1));
-  inline_math_mathExact(neg);
-  return true;
+  return inline_math_overflow<OverflowSubLNode>(longcon(0), argument(0));
 }
 
 bool LibraryCallKit::inline_math_multiplyExactI() {
-  Node* arg1 = argument(0);
-  Node* arg2 = argument(1);
-
-  Node* mul = _gvn.transform(new(C) MulExactINode(NULL, arg1, arg2));
-  inline_math_mathExact(mul);
-  return true;
+  return inline_math_overflow<OverflowMulINode>(argument(0), argument(1));
 }
 
 bool LibraryCallKit::inline_math_multiplyExactL() {
-  Node* arg1 = argument(0);
-  // argument(1) == TOP
-  Node* arg2 = argument(2);
-  // argument(3) == TOP
-
-  Node* mul = _gvn.transform(new(C) MulExactLNode(NULL, arg1, arg2));
-  inline_math_mathExact(mul);
-  return true;
+  return inline_math_overflow<OverflowMulLNode>(argument(0), argument(2));
 }
 
 Node*
--- a/src/share/vm/opto/loopTransform.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/opto/loopTransform.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -713,10 +713,6 @@
       case Op_ModL: body_size += 30; break;
       case Op_DivL: body_size += 30; break;
       case Op_MulL: body_size += 10; break;
-      case Op_FlagsProj:
-        // Can't handle unrolling of loops containing
-        // nodes that generate a FlagsProj at the moment
-        return false;
       case Op_StrComp:
       case Op_StrEquals:
       case Op_StrIndexOf:
@@ -780,10 +776,6 @@
         continue; // not RC
 
       Node *cmp = bol->in(1);
-      if (cmp->is_FlagsProj()) {
-        continue;
-      }
-
       Node *rc_exp = cmp->in(1);
       Node *limit = cmp->in(2);
 
--- a/src/share/vm/opto/loopopts.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/opto/loopopts.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -43,12 +43,6 @@
     return NULL;
   }
 
-  if (n->is_MathExact()) {
-    // MathExact has projections that are not correctly handled in the code
-    // below.
-    return NULL;
-  }
-
   int wins = 0;
   assert(!n->is_CFG(), "");
   assert(region->is_Region(), "");
@@ -2362,8 +2356,7 @@
         opc == Op_Catch     ||
         opc == Op_CatchProj ||
         opc == Op_Jump      ||
-        opc == Op_JumpProj  ||
-        opc == Op_FlagsProj) {
+        opc == Op_JumpProj) {
 #if !defined(PRODUCT)
       if (TracePartialPeeling) {
         tty->print_cr("\nExit control too complex: lp: %d", head->_idx);
--- a/src/share/vm/opto/machnode.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/opto/machnode.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -520,12 +520,33 @@
 // Machine SpillCopy Node.  Copies 1 or 2 words from any location to any
 // location (stack or register).
 class MachSpillCopyNode : public MachIdealNode {
+public:
+  enum SpillType {
+    TwoAddress,                        // Inserted when coalescing of a two-address-instruction node and its input fails
+    PhiInput,                          // Inserted when coalescing of a phi node and its input fails
+    DebugUse,                          // Inserted as debug info spills to safepoints in non-frequent blocks
+    LoopPhiInput,                      // Pre-split compares of loop-phis
+    Definition,                        // An lrg marked as spilled will be spilled to memory right after its definition,
+                                       // if in high pressure region or the lrg is bound
+    RegToReg,                          // A register to register move
+    RegToMem,                          // A register to memory move
+    MemToReg,                          // A memory to register move
+    PhiLocationDifferToInputLocation,  // When coalescing phi nodes in PhaseChaitin::Split(), a move spill is inserted if
+                                       // the phi and its input resides at different locations (i.e. reg or mem)
+    BasePointerToMem,                  // Spill base pointer to memory at safepoint
+    InputToRematerialization,          // When rematerializing a node we stretch the inputs live ranges, and they might be
+                                       // stretched beyond a new definition point, therefore we split out new copies instead
+    CallUse,                           // Spill use at a call
+    Bound                              // An lrg marked as spill that is bound and needs to be spilled at a use
+  };
+private:
   const RegMask *_in;           // RegMask for input
   const RegMask *_out;          // RegMask for output
   const Type *_type;
+  const SpillType _spill_type;
 public:
-  MachSpillCopyNode( Node *n, const RegMask &in, const RegMask &out ) :
-    MachIdealNode(), _in(&in), _out(&out), _type(n->bottom_type()) {
+  MachSpillCopyNode(SpillType spill_type, Node *n, const RegMask &in, const RegMask &out ) :
+    MachIdealNode(), _spill_type(spill_type), _in(&in), _out(&out), _type(n->bottom_type()) {
     init_class_id(Class_MachSpillCopy);
     init_flags(Flag_is_Copy);
     add_req(NULL);
@@ -544,8 +565,42 @@
   virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
   virtual uint size(PhaseRegAlloc *ra_) const;
 
+
 #ifndef PRODUCT
-  virtual const char *Name() const { return "MachSpillCopy"; }
+  virtual const char *Name() const {
+    switch (_spill_type) {
+      case TwoAddress:
+        return "TwoAddressSpillCopy";
+      case PhiInput:
+        return "PhiInputSpillCopy";
+      case DebugUse:
+        return "DebugUseSpillCopy";
+      case LoopPhiInput:
+        return "LoopPhiInputSpillCopy";
+      case Definition:
+        return "DefinitionSpillCopy";
+      case RegToReg:
+        return "RegToRegSpillCopy";
+      case RegToMem:
+        return "RegToMemSpillCopy";
+      case MemToReg:
+        return "MemToRegSpillCopy";
+      case PhiLocationDifferToInputLocation:
+        return "PhiLocationDifferToInputLocationSpillCopy";
+      case BasePointerToMem:
+        return "BasePointerToMemSpillCopy";
+      case InputToRematerialization:
+        return "InputToRematerializationSpillCopy";
+      case CallUse:
+        return "CallUseSpillCopy";
+      case Bound:
+        return "BoundSpillCopy";
+      default:
+        assert(false, "Must have valid spill type");
+        return "MachSpillCopy";
+    }
+  }
+
   virtual void format( PhaseRegAlloc *, outputStream *st ) const;
 #endif
 };
--- a/src/share/vm/opto/matcher.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/opto/matcher.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -1998,7 +1998,6 @@
       case Op_Catch:
       case Op_CatchProj:
       case Op_CProj:
-      case Op_FlagsProj:
       case Op_JumpProj:
       case Op_JProj:
       case Op_NeverBranch:
--- a/src/share/vm/opto/matcher.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/opto/matcher.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -340,10 +340,6 @@
   // Register for MODL projection of divmodL
   static RegMask modL_proj_mask();
 
-  static const RegMask mathExactI_result_proj_mask();
-  static const RegMask mathExactL_result_proj_mask();
-  static const RegMask mathExactI_flags_proj_mask();
-
   // Use hardware DIV instruction when it is faster than
   // a code which use multiply for division by constant.
   static bool use_asm_for_ldiv_by_con( jlong divisor );
--- a/src/share/vm/opto/mathexactnode.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/opto/mathexactnode.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -31,358 +31,93 @@
 #include "opto/mathexactnode.hpp"
 #include "opto/subnode.hpp"
 
-MathExactNode::MathExactNode(Node* ctrl, Node* in1) : MultiNode(2) {
-  init_class_id(Class_MathExact);
-  init_req(0, ctrl);
-  init_req(1, in1);
+template <typename OverflowOp>
+class AddHelper {
+public:
+  typedef typename OverflowOp::TypeClass TypeClass;
+  typedef typename TypeClass::NativeType NativeType;
+
+  static bool will_overflow(NativeType value1, NativeType value2) {
+    NativeType result = value1 + value2;
+    // Hacker's Delight 2-12 Overflow if both arguments have the opposite sign of the result
+    if (((value1 ^ result) & (value2 ^ result)) >= 0) {
+      return false;
+    }
+    return true;
+  }
+
+  static bool can_overflow(const Type* type1, const Type* type2) {
+    if (type1 == TypeClass::ZERO || type2 == TypeClass::ZERO) {
+      return false;
+    }
+    return true;
+  }
+};
+
+template <typename OverflowOp>
+class SubHelper {
+public:
+  typedef typename OverflowOp::TypeClass TypeClass;
+  typedef typename TypeClass::NativeType NativeType;
+
+  static bool will_overflow(NativeType value1, NativeType value2) {
+    NativeType result = value1 - value2;
+    // hacker's delight 2-12 overflow iff the arguments have different signs and
+    // the sign of the result is different than the sign of arg1
+    if (((value1 ^ value2) & (value1 ^ result)) >= 0) {
+      return false;
+    }
+    return true;
+  }
+
+  static bool can_overflow(const Type* type1, const Type* type2) {
+    if (type2 == TypeClass::ZERO) {
+      return false;
+    }
+    return true;
+  }
+};
+
+template <typename OverflowOp>
+class MulHelper {
+public:
+  typedef typename OverflowOp::TypeClass TypeClass;
+
+  static bool can_overflow(const Type* type1, const Type* type2) {
+    if (type1 == TypeClass::ZERO || type2 == TypeClass::ZERO) {
+      return false;
+    } else if (type1 == TypeClass::ONE || type2 == TypeClass::ONE) {
+      return false;
+    }
+    return true;
+  }
+};
+
+bool OverflowAddINode::will_overflow(jint v1, jint v2) const {
+  return AddHelper<OverflowAddINode>::will_overflow(v1, v2);
 }
 
-MathExactNode::MathExactNode(Node* ctrl, Node* in1, Node* in2) : MultiNode(3) {
-  init_class_id(Class_MathExact);
-  init_req(0, ctrl);
-  init_req(1, in1);
-  init_req(2, in2);
+bool OverflowSubINode::will_overflow(jint v1, jint v2) const {
+  return SubHelper<OverflowSubINode>::will_overflow(v1, v2);
 }
 
-BoolNode* MathExactNode::bool_node() const {
-  Node* flags = flags_node();
-  BoolNode* boolnode = flags->unique_out()->as_Bool();
-  assert(boolnode != NULL, "must have BoolNode");
-  return boolnode;
+bool OverflowMulINode::will_overflow(jint v1, jint v2) const {
+    jlong result = (jlong) v1 * (jlong) v2;
+    if ((jint) result == result) {
+      return false;
+    }
+    return true;
 }
 
-IfNode* MathExactNode::if_node() const {
-  BoolNode* boolnode = bool_node();
-  IfNode* ifnode = boolnode->unique_out()->as_If();
-  assert(ifnode != NULL, "must have IfNode");
-  return ifnode;
+bool OverflowAddLNode::will_overflow(jlong v1, jlong v2) const {
+  return AddHelper<OverflowAddLNode>::will_overflow(v1, v2);
 }
 
-Node* MathExactNode::control_node() const {
-  IfNode* ifnode = if_node();
-  return ifnode->in(0);
+bool OverflowSubLNode::will_overflow(jlong v1, jlong v2) const {
+  return SubHelper<OverflowSubLNode>::will_overflow(v1, v2);
 }
 
-Node* MathExactNode::non_throwing_branch() const {
-  IfNode* ifnode = if_node();
-  if (bool_node()->_test._test == BoolTest::overflow) {
-    return ifnode->proj_out(0);
-  }
-  return ifnode->proj_out(1);
-}
-
-// If the MathExactNode won't overflow we have to replace the
-// FlagsProjNode and ProjNode that is generated by the MathExactNode
-Node* MathExactNode::no_overflow(PhaseGVN* phase, Node* new_result) {
-  PhaseIterGVN* igvn = phase->is_IterGVN();
-  if (igvn) {
-    ProjNode* result = result_node();
-    ProjNode* flags = flags_node();
-
-    if (result != NULL) {
-      igvn->replace_node(result, new_result);
-    }
-
-    if (flags != NULL) {
-      BoolNode* boolnode = bool_node();
-      switch (boolnode->_test._test) {
-        case BoolTest::overflow:
-          // if the check is for overflow - never taken
-          igvn->replace_node(boolnode, phase->intcon(0));
-          break;
-        case BoolTest::no_overflow:
-          // if the check is for no overflow - always taken
-          igvn->replace_node(boolnode, phase->intcon(1));
-          break;
-        default:
-          fatal("Unexpected value of BoolTest");
-          break;
-      }
-      flags->del_req(0);
-    }
-  }
-  return new_result;
-}
-
-Node* MathExactINode::match(const ProjNode* proj, const Matcher* m) {
-  uint ideal_reg = proj->ideal_reg();
-  RegMask rm;
-  if (proj->_con == result_proj_node) {
-    rm = m->mathExactI_result_proj_mask();
-  } else {
-    assert(proj->_con == flags_proj_node, "must be result or flags");
-    assert(ideal_reg == Op_RegFlags, "sanity");
-    rm = m->mathExactI_flags_proj_mask();
-  }
-  return new (m->C) MachProjNode(this, proj->_con, rm, ideal_reg);
-}
-
-Node* MathExactLNode::match(const ProjNode* proj, const Matcher* m) {
-  uint ideal_reg = proj->ideal_reg();
-  RegMask rm;
-  if (proj->_con == result_proj_node) {
-    rm = m->mathExactL_result_proj_mask();
-  } else {
-    assert(proj->_con == flags_proj_node, "must be result or flags");
-    assert(ideal_reg == Op_RegFlags, "sanity");
-    rm = m->mathExactI_flags_proj_mask();
-  }
-  return new (m->C) MachProjNode(this, proj->_con, rm, ideal_reg);
-}
-
-Node* AddExactINode::Ideal(PhaseGVN* phase, bool can_reshape) {
-  Node* arg1 = in(1);
-  Node* arg2 = in(2);
-
-  const Type* type1 = phase->type(arg1);
-  const Type* type2 = phase->type(arg2);
-
-  if (type1 != Type::TOP && type1->singleton() &&
-      type2 != Type::TOP && type2->singleton()) {
-    jint val1 = arg1->get_int();
-    jint val2 = arg2->get_int();
-    jint result = val1 + val2;
-    // Hacker's Delight 2-12 Overflow if both arguments have the opposite sign of the result
-    if ( (((val1 ^ result) & (val2 ^ result)) >= 0)) {
-      Node* con_result = ConINode::make(phase->C, result);
-      return no_overflow(phase, con_result);
-    }
-    return NULL;
-  }
-
-  if (type1 == TypeInt::ZERO || type2 == TypeInt::ZERO) { // (Add 0 x) == x
-    Node* add_result = new (phase->C) AddINode(arg1, arg2);
-    return no_overflow(phase, add_result);
-  }
-
-  if (type2->singleton()) {
-    return NULL; // no change - keep constant on the right
-  }
-
-  if (type1->singleton()) {
-    // Make it x + Constant - move constant to the right
-    swap_edges(1, 2);
-    return this;
-  }
-
-  if (arg2->is_Load()) {
-    return NULL; // no change - keep load on the right
-  }
-
-  if (arg1->is_Load()) {
-    // Make it x + Load - move load to the right
-    swap_edges(1, 2);
-    return this;
-  }
-
-  if (arg1->_idx > arg2->_idx) {
-    // Sort the edges
-    swap_edges(1, 2);
-    return this;
-  }
-
-  return NULL;
-}
-
-Node* AddExactLNode::Ideal(PhaseGVN* phase, bool can_reshape) {
-  Node* arg1 = in(1);
-  Node* arg2 = in(2);
-
-  const Type* type1 = phase->type(arg1);
-  const Type* type2 = phase->type(arg2);
-
-  if (type1 != Type::TOP && type1->singleton() &&
-      type2 != Type::TOP && type2->singleton()) {
-    jlong val1 = arg1->get_long();
-    jlong val2 = arg2->get_long();
-    jlong result = val1 + val2;
-    // Hacker's Delight 2-12 Overflow if both arguments have the opposite sign of the result
-    if ( (((val1 ^ result) & (val2 ^ result)) >= 0)) {
-      Node* con_result = ConLNode::make(phase->C, result);
-      return no_overflow(phase, con_result);
-    }
-    return NULL;
-  }
-
-  if (type1 == TypeLong::ZERO || type2 == TypeLong::ZERO) { // (Add 0 x) == x
-    Node* add_result = new (phase->C) AddLNode(arg1, arg2);
-    return no_overflow(phase, add_result);
-  }
-
-  if (type2->singleton()) {
-    return NULL; // no change - keep constant on the right
-  }
-
-  if (type1->singleton()) {
-    // Make it x + Constant - move constant to the right
-    swap_edges(1, 2);
-    return this;
-  }
-
-  if (arg2->is_Load()) {
-    return NULL; // no change - keep load on the right
-  }
-
-  if (arg1->is_Load()) {
-    // Make it x + Load - move load to the right
-    swap_edges(1, 2);
-    return this;
-  }
-
-  if (arg1->_idx > arg2->_idx) {
-    // Sort the edges
-    swap_edges(1, 2);
-    return this;
-  }
-
-  return NULL;
-}
-
-Node* SubExactINode::Ideal(PhaseGVN* phase, bool can_reshape) {
-  Node* arg1 = in(1);
-  Node* arg2 = in(2);
-
-  const Type* type1 = phase->type(arg1);
-  const Type* type2 = phase->type(arg2);
-
-  if (type1 != Type::TOP && type1->singleton() &&
-      type2 != Type::TOP && type2->singleton()) {
-    jint val1 = arg1->get_int();
-    jint val2 = arg2->get_int();
-    jint result = val1 - val2;
-
-    // Hacker's Delight 2-12 Overflow iff the arguments have different signs and
-    // the sign of the result is different than the sign of arg1
-    if (((val1 ^ val2) & (val1 ^ result)) >= 0) {
-      Node* con_result = ConINode::make(phase->C, result);
-      return no_overflow(phase, con_result);
-    }
-    return NULL;
-  }
-
-  if (type1 == TypeInt::ZERO || type2 == TypeInt::ZERO) {
-    // Sub with zero is the same as add with zero
-    Node* add_result = new (phase->C) AddINode(arg1, arg2);
-    return no_overflow(phase, add_result);
-  }
-
-  return NULL;
-}
-
-Node* SubExactLNode::Ideal(PhaseGVN* phase, bool can_reshape) {
-  Node* arg1 = in(1);
-  Node* arg2 = in(2);
-
-  const Type* type1 = phase->type(arg1);
-  const Type* type2 = phase->type(arg2);
-
-  if (type1 != Type::TOP && type1->singleton() &&
-      type2 != Type::TOP && type2->singleton()) {
-    jlong val1 = arg1->get_long();
-    jlong val2 = arg2->get_long();
-    jlong result = val1 - val2;
-
-    // Hacker's Delight 2-12 Overflow iff the arguments have different signs and
-    // the sign of the result is different than the sign of arg1
-    if (((val1 ^ val2) & (val1 ^ result)) >= 0) {
-      Node* con_result = ConLNode::make(phase->C, result);
-      return no_overflow(phase, con_result);
-    }
-    return NULL;
-  }
-
-  if (type1 == TypeLong::ZERO || type2 == TypeLong::ZERO) {
-    // Sub with zero is the same as add with zero
-    Node* add_result = new (phase->C) AddLNode(arg1, arg2);
-    return no_overflow(phase, add_result);
-  }
-
-  return NULL;
-}
-
-Node* NegExactINode::Ideal(PhaseGVN* phase, bool can_reshape) {
-  Node *arg = in(1);
-
-  const Type* type = phase->type(arg);
-  if (type != Type::TOP && type->singleton()) {
-    jint value = arg->get_int();
-    if (value != min_jint) {
-      Node* neg_result = ConINode::make(phase->C, -value);
-      return no_overflow(phase, neg_result);
-    }
-  }
-  return NULL;
-}
-
-Node* NegExactLNode::Ideal(PhaseGVN* phase, bool can_reshape) {
-  Node *arg = in(1);
-
-  const Type* type = phase->type(arg);
-  if (type != Type::TOP && type->singleton()) {
-    jlong value = arg->get_long();
-    if (value != min_jlong) {
-      Node* neg_result = ConLNode::make(phase->C, -value);
-      return no_overflow(phase, neg_result);
-    }
-  }
-  return NULL;
-}
-
-Node* MulExactINode::Ideal(PhaseGVN* phase, bool can_reshape) {
-  Node* arg1 = in(1);
-  Node* arg2 = in(2);
-
-  const Type* type1 = phase->type(arg1);
-  const Type* type2 = phase->type(arg2);
-
-  if (type1 != Type::TOP && type1->singleton() &&
-      type2 != Type::TOP && type2->singleton()) {
-    jint val1 = arg1->get_int();
-    jint val2 = arg2->get_int();
-    jlong result = (jlong) val1 * (jlong) val2;
-    if ((jint) result == result) {
-      // no overflow
-      Node* mul_result = ConINode::make(phase->C, result);
-      return no_overflow(phase, mul_result);
-    }
-  }
-
-  if (type1 == TypeInt::ZERO || type2 == TypeInt::ZERO) {
-    return no_overflow(phase, ConINode::make(phase->C, 0));
-  }
-
-  if (type1 == TypeInt::ONE) {
-    Node* mul_result = new (phase->C) AddINode(arg2, phase->intcon(0));
-    return no_overflow(phase, mul_result);
-  }
-  if (type2 == TypeInt::ONE) {
-    Node* mul_result = new (phase->C) AddINode(arg1, phase->intcon(0));
-    return no_overflow(phase, mul_result);
-  }
-
-  if (type1 == TypeInt::MINUS_1) {
-    return new (phase->C) NegExactINode(NULL, arg2);
-  }
-
-  if (type2 == TypeInt::MINUS_1) {
-    return new (phase->C) NegExactINode(NULL, arg1);
-  }
-
-  return NULL;
-}
-
-Node* MulExactLNode::Ideal(PhaseGVN* phase, bool can_reshape) {
-  Node* arg1 = in(1);
-  Node* arg2 = in(2);
-
-  const Type* type1 = phase->type(arg1);
-  const Type* type2 = phase->type(arg2);
-
-  if (type1 != Type::TOP && type1->singleton() &&
-      type2 != Type::TOP && type2->singleton()) {
-    jlong val1 = arg1->get_long();
-    jlong val2 = arg2->get_long();
-
+bool OverflowMulLNode::will_overflow(jlong val1, jlong val2) const {
     jlong result = val1 * val2;
     jlong ax = (val1 < 0 ? -val1 : val1);
     jlong ay = (val2 < 0 ? -val2 : val2);
@@ -398,33 +133,125 @@
       }
     }
 
-    if (!overflow) {
-      Node* mul_result = ConLNode::make(phase->C, result);
-      return no_overflow(phase, mul_result);
+    return overflow;
+}
+
+bool OverflowAddINode::can_overflow(const Type* t1, const Type* t2) const {
+  return AddHelper<OverflowAddINode>::can_overflow(t1, t2);
+}
+
+bool OverflowSubINode::can_overflow(const Type* t1, const Type* t2) const {
+  if (in(1) == in(2)) {
+    return false;
+  }
+  return SubHelper<OverflowSubINode>::can_overflow(t1, t2);
+}
+
+bool OverflowMulINode::can_overflow(const Type* t1, const Type* t2) const {
+  return MulHelper<OverflowMulINode>::can_overflow(t1, t2);
+}
+
+bool OverflowAddLNode::can_overflow(const Type* t1, const Type* t2) const {
+  return AddHelper<OverflowAddLNode>::can_overflow(t1, t2);
+}
+
+bool OverflowSubLNode::can_overflow(const Type* t1, const Type* t2) const {
+  if (in(1) == in(2)) {
+    return false;
+  }
+  return SubHelper<OverflowSubLNode>::can_overflow(t1, t2);
+}
+
+bool OverflowMulLNode::can_overflow(const Type* t1, const Type* t2) const {
+  return MulHelper<OverflowMulLNode>::can_overflow(t1, t2);
+}
+
+const Type* OverflowNode::sub(const Type* t1, const Type* t2) const {
+  fatal(err_msg_res("sub() should not be called for '%s'", NodeClassNames[this->Opcode()]));
+  return TypeInt::CC;
+}
+
+template <typename OverflowOp>
+struct IdealHelper {
+  typedef typename OverflowOp::TypeClass TypeClass; // TypeInt, TypeLong
+  typedef typename TypeClass::NativeType NativeType;
+
+  static Node* Ideal(const OverflowOp* node, PhaseGVN* phase, bool can_reshape) {
+    Node* arg1 = node->in(1);
+    Node* arg2 = node->in(2);
+    const Type* type1 = phase->type(arg1);
+    const Type* type2 = phase->type(arg2);
+
+    if (type1 == NULL || type2 == NULL) {
+      return NULL;
     }
+
+    if (type1 != Type::TOP && type1->singleton() &&
+        type2 != Type::TOP && type2->singleton()) {
+      NativeType val1 = TypeClass::as_self(type1)->get_con();
+      NativeType val2 = TypeClass::as_self(type2)->get_con();
+      if (node->will_overflow(val1, val2) == false) {
+        Node* con_result = ConINode::make(phase->C, 0);
+        return con_result;
+      }
+      return NULL;
+    }
+    return NULL;
   }
 
-  if (type1 == TypeLong::ZERO || type2 == TypeLong::ZERO) {
-    return no_overflow(phase, ConLNode::make(phase->C, 0));
+  static const Type* Value(const OverflowOp* node, PhaseTransform* phase) {
+    const Type *t1 = phase->type( node->in(1) );
+    const Type *t2 = phase->type( node->in(2) );
+    if( t1 == Type::TOP ) return Type::TOP;
+    if( t2 == Type::TOP ) return Type::TOP;
+
+    const TypeClass* i1 = TypeClass::as_self(t1);
+    const TypeClass* i2 = TypeClass::as_self(t2);
+
+    if (i1 == NULL || i2 == NULL) {
+      return TypeInt::CC;
+    }
+
+    if (t1->singleton() && t2->singleton()) {
+      NativeType val1 = i1->get_con();
+      NativeType val2 = i2->get_con();
+      if (node->will_overflow(val1, val2)) {
+        return TypeInt::CC;
+      }
+      return TypeInt::ZERO;
+    } else if (i1 != TypeClass::TYPE_DOMAIN && i2 != TypeClass::TYPE_DOMAIN) {
+      if (node->will_overflow(i1->_lo, i2->_lo)) {
+        return TypeInt::CC;
+      } else if (node->will_overflow(i1->_lo, i2->_hi)) {
+        return TypeInt::CC;
+      } else if (node->will_overflow(i1->_hi, i2->_lo)) {
+        return TypeInt::CC;
+      } else if (node->will_overflow(i1->_hi, i2->_hi)) {
+        return TypeInt::CC;
+      }
+      return TypeInt::ZERO;
+    }
+
+    if (!node->can_overflow(t1, t2)) {
+      return TypeInt::ZERO;
+    }
+    return TypeInt::CC;
   }
+};
 
-  if (type1 == TypeLong::ONE) {
-    Node* mul_result = new (phase->C) AddLNode(arg2, phase->longcon(0));
-    return no_overflow(phase, mul_result);
-  }
-  if (type2 == TypeLong::ONE) {
-    Node* mul_result = new (phase->C) AddLNode(arg1, phase->longcon(0));
-    return no_overflow(phase, mul_result);
-  }
-
-  if (type1 == TypeLong::MINUS_1) {
-    return new (phase->C) NegExactLNode(NULL, arg2);
-  }
-
-  if (type2 == TypeLong::MINUS_1) {
-    return new (phase->C) NegExactLNode(NULL, arg1);
-  }
-
-  return NULL;
+Node* OverflowINode::Ideal(PhaseGVN* phase, bool can_reshape) {
+  return IdealHelper<OverflowINode>::Ideal(this, phase, can_reshape);
 }
 
+Node* OverflowLNode::Ideal(PhaseGVN* phase, bool can_reshape) {
+  return IdealHelper<OverflowLNode>::Ideal(this, phase, can_reshape);
+}
+
+const Type* OverflowINode::Value(PhaseTransform* phase) const {
+  return IdealHelper<OverflowINode>::Value(this, phase);
+}
+
+const Type* OverflowLNode::Value(PhaseTransform* phase) const {
+  return IdealHelper<OverflowLNode>::Value(this, phase);
+}
+
--- a/src/share/vm/opto/mathexactnode.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/opto/mathexactnode.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -27,128 +27,111 @@
 
 #include "opto/multnode.hpp"
 #include "opto/node.hpp"
+#include "opto/addnode.hpp"
 #include "opto/subnode.hpp"
 #include "opto/type.hpp"
 
-class BoolNode;
-class IfNode;
-class Node;
-
 class PhaseGVN;
 class PhaseTransform;
 
-class MathExactNode : public MultiNode {
+class OverflowNode : public CmpNode {
 public:
-  MathExactNode(Node* ctrl, Node* in1);
-  MathExactNode(Node* ctrl, Node* in1, Node* in2);
-  enum {
-    result_proj_node = 0,
-    flags_proj_node = 1
-  };
-  virtual int Opcode() const;
-  virtual Node* Identity(PhaseTransform* phase) { return this; }
-  virtual Node* Ideal(PhaseGVN* phase, bool can_reshape) { return NULL; }
-  virtual const Type* Value(PhaseTransform* phase) const { return bottom_type(); }
-  virtual uint hash() const { return NO_HASH; }
-  virtual bool is_CFG() const { return false; }
-  virtual uint ideal_reg() const { return NotAMachineReg; }
+  OverflowNode(Node* in1, Node* in2) : CmpNode(in1, in2) {}
 
-  ProjNode* result_node() const { return proj_out(result_proj_node); }
-  ProjNode* flags_node() const { return proj_out(flags_proj_node); }
-  Node* control_node() const;
-  Node* non_throwing_branch() const;
-protected:
-  IfNode* if_node() const;
-  BoolNode* bool_node() const;
-  Node* no_overflow(PhaseGVN *phase, Node* new_result);
+  virtual uint ideal_reg() const { return Op_RegFlags; }
+  virtual const Type* sub(const Type* t1, const Type* t2) const;
 };
 
-class MathExactINode : public MathExactNode {
- public:
-  MathExactINode(Node* ctrl, Node* in1) : MathExactNode(ctrl, in1) {}
-  MathExactINode(Node* ctrl, Node* in1, Node* in2) : MathExactNode(ctrl, in1, in2) {}
-  virtual int Opcode() const;
-  virtual Node* match(const ProjNode* proj, const Matcher* m);
-  virtual const Type* bottom_type() const { return TypeTuple::INT_CC_PAIR; }
+class OverflowINode : public OverflowNode {
+public:
+  typedef TypeInt TypeClass;
+
+  OverflowINode(Node* in1, Node* in2) : OverflowNode(in1, in2) {}
+  virtual Node* Ideal(PhaseGVN* phase, bool can_reshape);
+  virtual const Type* Value(PhaseTransform* phase) const;
+
+  virtual bool will_overflow(jint v1, jint v2) const = 0;
+  virtual bool can_overflow(const Type* t1, const Type* t2) const = 0;
 };
 
-class MathExactLNode : public MathExactNode {
+
+class OverflowLNode : public OverflowNode {
 public:
-  MathExactLNode(Node* ctrl, Node* in1) : MathExactNode(ctrl, in1) {}
-  MathExactLNode(Node* ctrl, Node* in1, Node* in2) : MathExactNode(ctrl, in1, in2) {}
-  virtual int Opcode() const;
-  virtual Node* match(const ProjNode* proj, const Matcher* m);
-  virtual const Type* bottom_type() const { return TypeTuple::LONG_CC_PAIR; }
+  typedef TypeLong TypeClass;
+
+  OverflowLNode(Node* in1, Node* in2) : OverflowNode(in1, in2) {}
+  virtual Node* Ideal(PhaseGVN* phase, bool can_reshape);
+  virtual const Type* Value(PhaseTransform* phase) const;
+
+  virtual bool will_overflow(jlong v1, jlong v2) const = 0;
+  virtual bool can_overflow(const Type* t1, const Type* t2) const = 0;
 };
 
-class AddExactINode : public MathExactINode {
+class OverflowAddINode : public OverflowINode {
 public:
-  AddExactINode(Node* ctrl, Node* in1, Node* in2) : MathExactINode(ctrl, in1, in2) {}
+  typedef AddINode MathOp;
+
+  OverflowAddINode(Node* in1, Node* in2) : OverflowINode(in1, in2) {}
   virtual int Opcode() const;
-  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+
+  virtual bool will_overflow(jint v1, jint v2) const;
+  virtual bool can_overflow(const Type* t1, const Type* t2) const;
 };
 
-class AddExactLNode : public MathExactLNode {
+class OverflowSubINode : public OverflowINode {
 public:
-  AddExactLNode(Node* ctrl, Node* in1, Node* in2) : MathExactLNode(ctrl, in1, in2) {}
+  typedef SubINode MathOp;
+
+  OverflowSubINode(Node* in1, Node* in2) : OverflowINode(in1, in2) {}
   virtual int Opcode() const;
-  virtual Node* Ideal(PhaseGVN* phase, bool can_reshape);
+
+  virtual bool will_overflow(jint v1, jint v2) const;
+  virtual bool can_overflow(const Type* t1, const Type* t2) const;
 };
 
-class SubExactINode : public MathExactINode {
+class OverflowMulINode : public OverflowINode {
 public:
-  SubExactINode(Node* ctrl, Node* in1, Node* in2) : MathExactINode(ctrl, in1, in2) {}
+  typedef MulINode MathOp;
+
+  OverflowMulINode(Node* in1, Node* in2) : OverflowINode(in1, in2) {}
   virtual int Opcode() const;
-  virtual Node* Ideal(PhaseGVN* phase, bool can_reshape);
+
+  virtual bool will_overflow(jint v1, jint v2) const;
+  virtual bool can_overflow(const Type* t1, const Type* t2) const;
 };
 
-class SubExactLNode : public MathExactLNode {
+class OverflowAddLNode : public OverflowLNode {
 public:
-  SubExactLNode(Node* ctrl, Node* in1, Node* in2) : MathExactLNode(ctrl, in1, in2) {}
+  typedef AddLNode MathOp;
+
+  OverflowAddLNode(Node* in1, Node* in2) : OverflowLNode(in1, in2) {}
   virtual int Opcode() const;
-  virtual Node* Ideal(PhaseGVN* phase, bool can_reshape);
+
+  virtual bool will_overflow(jlong v1, jlong v2) const;
+  virtual bool can_overflow(const Type* t1, const Type* t2) const;
 };
 
-class NegExactINode : public MathExactINode {
+class OverflowSubLNode : public OverflowLNode {
 public:
-  NegExactINode(Node* ctrl, Node* in1) : MathExactINode(ctrl, in1) {}
+  typedef SubLNode MathOp;
+
+  OverflowSubLNode(Node* in1, Node* in2) : OverflowLNode(in1, in2) {}
   virtual int Opcode() const;
-  virtual Node* Ideal(PhaseGVN* phase, bool can_reshape);
+
+  virtual bool will_overflow(jlong v1, jlong v2) const;
+  virtual bool can_overflow(const Type* t1, const Type* t2) const;
 };
 
-class NegExactLNode : public MathExactLNode {
+class OverflowMulLNode : public OverflowLNode {
 public:
-  NegExactLNode(Node* ctrl, Node* in1) : MathExactLNode(ctrl, in1) {}
+  typedef MulLNode MathOp;
+
+  OverflowMulLNode(Node* in1, Node* in2) : OverflowLNode(in1, in2) {}
   virtual int Opcode() const;
-  virtual Node* Ideal(PhaseGVN* phase, bool can_reshape);
+
+  virtual bool will_overflow(jlong v1, jlong v2) const;
+  virtual bool can_overflow(const Type* t1, const Type* t2) const;
 };
 
-class MulExactINode : public MathExactINode {
-public:
-  MulExactINode(Node* ctrl, Node* in1, Node* in2) : MathExactINode(ctrl, in1, in2) {}
-  virtual int Opcode() const;
-  virtual Node* Ideal(PhaseGVN* phase, bool can_reshape);
-};
-
-class MulExactLNode : public MathExactLNode {
-public:
-  MulExactLNode(Node* ctrl, Node* in1, Node* in2) : MathExactLNode(ctrl, in1, in2) {}
-  virtual int Opcode() const;
-  virtual Node* Ideal(PhaseGVN* phase, bool can_reshape);
-};
-
-class FlagsProjNode : public ProjNode {
-public:
-  FlagsProjNode(Node* src, uint con) : ProjNode(src, con) {
-    init_class_id(Class_FlagsProj);
-  }
-
-  virtual int Opcode() const;
-  virtual bool is_CFG() const { return false; }
-  virtual const Type* bottom_type() const { return TypeInt::CC; }
-  virtual uint ideal_reg() const { return Op_RegFlags; }
-};
-
-
 #endif
 
--- a/src/share/vm/opto/multnode.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/opto/multnode.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -54,11 +54,6 @@
         assert(Opcode() != Op_If || proj->Opcode() == (which_proj?Op_IfTrue:Op_IfFalse), "bad if #2");
         return proj;
       }
-    } else if (p->is_FlagsProj()) {
-      FlagsProjNode *proj = p->as_FlagsProj();
-      if (proj->_con == which_proj) {
-        return proj;
-      }
     } else {
       assert(p == this && this->is_Start(), "else must be proj");
       continue;
--- a/src/share/vm/opto/node.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/opto/node.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -69,7 +69,6 @@
 class EncodePKlassNode;
 class FastLockNode;
 class FastUnlockNode;
-class FlagsProjNode;
 class IfNode;
 class IfFalseNode;
 class IfTrueNode;
@@ -100,7 +99,6 @@
 class MachSpillCopyNode;
 class MachTempNode;
 class Matcher;
-class MathExactNode;
 class MemBarNode;
 class MemBarStoreStoreNode;
 class MemNode;
@@ -575,7 +573,6 @@
       DEFINE_CLASS_ID(MemBar,      Multi, 3)
         DEFINE_CLASS_ID(Initialize,       MemBar, 0)
         DEFINE_CLASS_ID(MemBarStoreStore, MemBar, 1)
-      DEFINE_CLASS_ID(MathExact,   Multi, 4)
 
     DEFINE_CLASS_ID(Mach,  Node, 1)
       DEFINE_CLASS_ID(MachReturn, Mach, 0)
@@ -632,7 +629,6 @@
       DEFINE_CLASS_ID(Cmp,   Sub, 0)
         DEFINE_CLASS_ID(FastLock,   Cmp, 0)
         DEFINE_CLASS_ID(FastUnlock, Cmp, 1)
-        DEFINE_CLASS_ID(FlagsProj, Cmp, 2)
 
     DEFINE_CLASS_ID(MergeMem, Node, 7)
     DEFINE_CLASS_ID(Bool,     Node, 8)
@@ -736,7 +732,6 @@
   DEFINE_CLASS_QUERY(EncodePKlass)
   DEFINE_CLASS_QUERY(FastLock)
   DEFINE_CLASS_QUERY(FastUnlock)
-  DEFINE_CLASS_QUERY(FlagsProj)
   DEFINE_CLASS_QUERY(If)
   DEFINE_CLASS_QUERY(IfFalse)
   DEFINE_CLASS_QUERY(IfTrue)
@@ -765,7 +760,6 @@
   DEFINE_CLASS_QUERY(MachSafePoint)
   DEFINE_CLASS_QUERY(MachSpillCopy)
   DEFINE_CLASS_QUERY(MachTemp)
-  DEFINE_CLASS_QUERY(MathExact)
   DEFINE_CLASS_QUERY(Mem)
   DEFINE_CLASS_QUERY(MemBar)
   DEFINE_CLASS_QUERY(MemBarStoreStore)
--- a/src/share/vm/opto/output.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/opto/output.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -344,6 +344,11 @@
   uint*      jmp_offset = NEW_RESOURCE_ARRAY(uint,nblocks);
   uint*      jmp_size   = NEW_RESOURCE_ARRAY(uint,nblocks);
   int*       jmp_nidx   = NEW_RESOURCE_ARRAY(int ,nblocks);
+
+  // Collect worst case block paddings
+  int* block_worst_case_pad = NEW_RESOURCE_ARRAY(int, nblocks);
+  memset(block_worst_case_pad, 0, nblocks * sizeof(int));
+
   DEBUG_ONLY( uint *jmp_target = NEW_RESOURCE_ARRAY(uint,nblocks); )
   DEBUG_ONLY( uint *jmp_rule = NEW_RESOURCE_ARRAY(uint,nblocks); )
 
@@ -460,6 +465,7 @@
           last_avoid_back_to_back_adr += max_loop_pad;
         }
         blk_size += max_loop_pad;
+        block_worst_case_pad[i + 1] = max_loop_pad;
       }
     }
 
@@ -499,9 +505,16 @@
         if (bnum > i) { // adjust following block's offset
           offset -= adjust_block_start;
         }
+
+        // This block can be a loop header, account for the padding
+        // in the previous block.
+        int block_padding = block_worst_case_pad[i];
+        assert(i == 0 || block_padding == 0 || br_offs >= block_padding, "Should have at least a padding on top");
         // In the following code a nop could be inserted before
         // the branch which will increase the backward distance.
-        bool needs_padding = ((uint)br_offs == last_may_be_short_branch_adr);
+        bool needs_padding = ((uint)(br_offs - block_padding) == last_may_be_short_branch_adr);
+        assert(!needs_padding || jmp_offset[i] == 0, "padding only branches at the beginning of block");
+
         if (needs_padding && offset <= 0)
           offset -= nop_size;
 
--- a/src/share/vm/opto/reg_split.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/opto/reg_split.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -55,7 +55,7 @@
 // Get a SpillCopy node with wide-enough masks.  Use the 'wide-mask', the
 // wide ideal-register spill-mask if possible.  If the 'wide-mask' does
 // not cover the input (or output), use the input (or output) mask instead.
-Node *PhaseChaitin::get_spillcopy_wide( Node *def, Node *use, uint uidx ) {
+Node *PhaseChaitin::get_spillcopy_wide(MachSpillCopyNode::SpillType spill_type, Node *def, Node *use, uint uidx ) {
   // If ideal reg doesn't exist we've got a bad schedule happening
   // that is forcing us to spill something that isn't spillable.
   // Bail rather than abort
@@ -93,7 +93,7 @@
       // Here we assume a trip through memory is required.
       w_i_mask = &C->FIRST_STACK_mask();
   }
-  return new (C) MachSpillCopyNode( def, *w_i_mask, *w_o_mask );
+  return new (C) MachSpillCopyNode(spill_type, def, *w_i_mask, *w_o_mask );
 }
 
 //------------------------------insert_proj------------------------------------
@@ -159,7 +159,7 @@
   assert( loc >= 0, "must insert past block head" );
 
   // Get a def-side SpillCopy
-  Node *spill = get_spillcopy_wide(def,NULL,0);
+  Node *spill = get_spillcopy_wide(MachSpillCopyNode::Definition, def, NULL, 0);
   // Did we fail to split?, then bail
   if (!spill) {
     return 0;
@@ -180,7 +180,7 @@
 //------------------------------split_USE--------------------------------------
 // Splits at uses can involve redeffing the LRG, so no CISC Spilling there.
 // Debug uses want to know if def is already stack enabled.
-uint PhaseChaitin::split_USE( Node *def, Block *b, Node *use, uint useidx, uint maxlrg, bool def_down, bool cisc_sp, GrowableArray<uint> splits, int slidx ) {
+uint PhaseChaitin::split_USE(MachSpillCopyNode::SpillType spill_type, Node *def, Block *b, Node *use, uint useidx, uint maxlrg, bool def_down, bool cisc_sp, GrowableArray<uint> splits, int slidx ) {
 #ifdef ASSERT
   // Increment the counter for this lrg
   splits.at_put(slidx, splits.at(slidx)+1);
@@ -216,7 +216,7 @@
         // DEF is UP, so must copy it DOWN and hook in USE
         // Insert SpillCopy before the USE, which uses DEF as its input,
         // and defs a new live range, which is used by this node.
-        Node *spill = get_spillcopy_wide(def,use,useidx);
+        Node *spill = get_spillcopy_wide(spill_type, def,use,useidx);
         // did we fail to split?
         if (!spill) {
           // Bail
@@ -268,7 +268,7 @@
     bindex = b->find_node(use);
   }
 
-  Node *spill = get_spillcopy_wide( def, use, useidx );
+  Node *spill = get_spillcopy_wide(spill_type, def, use, useidx );
   if( !spill ) return 0;        // Bailed out
   // Insert SpillCopy before the USE, which uses the reaching DEF as
   // its input, and defs a new live range, which is used by this node.
@@ -327,7 +327,7 @@
 
       Block *b_def = _cfg.get_block_for_node(def);
       int idx_def = b_def->find_node(def);
-      Node *in_spill = get_spillcopy_wide( in, def, i );
+      Node *in_spill = get_spillcopy_wide(MachSpillCopyNode::InputToRematerialization, in, def, i );
       if( !in_spill ) return 0; // Bailed out
       insert_proj(b_def,idx_def,in_spill,maxlrg++);
       if( b_def == b )
@@ -935,7 +935,7 @@
                 // This def has been rematerialized a couple of times without
                 // progress. It doesn't care if it lives UP or DOWN, so
                 // spill it down now.
-                maxlrg = split_USE(def,b,n,inpidx,maxlrg,false,false,splits,slidx);
+                maxlrg = split_USE(MachSpillCopyNode::BasePointerToMem, def,b,n,inpidx,maxlrg,false,false,splits,slidx);
                 // If it wasn't split bail
                 if (!maxlrg) {
                   return 0;
@@ -1015,7 +1015,7 @@
                  !is_vect && umask.is_misaligned_pair())) {
               // These need a Split regardless of overlap or pressure
               // SPLIT - NO DEF - NO CISC SPILL
-              maxlrg = split_USE(def,b,n,inpidx,maxlrg,dup,false, splits,slidx);
+              maxlrg = split_USE(MachSpillCopyNode::Bound, def,b,n,inpidx,maxlrg,dup,false, splits,slidx);
               // If it wasn't split bail
               if (!maxlrg) {
                 return 0;
@@ -1027,7 +1027,7 @@
             if (UseFPUForSpilling && n->is_MachCall() && !uup && !dup ) {
               // The use at the call can force the def down so insert
               // a split before the use to allow the def more freedom.
-              maxlrg = split_USE(def,b,n,inpidx,maxlrg,dup,false, splits,slidx);
+              maxlrg = split_USE(MachSpillCopyNode::CallUse, def,b,n,inpidx,maxlrg,dup,false, splits,slidx);
               // If it wasn't split bail
               if (!maxlrg) {
                 return 0;
@@ -1063,7 +1063,7 @@
               else {  // Both are either up or down, and there is no overlap
                 if( dup ) {  // If UP, reg->reg copy
                   // COPY ACROSS HERE - NO DEF - NO CISC SPILL
-                  maxlrg = split_USE(def,b,n,inpidx,maxlrg,false,false, splits,slidx);
+                  maxlrg = split_USE(MachSpillCopyNode::RegToReg, def,b,n,inpidx,maxlrg,false,false, splits,slidx);
                   // If it wasn't split bail
                   if (!maxlrg) {
                     return 0;
@@ -1075,10 +1075,10 @@
                   // First Split-UP to move value into Register
                   uint def_ideal = def->ideal_reg();
                   const RegMask* tmp_rm = Matcher::idealreg2regmask[def_ideal];
-                  Node *spill = new (C) MachSpillCopyNode(def, dmask, *tmp_rm);
+                  Node *spill = new (C) MachSpillCopyNode(MachSpillCopyNode::MemToReg, def, dmask, *tmp_rm);
                   insert_proj( b, insidx, spill, maxlrg );
                   // Then Split-DOWN as if previous Split was DEF
-                  maxlrg = split_USE(spill,b,n,inpidx,maxlrg,false,false, splits,slidx);
+                  maxlrg = split_USE(MachSpillCopyNode::RegToMem, spill,b,n,inpidx,maxlrg,false,false, splits,slidx);
                   // If it wasn't split bail
                   if (!maxlrg) {
                     return 0;
@@ -1103,7 +1103,7 @@
                   }
                 }
                 // COPY DOWN HERE - NO DEF - NO CISC SPILL
-                maxlrg = split_USE(def,b,n,inpidx,maxlrg,false,false, splits,slidx);
+                maxlrg = split_USE(MachSpillCopyNode::RegToMem, def,b,n,inpidx,maxlrg,false,false, splits,slidx);
                 // If it wasn't split bail
                 if (!maxlrg) {
                   return 0;
@@ -1118,7 +1118,7 @@
               else {       // DOWN, Split-UP and check register pressure
                 if( is_high_pressure( b, &lrgs(useidx), insidx ) ) {
                   // COPY UP HERE - NO DEF - CISC SPILL
-                  maxlrg = split_USE(def,b,n,inpidx,maxlrg,true,true, splits,slidx);
+                  maxlrg = split_USE(MachSpillCopyNode::MemToReg, def,b,n,inpidx,maxlrg,true,true, splits,slidx);
                   // If it wasn't split bail
                   if (!maxlrg) {
                     return 0;
@@ -1126,7 +1126,7 @@
                   insidx++;  // Reset iterator to skip USE side split
                 } else {                          // LRP
                   // COPY UP HERE - WITH DEF - NO CISC SPILL
-                  maxlrg = split_USE(def,b,n,inpidx,maxlrg,true,false, splits,slidx);
+                  maxlrg = split_USE(MachSpillCopyNode::MemToReg, def,b,n,inpidx,maxlrg,true,false, splits,slidx);
                   // If it wasn't split bail
                   if (!maxlrg) {
                     return 0;
@@ -1229,7 +1229,7 @@
               if (C->check_node_count(NodeLimitFudgeFactor, out_of_nodes)) {  // Check when generating nodes
                 return 0;
               }
-              Node *spill = new (C) MachSpillCopyNode(use,use_rm,def_rm);
+              Node *spill = new (C) MachSpillCopyNode(MachSpillCopyNode::MemToReg, use,use_rm,def_rm);
               n->set_req(copyidx,spill);
               n->as_MachSpillCopy()->set_in_RegMask(def_rm);
               // Put the spill just before the copy
@@ -1336,7 +1336,7 @@
       // Grab the UP/DOWN sense for the input
       u1 = UP[pidx][slidx];
       if( u1 != (phi_up != 0)) {
-        maxlrg = split_USE(def, b, phi, i, maxlrg, !u1, false, splits,slidx);
+        maxlrg = split_USE(MachSpillCopyNode::PhiLocationDifferToInputLocation, def, b, phi, i, maxlrg, !u1, false, splits,slidx);
         // If it wasn't split bail
         if (!maxlrg) {
           return 0;
--- a/src/share/vm/opto/subnode.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/opto/subnode.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -1126,11 +1126,15 @@
   Node *cmp = in(1);
   if( !cmp->is_Sub() ) return NULL;
   int cop = cmp->Opcode();
-  if( cop == Op_FastLock || cop == Op_FastUnlock || cop == Op_FlagsProj) return NULL;
+  if( cop == Op_FastLock || cop == Op_FastUnlock) return NULL;
   Node *cmp1 = cmp->in(1);
   Node *cmp2 = cmp->in(2);
   if( !cmp1 ) return NULL;
 
+  if (_test._test == BoolTest::overflow || _test._test == BoolTest::no_overflow) {
+    return NULL;
+  }
+
   // Constant on left?
   Node *con = cmp1;
   uint op2 = cmp2->Opcode();
--- a/src/share/vm/opto/superword.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/opto/superword.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -441,6 +441,7 @@
     return true;   // no induction variable
   }
   CountedLoopEndNode* pre_end = get_pre_loop_end(lp()->as_CountedLoop());
+  assert(pre_end != NULL, "we must have a correct pre-loop");
   assert(pre_end->stride_is_con(), "pre loop stride is constant");
   int preloop_stride = pre_end->stride_con();
 
@@ -1981,7 +1982,7 @@
   CountedLoopNode *main_head = lp()->as_CountedLoop();
   assert(main_head->is_main_loop(), "");
   CountedLoopEndNode* pre_end = get_pre_loop_end(main_head);
-  assert(pre_end != NULL, "");
+  assert(pre_end != NULL, "we must have a correct pre-loop");
   Node *pre_opaq1 = pre_end->limit();
   assert(pre_opaq1->Opcode() == Op_Opaque1, "");
   Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1;
@@ -2145,7 +2146,8 @@
   if (!p_f->is_IfFalse()) return NULL;
   if (!p_f->in(0)->is_CountedLoopEnd()) return NULL;
   CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd();
-  if (!pre_end->loopnode()->is_pre_loop()) return NULL;
+  CountedLoopNode* loop_node = pre_end->loopnode();
+  if (loop_node == NULL || !loop_node->is_pre_loop()) return NULL;
   return pre_end;
 }
 
--- a/src/share/vm/opto/type.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/opto/type.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -306,6 +306,7 @@
   TypeInt::POS1    = TypeInt::make(1,max_jint,   WidenMin); // Positive values
   TypeInt::INT     = TypeInt::make(min_jint,max_jint, WidenMax); // 32-bit integers
   TypeInt::SYMINT  = TypeInt::make(-max_jint,max_jint,WidenMin); // symmetric range
+  TypeInt::TYPE_DOMAIN  = TypeInt::INT;
   // CmpL is overloaded both as the bytecode computation returning
   // a trinary (-1,0,+1) integer result AND as an efficient long
   // compare returning optimizer ideal-type flags.
@@ -322,6 +323,7 @@
   TypeLong::LONG    = TypeLong::make(min_jlong,max_jlong,WidenMax); // 64-bit integers
   TypeLong::INT     = TypeLong::make((jlong)min_jint,(jlong)max_jint,WidenMin);
   TypeLong::UINT    = TypeLong::make(0,(jlong)max_juint,WidenMin);
+  TypeLong::TYPE_DOMAIN  = TypeLong::LONG;
 
   const Type **fboth =(const Type**)shared_type_arena->Amalloc_4(2*sizeof(Type*));
   fboth[0] = Type::CONTROL;
@@ -1161,6 +1163,7 @@
 const TypeInt *TypeInt::POS1;   // Positive 32-bit integers
 const TypeInt *TypeInt::INT;    // 32-bit integers
 const TypeInt *TypeInt::SYMINT; // symmetric range [-max_jint..max_jint]
+const TypeInt *TypeInt::TYPE_DOMAIN; // alias for TypeInt::INT
 
 //------------------------------TypeInt----------------------------------------
 TypeInt::TypeInt( jint lo, jint hi, int w ) : Type(Int), _lo(lo), _hi(hi), _widen(w) {
@@ -1418,6 +1421,7 @@
 const TypeLong *TypeLong::LONG; // 64-bit integers
 const TypeLong *TypeLong::INT;  // 32-bit subrange
 const TypeLong *TypeLong::UINT; // 32-bit unsigned subrange
+const TypeLong *TypeLong::TYPE_DOMAIN; // alias for TypeLong::LONG
 
 //------------------------------TypeLong---------------------------------------
 TypeLong::TypeLong( jlong lo, jlong hi, int w ) : Type(Long), _lo(lo), _hi(hi), _widen(w) {
@@ -2459,7 +2463,7 @@
 const TypeOopPtr *TypeOopPtr::BOTTOM;
 
 //------------------------------TypeOopPtr-------------------------------------
-TypeOopPtr::TypeOopPtr(TYPES t, PTR ptr, ciKlass* k, bool xk, ciObject* o, int offset, int instance_id, const TypeOopPtr* speculative)
+TypeOopPtr::TypeOopPtr(TYPES t, PTR ptr, ciKlass* k, bool xk, ciObject* o, int offset, int instance_id, const TypeOopPtr* speculative, int inline_depth)
   : TypePtr(t, ptr, offset),
     _const_oop(o), _klass(k),
     _klass_is_exact(xk),
@@ -2467,7 +2471,8 @@
     _is_ptr_to_narrowklass(false),
     _is_ptr_to_boxed_value(false),
     _instance_id(instance_id),
-    _speculative(speculative) {
+    _speculative(speculative),
+    _inline_depth(inline_depth){
   if (Compile::current()->eliminate_boxing() && (t == InstPtr) &&
       (offset > 0) && xk && (k != 0) && k->is_instance_klass()) {
     _is_ptr_to_boxed_value = k->as_instance_klass()->is_boxed_value_offset(offset);
@@ -2534,12 +2539,12 @@
 
 //------------------------------make-------------------------------------------
 const TypeOopPtr *TypeOopPtr::make(PTR ptr,
-                                   int offset, int instance_id, const TypeOopPtr* speculative) {
+                                   int offset, int instance_id, const TypeOopPtr* speculative, int inline_depth) {
   assert(ptr != Constant, "no constant generic pointers");
   ciKlass*  k = Compile::current()->env()->Object_klass();
   bool      xk = false;
   ciObject* o = NULL;
-  return (TypeOopPtr*)(new TypeOopPtr(OopPtr, ptr, k, xk, o, offset, instance_id, speculative))->hashcons();
+  return (TypeOopPtr*)(new TypeOopPtr(OopPtr, ptr, k, xk, o, offset, instance_id, speculative, inline_depth))->hashcons();
 }
 
 
@@ -2547,7 +2552,7 @@
 const Type *TypeOopPtr::cast_to_ptr_type(PTR ptr) const {
   assert(_base == OopPtr, "subclass must override cast_to_ptr_type");
   if( ptr == _ptr ) return this;
-  return make(ptr, _offset, _instance_id, _speculative);
+  return make(ptr, _offset, _instance_id, _speculative, _inline_depth);
 }
 
 //-----------------------------cast_to_instance_id----------------------------
@@ -2644,7 +2649,7 @@
     case AnyNull: {
       int instance_id = meet_instance_id(InstanceTop);
       const TypeOopPtr* speculative = _speculative;
-      return make(ptr, offset, instance_id, speculative);
+      return make(ptr, offset, instance_id, speculative, _inline_depth);
     }
     case BotPTR:
     case NotNull:
@@ -2657,7 +2662,8 @@
     const TypeOopPtr *tp = t->is_oopptr();
     int instance_id = meet_instance_id(tp->instance_id());
     const TypeOopPtr* speculative = xmeet_speculative(tp);
-    return make(meet_ptr(tp->ptr()), meet_offset(tp->offset()), instance_id, speculative);
+    int depth = meet_inline_depth(tp->inline_depth());
+    return make(meet_ptr(tp->ptr()), meet_offset(tp->offset()), instance_id, speculative, depth);
   }
 
   case InstPtr:                  // For these, flip the call around to cut down
@@ -2674,7 +2680,7 @@
 const Type *TypeOopPtr::xdual() const {
   assert(klass() == Compile::current()->env()->Object_klass(), "no klasses here");
   assert(const_oop() == NULL,             "no constants here");
-  return new TypeOopPtr(_base, dual_ptr(), klass(), klass_is_exact(), const_oop(), dual_offset(), dual_instance_id(), dual_speculative());
+  return new TypeOopPtr(_base, dual_ptr(), klass(), klass_is_exact(), const_oop(), dual_offset(), dual_instance_id(), dual_speculative(), dual_inline_depth());
 }
 
 //--------------------------make_from_klass_common-----------------------------
@@ -2765,7 +2771,7 @@
     } else if (!o->should_be_constant()) {
       return TypeAryPtr::make(TypePtr::NotNull, arr0, klass, true, 0);
     }
-    const TypeAryPtr* arr = TypeAryPtr::make(TypePtr::Constant, o, arr0, klass, true, 0, InstanceBot, NULL, is_autobox_cache);
+    const TypeAryPtr* arr = TypeAryPtr::make(TypePtr::Constant, o, arr0, klass, true, 0, InstanceBot, NULL, InlineDepthBottom, is_autobox_cache);
     return arr;
   } else if (klass->is_type_array_klass()) {
     // Element is an typeArray
@@ -2854,7 +2860,8 @@
   const TypeOopPtr *a = (const TypeOopPtr*)t;
   if (_klass_is_exact != a->_klass_is_exact ||
       _instance_id != a->_instance_id ||
-      !eq_speculative(a))  return false;
+      !eq_speculative(a) ||
+      _inline_depth != a->_inline_depth)  return false;
   ciObject* one = const_oop();
   ciObject* two = a->const_oop();
   if (one == NULL || two == NULL) {
@@ -2872,6 +2879,7 @@
     _klass_is_exact +
     _instance_id +
     hash_speculative() +
+    _inline_depth +
     TypePtr::hash();
 }
 
@@ -2892,6 +2900,7 @@
   else if (_instance_id != InstanceBot)
     st->print(",iid=%d",_instance_id);
 
+  dump_inline_depth(st);
   dump_speculative(st);
 }
 
@@ -2905,6 +2914,16 @@
     st->print(")");
   }
 }
+
+void TypeOopPtr::dump_inline_depth(outputStream *st) const {
+  if (_inline_depth != InlineDepthBottom) {
+    if (_inline_depth == InlineDepthTop) {
+      st->print(" (inline_depth=InlineDepthTop)");
+    } else {
+      st->print(" (inline_depth=%d)", _inline_depth);
+    }
+  }
+}
 #endif
 
 //------------------------------singleton--------------------------------------
@@ -2918,7 +2937,7 @@
 
 //------------------------------add_offset-------------------------------------
 const TypePtr *TypeOopPtr::add_offset(intptr_t offset) const {
-  return make(_ptr, xadd_offset(offset), _instance_id, add_offset_speculative(offset));
+  return make(_ptr, xadd_offset(offset), _instance_id, add_offset_speculative(offset), _inline_depth);
 }
 
 /**
@@ -2928,7 +2947,52 @@
   if (_speculative == NULL) {
     return this;
   }
-  return make(_ptr, _offset, _instance_id, NULL);
+  assert(_inline_depth == InlineDepthTop || _inline_depth == InlineDepthBottom, "non speculative type shouldn't have inline depth");
+  return make(_ptr, _offset, _instance_id, NULL, _inline_depth);
+}
+
+/**
+ * Return same type but with a different inline depth (used for speculation)
+ *
+ * @param depth  depth to meet with
+ */
+const TypeOopPtr* TypeOopPtr::with_inline_depth(int depth) const {
+  if (!UseInlineDepthForSpeculativeTypes) {
+    return this;
+  }
+  return make(_ptr, _offset, _instance_id, _speculative, depth);
+}
+
+/**
+ * Check whether new profiling would improve speculative type
+ *
+ * @param   exact_kls    class from profiling
+ * @param   inline_depth inlining depth of profile point
+ *
+ * @return  true if type profile is valuable
+ */
+bool TypeOopPtr::would_improve_type(ciKlass* exact_kls, int inline_depth) const {
+  // no way to improve an already exact type
+  if (klass_is_exact()) {
+    return false;
+  }
+  // no profiling?
+  if (exact_kls == NULL) {
+    return false;
+  }
+  // no speculative type or non exact speculative type?
+  if (speculative_type() == NULL) {
+    return true;
+  }
+  // If the node already has an exact speculative type keep it,
+  // unless it was provided by profiling that is at a deeper
+  // inlining level. Profiling at a higher inlining depth is
+  // expected to be less accurate.
+  if (_speculative->inline_depth() == InlineDepthBottom) {
+    return false;
+  }
+  assert(_speculative->inline_depth() != InlineDepthTop, "can't do the comparison");
+  return inline_depth < _speculative->inline_depth();
 }
 
 //------------------------------meet_instance_id--------------------------------
@@ -3031,6 +3095,21 @@
   return _speculative->hash();
 }
 
+/**
+ * dual of the inline depth for this type (used for speculation)
+ */
+int TypeOopPtr::dual_inline_depth() const {
+  return -inline_depth();
+}
+
+/**
+ * meet of 2 inline depth (used for speculation)
+ *
+ * @param depth  depth to meet with
+ */
+int TypeOopPtr::meet_inline_depth(int depth) const {
+  return MAX2(inline_depth(), depth);
+}
 
 //=============================================================================
 // Convenience common pre-built types.
@@ -3041,8 +3120,8 @@
 const TypeInstPtr *TypeInstPtr::KLASS;
 
 //------------------------------TypeInstPtr-------------------------------------
-TypeInstPtr::TypeInstPtr(PTR ptr, ciKlass* k, bool xk, ciObject* o, int off, int instance_id, const TypeOopPtr* speculative)
-  : TypeOopPtr(InstPtr, ptr, k, xk, o, off, instance_id, speculative), _name(k->name()) {
+TypeInstPtr::TypeInstPtr(PTR ptr, ciKlass* k, bool xk, ciObject* o, int off, int instance_id, const TypeOopPtr* speculative, int inline_depth)
+  : TypeOopPtr(InstPtr, ptr, k, xk, o, off, instance_id, speculative, inline_depth), _name(k->name()) {
    assert(k != NULL &&
           (k->is_loaded() || o == NULL),
           "cannot have constants with non-loaded klass");
@@ -3055,7 +3134,8 @@
                                      ciObject* o,
                                      int offset,
                                      int instance_id,
-                                     const TypeOopPtr* speculative) {
+                                     const TypeOopPtr* speculative,
+                                     int inline_depth) {
   assert( !k->is_loaded() || k->is_instance_klass(), "Must be for instance");
   // Either const_oop() is NULL or else ptr is Constant
   assert( (!o && ptr != Constant) || (o && ptr == Constant),
@@ -3076,7 +3156,7 @@
 
   // Now hash this baby
   TypeInstPtr *result =
-    (TypeInstPtr*)(new TypeInstPtr(ptr, k, xk, o ,offset, instance_id, speculative))->hashcons();
+    (TypeInstPtr*)(new TypeInstPtr(ptr, k, xk, o ,offset, instance_id, speculative, inline_depth))->hashcons();
 
   return result;
 }
@@ -3109,7 +3189,7 @@
   if( ptr == _ptr ) return this;
   // Reconstruct _sig info here since not a problem with later lazy
   // construction, _sig will show up on demand.
-  return make(ptr, klass(), klass_is_exact(), const_oop(), _offset, _instance_id, _speculative);
+  return make(ptr, klass(), klass_is_exact(), const_oop(), _offset, _instance_id, _speculative, _inline_depth);
 }
 
 
@@ -3121,13 +3201,13 @@
   ciInstanceKlass* ik = _klass->as_instance_klass();
   if( (ik->is_final() || _const_oop) )  return this;  // cannot clear xk
   if( ik->is_interface() )              return this;  // cannot set xk
-  return make(ptr(), klass(), klass_is_exact, const_oop(), _offset, _instance_id, _speculative);
+  return make(ptr(), klass(), klass_is_exact, const_oop(), _offset, _instance_id, _speculative, _inline_depth);
 }
 
 //-----------------------------cast_to_instance_id----------------------------
 const TypeOopPtr *TypeInstPtr::cast_to_instance_id(int instance_id) const {
   if( instance_id == _instance_id ) return this;
-  return make(_ptr, klass(), _klass_is_exact, const_oop(), _offset, instance_id, _speculative);
+  return make(_ptr, klass(), _klass_is_exact, const_oop(), _offset, instance_id, _speculative, _inline_depth);
 }
 
 //------------------------------xmeet_unloaded---------------------------------
@@ -3138,6 +3218,7 @@
     PTR ptr = meet_ptr(tinst->ptr());
     int instance_id = meet_instance_id(tinst->instance_id());
     const TypeOopPtr* speculative = xmeet_speculative(tinst);
+    int depth = meet_inline_depth(tinst->inline_depth());
 
     const TypeInstPtr *loaded    = is_loaded() ? this  : tinst;
     const TypeInstPtr *unloaded  = is_loaded() ? tinst : this;
@@ -3158,7 +3239,7 @@
       assert(loaded->ptr() != TypePtr::Null, "insanity check");
       //
       if(      loaded->ptr() == TypePtr::TopPTR ) { return unloaded; }
-      else if (loaded->ptr() == TypePtr::AnyNull) { return TypeInstPtr::make(ptr, unloaded->klass(), false, NULL, off, instance_id, speculative); }
+      else if (loaded->ptr() == TypePtr::AnyNull) { return TypeInstPtr::make(ptr, unloaded->klass(), false, NULL, off, instance_id, speculative, depth); }
       else if (loaded->ptr() == TypePtr::BotPTR ) { return TypeInstPtr::BOTTOM; }
       else if (loaded->ptr() == TypePtr::Constant || loaded->ptr() == TypePtr::NotNull) {
         if (unloaded->ptr() == TypePtr::BotPTR  ) { return TypeInstPtr::BOTTOM;  }
@@ -3215,6 +3296,7 @@
     PTR ptr = meet_ptr(tp->ptr());
     int instance_id = meet_instance_id(tp->instance_id());
     const TypeOopPtr* speculative = xmeet_speculative(tp);
+    int depth = meet_inline_depth(tp->inline_depth());
     switch (ptr) {
     case TopPTR:
     case AnyNull:                // Fall 'down' to dual of object klass
@@ -3222,12 +3304,12 @@
       // below the centerline when the superclass is exact. We need to
       // do the same here.
       if (klass()->equals(ciEnv::current()->Object_klass()) && !klass_is_exact()) {
-        return TypeAryPtr::make(ptr, tp->ary(), tp->klass(), tp->klass_is_exact(), offset, instance_id, speculative);
+        return TypeAryPtr::make(ptr, tp->ary(), tp->klass(), tp->klass_is_exact(), offset, instance_id, speculative, depth);
       } else {
         // cannot subclass, so the meet has to fall badly below the centerline
         ptr = NotNull;
         instance_id = InstanceBot;
-        return TypeInstPtr::make( ptr, ciEnv::current()->Object_klass(), false, NULL, offset, instance_id, speculative);
+        return TypeInstPtr::make( ptr, ciEnv::current()->Object_klass(), false, NULL, offset, instance_id, speculative, depth);
       }
     case Constant:
     case NotNull:
@@ -3242,7 +3324,7 @@
         if (klass()->equals(ciEnv::current()->Object_klass()) && !klass_is_exact()) {
           // that is, tp's array type is a subtype of my klass
           return TypeAryPtr::make(ptr, (ptr == Constant ? tp->const_oop() : NULL),
-                                  tp->ary(), tp->klass(), tp->klass_is_exact(), offset, instance_id, speculative);
+                                  tp->ary(), tp->klass(), tp->klass_is_exact(), offset, instance_id, speculative, depth);
         }
       }
       // The other case cannot happen, since I cannot be a subtype of an array.
@@ -3250,7 +3332,7 @@
       if( ptr == Constant )
          ptr = NotNull;
       instance_id = InstanceBot;
-      return make(ptr, ciEnv::current()->Object_klass(), false, NULL, offset, instance_id, speculative);
+      return make(ptr, ciEnv::current()->Object_klass(), false, NULL, offset, instance_id, speculative, depth);
     default: typerr(t);
     }
   }
@@ -3265,14 +3347,16 @@
     case AnyNull: {
       int instance_id = meet_instance_id(InstanceTop);
       const TypeOopPtr* speculative = xmeet_speculative(tp);
+      int depth = meet_inline_depth(tp->inline_depth());
       return make(ptr, klass(), klass_is_exact(),
-                  (ptr == Constant ? const_oop() : NULL), offset, instance_id, speculative);
+                  (ptr == Constant ? const_oop() : NULL), offset, instance_id, speculative, depth);
     }
     case NotNull:
     case BotPTR: {
       int instance_id = meet_instance_id(tp->instance_id());
       const TypeOopPtr* speculative = xmeet_speculative(tp);
-      return TypeOopPtr::make(ptr, offset, instance_id, speculative);
+      int depth = meet_inline_depth(tp->inline_depth());
+      return TypeOopPtr::make(ptr, offset, instance_id, speculative, depth);
     }
     default: typerr(t);
     }
@@ -3292,7 +3376,7 @@
       int instance_id = meet_instance_id(InstanceTop);
       const TypeOopPtr* speculative = _speculative;
       return make(ptr, klass(), klass_is_exact(),
-                  (ptr == Constant ? const_oop() : NULL), offset, instance_id, speculative);
+                  (ptr == Constant ? const_oop() : NULL), offset, instance_id, speculative, _inline_depth);
     }
     case NotNull:
     case BotPTR:
@@ -3324,13 +3408,14 @@
     PTR ptr = meet_ptr( tinst->ptr() );
     int instance_id = meet_instance_id(tinst->instance_id());
     const TypeOopPtr* speculative = xmeet_speculative(tinst);
+    int depth = meet_inline_depth(tinst->inline_depth());
 
     // Check for easy case; klasses are equal (and perhaps not loaded!)
     // If we have constants, then we created oops so classes are loaded
     // and we can handle the constants further down.  This case handles
     // both-not-loaded or both-loaded classes
     if (ptr != Constant && klass()->equals(tinst->klass()) && klass_is_exact() == tinst->klass_is_exact()) {
-      return make(ptr, klass(), klass_is_exact(), NULL, off, instance_id, speculative);
+      return make(ptr, klass(), klass_is_exact(), NULL, off, instance_id, speculative, depth);
     }
 
     // Classes require inspection in the Java klass hierarchy.  Must be loaded.
@@ -3394,7 +3479,7 @@
         // Find out which constant.
         o = (this_klass == klass()) ? const_oop() : tinst->const_oop();
       }
-      return make(ptr, k, xk, o, off, instance_id, speculative);
+      return make(ptr, k, xk, o, off, instance_id, speculative, depth);
     }
 
     // Either oop vs oop or interface vs interface or interface vs Object
@@ -3471,7 +3556,7 @@
         else
           ptr = NotNull;
       }
-      return make(ptr, this_klass, this_xk, o, off, instance_id, speculative);
+      return make(ptr, this_klass, this_xk, o, off, instance_id, speculative, depth);
     } // Else classes are not equal
 
     // Since klasses are different, we require a LCA in the Java
@@ -3482,7 +3567,7 @@
 
     // Now we find the LCA of Java classes
     ciKlass* k = this_klass->least_common_ancestor(tinst_klass);
-    return make(ptr, k, false, NULL, off, instance_id, speculative);
+    return make(ptr, k, false, NULL, off, instance_id, speculative, depth);
   } // End of case InstPtr
 
   } // End of switch
@@ -3506,7 +3591,7 @@
 // Dual: do NOT dual on klasses.  This means I do NOT understand the Java
 // inheritance mechanism.
 const Type *TypeInstPtr::xdual() const {
-  return new TypeInstPtr(dual_ptr(), klass(), klass_is_exact(), const_oop(), dual_offset(), dual_instance_id(), dual_speculative());
+  return new TypeInstPtr(dual_ptr(), klass(), klass_is_exact(), const_oop(), dual_offset(), dual_instance_id(), dual_speculative(), dual_inline_depth());
 }
 
 //------------------------------eq---------------------------------------------
@@ -3563,6 +3648,7 @@
   else if (_instance_id != InstanceBot)
     st->print(",iid=%d",_instance_id);
 
+  dump_inline_depth(st);
   dump_speculative(st);
 }
 #endif
@@ -3576,7 +3662,15 @@
   if (_speculative == NULL) {
     return this;
   }
-  return make(_ptr, klass(), klass_is_exact(), const_oop(), _offset, _instance_id, NULL);
+  assert(_inline_depth == InlineDepthTop || _inline_depth == InlineDepthBottom, "non speculative type shouldn't have inline depth");
+  return make(_ptr, klass(), klass_is_exact(), const_oop(), _offset, _instance_id, NULL, _inline_depth);
+}
+
+const TypeOopPtr *TypeInstPtr::with_inline_depth(int depth) const {
+  if (!UseInlineDepthForSpeculativeTypes) {
+    return this;
+  }
+  return make(_ptr, klass(), klass_is_exact(), const_oop(), _offset, _instance_id, _speculative, depth);
 }
 
 //=============================================================================
@@ -3593,30 +3687,30 @@
 const TypeAryPtr *TypeAryPtr::DOUBLES;
 
 //------------------------------make-------------------------------------------
-const TypeAryPtr *TypeAryPtr::make(PTR ptr, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id, const TypeOopPtr* speculative) {
+const TypeAryPtr *TypeAryPtr::make(PTR ptr, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id, const TypeOopPtr* speculative, int inline_depth) {
   assert(!(k == NULL && ary->_elem->isa_int()),
          "integral arrays must be pre-equipped with a class");
   if (!xk)  xk = ary->ary_must_be_exact();
   assert(instance_id <= 0 || xk || !UseExactTypes, "instances are always exactly typed");
   if (!UseExactTypes)  xk = (ptr == Constant);
-  return (TypeAryPtr*)(new TypeAryPtr(ptr, NULL, ary, k, xk, offset, instance_id, false, speculative))->hashcons();
+  return (TypeAryPtr*)(new TypeAryPtr(ptr, NULL, ary, k, xk, offset, instance_id, false, speculative, inline_depth))->hashcons();
 }
 
 //------------------------------make-------------------------------------------
-const TypeAryPtr *TypeAryPtr::make(PTR ptr, ciObject* o, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id, const TypeOopPtr* speculative, bool is_autobox_cache) {
+const TypeAryPtr *TypeAryPtr::make(PTR ptr, ciObject* o, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id, const TypeOopPtr* speculative, int inline_depth, bool is_autobox_cache) {
   assert(!(k == NULL && ary->_elem->isa_int()),
          "integral arrays must be pre-equipped with a class");
   assert( (ptr==Constant && o) || (ptr!=Constant && !o), "" );
   if (!xk)  xk = (o != NULL) || ary->ary_must_be_exact();
   assert(instance_id <= 0 || xk || !UseExactTypes, "instances are always exactly typed");
   if (!UseExactTypes)  xk = (ptr == Constant);
-  return (TypeAryPtr*)(new TypeAryPtr(ptr, o, ary, k, xk, offset, instance_id, is_autobox_cache, speculative))->hashcons();
+  return (TypeAryPtr*)(new TypeAryPtr(ptr, o, ary, k, xk, offset, instance_id, is_autobox_cache, speculative, inline_depth))->hashcons();
 }
 
 //------------------------------cast_to_ptr_type-------------------------------
 const Type *TypeAryPtr::cast_to_ptr_type(PTR ptr) const {
   if( ptr == _ptr ) return this;
-  return make(ptr, const_oop(), _ary, klass(), klass_is_exact(), _offset, _instance_id, _speculative);
+  return make(ptr, const_oop(), _ary, klass(), klass_is_exact(), _offset, _instance_id, _speculative, _inline_depth);
 }
 
 
@@ -3625,13 +3719,13 @@
   if( klass_is_exact == _klass_is_exact ) return this;
   if (!UseExactTypes)  return this;
   if (_ary->ary_must_be_exact())  return this;  // cannot clear xk
-  return make(ptr(), const_oop(), _ary, klass(), klass_is_exact, _offset, _instance_id, _speculative);
+  return make(ptr(), const_oop(), _ary, klass(), klass_is_exact, _offset, _instance_id, _speculative, _inline_depth);
 }
 
 //-----------------------------cast_to_instance_id----------------------------
 const TypeOopPtr *TypeAryPtr::cast_to_instance_id(int instance_id) const {
   if( instance_id == _instance_id ) return this;
-  return make(_ptr, const_oop(), _ary, klass(), _klass_is_exact, _offset, instance_id, _speculative);
+  return make(_ptr, const_oop(), _ary, klass(), _klass_is_exact, _offset, instance_id, _speculative, _inline_depth);
 }
 
 //-----------------------------narrow_size_type-------------------------------
@@ -3694,7 +3788,7 @@
   new_size = narrow_size_type(new_size);
   if (new_size == size())  return this;
   const TypeAry* new_ary = TypeAry::make(elem(), new_size, is_stable());
-  return make(ptr(), const_oop(), new_ary, klass(), klass_is_exact(), _offset, _instance_id, _speculative);
+  return make(ptr(), const_oop(), new_ary, klass(), klass_is_exact(), _offset, _instance_id, _speculative, _inline_depth);
 }
 
 
@@ -3773,19 +3867,20 @@
     const TypeOopPtr *tp = t->is_oopptr();
     int offset = meet_offset(tp->offset());
     PTR ptr = meet_ptr(tp->ptr());
+    int depth = meet_inline_depth(tp->inline_depth());
     switch (tp->ptr()) {
     case TopPTR:
     case AnyNull: {
       int instance_id = meet_instance_id(InstanceTop);
       const TypeOopPtr* speculative = xmeet_speculative(tp);
       return make(ptr, (ptr == Constant ? const_oop() : NULL),
-                  _ary, _klass, _klass_is_exact, offset, instance_id, speculative);
+                  _ary, _klass, _klass_is_exact, offset, instance_id, speculative, depth);
     }
     case BotPTR:
     case NotNull: {
       int instance_id = meet_instance_id(tp->instance_id());
       const TypeOopPtr* speculative = xmeet_speculative(tp);
-      return TypeOopPtr::make(ptr, offset, instance_id, speculative);
+      return TypeOopPtr::make(ptr, offset, instance_id, speculative, depth);
     }
     default: ShouldNotReachHere();
     }
@@ -3809,7 +3904,7 @@
       int instance_id = meet_instance_id(InstanceTop);
       const TypeOopPtr* speculative = _speculative;
       return make(ptr, (ptr == Constant ? const_oop() : NULL),
-                  _ary, _klass, _klass_is_exact, offset, instance_id, speculative);
+                  _ary, _klass, _klass_is_exact, offset, instance_id, speculative, _inline_depth);
     }
     default: ShouldNotReachHere();
     }
@@ -3826,6 +3921,7 @@
     PTR ptr = meet_ptr(tap->ptr());
     int instance_id = meet_instance_id(tap->instance_id());
     const TypeOopPtr* speculative = xmeet_speculative(tap);
+    int depth = meet_inline_depth(tap->inline_depth());
     ciKlass* lazy_klass = NULL;
     if (tary->_elem->isa_int()) {
       // Integral array element types have irrelevant lattice relations.
@@ -3866,7 +3962,7 @@
       } else {
         xk = (tap->_klass_is_exact | this->_klass_is_exact);
       }
-      return make(ptr, const_oop(), tary, lazy_klass, xk, off, instance_id, speculative);
+      return make(ptr, const_oop(), tary, lazy_klass, xk, off, instance_id, speculative, depth);
     case Constant: {
       ciObject* o = const_oop();
       if( _ptr == Constant ) {
@@ -3885,7 +3981,7 @@
         // Only precise for identical arrays
         xk = this->_klass_is_exact && (klass() == tap->klass());
       }
-      return TypeAryPtr::make(ptr, o, tary, lazy_klass, xk, off, instance_id, speculative);
+      return TypeAryPtr::make(ptr, o, tary, lazy_klass, xk, off, instance_id, speculative, depth);
     }
     case NotNull:
     case BotPTR:
@@ -3894,7 +3990,7 @@
             xk = tap->_klass_is_exact;
       else  xk = (tap->_klass_is_exact & this->_klass_is_exact) &&
               (klass() == tap->klass()); // Only precise for identical arrays
-      return TypeAryPtr::make(ptr, NULL, tary, lazy_klass, xk, off, instance_id, speculative);
+      return TypeAryPtr::make(ptr, NULL, tary, lazy_klass, xk, off, instance_id, speculative, depth);
     default: ShouldNotReachHere();
     }
   }
@@ -3906,6 +4002,7 @@
     PTR ptr = meet_ptr(tp->ptr());
     int instance_id = meet_instance_id(tp->instance_id());
     const TypeOopPtr* speculative = xmeet_speculative(tp);
+    int depth = meet_inline_depth(tp->inline_depth());
     switch (ptr) {
     case TopPTR:
     case AnyNull:                // Fall 'down' to dual of object klass
@@ -3913,12 +4010,12 @@
       // below the centerline when the superclass is exact. We need to
       // do the same here.
       if (tp->klass()->equals(ciEnv::current()->Object_klass()) && !tp->klass_is_exact()) {
-        return TypeAryPtr::make(ptr, _ary, _klass, _klass_is_exact, offset, instance_id, speculative);
+        return TypeAryPtr::make(ptr, _ary, _klass, _klass_is_exact, offset, instance_id, speculative, depth);
       } else {
         // cannot subclass, so the meet has to fall badly below the centerline
         ptr = NotNull;
         instance_id = InstanceBot;
-        return TypeInstPtr::make(ptr, ciEnv::current()->Object_klass(), false, NULL,offset, instance_id, speculative);
+        return TypeInstPtr::make(ptr, ciEnv::current()->Object_klass(), false, NULL,offset, instance_id, speculative, depth);
       }
     case Constant:
     case NotNull:
@@ -3933,7 +4030,7 @@
         if (tp->klass()->equals(ciEnv::current()->Object_klass()) && !tp->klass_is_exact()) {
           // that is, my array type is a subtype of 'tp' klass
           return make(ptr, (ptr == Constant ? const_oop() : NULL),
-                      _ary, _klass, _klass_is_exact, offset, instance_id, speculative);
+                      _ary, _klass, _klass_is_exact, offset, instance_id, speculative, depth);
         }
       }
       // The other case cannot happen, since t cannot be a subtype of an array.
@@ -3941,7 +4038,7 @@
       if( ptr == Constant )
          ptr = NotNull;
       instance_id = InstanceBot;
-      return TypeInstPtr::make(ptr, ciEnv::current()->Object_klass(), false, NULL,offset, instance_id, speculative);
+      return TypeInstPtr::make(ptr, ciEnv::current()->Object_klass(), false, NULL,offset, instance_id, speculative, depth);
     default: typerr(t);
     }
   }
@@ -3952,7 +4049,7 @@
 //------------------------------xdual------------------------------------------
 // Dual: compute field-by-field dual
 const Type *TypeAryPtr::xdual() const {
-  return new TypeAryPtr(dual_ptr(), _const_oop, _ary->dual()->is_ary(),_klass, _klass_is_exact, dual_offset(), dual_instance_id(), is_autobox_cache(), dual_speculative());
+  return new TypeAryPtr(dual_ptr(), _const_oop, _ary->dual()->is_ary(),_klass, _klass_is_exact, dual_offset(), dual_instance_id(), is_autobox_cache(), dual_speculative(), dual_inline_depth());
 }
 
 //----------------------interface_vs_oop---------------------------------------
@@ -4005,6 +4102,7 @@
   else if (_instance_id != InstanceBot)
     st->print(",iid=%d",_instance_id);
 
+  dump_inline_depth(st);
   dump_speculative(st);
 }
 #endif
@@ -4016,11 +4114,22 @@
 
 //------------------------------add_offset-------------------------------------
 const TypePtr *TypeAryPtr::add_offset(intptr_t offset) const {
-  return make(_ptr, _const_oop, _ary, _klass, _klass_is_exact, xadd_offset(offset), _instance_id, add_offset_speculative(offset));
+  return make(_ptr, _const_oop, _ary, _klass, _klass_is_exact, xadd_offset(offset), _instance_id, add_offset_speculative(offset), _inline_depth);
 }
 
 const Type *TypeAryPtr::remove_speculative() const {
-  return make(_ptr, _const_oop, _ary->remove_speculative()->is_ary(), _klass, _klass_is_exact, _offset, _instance_id, NULL);
+  if (_speculative == NULL) {
+    return this;
+  }
+  assert(_inline_depth == InlineDepthTop || _inline_depth == InlineDepthBottom, "non speculative type shouldn't have inline depth");
+  return make(_ptr, _const_oop, _ary->remove_speculative()->is_ary(), _klass, _klass_is_exact, _offset, _instance_id, NULL, _inline_depth);
+}
+
+const TypeOopPtr *TypeAryPtr::with_inline_depth(int depth) const {
+  if (!UseInlineDepthForSpeculativeTypes) {
+    return this;
+  }
+  return make(_ptr, _const_oop, _ary->remove_speculative()->is_ary(), _klass, _klass_is_exact, _offset, _instance_id, _speculative, depth);
 }
 
 //=============================================================================
--- a/src/share/vm/opto/type.hpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/opto/type.hpp	Fri Feb 28 09:30:20 2014 -0800
@@ -415,10 +415,15 @@
                                         bool is_autobox_cache = false);
 
   // Speculative type. See TypeInstPtr
+  virtual const TypeOopPtr* speculative() const { return NULL; }
   virtual ciKlass* speculative_type() const { return NULL; }
   const Type* maybe_remove_speculative(bool include_speculative) const;
   virtual const Type* remove_speculative() const { return this; }
 
+  virtual bool would_improve_type(ciKlass* exact_kls, int inline_depth) const {
+    return exact_kls != NULL;
+  }
+
 private:
   // support arrays
   static const BasicType _basic_type[];
@@ -489,6 +494,7 @@
   virtual const Type *filter_helper(const Type *kills, bool include_speculative) const;
 
 public:
+  typedef jint NativeType;
   virtual bool eq( const Type *t ) const;
   virtual int  hash() const;             // Type specific hashing
   virtual bool singleton(void) const;    // TRUE if type is a singleton
@@ -531,6 +537,9 @@
   static const TypeInt *POS1;
   static const TypeInt *INT;
   static const TypeInt *SYMINT; // symmetric range [-max_jint..max_jint]
+  static const TypeInt *TYPE_DOMAIN; // alias for TypeInt::INT
+
+  static const TypeInt *as_self(const Type *t) { return t->is_int(); }
 #ifndef PRODUCT
   virtual void dump2( Dict &d, uint depth, outputStream *st ) const;
 #endif
@@ -546,6 +555,7 @@
   // Do not kill _widen bits.
   virtual const Type *filter_helper(const Type *kills, bool include_speculative) const;
 public:
+  typedef jlong NativeType;
   virtual bool eq( const Type *t ) const;
   virtual int  hash() const;             // Type specific hashing
   virtual bool singleton(void) const;    // TRUE if type is a singleton
@@ -568,6 +578,7 @@
 
   virtual bool        is_finite() const;  // Has a finite value
 
+
   virtual const Type *xmeet( const Type *t ) const;
   virtual const Type *xdual() const;    // Compute dual right now.
   virtual const Type *widen( const Type *t, const Type* limit_type ) const;
@@ -580,6 +591,11 @@
   static const TypeLong *LONG;
   static const TypeLong *INT;    // 32-bit subrange [min_jint..max_jint]
   static const TypeLong *UINT;   // 32-bit unsigned [0..max_juint]
+  static const TypeLong *TYPE_DOMAIN; // alias for TypeLong::LONG
+
+  // static convenience methods.
+  static const TypeLong *as_self(const Type *t) { return t->is_long(); }
+
 #ifndef PRODUCT
   virtual void dump2( Dict &d, uint, outputStream *st  ) const;// Specialized per-Type dumping
 #endif
@@ -834,7 +850,7 @@
 // Some kind of oop (Java pointer), either klass or instance or array.
 class TypeOopPtr : public TypePtr {
 protected:
-  TypeOopPtr(TYPES t, PTR ptr, ciKlass* k, bool xk, ciObject* o, int offset, int instance_id, const TypeOopPtr* speculative);
+  TypeOopPtr(TYPES t, PTR ptr, ciKlass* k, bool xk, ciObject* o, int offset, int instance_id, const TypeOopPtr* speculative, int inline_depth);
 public:
   virtual bool eq( const Type *t ) const;
   virtual int  hash() const;             // Type specific hashing
@@ -845,6 +861,10 @@
   };
 protected:
 
+  enum {
+    InlineDepthBottom = INT_MAX,
+    InlineDepthTop = -InlineDepthBottom
+  };
   // Oop is NULL, unless this is a constant oop.
   ciObject*     _const_oop;   // Constant oop
   // If _klass is NULL, then so is _sig.  This is an unloaded klass.
@@ -865,6 +885,11 @@
   // use it, then we have to emit a guard: this part of the type is
   // not something we know but something we speculate about the type.
   const TypeOopPtr*   _speculative;
+  // For speculative types, we record at what inlining depth the
+  // profiling point that provided the data is. We want to favor
+  // profile data coming from outer scopes which are likely better for
+  // the current compilation.
+  int _inline_depth;
 
   static const TypeOopPtr* make_from_klass_common(ciKlass* klass, bool klass_change, bool try_for_exact);
 
@@ -880,6 +905,12 @@
 #ifndef PRODUCT
   void dump_speculative(outputStream *st) const;
 #endif
+  // utility methods to work on the inline depth of the type
+  int dual_inline_depth() const;
+  int meet_inline_depth(int depth) const;
+#ifndef PRODUCT
+  void dump_inline_depth(outputStream *st) const;
+#endif
 
   // Do not allow interface-vs.-noninterface joins to collapse to top.
   virtual const Type *filter_helper(const Type *kills, bool include_speculative) const;
@@ -910,7 +941,7 @@
                                               bool not_null_elements = false);
 
   // Make a generic (unclassed) pointer to an oop.
-  static const TypeOopPtr* make(PTR ptr, int offset, int instance_id, const TypeOopPtr* speculative);
+  static const TypeOopPtr* make(PTR ptr, int offset, int instance_id, const TypeOopPtr* speculative = NULL, int inline_depth = InlineDepthBottom);
 
   ciObject* const_oop()    const { return _const_oop; }
   virtual ciKlass* klass() const { return _klass;     }
@@ -924,7 +955,7 @@
   bool is_known_instance()       const { return _instance_id > 0; }
   int  instance_id()             const { return _instance_id; }
   bool is_known_instance_field() const { return is_known_instance() && _offset >= 0; }
-  const TypeOopPtr* speculative() const { return _speculative; }
+  virtual const TypeOopPtr* speculative() const { return _speculative; }
 
   virtual intptr_t get_con() const;
 
@@ -957,18 +988,23 @@
     if (_speculative != NULL) {
       const TypeOopPtr* speculative = _speculative->join(this)->is_oopptr();
       if (speculative->klass_is_exact()) {
-       return speculative->klass();
+        return speculative->klass();
       }
     }
     return NULL;
   }
+  int inline_depth() const {
+    return _inline_depth;
+  }
+  virtual const TypeOopPtr* with_inline_depth(int depth) const;
+  virtual bool would_improve_type(ciKlass* exact_kls, int inline_depth) const;
 };
 
 //------------------------------TypeInstPtr------------------------------------
 // Class of Java object pointers, pointing either to non-array Java instances
 // or to a Klass* (including array klasses).
 class TypeInstPtr : public TypeOopPtr {
-  TypeInstPtr(PTR ptr, ciKlass* k, bool xk, ciObject* o, int offset, int instance_id, const TypeOopPtr* speculative);
+  TypeInstPtr(PTR ptr, ciKlass* k, bool xk, ciObject* o, int offset, int instance_id, const TypeOopPtr* speculative, int inline_depth);
   virtual bool eq( const Type *t ) const;
   virtual int  hash() const;             // Type specific hashing
 
@@ -1004,7 +1040,7 @@
   }
 
   // Make a pointer to an oop.
-  static const TypeInstPtr *make(PTR ptr, ciKlass* k, bool xk, ciObject* o, int offset, int instance_id = InstanceBot, const TypeOopPtr* speculative = NULL);
+  static const TypeInstPtr *make(PTR ptr, ciKlass* k, bool xk, ciObject* o, int offset, int instance_id = InstanceBot, const TypeOopPtr* speculative = NULL, int inline_depth = InlineDepthBottom);
 
   /** Create constant type for a constant boxed value */
   const Type* get_const_boxed_value() const;
@@ -1023,6 +1059,7 @@
   virtual const TypePtr *add_offset( intptr_t offset ) const;
   // Return same type without a speculative part
   virtual const Type* remove_speculative() const;
+  virtual const TypeOopPtr* with_inline_depth(int depth) const;
 
   // the core of the computation of the meet of 2 types
   virtual const Type *xmeet_helper(const Type *t) const;
@@ -1044,8 +1081,8 @@
 // Class of Java array pointers
 class TypeAryPtr : public TypeOopPtr {
   TypeAryPtr( PTR ptr, ciObject* o, const TypeAry *ary, ciKlass* k, bool xk,
-              int offset, int instance_id, bool is_autobox_cache, const TypeOopPtr* speculative)
-    : TypeOopPtr(AryPtr,ptr,k,xk,o,offset, instance_id, speculative),
+              int offset, int instance_id, bool is_autobox_cache, const TypeOopPtr* speculative, int inline_depth)
+    : TypeOopPtr(AryPtr,ptr,k,xk,o,offset, instance_id, speculative, inline_depth),
     _ary(ary),
     _is_autobox_cache(is_autobox_cache)
  {
@@ -1083,9 +1120,9 @@
 
   bool is_autobox_cache() const { return _is_autobox_cache; }
 
-  static const TypeAryPtr *make( PTR ptr, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id = InstanceBot, const TypeOopPtr* speculative = NULL);
+  static const TypeAryPtr *make( PTR ptr, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id = InstanceBot, const TypeOopPtr* speculative = NULL, int inline_depth = InlineDepthBottom);
   // Constant pointer to array
-  static const TypeAryPtr *make( PTR ptr, ciObject* o, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id = InstanceBot, const TypeOopPtr* speculative = NULL, bool is_autobox_cache = false);
+  static const TypeAryPtr *make( PTR ptr, ciObject* o, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id = InstanceBot, const TypeOopPtr* speculative = NULL, int inline_depth = InlineDepthBottom, bool is_autobox_cache= false);
 
   // Return a 'ptr' version of this type
   virtual const Type *cast_to_ptr_type(PTR ptr) const;
@@ -1101,6 +1138,7 @@
   virtual const TypePtr *add_offset( intptr_t offset ) const;
   // Return same type without a speculative part
   virtual const Type* remove_speculative() const;
+  virtual const TypeOopPtr* with_inline_depth(int depth) const;
 
   // the core of the computation of the meet of 2 types
   virtual const Type *xmeet_helper(const Type *t) const;
--- a/src/share/vm/prims/jni.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/prims/jni.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -3876,9 +3876,10 @@
 void TestMetaspaceAux_test();
 void TestMetachunk_test();
 void TestVirtualSpaceNode_test();
+void TestNewSize_test();
+void TestKlass_test();
+#if INCLUDE_ALL_GCS
 void TestOldFreeSpaceCalculation_test();
-void TestNewSize_test();
-#if INCLUDE_ALL_GCS
 void TestG1BiasedArray_test();
 void TestBufferingOopClosure_test();
 #endif
@@ -3899,12 +3900,13 @@
     run_unit_test(QuickSort::test_quick_sort());
     run_unit_test(AltHashing::test_alt_hash());
     run_unit_test(test_loggc_filename());
-    run_unit_test(TestOldFreeSpaceCalculation_test());
     run_unit_test(TestNewSize_test());
+    run_unit_test(TestKlass_test());
 #if INCLUDE_VM_STRUCTS
     run_unit_test(VMStructs::test());
 #endif
 #if INCLUDE_ALL_GCS
+    run_unit_test(TestOldFreeSpaceCalculation_test());
     run_unit_test(TestG1BiasedArray_test());
     run_unit_test(HeapRegionRemSet::test_prt());
     run_unit_test(TestBufferingOopClosure_test());
--- a/src/share/vm/prims/jvm.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/prims/jvm.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -1241,7 +1241,11 @@
   if (HAS_PENDING_EXCEPTION) {
     pending_exception = Handle(THREAD, PENDING_EXCEPTION);
     CLEAR_PENDING_EXCEPTION;
-
+    // JVMTI has already reported the pending exception
+    // JVMTI internal flag reset is needed in order to report PrivilegedActionException
+    if (THREAD->is_Java_thread()) {
+      JvmtiExport::clear_detected_exception((JavaThread*) THREAD);
+    }
     if ( pending_exception->is_a(SystemDictionary::Exception_klass()) &&
         !pending_exception->is_a(SystemDictionary::RuntimeException_klass())) {
       // Throw a java.security.PrivilegedActionException(Exception e) exception
--- a/src/share/vm/prims/jvmtiExport.cpp	Tue Feb 25 23:59:04 2014 -0800
+++ b/src/share/vm/prims/jvmtiExport.cpp	Fri Feb 28 09:30:20 2014 -0800
@@ -2161,6 +2161,15 @@
   }
 }
 
+void JvmtiExport::clear_detected_exception(JavaThread* thread) {
+  assert(JavaThread::current() == thread, "thread is not current");
+
+  JvmtiThreadState* state = thread->jvmti_thread_state();
+  if (state != NULL) {
+    state->clear_exception_detected();
+  }
+}
+