changeset 107:93b6525e3b82

6603919: Stackwalking crash on x86 -server with Sun Studio's collect -j on Summary: Rewrite frame::safe_for_sender and friends to be safe for collector/analyzer Reviewed-by: dcubed, kvn
author sgoldman
date Tue, 08 Apr 2008 12:23:15 -0400
parents c9314fa4f757
children a761c2d3b76a
files src/cpu/sparc/vm/frame_sparc.cpp src/cpu/x86/vm/frame_x86.cpp src/cpu/x86/vm/frame_x86.inline.hpp src/cpu/x86/vm/templateTable_x86_32.cpp src/os_cpu/solaris_sparc/vm/thread_solaris_sparc.cpp src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp src/os_cpu/solaris_x86/vm/thread_solaris_x86.cpp src/share/vm/code/codeCache.hpp src/share/vm/prims/forte.cpp src/share/vm/runtime/fprofiler.cpp src/share/vm/runtime/fprofiler.hpp src/share/vm/runtime/frame.hpp src/share/vm/runtime/vframe.hpp
diffstat 13 files changed, 859 insertions(+), 671 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/sparc/vm/frame_sparc.cpp	Mon Apr 07 15:15:16 2008 -0700
+++ b/src/cpu/sparc/vm/frame_sparc.cpp	Tue Apr 08 12:23:15 2008 -0400
@@ -157,22 +157,158 @@
   check_location_valid();
 }
 
+bool frame::safe_for_sender(JavaThread *thread) {
 
-bool frame::safe_for_sender(JavaThread *thread) {
-  address   sp = (address)_sp;
-  if (sp != NULL &&
-      (sp <= thread->stack_base() && sp >= thread->stack_base() - thread->stack_size())) {
-      // Unfortunately we can only check frame complete for runtime stubs and nmethod
-      // other generic buffer blobs are more problematic so we just assume they are
-      // ok. adapter blobs never have a frame complete and are never ok.
-      if (_cb != NULL && !_cb->is_frame_complete_at(_pc)) {
-        if (_cb->is_nmethod() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) {
-          return false;
-        }
+  address _SP = (address) sp();
+  address _FP = (address) fp();
+  address _UNEXTENDED_SP = (address) unextended_sp();
+  // sp must be within the stack
+  bool sp_safe = (_SP <= thread->stack_base()) &&
+                 (_SP >= thread->stack_base() - thread->stack_size());
+
+  if (!sp_safe) {
+    return false;
+  }
+
+  // unextended sp must be within the stack and above or equal sp
+  bool unextended_sp_safe = (_UNEXTENDED_SP <= thread->stack_base()) &&
+                            (_UNEXTENDED_SP >= _SP);
+
+  if (!unextended_sp_safe) return false;
+
+  // an fp must be within the stack and above (but not equal) sp
+  bool fp_safe = (_FP <= thread->stack_base()) &&
+                 (_FP > _SP);
+
+  // We know sp/unextended_sp are safe only fp is questionable here
+
+  // If the current frame is known to the code cache then we can attempt to
+  // to construct the sender and do some validation of it. This goes a long way
+  // toward eliminating issues when we get in frame construction code
+
+  if (_cb != NULL ) {
+
+    // First check if frame is complete and tester is reliable
+    // Unfortunately we can only check frame complete for runtime stubs and nmethod
+    // other generic buffer blobs are more problematic so we just assume they are
+    // ok. adapter blobs never have a frame complete and are never ok.
+
+    if (!_cb->is_frame_complete_at(_pc)) {
+      if (_cb->is_nmethod() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) {
+        return false;
       }
-      return true;
+    }
+
+    // Entry frame checks
+    if (is_entry_frame()) {
+      // an entry frame must have a valid fp.
+
+      if (!fp_safe) {
+        return false;
+      }
+
+      // Validate the JavaCallWrapper an entry frame must have
+
+      address jcw = (address)entry_frame_call_wrapper();
+
+      bool jcw_safe = (jcw <= thread->stack_base()) && ( jcw > _FP);
+
+      return jcw_safe;
+
+    }
+
+    intptr_t* younger_sp = sp();
+    intptr_t* _SENDER_SP = sender_sp(); // sender is actually just _FP
+    bool adjusted_stack = is_interpreted_frame();
+
+    address   sender_pc = (address)younger_sp[I7->sp_offset_in_saved_window()] + pc_return_offset;
+
+
+    // We must always be able to find a recognizable pc
+    CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc);
+    if (sender_pc == NULL ||  sender_blob == NULL) {
+      return false;
+    }
+
+    // It should be safe to construct the sender though it might not be valid
+
+    frame sender(_SENDER_SP, younger_sp, adjusted_stack);
+
+    // Do we have a valid fp?
+    address sender_fp = (address) sender.fp();
+
+    // an fp must be within the stack and above (but not equal) current frame's _FP
+
+    bool sender_fp_safe = (sender_fp <= thread->stack_base()) &&
+                   (sender_fp > _FP);
+
+    if (!sender_fp_safe) {
+      return false;
+    }
+
+
+    // If the potential sender is the interpreter then we can do some more checking
+    if (Interpreter::contains(sender_pc)) {
+      return sender.is_interpreted_frame_valid(thread);
+    }
+
+    // Could just be some random pointer within the codeBlob
+    if (!sender.cb()->instructions_contains(sender_pc)) return false;
+
+    // We should never be able to see an adapter if the current frame is something from code cache
+
+    if ( sender_blob->is_adapter_blob()) {
+      return false;
+    }
+
+    if( sender.is_entry_frame()) {
+      // Validate the JavaCallWrapper an entry frame must have
+
+      address jcw = (address)sender.entry_frame_call_wrapper();
+
+      bool jcw_safe = (jcw <= thread->stack_base()) && ( jcw > sender_fp);
+
+      return jcw_safe;
+    }
+
+    // If the frame size is 0 something is bad because every nmethod has a non-zero frame size
+    // because you must allocate window space
+
+    if (sender_blob->frame_size() == 0) {
+      assert(!sender_blob->is_nmethod(), "should count return address at least");
+      return false;
+    }
+
+    // The sender should positively be an nmethod or call_stub. On sparc we might in fact see something else.
+    // The cause of this is because at a save instruction the O7 we get is a leftover from an earlier
+    // window use. So if a runtime stub creates two frames (common in fastdebug/jvmg) then we see the
+    // stale pc. So if the sender blob is not something we'd expect we have little choice but to declare
+    // the stack unwalkable. pd_get_top_frame_for_signal_handler tries to recover from this by unwinding
+    // that initial frame and retrying.
+
+    if (!sender_blob->is_nmethod()) {
+      return false;
+    }
+
+    // Could put some more validation for the potential non-interpreted sender
+    // frame we'd create by calling sender if I could think of any. Wait for next crash in forte...
+
+    // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb
+
+    // We've validated the potential sender that would be created
+
+    return true;
+
   }
-  return false;
+
+  // Must be native-compiled frame. Since sender will try and use fp to find
+  // linkages it must be safe
+
+  if (!fp_safe) return false;
+
+  // could try and do some more potential verification of native frame if we could think of some...
+
+  return true;
 }
 
 // constructors
@@ -450,7 +586,7 @@
 }
 
 
-bool frame::is_interpreted_frame_valid() const {
+bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
 #ifdef CC_INTERP
   // Is there anything to do?
 #else
@@ -462,6 +598,7 @@
   if (sp() == 0 || (intptr_t(sp()) & (2*wordSize-1)) != 0) {
     return false;
   }
+
   const intptr_t interpreter_frame_initial_sp_offset = interpreter_frame_vm_local_words;
   if (fp() + interpreter_frame_initial_sp_offset < sp()) {
     return false;
@@ -471,9 +608,43 @@
   if (fp() <= sp()) {        // this attempts to deal with unsigned comparison above
     return false;
   }
-  if (fp() - sp() > 4096) {  // stack frames shouldn't be large.
+  // do some validation of frame elements
+
+  // first the method
+
+  methodOop m = *interpreter_frame_method_addr();
+
+  // validate the method we'd find in this potential sender
+  if (!Universe::heap()->is_valid_method(m)) return false;
+
+  // stack frames shouldn't be much larger than max_stack elements
+
+  if (fp() - sp() > 1024 + m->max_stack()*Interpreter::stackElementSize()) {
     return false;
   }
+
+  // validate bci/bcx
+
+  intptr_t  bcx    = interpreter_frame_bcx();
+  if (m->validate_bci_from_bcx(bcx) < 0) {
+    return false;
+  }
+
+  // validate constantPoolCacheOop
+
+  constantPoolCacheOop cp = *interpreter_frame_cache_addr();
+
+  if (cp == NULL ||
+      !Space::is_aligned(cp) ||
+      !Universe::heap()->is_permanent((void*)cp)) return false;
+
+  // validate locals
+
+  address locals =  (address) *interpreter_frame_locals_addr();
+
+  if (locals > thread->stack_base() || locals < (address) fp()) return false;
+
+  // We'd have to be pretty unlucky to be mislead at this point
 #endif /* CC_INTERP */
   return true;
 }
--- a/src/cpu/x86/vm/frame_x86.cpp	Mon Apr 07 15:15:16 2008 -0700
+++ b/src/cpu/x86/vm/frame_x86.cpp	Tue Apr 08 12:23:15 2008 -0400
@@ -37,39 +37,181 @@
   address   sp = (address)_sp;
   address   fp = (address)_fp;
   address   unextended_sp = (address)_unextended_sp;
-  bool sp_safe = (sp != NULL &&
-                 (sp <= thread->stack_base()) &&
-                 (sp >= thread->stack_base() - thread->stack_size()));
-  bool unextended_sp_safe = (unextended_sp != NULL &&
-                 (unextended_sp <= thread->stack_base()) &&
-                 (unextended_sp >= thread->stack_base() - thread->stack_size()));
-  bool fp_safe = (fp != NULL &&
-                 (fp <= thread->stack_base()) &&
-                 (fp >= thread->stack_base() - thread->stack_size()));
-  if (sp_safe && unextended_sp_safe && fp_safe) {
+  // sp must be within the stack
+  bool sp_safe = (sp <= thread->stack_base()) &&
+                 (sp >= thread->stack_base() - thread->stack_size());
+
+  if (!sp_safe) {
+    return false;
+  }
+
+  // unextended sp must be within the stack and above or equal sp
+  bool unextended_sp_safe = (unextended_sp <= thread->stack_base()) &&
+                            (unextended_sp >= sp);
+
+  if (!unextended_sp_safe) {
+    return false;
+  }
+
+  // an fp must be within the stack and above (but not equal) sp
+  bool fp_safe = (fp <= thread->stack_base()) && (fp > sp);
+
+  // We know sp/unextended_sp are safe only fp is questionable here
+
+  // If the current frame is known to the code cache then we can attempt to
+  // to construct the sender and do some validation of it. This goes a long way
+  // toward eliminating issues when we get in frame construction code
+
+  if (_cb != NULL ) {
+
+    // First check if frame is complete and tester is reliable
     // Unfortunately we can only check frame complete for runtime stubs and nmethod
     // other generic buffer blobs are more problematic so we just assume they are
     // ok. adapter blobs never have a frame complete and are never ok.
-    if (_cb != NULL && !_cb->is_frame_complete_at(_pc)) {
+
+    if (!_cb->is_frame_complete_at(_pc)) {
       if (_cb->is_nmethod() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) {
         return false;
       }
     }
+    // Entry frame checks
+    if (is_entry_frame()) {
+      // an entry frame must have a valid fp.
+
+      if (!fp_safe) return false;
+
+      // Validate the JavaCallWrapper an entry frame must have
+
+      address jcw = (address)entry_frame_call_wrapper();
+
+      bool jcw_safe = (jcw <= thread->stack_base()) && ( jcw > fp);
+
+      return jcw_safe;
+
+    }
+
+    intptr_t* sender_sp = NULL;
+    address   sender_pc = NULL;
+
+    if (is_interpreted_frame()) {
+      // fp must be safe
+      if (!fp_safe) {
+        return false;
+      }
+
+      sender_pc = (address) this->fp()[return_addr_offset];
+      sender_sp = (intptr_t*) addr_at(sender_sp_offset);
+
+    } else {
+      // must be some sort of compiled/runtime frame
+      // fp does not have to be safe (although it could be check for c1?)
+
+      sender_sp = _unextended_sp + _cb->frame_size();
+      // On Intel the return_address is always the word on the stack
+      sender_pc = (address) *(sender_sp-1);
+    }
+
+    // We must always be able to find a recognizable pc
+    CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc);
+    if (sender_pc == NULL ||  sender_blob == NULL) {
+      return false;
+    }
+
+
+    // If the potential sender is the interpreter then we can do some more checking
+    if (Interpreter::contains(sender_pc)) {
+
+      // ebp is always saved in a recognizable place in any code we generate. However
+      // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved ebp
+      // is really a frame pointer.
+
+      intptr_t *saved_fp = (intptr_t*)*(sender_sp - frame::sender_sp_offset);
+      bool saved_fp_safe = ((address)saved_fp <= thread->stack_base()) && (saved_fp > sender_sp);
+
+      if (!saved_fp_safe) {
+        return false;
+      }
+
+      // construct the potential sender
+
+      frame sender(sender_sp, saved_fp, sender_pc);
+
+      return sender.is_interpreted_frame_valid(thread);
+
+    }
+
+    // Could just be some random pointer within the codeBlob
+
+    if (!sender_blob->instructions_contains(sender_pc)) return false;
+
+    // We should never be able to see an adapter if the current frame is something from code cache
+
+    if ( sender_blob->is_adapter_blob()) {
+      return false;
+    }
+
+    // Could be the call_stub
+
+    if (StubRoutines::returns_to_call_stub(sender_pc)) {
+      intptr_t *saved_fp = (intptr_t*)*(sender_sp - frame::sender_sp_offset);
+      bool saved_fp_safe = ((address)saved_fp <= thread->stack_base()) && (saved_fp > sender_sp);
+
+      if (!saved_fp_safe) {
+        return false;
+      }
+
+      // construct the potential sender
+
+      frame sender(sender_sp, saved_fp, sender_pc);
+
+      // Validate the JavaCallWrapper an entry frame must have
+      address jcw = (address)sender.entry_frame_call_wrapper();
+
+      bool jcw_safe = (jcw <= thread->stack_base()) && ( jcw > (address)sender.fp());
+
+      return jcw_safe;
+    }
+
+    // If the frame size is 0 something is bad because every nmethod has a non-zero frame size
+    // because the return address counts against the callee's frame.
+
+    if (sender_blob->frame_size() == 0) {
+      assert(!sender_blob->is_nmethod(), "should count return address at least");
+      return false;
+    }
+
+    // We should never be able to see anything here except an nmethod. If something in the
+    // code cache (current frame) is called by an entity within the code cache that entity
+    // should not be anything but the call stub (already covered), the interpreter (already covered)
+    // or an nmethod.
+
+    assert(sender_blob->is_nmethod(), "Impossible call chain");
+
+    // Could put some more validation for the potential non-interpreted sender
+    // frame we'd create by calling sender if I could think of any. Wait for next crash in forte...
+
+    // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb
+
+    // We've validated the potential sender that would be created
     return true;
   }
-  // Note: fp == NULL is not really a prerequisite for this to be safe to
-  // walk for c2. However we've modified the code such that if we get
-  // a failure with fp != NULL that we then try with FP == NULL.
-  // This is basically to mimic what a last_frame would look like if
-  // c2 had generated it.
-  if (sp_safe && unextended_sp_safe && fp == NULL) {
-    // frame must be complete if fp == NULL as fp == NULL is only sensible
-    // if we are looking at a nmethod and frame complete assures us of that.
-    if (_cb != NULL && _cb->is_frame_complete_at(_pc) && _cb->is_compiled_by_c2()) {
-        return true;
-    }
+
+  // Must be native-compiled frame. Since sender will try and use fp to find
+  // linkages it must be safe
+
+  if (!fp_safe) {
+    return false;
   }
-  return false;
+
+  // Will the pc we fetch be non-zero (which we'll find at the oldest frame)
+
+  if ( (address) this->fp()[return_addr_offset] == NULL) return false;
+
+
+  // could try and do some more potential verification of native frame if we could think of some...
+
+  return true;
+
 }
 
 
@@ -292,7 +434,7 @@
   // nothing done here now
 }
 
-bool frame::is_interpreted_frame_valid() const {
+bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
 // QQQ
 #ifdef CC_INTERP
 #else
@@ -312,9 +454,45 @@
   if (fp() <= sp()) {        // this attempts to deal with unsigned comparison above
     return false;
   }
-  if (fp() - sp() > 4096) {  // stack frames shouldn't be large.
+
+  // do some validation of frame elements
+
+  // first the method
+
+  methodOop m = *interpreter_frame_method_addr();
+
+  // validate the method we'd find in this potential sender
+  if (!Universe::heap()->is_valid_method(m)) return false;
+
+  // stack frames shouldn't be much larger than max_stack elements
+
+  if (fp() - sp() > 1024 + m->max_stack()*Interpreter::stackElementSize()) {
     return false;
   }
+
+  // validate bci/bcx
+
+  intptr_t  bcx    = interpreter_frame_bcx();
+  if (m->validate_bci_from_bcx(bcx) < 0) {
+    return false;
+  }
+
+  // validate constantPoolCacheOop
+
+  constantPoolCacheOop cp = *interpreter_frame_cache_addr();
+
+  if (cp == NULL ||
+      !Space::is_aligned(cp) ||
+      !Universe::heap()->is_permanent((void*)cp)) return false;
+
+  // validate locals
+
+  address locals =  (address) *interpreter_frame_locals_addr();
+
+  if (locals > thread->stack_base() || locals < (address) fp()) return false;
+
+  // We'd have to be pretty unlucky to be mislead at this point
+
 #endif // CC_INTERP
   return true;
 }
--- a/src/cpu/x86/vm/frame_x86.inline.hpp	Mon Apr 07 15:15:16 2008 -0700
+++ b/src/cpu/x86/vm/frame_x86.inline.hpp	Tue Apr 08 12:23:15 2008 -0400
@@ -72,15 +72,20 @@
   _unextended_sp = sp;
   _fp = fp;
   _pc = (address)(sp[-1]);
-  assert(_pc != NULL, "no pc?");
+
+  // Here's a sticky one. This constructor can be called via AsyncGetCallTrace
+  // when last_Java_sp is non-null but the pc fetched is junk. If we are truly
+  // unlucky the junk value could be to a zombied method and we'll die on the
+  // find_blob call. This is also why we can have no asserts on the validity
+  // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler
+  // -> pd_last_frame should use a specialized version of pd_last_frame which could
+  // call a specilaized frame constructor instead of this one.
+  // Then we could use the assert below. However this assert is of somewhat dubious
+  // value.
+  // assert(_pc != NULL, "no pc?");
+
   _cb = CodeCache::find_blob(_pc);
-  // In case of native stubs, the pc retreived here might be
-  // wrong. (the _last_native_pc will have the right value)
-  // So do not put add any asserts on the _pc here.
 
-  // QQQ The above comment is wrong and has been wrong for years. This constructor
-  // should (and MUST) not be called in that situation. In the native situation
-  // the pc should be supplied to the constructor.
   _deopt_state = not_deoptimized;
   if (_cb != NULL && _cb->is_nmethod() && ((nmethod*)_cb)->is_deopt_pc(_pc)) {
     _pc = (((nmethod*)_cb)->get_original_pc(this));
--- a/src/cpu/x86/vm/templateTable_x86_32.cpp	Mon Apr 07 15:15:16 2008 -0700
+++ b/src/cpu/x86/vm/templateTable_x86_32.cpp	Tue Apr 08 12:23:15 2008 -0400
@@ -1632,7 +1632,7 @@
       // We need to prepare to execute the OSR method. First we must
       // migrate the locals and monitors off of the stack.
 
-      __ movl(rsi, rax);                             // save the nmethod
+      __ movl(rbx, rax);                             // save the nmethod
 
       const Register thread = rcx;
       __ get_thread(thread);
@@ -1688,7 +1688,7 @@
       __ pushl(rdi);
 
       // and begin the OSR nmethod
-      __ jmp(Address(rsi, nmethod::osr_entry_point_offset()));
+      __ jmp(Address(rbx, nmethod::osr_entry_point_offset()));
     }
   }
 }
--- a/src/os_cpu/solaris_sparc/vm/thread_solaris_sparc.cpp	Mon Apr 07 15:15:16 2008 -0700
+++ b/src/os_cpu/solaris_sparc/vm/thread_solaris_sparc.cpp	Tue Apr 08 12:23:15 2008 -0400
@@ -50,17 +50,6 @@
   // even if isInJava == true. It should be more reliable than
   // ucontext info.
   if (jt->has_last_Java_frame() && jt->frame_anchor()->walkable()) {
-#if 0
-    // This sanity check may not be needed with the new frame
-    // walking code. Remove it for now.
-    if (!jt->frame_anchor()->post_Java_state_is_pc()
-    && frame::next_younger_sp_or_null(last_Java_sp(),
-    jt->frame_anchor()->post_Java_sp()) == NULL) {
-      // the anchor contains an SP, but the frame is not walkable
-      // because post_Java_sp isn't valid relative to last_Java_sp
-      return false;
-    }
-#endif
     *fr_addr = jt->pd_last_frame();
     return true;
   }
@@ -77,23 +66,59 @@
     return false;
   }
 
+  frame ret_frame(ret_sp, frame::unpatchable, addr.pc());
+
   // we were running Java code when SIGPROF came in
   if (isInJava) {
+
+
+    // If the frame we got is safe then it is most certainly valid
+    if (ret_frame.safe_for_sender(jt)) {
+      *fr_addr = ret_frame;
+      return true;
+    }
+
+    // If it isn't safe then we can try several things to try and get
+    // a good starting point.
+    //
+    // On sparc the frames are almost certainly walkable in the sense
+    // of sp/fp linkages. However because of recycling of windows if
+    // a piece of code does multiple save's where the initial save creates
+    // a real frame with a return pc and the succeeding save's are used to
+    // simply get free registers and have no real pc then the pc linkage on these
+    // "inner" temporary frames will be bogus.
+    // Since there is in general only a nesting level like
+    // this one deep in general we'll try and unwind such an "inner" frame
+    // here ourselves and see if it makes sense
+
+    frame unwind_frame(ret_frame.fp(), frame::unpatchable, addr.pc());
+
+    if (unwind_frame.safe_for_sender(jt)) {
+      *fr_addr = unwind_frame;
+      return true;
+    }
+
+    // Well that didn't work. Most likely we're toast on this tick
+    // The previous code would try this. I think it is dubious in light
+    // of changes to safe_for_sender and the unwind trick above but
+    // if it gets us a safe frame who wants to argue.
+
     // If we have a last_Java_sp, then the SIGPROF signal caught us
     // right when we were transitioning from _thread_in_Java to a new
     // JavaThreadState. We use last_Java_sp instead of the sp from
     // the ucontext since it should be more reliable.
+
     if (jt->has_last_Java_frame()) {
       ret_sp = jt->last_Java_sp();
+      frame ret_frame2(ret_sp, frame::unpatchable, addr.pc());
+      if (ret_frame2.safe_for_sender(jt)) {
+        *fr_addr = ret_frame2;
+        return true;
+      }
     }
-    // Implied else: we don't have a last_Java_sp so we use what we
-    // got from the ucontext.
 
-    frame ret_frame(ret_sp, frame::unpatchable, addr.pc());
-    if (!ret_frame.safe_for_sender(jt)) {
-      // nothing else to try if the frame isn't good
-      return false;
-    }
+    // This is the best we can do. We will only be able to decode the top frame
+
     *fr_addr = ret_frame;
     return true;
   }
@@ -105,17 +130,13 @@
   if (jt->has_last_Java_frame()) {
     assert(!jt->frame_anchor()->walkable(), "case covered above");
 
-    if (jt->thread_state() == _thread_in_native) {
-      frame ret_frame(jt->last_Java_sp(), frame::unpatchable, addr.pc());
-      if (!ret_frame.safe_for_sender(jt)) {
-        // nothing else to try if the frame isn't good
-        return false;
-      }
-      *fr_addr = ret_frame;
-      return true;
-    }
+    frame ret_frame(jt->last_Java_sp(), frame::unpatchable, addr.pc());
+    *fr_addr = ret_frame;
+    return true;
   }
 
-  // nothing else to try
-  return false;
+  // nothing else to try but what we found initially
+
+  *fr_addr = ret_frame;
+  return true;
 }
--- a/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp	Mon Apr 07 15:15:16 2008 -0700
+++ b/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp	Tue Apr 08 12:23:15 2008 -0400
@@ -212,7 +212,8 @@
                 CAST_FROM_FN_PTR(address, os::current_frame));
   if (os::is_first_C_frame(&myframe)) {
     // stack is not walkable
-    return frame(NULL, NULL, NULL);
+    frame ret; // This will be a null useless frame
+    return ret;
   } else {
     return os::get_sender_for_C_frame(&myframe);
   }
--- a/src/os_cpu/solaris_x86/vm/thread_solaris_x86.cpp	Mon Apr 07 15:15:16 2008 -0700
+++ b/src/os_cpu/solaris_x86/vm/thread_solaris_x86.cpp	Tue Apr 08 12:23:15 2008 -0400
@@ -32,49 +32,53 @@
 
   assert(Thread::current() == this, "caller must be current thread");
   assert(this->is_Java_thread(), "must be JavaThread");
-
   JavaThread* jt = (JavaThread *)this;
 
-  // If we have a last_Java_frame, then we should use it even if
-  // isInJava == true.  It should be more reliable than ucontext info.
+  // last_Java_frame is always walkable and safe use it if we have it
+
   if (jt->has_last_Java_frame()) {
     *fr_addr = jt->pd_last_frame();
     return true;
   }
 
-  // At this point, we don't have a last_Java_frame, so
-  // we try to glean some information out of the ucontext
-  // if we were running Java code when SIGPROF came in.
-  if (isInJava) {
-    ucontext_t* uc = (ucontext_t*) ucontext;
+  ucontext_t* uc = (ucontext_t*) ucontext;
 
-    intptr_t* ret_fp;
-    intptr_t* ret_sp;
-    ExtendedPC addr = os::Solaris::fetch_frame_from_ucontext(this, uc,
-      &ret_sp, &ret_fp);
-    if (addr.pc() == NULL || ret_sp == NULL ) {
-      // ucontext wasn't useful
-      return false;
-    }
+  // We always want to use the initial frame we create from the ucontext as
+  // it certainly signals where we currently are. However that frame may not
+  // be safe for calling sender. In that case if we have a last_Java_frame
+  // then the forte walker will switch to that frame as the virtual sender
+  // for the frame we create here which is not sender safe.
 
-    frame ret_frame(ret_sp, ret_fp, addr.pc());
-    if (!ret_frame.safe_for_sender(jt)) {
-#ifdef COMPILER2
-      frame ret_frame2(ret_sp, NULL, addr.pc());
-      if (!ret_frame2.safe_for_sender(jt)) {
-        // nothing else to try if the frame isn't good
-        return false;
-      }
-      ret_frame = ret_frame2;
-#else
-      // nothing else to try if the frame isn't good
-      return false;
-#endif /* COMPILER2 */
-    }
-    *fr_addr = ret_frame;
-    return true;
+  intptr_t* ret_fp;
+  intptr_t* ret_sp;
+  ExtendedPC addr = os::Solaris::fetch_frame_from_ucontext(this, uc, &ret_sp, &ret_fp);
+
+  // Something would really have to be screwed up to get a NULL pc
+
+  if (addr.pc() == NULL ) {
+    assert(false, "NULL pc from signal handler!");
+    return false;
+
   }
 
-  // nothing else to try
-  return false;
+  // If sp and fp are nonsense just leave them out
+
+  if ((address)ret_sp >= jt->stack_base() ||
+      (address)ret_sp < jt->stack_base() - jt->stack_size() ) {
+
+      ret_sp = NULL;
+      ret_fp = NULL;
+  } else {
+
+    // sp is reasonable is fp reasonable?
+    if ( (address)ret_fp >= jt->stack_base() || ret_fp < ret_sp) {
+      ret_fp = NULL;
+    }
+  }
+
+  frame ret_frame(ret_sp, ret_fp, addr.pc());
+
+  *fr_addr = ret_frame;
+  return true;
+
 }
--- a/src/share/vm/code/codeCache.hpp	Mon Apr 07 15:15:16 2008 -0700
+++ b/src/share/vm/code/codeCache.hpp	Tue Apr 08 12:23:15 2008 -0400
@@ -71,7 +71,22 @@
   // what you are doing)
   static CodeBlob* find_blob_unsafe(void* start) {
     CodeBlob* result = (CodeBlob*)_heap->find_start(start);
-    assert(result == NULL || result->blob_contains((address)start), "found wrong CodeBlob");
+    // this assert is too strong because the heap code will return the
+    // heapblock containing start. That block can often be larger than
+    // the codeBlob itself. If you look up an address that is within
+    // the heapblock but not in the codeBlob you will assert.
+    //
+    // Most things will not lookup such bad addresses. However
+    // AsyncGetCallTrace can see intermediate frames and get that kind
+    // of invalid address and so can a developer using hsfind.
+    //
+    // The more correct answer is to return NULL if blob_contains() returns
+    // false.
+    // assert(result == NULL || result->blob_contains((address)start), "found wrong CodeBlob");
+
+    if (result != NULL && !result->blob_contains((address)start)) {
+      result = NULL;
+    }
     return result;
   }
 
--- a/src/share/vm/prims/forte.cpp	Mon Apr 07 15:15:16 2008 -0700
+++ b/src/share/vm/prims/forte.cpp	Tue Apr 08 12:23:15 2008 -0400
@@ -25,6 +25,20 @@
 # include "incls/_precompiled.incl"
 # include "incls/_forte.cpp.incl"
 
+// These name match the names reported by the forte quality kit
+enum {
+  ticks_no_Java_frame         =  0,
+  ticks_no_class_load         = -1,
+  ticks_GC_active             = -2,
+  ticks_unknown_not_Java      = -3,
+  ticks_not_walkable_not_Java = -4,
+  ticks_unknown_Java          = -5,
+  ticks_not_walkable_Java     = -6,
+  ticks_unknown_state         = -7,
+  ticks_thread_exit           = -8,
+  ticks_deopt                 = -9,
+  ticks_safepoint             = -10
+};
 
 //-------------------------------------------------------
 
@@ -41,297 +55,29 @@
 };
 
 
-static void forte_is_walkable_compiled_frame(frame* fr, RegisterMap* map,
+static void is_decipherable_compiled_frame(frame* fr, RegisterMap* map,
   bool* is_compiled_p, bool* is_walkable_p);
-static bool forte_is_walkable_interpreted_frame(frame* fr,
-  methodOop* method_p, int* bci_p);
+static bool is_decipherable_interpreted_frame(JavaThread* thread,
+                                                frame* fr,
+                                                methodOop* method_p,
+                                                int* bci_p);
 
 
-// A Forte specific version of frame:safe_for_sender().
-static bool forte_safe_for_sender(frame* fr, JavaThread *thread) {
-  bool ret_value = false;  // be pessimistic
 
-#ifdef COMPILER2
-#if defined(IA32) || defined(AMD64)
-  {
-    // This check is the same as the standard safe_for_sender()
-    // on IA32 or AMD64 except that NULL FP values are tolerated
-    // for C2.
-    address   sp = (address)fr->sp();
-    address   fp = (address)fr->fp();
-    ret_value = sp != NULL && sp <= thread->stack_base() &&
-      sp >= thread->stack_base() - thread->stack_size() &&
-      (fp == NULL || (fp <= thread->stack_base() &&
-      fp >= thread->stack_base() - thread->stack_size()));
 
-    // We used to use standard safe_for_sender() when we are supposed
-    // to be executing Java code. However, that prevents us from
-    // walking some intrinsic stacks so now we have to be more refined.
-    // If we passed the above check and we have a NULL frame pointer
-    // and we are supposed to be executing Java code, then we have a
-    // couple of more checks to make.
-    if (ret_value && fp == NULL && (thread->thread_state() == _thread_in_Java
-        || thread->thread_state() == _thread_in_Java_trans)) {
+vframeStreamForte::vframeStreamForte(JavaThread *jt,
+                                     frame fr,
+                                     bool stop_at_java_call_stub) : vframeStreamCommon(jt) {
 
-      if (fr->is_interpreted_frame()) {
-        // interpreted frames don't really have a NULL frame pointer
-        return false;
-      } else if (CodeCache::find_blob(fr->pc()) == NULL) {
-        // the NULL frame pointer should be associated with generated code
-        return false;
-      }
-    }
-  }
+  _stop_at_java_call_stub = stop_at_java_call_stub;
+  _frame = fr;
 
-#else // !(IA32 || AMD64)
-  ret_value = fr->safe_for_sender(thread);
-#endif // IA32 || AMD64
+  // We must always have a valid frame to start filling
 
-#else // !COMPILER2
-  ret_value = fr->safe_for_sender(thread);
-#endif // COMPILER2
+  bool filled_in = fill_from_frame();
 
-  if (!ret_value) {
-    return ret_value;  // not safe, nothing more to do
-  }
+  assert(filled_in, "invariant");
 
-  address sp1;
-
-#ifdef SPARC
-  // On Solaris SPARC, when a compiler frame has an interpreted callee
-  // the _interpreter_sp_adjustment field contains the adjustment to
-  // this frame's SP made by that interpreted callee.
-  // For AsyncGetCallTrace(), we need to verify that the resulting SP
-  // is valid for the specified thread's stack.
-  sp1 = (address)fr->sp();
-  address sp2 = (address)fr->unextended_sp();
-
-  // If the second SP is NULL, then the _interpreter_sp_adjustment
-  // field simply adjusts this frame's SP to NULL and the frame is
-  // not safe. This strange value can be set in the frame constructor
-  // when our peek into the interpreted callee's adjusted value for
-  // this frame's SP finds a NULL. This can happen when SIGPROF
-  // catches us while we are creating the interpreter frame.
-  //
-  if (sp2 == NULL ||
-
-      // If the two SPs are different, then _interpreter_sp_adjustment
-      // is non-zero and we need to validate the second SP. We invert
-      // the range check from frame::safe_for_sender() and bail out
-      // if the second SP is not safe.
-      (sp1 != sp2 && !(sp2 <= thread->stack_base()
-      && sp2 >= (thread->stack_base() - thread->stack_size())))) {
-    return false;
-  }
-#endif // SPARC
-
-  if (fr->is_entry_frame()) {
-    // This frame thinks it is an entry frame; we need to validate
-    // the JavaCallWrapper pointer.
-    // Note: frame::entry_frame_is_first() assumes that the
-    // JavaCallWrapper has a non-NULL _anchor field. We don't
-    // check that here (yet) since we've never seen a failure
-    // due to a NULL _anchor field.
-    // Update: Originally this check was done only for SPARC. However,
-    // this failure has now been seen on C2 C86. I have no reason to
-    // believe that this is not a general issue so I'm enabling the
-    // check for all compilers on all supported platforms.
-#ifdef COMPILER2
-#if defined(IA32) || defined(AMD64)
-    if (fr->fp() == NULL) {
-      // C2 X86 allows NULL frame pointers, but if we have one then
-      // we cannot call entry_frame_call_wrapper().
-      return false;
-    }
-#endif // IA32 || AMD64
-#endif // COMPILER2
-
-    sp1 = (address)fr->entry_frame_call_wrapper();
-    // We invert the range check from frame::safe_for_sender() and
-    // bail out if the JavaCallWrapper * is not safe.
-    if (!(sp1 <= thread->stack_base()
-        && sp1 >= (thread->stack_base() - thread->stack_size()))) {
-      return false;
-    }
-  }
-
-  return ret_value;
-}
-
-
-// Unknown compiled frames have caused assertion failures on Solaris
-// X86. This code also detects unknown compiled frames on Solaris
-// SPARC, but no assertion failures have been observed. However, I'm
-// paranoid so I'm enabling this code whenever we have a compiler.
-//
-// Returns true if the specified frame is an unknown compiled frame
-// and false otherwise.
-static bool is_unknown_compiled_frame(frame* fr, JavaThread *thread) {
-  bool ret_value = false;  // be optimistic
-
-  // This failure mode only occurs when the thread is in state
-  // _thread_in_Java so we are okay for this check for any other
-  // thread state.
-  //
-  // Note: _thread_in_Java does not always mean that the thread
-  // is executing Java code. AsyncGetCallTrace() has caught
-  // threads executing in JRT_LEAF() routines when the state
-  // will also be _thread_in_Java.
-  if (thread->thread_state() != _thread_in_Java) {
-    return ret_value;
-  }
-
-  // This failure mode only occurs with compiled frames so we are
-  // okay for this check for both entry and interpreted frames.
-  if (fr->is_entry_frame() || fr->is_interpreted_frame()) {
-    return ret_value;
-  }
-
-  // This failure mode only occurs when the compiled frame's PC
-  // is in the code cache so we are okay for this check if the
-  // PC is not in the code cache.
-  CodeBlob* cb = CodeCache::find_blob(fr->pc());
-  if (cb == NULL) {
-    return ret_value;
-  }
-
-  // We have compiled code in the code cache so it is time for
-  // the final check: let's see if any frame type is set
-  ret_value = !(
-    // is_entry_frame() is checked above
-    // testers that are a subset of is_entry_frame():
-    //   is_first_frame()
-    fr->is_java_frame()
-    // testers that are a subset of is_java_frame():
-    //   is_interpreted_frame()
-    //   is_compiled_frame()
-    || fr->is_native_frame()
-    || fr->is_runtime_frame()
-    || fr->is_safepoint_blob_frame()
-    );
-
-  // If there is no frame type set, then we have an unknown compiled
-  // frame and sender() should not be called on it.
-
-  return ret_value;
-}
-
-#define DebugNonSafepoints_IS_CLEARED \
-  (!FLAG_IS_DEFAULT(DebugNonSafepoints) && !DebugNonSafepoints)
-
-// if -XX:-DebugNonSafepoints, then top-frame will be skipped
-vframeStreamForte::vframeStreamForte(JavaThread *jt, frame fr,
-  bool stop_at_java_call_stub) : vframeStreamCommon(jt) {
-  _stop_at_java_call_stub = stop_at_java_call_stub;
-
-  if (!DebugNonSafepoints_IS_CLEARED) {
-    // decode the top frame fully
-    // (usual case, if JVMTI is enabled)
-    _frame = fr;
-  } else {
-    // skip top frame, as it may not be at safepoint
-    // For AsyncGetCallTrace(), we extracted as much info from the top
-    // frame as we could in forte_is_walkable_frame(). We also verified
-    // forte_safe_for_sender() so this sender() call is safe.
-    _frame  = fr.sender(&_reg_map);
-  }
-
-  if (jt->thread_state() == _thread_in_Java && !fr.is_first_frame()) {
-    bool sender_check = false;  // assume sender is not safe
-
-    if (forte_safe_for_sender(&_frame, jt)) {
-      // If the initial sender frame is safe, then continue on with other
-      // checks. The unsafe sender frame has been seen on Solaris X86
-      // with both Compiler1 and Compiler2. It has not been seen on
-      // Solaris SPARC, but seems like a good sanity check to have
-      // anyway.
-
-      // SIGPROF caught us in Java code and the current frame is not the
-      // first frame so we should sanity check the sender frame. It is
-      // possible for SIGPROF to catch us in the middle of making a call.
-      // When that happens the current frame is actually a combination of
-      // the real sender and some of the new call's info. We can't find
-      // the real sender with such a current frame and things can get
-      // confused.
-      //
-      // This sanity check has caught problems with the sender frame on
-      // Solaris SPARC. So far Solaris X86 has not had a failure here.
-      sender_check = _frame.is_entry_frame()
-        // testers that are a subset of is_entry_frame():
-        //   is_first_frame()
-        || _frame.is_java_frame()
-        // testers that are a subset of is_java_frame():
-        //   is_interpreted_frame()
-        //   is_compiled_frame()
-        || _frame.is_native_frame()
-        || _frame.is_runtime_frame()
-        || _frame.is_safepoint_blob_frame()
-        ;
-
-      // We need an additional sanity check on an initial interpreted
-      // sender frame. This interpreted frame needs to be both walkable
-      // and have a valid BCI. This is yet another variant of SIGPROF
-      // catching us in the middle of making a call.
-      if (sender_check && _frame.is_interpreted_frame()) {
-        methodOop method = NULL;
-        int bci = -1;
-
-        if (!forte_is_walkable_interpreted_frame(&_frame, &method, &bci)
-            || bci == -1) {
-          sender_check = false;
-        }
-      }
-
-      // We need an additional sanity check on an initial compiled
-      // sender frame. This compiled frame also needs to be walkable.
-      // This is yet another variant of SIGPROF catching us in the
-      // middle of making a call.
-      if (sender_check && !_frame.is_interpreted_frame()) {
-        bool is_compiled, is_walkable;
-
-        forte_is_walkable_compiled_frame(&_frame, &_reg_map,
-          &is_compiled, &is_walkable);
-        if (is_compiled && !is_walkable) {
-          sender_check = false;
-        }
-      }
-    }
-
-    if (!sender_check) {
-      // nothing else to try if we can't recognize the sender
-      _mode = at_end_mode;
-      return;
-    }
-  }
-
-  int loop_count = 0;
-  int loop_max = MaxJavaStackTraceDepth * 2;
-
-  while (!fill_from_frame()) {
-    _frame = _frame.sender(&_reg_map);
-
-#ifdef COMPILER2
-#if defined(IA32) || defined(AMD64)
-    // Stress testing on C2 X86 has shown a periodic problem with
-    // the sender() call below. The initial _frame that we have on
-    // entry to the loop has already passed forte_safe_for_sender()
-    // so we only check frames after it.
-    if (!forte_safe_for_sender(&_frame, _thread)) {
-      _mode = at_end_mode;
-      return;
-    }
-#endif // IA32 || AMD64
-#endif // COMPILER2
-
-    if (++loop_count >= loop_max) {
-      // We have looped more than twice the number of possible
-      // Java frames. This indicates that we are trying to walk
-      // a stack that is in the middle of being constructed and
-      // it is self referential.
-      _mode = at_end_mode;
-      return;
-    }
-  }
 }
 
 
@@ -358,95 +104,57 @@
 
   do {
 
-#if defined(COMPILER1) && defined(SPARC)
-  bool prevIsInterpreted =  _frame.is_interpreted_frame();
-#endif // COMPILER1 && SPARC
+    loop_count++;
 
-    _frame = _frame.sender(&_reg_map);
+    // By the time we get here we should never see unsafe but better
+    // safe then segv'd
 
-    if (!forte_safe_for_sender(&_frame, _thread)) {
+    if (loop_count > loop_max || !_frame.safe_for_sender(_thread)) {
       _mode = at_end_mode;
       return;
     }
 
-#if defined(COMPILER1) && defined(SPARC)
-    if (prevIsInterpreted) {
-      // previous callee was interpreted and may require a special check
-      if (_frame.is_compiled_frame() && _frame.cb()->is_compiled_by_c1()) {
-        // compiled sender called interpreted callee so need one more check
-        bool is_compiled, is_walkable;
+    _frame = _frame.sender(&_reg_map);
 
-        // sanity check the compiled sender frame
-        forte_is_walkable_compiled_frame(&_frame, &_reg_map,
-          &is_compiled, &is_walkable);
-        assert(is_compiled, "sanity check");
-        if (!is_walkable) {
-          // compiled sender frame is not walkable so bail out
-          _mode = at_end_mode;
-          return;
-        }
-      }
-    }
-#endif // COMPILER1 && SPARC
-
-    if (++loop_count >= loop_max) {
-      // We have looped more than twice the number of possible
-      // Java frames. This indicates that we are trying to walk
-      // a stack that is in the middle of being constructed and
-      // it is self referential.
-      _mode = at_end_mode;
-      return;
-    }
   } while (!fill_from_frame());
 }
 
-// Determine if 'fr' is a walkable, compiled frame.
-// *is_compiled_p is set to true if the frame is compiled and if it
-// is, then *is_walkable_p is set to true if it is also walkable.
-static void forte_is_walkable_compiled_frame(frame* fr, RegisterMap* map,
-  bool* is_compiled_p, bool* is_walkable_p) {
+// Determine if 'fr' is a decipherable compiled frame. We are already
+// assured that fr is for a java nmethod.
 
-  *is_compiled_p = false;
-  *is_walkable_p = false;
+static bool is_decipherable_compiled_frame(frame* fr) {
 
-  CodeBlob* cb = CodeCache::find_blob(fr->pc());
-  if (cb != NULL &&
-      cb->is_nmethod() &&
-      ((nmethod*)cb)->is_java_method()) {
-    // frame is compiled and executing a Java method
-    *is_compiled_p = true;
+  assert(fr->cb() != NULL && fr->cb()->is_nmethod(), "invariant");
+  nmethod* nm = (nmethod*) fr->cb();
+  assert(nm->is_java_method(), "invariant");
 
-    // Increment PC because the PcDesc we want is associated with
-    // the *end* of the instruction, and pc_desc_near searches
-    // forward to the first matching PC after the probe PC.
-    PcDesc* pc_desc = NULL;
-    if (!DebugNonSafepoints_IS_CLEARED) {
-      // usual case:  look for any safepoint near the sampled PC
-      address probe_pc = fr->pc() + 1;
-      pc_desc = ((nmethod*) cb)->pc_desc_near(probe_pc);
-    } else {
-      // reduced functionality:  only recognize PCs immediately after calls
-      pc_desc = ((nmethod*) cb)->pc_desc_at(fr->pc());
+  // First try and find an exact PcDesc
+
+  PcDesc* pc_desc = nm->pc_desc_at(fr->pc());
+
+  // Did we find a useful PcDesc?
+  if (pc_desc != NULL &&
+      pc_desc->scope_decode_offset() == DebugInformationRecorder::serialized_null) {
+
+    address probe_pc = fr->pc() + 1;
+    pc_desc = nm->pc_desc_near(probe_pc);
+
+    // Now do we have a useful PcDesc?
+
+    if (pc_desc != NULL &&
+        pc_desc->scope_decode_offset() == DebugInformationRecorder::serialized_null) {
+      // No debug information available for this pc
+      // vframeStream would explode if we try and walk the frames.
+      return false;
     }
-    if (pc_desc != NULL && (pc_desc->scope_decode_offset()
-                            == DebugInformationRecorder::serialized_null)) {
-      pc_desc = NULL;
-    }
-    if (pc_desc != NULL) {
-      // it has a PcDesc so the frame is also walkable
-      *is_walkable_p = true;
-      if (!DebugNonSafepoints_IS_CLEARED) {
-        // Normalize the PC to the one associated exactly with
-        // this PcDesc, so that subsequent stack-walking queries
-        // need not be approximate:
-        fr->set_pc(pc_desc->real_pc((nmethod*) cb));
-      }
-    }
-    // Implied else: this compiled frame has no PcDesc, i.e., contains
-    // a frameless stub such as C1 method exit, so it is not walkable.
+
+    // This PcDesc is useful however we must adjust the frame's pc
+    // so that the vframeStream lookups will use this same pc
+
+    fr->set_pc(pc_desc->real_pc(nm));
   }
-  // Implied else: this isn't a compiled frame so it isn't a
-  // walkable, compiled frame.
+
+  return true;
 }
 
 // Determine if 'fr' is a walkable interpreted frame. Returns false
@@ -457,159 +165,189 @@
 // Note: this method returns true when a valid Java method is found
 // even if a valid BCI cannot be found.
 
-static bool forte_is_walkable_interpreted_frame(frame* fr,
-  methodOop* method_p, int* bci_p) {
+static bool is_decipherable_interpreted_frame(JavaThread* thread,
+                                                frame* fr,
+                                                methodOop* method_p,
+                                                int* bci_p) {
   assert(fr->is_interpreted_frame(), "just checking");
 
   // top frame is an interpreted frame
   // check if it is walkable (i.e. valid methodOop and valid bci)
-  if (fr->is_interpreted_frame_valid()) {
-    if (fr->fp() != NULL) {
-      // access address in order not to trigger asserts that
-      // are built in interpreter_frame_method function
-      methodOop method = *fr->interpreter_frame_method_addr();
-      if (Universe::heap()->is_valid_method(method)) {
-        intptr_t bcx = fr->interpreter_frame_bcx();
-        int      bci = method->validate_bci_from_bcx(bcx);
-        // note: bci is set to -1 if not a valid bci
-        *method_p = method;
-        *bci_p = bci;
-        return true;
-      }
-    }
+
+  // Because we may be racing a gc thread the method and/or bci
+  // of a valid interpreter frame may look bad causing us to
+  // fail the is_interpreted_frame_valid test. If the thread
+  // is in any of the following states we are assured that the
+  // frame is in fact valid and we must have hit the race.
+
+  JavaThreadState state = thread->thread_state();
+  bool known_valid = (state == _thread_in_native ||
+                      state == _thread_in_vm ||
+                      state == _thread_blocked );
+
+  if (known_valid || fr->is_interpreted_frame_valid(thread)) {
+
+    // The frame code should completely validate the frame so that
+    // references to methodOop and bci are completely safe to access
+    // If they aren't the frame code should be fixed not this
+    // code. However since gc isn't locked out the values could be
+    // stale. This is a race we can never completely win since we can't
+    // lock out gc so do one last check after retrieving their values
+    // from the frame for additional safety
+
+    methodOop method = fr->interpreter_frame_method();
+
+    // We've at least found a method.
+    // NOTE: there is something to be said for the approach that
+    // if we don't find a valid bci then the method is not likely
+    // a valid method. Then again we may have caught an interpreter
+    // frame in the middle of construction and the bci field is
+    // not yet valid.
+
+    *method_p = method;
+
+    // See if gc may have invalidated method since we validated frame
+
+    if (!Universe::heap()->is_valid_method(method)) return false;
+
+    intptr_t bcx = fr->interpreter_frame_bcx();
+
+    int      bci = method->validate_bci_from_bcx(bcx);
+
+    // note: bci is set to -1 if not a valid bci
+    *bci_p = bci;
+    return true;
   }
+
   return false;
 }
 
 
-// Determine if 'fr' can be used to find a walkable frame. Returns
-// false if a walkable frame cannot be found. *walkframe_p, *method_p,
-// and *bci_p are not set when false is returned. Returns true if a
-// walkable frame is returned via *walkframe_p. *method_p is non-NULL
-// if the returned frame was executing a Java method. *bci_p is != -1
-// if a valid BCI in the Java method could be found.
+// Determine if 'fr' can be used to find an initial Java frame.
+// Return false if it can not find a fully decipherable Java frame
+// (in other words a frame that isn't safe to use in a vframe stream).
+// Obviously if it can't even find a Java frame false will also be returned.
 //
-// *walkframe_p will be used by vframeStreamForte as the initial
-// frame for walking the stack. Currently the initial frame is
-// skipped by vframeStreamForte because we inherited the logic from
-// the vframeStream class. This needs to be revisited in the future.
-static bool forte_is_walkable_frame(JavaThread* thread, frame* fr,
-  frame* walkframe_p, methodOop* method_p, int* bci_p) {
+// If we find a Java frame decipherable or not then by definition we have
+// identified a method and that will be returned to the caller via method_p.
+// If we can determine a bci that is returned also. (Hmm is it possible
+// to return a method and bci and still return false? )
+//
+// The initial Java frame we find (if any) is return via initial_frame_p.
+//
 
-  if (!forte_safe_for_sender(fr, thread)
-      || is_unknown_compiled_frame(fr, thread)
-     ) {
-    // If the initial frame is not safe, then bail out. So far this
-    // has only been seen on Solaris X86 with Compiler2, but it seems
-    // like a great initial sanity check.
-    return false;
+static bool find_initial_Java_frame(JavaThread* thread,
+                                    frame* fr,
+                                    frame* initial_frame_p,
+                                    methodOop* method_p,
+                                    int* bci_p) {
+
+  // It is possible that for a frame containing an nmethod
+  // we can capture the method but no bci. If we get no
+  // bci the frame isn't walkable but the method is usable.
+  // Therefore we init the returned methodOop to NULL so the
+  // caller can make the distinction.
+
+  *method_p = NULL;
+
+  // On the initial call to this method the frame we get may not be
+  // recognizable to us. This should only happen if we are in a JRT_LEAF
+  // or something called by a JRT_LEAF method.
+
+
+
+  frame candidate = *fr;
+
+  // If the starting frame we were given has no codeBlob associated with
+  // it see if we can find such a frame because only frames with codeBlobs
+  // are possible Java frames.
+
+  if (fr->cb() == NULL) {
+
+    // See if we can find a useful frame
+    int loop_count;
+    int loop_max = MaxJavaStackTraceDepth * 2;
+    RegisterMap map(thread, false);
+
+    for (loop_count = 0; loop_count < loop_max; loop_count++) {
+      if (!candidate.safe_for_sender(thread)) return false;
+      candidate = candidate.sender(&map);
+      if (candidate.cb() != NULL) break;
+    }
+    if (candidate.cb() == NULL) return false;
   }
 
-  if (fr->is_first_frame()) {
-    // If initial frame is frame from StubGenerator and there is no
-    // previous anchor, there are no java frames yet
-    return false;
+  // We have a frame known to be in the codeCache
+  // We will hopefully be able to figure out something to do with it.
+  int loop_count;
+  int loop_max = MaxJavaStackTraceDepth * 2;
+  RegisterMap map(thread, false);
+
+  for (loop_count = 0; loop_count < loop_max; loop_count++) {
+
+    if (candidate.is_first_frame()) {
+      // If initial frame is frame from StubGenerator and there is no
+      // previous anchor, there are no java frames associated with a method
+      return false;
+    }
+
+    if (candidate.is_interpreted_frame()) {
+      if (is_decipherable_interpreted_frame(thread, &candidate, method_p, bci_p)) {
+        *initial_frame_p = candidate;
+        return true;
+      }
+
+      // Hopefully we got some data
+      return false;
+    }
+
+    if (candidate.cb()->is_nmethod()) {
+
+      nmethod* nm = (nmethod*) candidate.cb();
+      *method_p = nm->method();
+
+      // If the frame isn't fully decipherable then the default
+      // value for the bci is a signal that we don't have a bci.
+      // If we have a decipherable frame this bci value will
+      // not be used.
+
+      *bci_p = -1;
+
+      *initial_frame_p = candidate;
+
+      // Native wrapper code is trivial to decode by vframeStream
+
+      if (nm->is_native_method()) return true;
+
+      // If it isn't decipherable then we have found a pc that doesn't
+      // have a PCDesc that can get us a bci however we did find
+      // a method
+
+      if (!is_decipherable_compiled_frame(&candidate)) {
+        return false;
+      }
+
+      // is_decipherable_compiled_frame may modify candidate's pc
+      *initial_frame_p = candidate;
+
+      return true;
+    }
+
+    // Must be some stub frame that we don't care about
+
+    if (!candidate.safe_for_sender(thread)) return false;
+    candidate = candidate.sender(&map);
+
+    // If it isn't in the code cache something is wrong
+    // since once we find a frame in the code cache they
+    // all should be there.
+
+    if (candidate.cb() == NULL) return false;
+
   }
 
-  if (fr->is_interpreted_frame()) {
-    if (forte_is_walkable_interpreted_frame(fr, method_p, bci_p)) {
-      *walkframe_p = *fr;
-      return true;
-    }
-    return false;
-  }
+  return false;
 
-  // At this point we have something other than a first frame or an
-  // interpreted frame.
-
-  methodOop method = NULL;
-  frame candidate = *fr;
-
-  // If we loop more than twice the number of possible Java
-  // frames, then this indicates that we are trying to walk
-  // a stack that is in the middle of being constructed and
-  // it is self referential. So far this problem has only
-  // been seen on Solaris X86 Compiler2, but it seems like
-  // a good robustness fix for all platforms.
-
-  int loop_count;
-  int loop_max = MaxJavaStackTraceDepth * 2;
-
-  for (loop_count = 0; loop_count < loop_max; loop_count++) {
-    // determine if the candidate frame is executing a Java method
-    if (CodeCache::contains(candidate.pc())) {
-      // candidate is a compiled frame or stub routine
-      CodeBlob* cb = CodeCache::find_blob(candidate.pc());
-
-      if (cb->is_nmethod()) {
-        method = ((nmethod *)cb)->method();
-      }
-    } // end if CodeCache has our PC
-
-    RegisterMap map(thread, false);
-
-    // we have a Java frame that seems reasonable
-    if (method != NULL && candidate.is_java_frame()
-        && candidate.sp() != NULL && candidate.pc() != NULL) {
-      // we need to sanity check the candidate further
-      bool is_compiled, is_walkable;
-
-      forte_is_walkable_compiled_frame(&candidate, &map, &is_compiled,
-        &is_walkable);
-      if (is_compiled) {
-        // At this point, we know we have a compiled Java frame with
-        // method information that we want to return. We don't check
-        // the is_walkable flag here because that flag pertains to
-        // vframeStreamForte work that is done after we are done here.
-        break;
-      }
-    }
-
-    // At this point, the candidate doesn't work so try the sender.
-
-    // For AsyncGetCallTrace() we cannot assume there is a sender
-    // for the initial frame. The initial forte_safe_for_sender() call
-    // and check for is_first_frame() is done on entry to this method.
-    candidate = candidate.sender(&map);
-    if (!forte_safe_for_sender(&candidate, thread)) {
-
-#ifdef COMPILER2
-#if defined(IA32) || defined(AMD64)
-      // C2 on X86 can use the ebp register as a general purpose register
-      // which can cause the candidate to fail theforte_safe_for_sender()
-      // above. We try one more time using a NULL frame pointer (fp).
-
-      candidate = frame(candidate.sp(), NULL, candidate.pc());
-      if (!forte_safe_for_sender(&candidate, thread)) {
-#endif // IA32 || AMD64
-#endif // COMPILER2
-
-        return false;
-
-#ifdef COMPILER2
-#if defined(IA32) || defined(AMD64)
-      } // end forte_safe_for_sender retry with NULL fp
-#endif // IA32 || AMD64
-#endif // COMPILER2
-
-    } // end first forte_safe_for_sender check
-
-    if (candidate.is_first_frame()
-        || is_unknown_compiled_frame(&candidate, thread)) {
-      return false;
-    }
-  } // end for loop_count
-
-  if (method == NULL) {
-    // If we didn't get any method info from the candidate, then
-    // we have nothing to return so bail out.
-    return false;
-  }
-
-  *walkframe_p = candidate;
-  *method_p = method;
-  *bci_p = -1;
-  return true;
 }
 
 
@@ -627,10 +365,12 @@
 } ASGCT_CallTrace;
 
 static void forte_fill_call_trace_given_top(JavaThread* thd,
-  ASGCT_CallTrace* trace, int depth, frame top_frame) {
+                                            ASGCT_CallTrace* trace,
+                                            int depth,
+                                            frame top_frame) {
   NoHandleMark nhm;
 
-  frame walkframe;
+  frame initial_Java_frame;
   methodOop method;
   int bci;
   int count;
@@ -638,48 +378,51 @@
   count = 0;
   assert(trace->frames != NULL, "trace->frames must be non-NULL");
 
-  if (!forte_is_walkable_frame(thd, &top_frame, &walkframe, &method, &bci)) {
-    // return if no walkable frame is found
+  bool fully_decipherable = find_initial_Java_frame(thd, &top_frame, &initial_Java_frame, &method, &bci);
+
+  // The frame might not be walkable but still recovered a method
+  // (e.g. an nmethod with no scope info for the pc
+
+  if (method == NULL) return;
+
+  CollectedHeap* ch = Universe::heap();
+
+  // The method is not stored GC safe so see if GC became active
+  // after we entered AsyncGetCallTrace() and before we try to
+  // use the methodOop.
+  // Yes, there is still a window after this check and before
+  // we use methodOop below, but we can't lock out GC so that
+  // has to be an acceptable risk.
+  if (!ch->is_valid_method(method)) {
+    trace->num_frames = ticks_GC_active; // -2
     return;
   }
 
-  CollectedHeap* ch = Universe::heap();
+  // We got a Java frame however it isn't fully decipherable
+  // so it won't necessarily be safe to use it for the
+  // initial frame in the vframe stream.
 
-  if (method != NULL) {
-    // The method is not stored GC safe so see if GC became active
-    // after we entered AsyncGetCallTrace() and before we try to
-    // use the methodOop.
-    // Yes, there is still a window after this check and before
-    // we use methodOop below, but we can't lock out GC so that
-    // has to be an acceptable risk.
-    if (!ch->is_valid_method(method)) {
-      trace->num_frames = -2;
-      return;
+  if (!fully_decipherable) {
+    // Take whatever method the top-frame decoder managed to scrape up.
+    // We look further at the top frame only if non-safepoint
+    // debugging information is available.
+    count++;
+    trace->num_frames = count;
+    trace->frames[0].method_id = method->find_jmethod_id_or_null();
+    if (!method->is_native()) {
+      trace->frames[0].lineno = bci;
+    } else {
+      trace->frames[0].lineno = -3;
     }
 
-    if (DebugNonSafepoints_IS_CLEARED) {
-      // Take whatever method the top-frame decoder managed to scrape up.
-      // We look further at the top frame only if non-safepoint
-      // debugging information is available.
-      count++;
-      trace->num_frames = count;
-      trace->frames[0].method_id = method->find_jmethod_id_or_null();
-      if (!method->is_native()) {
-        trace->frames[0].lineno = bci;
-      } else {
-        trace->frames[0].lineno = -3;
-      }
-    }
+    if (!initial_Java_frame.safe_for_sender(thd)) return;
+
+    RegisterMap map(thd, false);
+    initial_Java_frame = initial_Java_frame.sender(&map);
   }
 
-  // check has_last_Java_frame() after looking at the top frame
-  // which may be an interpreted Java frame.
-  if (!thd->has_last_Java_frame() && method == NULL) {
-    trace->num_frames = 0;
-    return;
-  }
+  vframeStreamForte st(thd, initial_Java_frame, false);
 
-  vframeStreamForte st(thd, walkframe, false);
   for (; !st.at_end() && count < depth; st.forte_next(), count++) {
     bci = st.bci();
     method = st.method();
@@ -693,7 +436,7 @@
     if (!ch->is_valid_method(method)) {
       // we throw away everything we've gathered in this sample since
       // none of it is safe
-      trace->num_frames = -2;
+      trace->num_frames = ticks_GC_active; // -2
       return;
     }
 
@@ -765,6 +508,11 @@
 
 extern "C" {
 void AsyncGetCallTrace(ASGCT_CallTrace *trace, jint depth, void* ucontext) {
+
+// This is if'd out because we no longer use thread suspension.
+// However if someone wanted to backport this to a 5.0 jvm then this
+// code would be important.
+#if 0
   if (SafepointSynchronize::is_synchronizing()) {
     // The safepoint mechanism is trying to synchronize all the threads.
     // Since this can involve thread suspension, it is not safe for us
@@ -774,9 +522,10 @@
     // are suspended while holding a resource and another thread blocks
     // on that resource in the SIGPROF handler, then we will have a
     // three-thread deadlock (VMThread, this thread, the other thread).
-    trace->num_frames = -10;
+    trace->num_frames = ticks_safepoint; // -10
     return;
   }
+#endif
 
   JavaThread* thread;
 
@@ -785,13 +534,13 @@
     thread->is_exiting()) {
 
     // bad env_id, thread has exited or thread is exiting
-    trace->num_frames = -8;
+    trace->num_frames = ticks_thread_exit; // -8
     return;
   }
 
   if (thread->in_deopt_handler()) {
     // thread is in the deoptimization handler so return no frames
-    trace->num_frames = -9;
+    trace->num_frames = ticks_deopt; // -9
     return;
   }
 
@@ -799,12 +548,12 @@
          "AsyncGetCallTrace must be called by the current interrupted thread");
 
   if (!JvmtiExport::should_post_class_load()) {
-    trace->num_frames = -1;
+    trace->num_frames = ticks_no_class_load; // -1
     return;
   }
 
   if (Universe::heap()->is_gc_active()) {
-    trace->num_frames = -2;
+    trace->num_frames = ticks_GC_active; // -2
     return;
   }
 
@@ -827,14 +576,22 @@
 
       // param isInJava == false - indicate we aren't in Java code
       if (!thread->pd_get_top_frame_for_signal_handler(&fr, ucontext, false)) {
+        trace->num_frames = ticks_unknown_not_Java;  // -3 unknown frame
+      } else {
         if (!thread->has_last_Java_frame()) {
-          trace->num_frames = 0;   // no Java frames
+          trace->num_frames = 0; // No Java frames
         } else {
-          trace->num_frames = -3;  // unknown frame
+          trace->num_frames = ticks_not_walkable_not_Java;    // -4 non walkable frame by default
+          forte_fill_call_trace_given_top(thread, trace, depth, fr);
+
+          // This assert would seem to be valid but it is not.
+          // It would be valid if we weren't possibly racing a gc
+          // thread. A gc thread can make a valid interpreted frame
+          // look invalid. It's a small window but it does happen.
+          // The assert is left here commented out as a reminder.
+          // assert(trace->num_frames != ticks_not_walkable_not_Java, "should always be walkable");
+
         }
-      } else {
-        trace->num_frames = -4;    // non walkable frame by default
-        forte_fill_call_trace_given_top(thread, trace, depth, fr);
       }
     }
     break;
@@ -845,16 +602,16 @@
 
       // param isInJava == true - indicate we are in Java code
       if (!thread->pd_get_top_frame_for_signal_handler(&fr, ucontext, true)) {
-        trace->num_frames = -5;  // unknown frame
+        trace->num_frames = ticks_unknown_Java;  // -5 unknown frame
       } else {
-        trace->num_frames = -6;  // non walkable frame by default
+        trace->num_frames = ticks_not_walkable_Java;  // -6, non walkable frame by default
         forte_fill_call_trace_given_top(thread, trace, depth, fr);
       }
     }
     break;
   default:
     // Unknown thread state
-    trace->num_frames = -7;
+    trace->num_frames = ticks_unknown_state; // -7
     break;
   }
 }
--- a/src/share/vm/runtime/fprofiler.cpp	Mon Apr 07 15:15:16 2008 -0700
+++ b/src/share/vm/runtime/fprofiler.cpp	Tue Apr 08 12:23:15 2008 -0400
@@ -924,29 +924,23 @@
   FlatProfiler::record_thread_ticks();
 }
 
-void ThreadProfiler::record_interpreted_tick(frame fr, TickPosition where, int* ticks) {
+void ThreadProfiler::record_interpreted_tick(JavaThread* thread, frame fr, TickPosition where, int* ticks) {
   FlatProfiler::all_int_ticks++;
   if (!FlatProfiler::full_profile()) {
     return;
   }
 
-  if (!fr.is_interpreted_frame_valid()) {
+  if (!fr.is_interpreted_frame_valid(thread)) {
     // tick came at a bad time
     interpreter_ticks += 1;
     FlatProfiler::interpreter_ticks += 1;
     return;
   }
 
-  methodOop method = NULL;
-  if (fr.fp() != NULL) {
-    method = *fr.interpreter_frame_method_addr();
-  }
-  if (!Universe::heap()->is_valid_method(method)) {
-    // tick came at a bad time, stack frame not initialized correctly
-    interpreter_ticks += 1;
-    FlatProfiler::interpreter_ticks += 1;
-    return;
-  }
+  // The frame has been fully validated so we can trust the method and bci
+
+  methodOop method = *fr.interpreter_frame_method_addr();
+
   interpreted_update(method, where);
 
   // update byte code table
@@ -997,7 +991,7 @@
   // The tick happend in real code -> non VM code
   if (fr.is_interpreted_frame()) {
     interval_data_ref()->inc_interpreted();
-    record_interpreted_tick(fr, tp_code, FlatProfiler::bytecode_ticks);
+    record_interpreted_tick(thread, fr, tp_code, FlatProfiler::bytecode_ticks);
     return;
   }
 
@@ -1028,7 +1022,7 @@
   // The tick happend in VM code
   interval_data_ref()->inc_native();
   if (fr.is_interpreted_frame()) {
-    record_interpreted_tick(fr, tp_native, FlatProfiler::bytecode_ticks_stub);
+    record_interpreted_tick(thread, fr, tp_native, FlatProfiler::bytecode_ticks_stub);
     return;
   }
   if (CodeCache::contains(fr.pc())) {
--- a/src/share/vm/runtime/fprofiler.hpp	Mon Apr 07 15:15:16 2008 -0700
+++ b/src/share/vm/runtime/fprofiler.hpp	Tue Apr 08 12:23:15 2008 -0400
@@ -135,7 +135,7 @@
   ProfilerNode** table;
 
 private:
-  void record_interpreted_tick(frame fr, TickPosition where, int* ticks);
+  void record_interpreted_tick(JavaThread* thread, frame fr, TickPosition where, int* ticks);
   void record_compiled_tick   (JavaThread* thread, frame fr, TickPosition where);
   void interpreted_update(methodOop method, TickPosition where);
   void compiled_update   (methodOop method, TickPosition where);
--- a/src/share/vm/runtime/frame.hpp	Mon Apr 07 15:15:16 2008 -0700
+++ b/src/share/vm/runtime/frame.hpp	Tue Apr 08 12:23:15 2008 -0400
@@ -108,7 +108,7 @@
   bool is_first_frame() const; // oldest frame? (has no sender)
   bool is_first_java_frame() const;              // same for Java frame
 
-  bool is_interpreted_frame_valid() const;       // performs sanity checks on interpreted frames.
+  bool is_interpreted_frame_valid(JavaThread* thread) const;       // performs sanity checks on interpreted frames.
 
   // tells whether this frame is marked for deoptimization
   bool should_be_deoptimized() const;
--- a/src/share/vm/runtime/vframe.hpp	Mon Apr 07 15:15:16 2008 -0700
+++ b/src/share/vm/runtime/vframe.hpp	Tue Apr 08 12:23:15 2008 -0400
@@ -416,6 +416,48 @@
       int decode_offset;
       if (pc_desc == NULL) {
         // Should not happen, but let fill_from_compiled_frame handle it.
+
+        // If we are trying to walk the stack of a thread that is not
+        // at a safepoint (like AsyncGetCallTrace would do) then this is an
+        // acceptable result. [ This is assuming that safe_for_sender
+        // is so bullet proof that we can trust the frames it produced. ]
+        //
+        // So if we see that the thread is not safepoint safe
+        // then simply produce the method and a bci of zero
+        // and skip the possibility of decoding any inlining that
+        // may be present. That is far better than simply stopping (or
+        // asserting. If however the thread is safepoint safe this
+        // is the sign of a compiler bug  and we'll let
+        // fill_from_compiled_frame handle it.
+
+
+        JavaThreadState state = _thread->thread_state();
+
+        // in_Java should be good enough to test safepoint safety
+        // if state were say in_Java_trans then we'd expect that
+        // the pc would have already been slightly adjusted to
+        // one that would produce a pcDesc since the trans state
+        // would be one that might in fact anticipate a safepoint
+
+        if (state == _thread_in_Java ) {
+          // This will get a method a zero bci and no inlining.
+          // Might be nice to have a unique bci to signify this
+          // particular case but for now zero will do.
+
+          fill_from_compiled_native_frame();
+
+          // There is something to be said for setting the mode to
+          // at_end_mode to prevent trying to walk further up the
+          // stack. There is evidence that if we walk any further
+          // that we could produce a bad stack chain. However until
+          // we see evidence that allowing this causes us to find
+          // frames bad enough to cause segv's or assertion failures
+          // we don't do it as while we may get a bad call chain the
+          // probability is much higher (several magnitudes) that we
+          // get good data.
+
+          return true;
+        }
         decode_offset = DebugInformationRecorder::serialized_null;
       } else {
         decode_offset = pc_desc->scope_decode_offset();