changeset 2688:79f9a3ed607a

Merge
author jcoomes
date Fri, 09 Sep 2011 16:17:16 -0700
parents dce7d24674f4 e984655be425
children 513a84dd0f8b
files .hgtags agent/src/share/classes/sun/jvm/hotspot/runtime/amd64/AMD64Frame.java agent/src/share/classes/sun/jvm/hotspot/runtime/amd64/AMD64RegisterMap.java make/solaris/makefiles/mapfile-vers-nonproduct src/share/vm/runtime/reflectionCompat.hpp
diffstat 166 files changed, 3988 insertions(+), 3329 deletions(-) [+]
line wrap: on
line diff
--- a/.hgtags	Thu Sep 08 16:59:27 2011 -0700
+++ b/.hgtags	Fri Sep 09 16:17:16 2011 -0700
@@ -178,3 +178,7 @@
 31e253c1da429124bb87570ab095d9bc89850d0a jdk8-b02
 3a2fb61165dfc72e398179a2796d740c8da5b8c0 jdk8-b03
 0fa3ace511fe98fe948e751531f3e2b7c60c8376 jdk8-b04
+0cc8a70952c368e06de2adab1f2649a408f5e577 hs22-b01
+7c29742c41b44fb0cd5a13c7ac8834f3f2ca649e hs22-b02
+3a2fb61165dfc72e398179a2796d740c8da5b8c0 hs22-b03
+ce9bde819dcba4a5d2822229d9183e69c74326ca hs22-b04
--- a/agent/src/share/classes/sun/jvm/hotspot/HSDB.java	Thu Sep 08 16:59:27 2011 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/HSDB.java	Fri Sep 09 16:17:16 2011 -0700
@@ -1740,7 +1740,7 @@
       else if (f.isCompiledFrame())    { tty.print("compiled"); }
       else if (f.isEntryFrame())       { tty.print("entry"); }
       else if (f.isNativeFrame())      { tty.print("native"); }
-      else if (f.isGlueFrame())        { tty.print("glue"); }
+      else if (f.isRuntimeFrame())     { tty.print("runtime"); }
       else { tty.print("external"); }
       tty.print(" frame with PC = " + f.getPC() + ", SP = " + f.getSP() + ", FP = " + f.getFP());
       if (f.isSignalHandlerFrameDbg()) {
--- a/agent/src/share/classes/sun/jvm/hotspot/code/CodeBlob.java	Thu Sep 08 16:59:27 2011 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/code/CodeBlob.java	Fri Sep 09 16:17:16 2011 -0700
@@ -102,6 +102,11 @@
   /** On-Stack Replacement method */
   public boolean isOSRMethod()          { return false; }
 
+  public NMethod asNMethodOrNull() {
+    if (isNMethod()) return (NMethod)this;
+    return null;
+  }
+
   // Boundaries
   public Address headerBegin() {
     return addr;
@@ -195,7 +200,7 @@
   }
 
   // Returns true, if the next frame is responsible for GC'ing oops passed as arguments
-  public boolean callerMustGCArguments(JavaThread thread) { return false; }
+  public boolean callerMustGCArguments() { return false; }
 
   public String getName() {
     return CStringUtilities.getString(nameField.getValue(addr));
--- a/agent/src/share/classes/sun/jvm/hotspot/code/CodeCache.java	Thu Sep 08 16:59:27 2011 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/code/CodeCache.java	Fri Sep 09 16:17:16 2011 -0700
@@ -59,6 +59,7 @@
     virtualConstructor.addMapping("RuntimeStub", RuntimeStub.class);
     virtualConstructor.addMapping("RicochetBlob", RicochetBlob.class);
     virtualConstructor.addMapping("AdapterBlob", AdapterBlob.class);
+    virtualConstructor.addMapping("MethodHandlesAdapterBlob", MethodHandlesAdapterBlob.class);
     virtualConstructor.addMapping("SafepointBlob", SafepointBlob.class);
     virtualConstructor.addMapping("DeoptimizationBlob", DeoptimizationBlob.class);
     if (VM.getVM().isServerCompiler()) {
@@ -126,6 +127,10 @@
       Assert.that(result.blobContains(start) || result.blobContains(start.addOffsetTo(8)),
                                                                     "found wrong CodeBlob");
     }
+    if (result.isRicochetBlob()) {
+      // This should probably be done for other SingletonBlobs
+      return VM.getVM().ricochetBlob();
+    }
     return result;
   }
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/agent/src/share/classes/sun/jvm/hotspot/code/MethodHandlesAdapterBlob.java	Fri Sep 09 16:17:16 2011 -0700
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.code;
+
+import java.util.*;
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.runtime.*;
+import sun.jvm.hotspot.types.*;
+
+public class MethodHandlesAdapterBlob extends AdapterBlob {
+  static {
+    VM.registerVMInitializedObserver(new Observer() {
+        public void update(Observable o, Object data) {
+          initialize(VM.getVM().getTypeDataBase());
+        }
+      });
+  }
+
+  private static void initialize(TypeDataBase db) {
+    Type type = db.lookupType("MethodHandlesAdapterBlob");
+
+    // FIXME: add any needed fields
+  }
+
+  public MethodHandlesAdapterBlob(Address addr) {
+    super(addr);
+  }
+
+  public boolean isMethodHandlesAdapterBlob() {
+    return true;
+  }
+
+  public String getName() {
+    return "MethodHandlesAdapterBlob: " + super.getName();
+  }
+}
--- a/agent/src/share/classes/sun/jvm/hotspot/code/NMethod.java	Thu Sep 08 16:59:27 2011 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/code/NMethod.java	Fri Sep 09 16:17:16 2011 -0700
@@ -46,6 +46,7 @@
   /** Offsets for different nmethod parts */
   private static CIntegerField exceptionOffsetField;
   private static CIntegerField deoptOffsetField;
+  private static CIntegerField deoptMhOffsetField;
   private static CIntegerField origPCOffsetField;
   private static CIntegerField stubOffsetField;
   private static CIntegerField oopsOffsetField;
@@ -95,6 +96,7 @@
 
     exceptionOffsetField        = type.getCIntegerField("_exception_offset");
     deoptOffsetField            = type.getCIntegerField("_deoptimize_offset");
+    deoptMhOffsetField          = type.getCIntegerField("_deoptimize_mh_offset");
     origPCOffsetField           = type.getCIntegerField("_orig_pc_offset");
     stubOffsetField             = type.getCIntegerField("_stub_offset");
     oopsOffsetField             = type.getCIntegerField("_oops_offset");
@@ -136,10 +138,11 @@
   /** Boundaries for different parts */
   public Address constantsBegin()       { return contentBegin();                                     }
   public Address constantsEnd()         { return getEntryPoint();                                    }
-  public Address instsBegin()           { return codeBegin();                                       }
+  public Address instsBegin()           { return codeBegin();                                        }
   public Address instsEnd()             { return headerBegin().addOffsetTo(getStubOffset());         }
   public Address exceptionBegin()       { return headerBegin().addOffsetTo(getExceptionOffset());    }
-  public Address deoptBegin()           { return headerBegin().addOffsetTo(getDeoptOffset());        }
+  public Address deoptHandlerBegin()    { return headerBegin().addOffsetTo(getDeoptOffset());        }
+  public Address deoptMhHandlerBegin()  { return headerBegin().addOffsetTo(getDeoptMhOffset());      }
   public Address stubBegin()            { return headerBegin().addOffsetTo(getStubOffset());         }
   public Address stubEnd()              { return headerBegin().addOffsetTo(getOopsOffset());         }
   public Address oopsBegin()            { return headerBegin().addOffsetTo(getOopsOffset());         }
@@ -250,6 +253,22 @@
     return (int) scavengeRootStateField.getValue(addr);
   }
 
+  // MethodHandle
+  public boolean isMethodHandleReturn(Address returnPc) {
+    // Hard to read a bit fields from Java and it's only there for performance
+    // so just go directly to the PCDesc
+    // if (!hasMethodHandleInvokes())  return false;
+    PCDesc pd = getPCDescAt(returnPc);
+    if (pd == null)
+      return false;
+    return pd.isMethodHandleInvoke();
+  }
+
+  // Deopt
+  // Return true is the PC is one would expect if the frame is being deopted.
+  public boolean isDeoptPc      (Address pc) { return isDeoptEntry(pc) || isDeoptMhEntry(pc); }
+  public boolean isDeoptEntry   (Address pc) { return pc == deoptHandlerBegin(); }
+  public boolean isDeoptMhEntry (Address pc) { return pc == deoptMhHandlerBegin(); }
 
   /** Tells whether frames described by this nmethod can be
       deoptimized. Note: native wrappers cannot be deoptimized. */
@@ -388,6 +407,7 @@
   private int getEntryBCI()           { return (int) entryBCIField          .getValue(addr); }
   private int getExceptionOffset()    { return (int) exceptionOffsetField   .getValue(addr); }
   private int getDeoptOffset()        { return (int) deoptOffsetField       .getValue(addr); }
+  private int getDeoptMhOffset()      { return (int) deoptMhOffsetField     .getValue(addr); }
   private int getStubOffset()         { return (int) stubOffsetField        .getValue(addr); }
   private int getOopsOffset()         { return (int) oopsOffsetField        .getValue(addr); }
   private int getScopesDataOffset()   { return (int) scopesDataOffsetField  .getValue(addr); }
--- a/agent/src/share/classes/sun/jvm/hotspot/code/PCDesc.java	Thu Sep 08 16:59:27 2011 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/code/PCDesc.java	Fri Sep 09 16:17:16 2011 -0700
@@ -38,6 +38,9 @@
   private static CIntegerField scopeDecodeOffsetField;
   private static CIntegerField objDecodeOffsetField;
   private static CIntegerField pcFlagsField;
+  private static int reexecuteMask;
+  private static int isMethodHandleInvokeMask;
+  private static int returnOopMask;
 
   static {
     VM.registerVMInitializedObserver(new Observer() {
@@ -54,6 +57,10 @@
     scopeDecodeOffsetField = type.getCIntegerField("_scope_decode_offset");
     objDecodeOffsetField   = type.getCIntegerField("_obj_decode_offset");
     pcFlagsField           = type.getCIntegerField("_flags");
+
+    reexecuteMask            = db.lookupIntConstant("PcDesc::PCDESC_reexecute");
+    isMethodHandleInvokeMask = db.lookupIntConstant("PcDesc::PCDESC_is_method_handle_invoke");
+    returnOopMask            = db.lookupIntConstant("PcDesc::PCDESC_return_oop");
   }
 
   public PCDesc(Address addr) {
@@ -81,7 +88,12 @@
 
   public boolean getReexecute() {
     int flags = (int)pcFlagsField.getValue(addr);
-    return ((flags & 0x1)== 1); //first is the reexecute bit
+    return (flags & reexecuteMask) != 0;
+  }
+
+  public boolean isMethodHandleInvoke() {
+    int flags = (int)pcFlagsField.getValue(addr);
+    return (flags & isMethodHandleInvokeMask) != 0;
   }
 
   public void print(NMethod code) {
--- a/agent/src/share/classes/sun/jvm/hotspot/code/RicochetBlob.java	Thu Sep 08 16:59:27 2011 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/code/RicochetBlob.java	Fri Sep 09 16:17:16 2011 -0700
@@ -41,11 +41,15 @@
   }
 
   private static void initialize(TypeDataBase db) {
-    // Type type = db.lookupType("RicochetBlob");
+    Type type = db.lookupType("RicochetBlob");
 
-    // FIXME: add any needed fields
+    bounceOffsetField                = type.getCIntegerField("_bounce_offset");
+    exceptionOffsetField             = type.getCIntegerField("_exception_offset");
   }
 
+  private static CIntegerField bounceOffsetField;
+  private static CIntegerField exceptionOffsetField;
+
   public RicochetBlob(Address addr) {
     super(addr);
   }
@@ -53,4 +57,14 @@
   public boolean isRicochetBlob() {
     return true;
   }
+
+  public Address bounceAddr() {
+    return codeBegin().addOffsetTo(bounceOffsetField.getValue(addr));
+  }
+
+  public boolean returnsToBounceAddr(Address pc) {
+    Address bouncePc = bounceAddr();
+    return (pc.equals(bouncePc) || pc.addOffsetTo(Frame.pcReturnOffset()).equals(bouncePc));
+  }
+
 }
--- a/agent/src/share/classes/sun/jvm/hotspot/code/RuntimeStub.java	Thu Sep 08 16:59:27 2011 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/code/RuntimeStub.java	Fri Sep 09 16:17:16 2011 -0700
@@ -30,6 +30,8 @@
 import sun.jvm.hotspot.types.*;
 
 public class RuntimeStub extends CodeBlob {
+  private static CIntegerField callerMustGCArgumentsField;
+
   static {
     VM.registerVMInitializedObserver(new Observer() {
         public void update(Observable o, Object data) {
@@ -40,6 +42,7 @@
 
   private static void initialize(TypeDataBase db) {
     Type type = db.lookupType("RuntimeStub");
+    callerMustGCArgumentsField                = type.getCIntegerField("_caller_must_gc_arguments");
 
     // FIXME: add any needed fields
   }
@@ -52,6 +55,11 @@
     return true;
   }
 
+  public boolean callerMustGCArguments() {
+    return callerMustGCArgumentsField.getValue(addr) != 0;
+  }
+
+
   public String getName() {
     return "RuntimeStub: " + super.getName();
   }
--- a/agent/src/share/classes/sun/jvm/hotspot/compiler/OopMapSet.java	Thu Sep 08 16:59:27 2011 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/compiler/OopMapSet.java	Fri Sep 09 16:17:16 2011 -0700
@@ -246,7 +246,7 @@
     }
 
     // Check if caller must update oop argument
-    regMap.setIncludeArgumentOops(cb.callerMustGCArguments(regMap.getThread()));
+    regMap.setIncludeArgumentOops(cb.callerMustGCArguments());
 
     int nofCallee = 0;
     Address[] locs = new Address[2 * REG_COUNT + 1];
--- a/agent/src/share/classes/sun/jvm/hotspot/interpreter/BytecodeLoadConstant.java	Thu Sep 08 16:59:27 2011 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/interpreter/BytecodeLoadConstant.java	Fri Sep 09 16:17:16 2011 -0700
@@ -90,7 +90,7 @@
            jcode == Bytecodes._ldc2_w;
     if (! codeOk) return false;
 
-    ConstantTag ctag = method().getConstants().getTagAt(rawIndex());
+    ConstantTag ctag = method().getConstants().getTagAt(poolIndex());
     if (jcode == Bytecodes._ldc2_w) {
        // has to be double or long
        return (ctag.isDouble() || ctag.isLong()) ? true: false;
--- a/agent/src/share/classes/sun/jvm/hotspot/jdi/ReferenceTypeImpl.java	Thu Sep 08 16:59:27 2011 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/jdi/ReferenceTypeImpl.java	Fri Sep 09 16:17:16 2011 -0700
@@ -28,11 +28,13 @@
 
 import com.sun.jdi.*;
 
+import sun.jvm.hotspot.memory.SystemDictionary;
 import sun.jvm.hotspot.oops.Instance;
 import sun.jvm.hotspot.oops.InstanceKlass;
 import sun.jvm.hotspot.oops.ArrayKlass;
 import sun.jvm.hotspot.oops.JVMDIClassStatus;
 import sun.jvm.hotspot.oops.Klass;
+import sun.jvm.hotspot.oops.ObjArray;
 import sun.jvm.hotspot.oops.Oop;
 import sun.jvm.hotspot.oops.Symbol;
 import sun.jvm.hotspot.oops.DefaultHeapVisitor;
@@ -53,6 +55,7 @@
     private SoftReference methodsCache;
     private SoftReference allMethodsCache;
     private SoftReference nestedTypesCache;
+    private SoftReference methodInvokesCache;
 
     /* to mark when no info available */
     static final SDE NO_SDE_INFO_MARK = new SDE();
@@ -82,6 +85,27 @@
                 return method;
             }
         }
+        if (ref.getMethodHolder().equals(SystemDictionary.getMethodHandleKlass())) {
+          // invoke methods are generated as needed, so make mirrors as needed
+          List mis = null;
+          if (methodInvokesCache == null) {
+            mis = new ArrayList();
+            methodInvokesCache = new SoftReference(mis);
+          } else {
+            mis = (List)methodInvokesCache.get();
+          }
+          it = mis.iterator();
+          while (it.hasNext()) {
+            MethodImpl method = (MethodImpl)it.next();
+            if (ref.equals(method.ref())) {
+              return method;
+            }
+          }
+
+          MethodImpl method = MethodImpl.createMethodImpl(vm, this, ref);
+          mis.add(method);
+          return method;
+        }
         throw new IllegalArgumentException("Invalid method id: " + ref);
     }
 
--- a/agent/src/share/classes/sun/jvm/hotspot/jdi/StackFrameImpl.java	Thu Sep 08 16:59:27 2011 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/jdi/StackFrameImpl.java	Fri Sep 09 16:17:16 2011 -0700
@@ -123,6 +123,9 @@
                 Assert.that(values.size() > 0, "this is missing");
             }
             // 'this' at index 0.
+            if (values.get(0).getType() == BasicType.getTConflict()) {
+              return null;
+            }
             OopHandle handle = values.oopHandleAt(0);
             ObjectHeap heap = vm.saObjectHeap();
             thisObject = vm.objectMirror(heap.newOop(handle));
@@ -210,6 +213,8 @@
         validateStackFrame();
         StackValueCollection values = saFrame.getLocals();
         MethodImpl mmm = (MethodImpl)location.method();
+        if (mmm.isNative())
+            return null;
         List argSigs = mmm.argumentSignatures();
         int count = argSigs.size();
         List res = new ArrayList(0);
@@ -231,34 +236,67 @@
         ValueImpl valueImpl = null;
         OopHandle handle = null;
         ObjectHeap heap = vm.saObjectHeap();
-        if (variableType == BasicType.T_BOOLEAN) {
+        if (values.get(ss).getType() == BasicType.getTConflict()) {
+          // Dead locals, so just represent them as a zero of the appropriate type
+          if (variableType == BasicType.T_BOOLEAN) {
+            valueImpl = (BooleanValueImpl) vm.mirrorOf(false);
+          } else if (variableType == BasicType.T_CHAR) {
+            valueImpl = (CharValueImpl) vm.mirrorOf((char)0);
+          } else if (variableType == BasicType.T_FLOAT) {
+            valueImpl = (FloatValueImpl) vm.mirrorOf((float)0);
+          } else if (variableType == BasicType.T_DOUBLE) {
+            valueImpl = (DoubleValueImpl) vm.mirrorOf((double)0);
+          } else if (variableType == BasicType.T_BYTE) {
+            valueImpl = (ByteValueImpl) vm.mirrorOf((byte)0);
+          } else if (variableType == BasicType.T_SHORT) {
+            valueImpl = (ShortValueImpl) vm.mirrorOf((short)0);
+          } else if (variableType == BasicType.T_INT) {
+            valueImpl = (IntegerValueImpl) vm.mirrorOf((int)0);
+          } else if (variableType == BasicType.T_LONG) {
+            valueImpl = (LongValueImpl) vm.mirrorOf((long)0);
+          } else if (variableType == BasicType.T_OBJECT) {
+            // we may have an [Ljava/lang/Object; - i.e., Object[] with the
+            // elements themselves may be arrays because every array is an Object.
+            handle = null;
+            valueImpl = (ObjectReferenceImpl) vm.objectMirror(heap.newOop(handle));
+          } else if (variableType == BasicType.T_ARRAY) {
+            handle = null;
+            valueImpl = vm.arrayMirror((Array)heap.newOop(handle));
+          } else if (variableType == BasicType.T_VOID) {
+            valueImpl = new VoidValueImpl(vm);
+          } else {
+            throw new RuntimeException("Should not read here");
+          }
+        } else {
+          if (variableType == BasicType.T_BOOLEAN) {
             valueImpl = (BooleanValueImpl) vm.mirrorOf(values.booleanAt(ss));
-        } else if (variableType == BasicType.T_CHAR) {
+          } else if (variableType == BasicType.T_CHAR) {
             valueImpl = (CharValueImpl) vm.mirrorOf(values.charAt(ss));
-        } else if (variableType == BasicType.T_FLOAT) {
+          } else if (variableType == BasicType.T_FLOAT) {
             valueImpl = (FloatValueImpl) vm.mirrorOf(values.floatAt(ss));
-        } else if (variableType == BasicType.T_DOUBLE) {
+          } else if (variableType == BasicType.T_DOUBLE) {
             valueImpl = (DoubleValueImpl) vm.mirrorOf(values.doubleAt(ss));
-        } else if (variableType == BasicType.T_BYTE) {
+          } else if (variableType == BasicType.T_BYTE) {
             valueImpl = (ByteValueImpl) vm.mirrorOf(values.byteAt(ss));
-        } else if (variableType == BasicType.T_SHORT) {
+          } else if (variableType == BasicType.T_SHORT) {
             valueImpl = (ShortValueImpl) vm.mirrorOf(values.shortAt(ss));
-        } else if (variableType == BasicType.T_INT) {
+          } else if (variableType == BasicType.T_INT) {
             valueImpl = (IntegerValueImpl) vm.mirrorOf(values.intAt(ss));
-        } else if (variableType == BasicType.T_LONG) {
+          } else if (variableType == BasicType.T_LONG) {
             valueImpl = (LongValueImpl) vm.mirrorOf(values.longAt(ss));
-        } else if (variableType == BasicType.T_OBJECT) {
+          } else if (variableType == BasicType.T_OBJECT) {
             // we may have an [Ljava/lang/Object; - i.e., Object[] with the
             // elements themselves may be arrays because every array is an Object.
             handle = values.oopHandleAt(ss);
             valueImpl = (ObjectReferenceImpl) vm.objectMirror(heap.newOop(handle));
-        } else if (variableType == BasicType.T_ARRAY) {
+          } else if (variableType == BasicType.T_ARRAY) {
             handle = values.oopHandleAt(ss);
             valueImpl = vm.arrayMirror((Array)heap.newOop(handle));
-        } else if (variableType == BasicType.T_VOID) {
+          } else if (variableType == BasicType.T_VOID) {
             valueImpl = new VoidValueImpl(vm);
-        } else {
+          } else {
             throw new RuntimeException("Should not read here");
+          }
         }
 
         return valueImpl;
--- a/agent/src/share/classes/sun/jvm/hotspot/memory/SystemDictionary.java	Thu Sep 08 16:59:27 2011 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/memory/SystemDictionary.java	Fri Sep 09 16:17:16 2011 -0700
@@ -44,6 +44,7 @@
   private static sun.jvm.hotspot.types.OopField systemKlassField;
   private static sun.jvm.hotspot.types.OopField threadKlassField;
   private static sun.jvm.hotspot.types.OopField threadGroupKlassField;
+  private static sun.jvm.hotspot.types.OopField methodHandleKlassField;
 
   static {
     VM.registerVMInitializedObserver(new Observer() {
@@ -69,6 +70,7 @@
     systemKlassField = type.getOopField(WK_KLASS("System_klass"));
     threadKlassField = type.getOopField(WK_KLASS("Thread_klass"));
     threadGroupKlassField = type.getOopField(WK_KLASS("ThreadGroup_klass"));
+    methodHandleKlassField = type.getOopField(WK_KLASS("MethodHandle_klass"));
   }
 
   // This WK functions must follow the definitions in systemDictionary.hpp:
@@ -127,6 +129,10 @@
     return (InstanceKlass) newOop(systemKlassField.getValue());
   }
 
+  public static InstanceKlass getMethodHandleKlass() {
+    return (InstanceKlass) newOop(methodHandleKlassField.getValue());
+  }
+
   public InstanceKlass getAbstractOwnableSynchronizerKlass() {
     return (InstanceKlass) find("java/util/concurrent/locks/AbstractOwnableSynchronizer",
                                 null, null);
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/CompiledVFrame.java	Thu Sep 08 16:59:27 2011 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/CompiledVFrame.java	Fri Sep 09 16:17:16 2011 -0700
@@ -93,6 +93,8 @@
   }
 
   public StackValueCollection getLocals() {
+    if (getScope() == null)
+      return new StackValueCollection();
     List scvList = getScope().getLocals();
     if (scvList == null)
       return new StackValueCollection();
@@ -108,6 +110,8 @@
   }
 
   public StackValueCollection getExpressions() {
+    if (getScope() == null)
+      return new StackValueCollection();
     List scvList = getScope().getExpressions();
     if (scvList == null)
       return new StackValueCollection();
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/Frame.java	Thu Sep 08 16:59:27 2011 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/Frame.java	Fri Sep 09 16:17:16 2011 -0700
@@ -33,6 +33,7 @@
 import sun.jvm.hotspot.debugger.*;
 import sun.jvm.hotspot.interpreter.*;
 import sun.jvm.hotspot.oops.*;
+import sun.jvm.hotspot.runtime.sparc.SPARCFrame;
 import sun.jvm.hotspot.types.*;
 import sun.jvm.hotspot.utilities.*;
 
@@ -74,11 +75,19 @@
   /** Size of constMethodOopDesc for computing BCI from BCP (FIXME: hack) */
   private static long    constMethodOopDescSize;
 
+  private static int pcReturnOffset;
+
+  public static int pcReturnOffset() {
+    return pcReturnOffset;
+  }
+
   private static synchronized void initialize(TypeDataBase db) {
     Type constMethodOopType = db.lookupType("constMethodOopDesc");
     // FIXME: not sure whether alignment here is correct or how to
     // force it (round up to address size?)
     constMethodOopDescSize = constMethodOopType.getSize();
+
+    pcReturnOffset = db.lookupIntConstant("frame::pc_return_offset").intValue();
   }
 
   protected int bcpToBci(Address bcp, ConstMethod cm) {
@@ -106,6 +115,10 @@
   public void    setPC(Address newpc) { pc = newpc; }
   public boolean isDeoptimized()      { return deoptimized; }
 
+  public CodeBlob cb() {
+    return VM.getVM().getCodeCache().findBlob(getPC());
+  }
+
   public abstract Address getSP();
   public abstract Address getID();
   public abstract Address getFP();
@@ -134,6 +147,12 @@
     }
   }
 
+  public boolean isRicochetFrame() {
+    CodeBlob cb = VM.getVM().getCodeCache().findBlob(getPC());
+    RicochetBlob rcb = VM.getVM().ricochetBlob();
+    return (cb == rcb && rcb != null && rcb.returnsToBounceAddr(getPC()));
+  }
+
   public boolean isCompiledFrame() {
     if (Assert.ASSERTS_ENABLED) {
       Assert.that(!VM.getVM().isCore(), "noncore builds only");
@@ -142,7 +161,7 @@
     return (cb != null && cb.isJavaMethod());
   }
 
-  public boolean isGlueFrame() {
+  public boolean isRuntimeFrame() {
     if (Assert.ASSERTS_ENABLED) {
       Assert.that(!VM.getVM().isCore(), "noncore builds only");
     }
@@ -197,7 +216,8 @@
   public Frame realSender(RegisterMap map) {
     if (!VM.getVM().isCore()) {
       Frame result = sender(map);
-      while (result.isGlueFrame()) {
+      while (result.isRuntimeFrame() ||
+             result.isRicochetFrame()) {
         result = result.sender(map);
       }
       return result;
@@ -611,6 +631,9 @@
     if (Assert.ASSERTS_ENABLED) {
       Assert.that(cb != null, "sanity check");
     }
+    if (cb == VM.getVM().ricochetBlob()) {
+      oopsRicochetDo(oopVisitor, regMap);
+    }
     if (cb.getOopMaps() != null) {
       OopMapSet.oopsDo(this, cb, regMap, oopVisitor, VM.getVM().isDebugging());
 
@@ -627,6 +650,10 @@
     //    }
   }
 
+  private void oopsRicochetDo      (AddressVisitor oopVisitor, RegisterMap regMap) {
+    // XXX Empty for now
+  }
+
   // FIXME: implement the above routines, plus add
   // oops_interpreted_arguments_do and oops_compiled_arguments_do
 }
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/JavaVFrame.java	Thu Sep 08 16:59:27 2011 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/JavaVFrame.java	Fri Sep 09 16:17:16 2011 -0700
@@ -128,14 +128,14 @@
       }
 
       // dynamic part - we just compare the frame pointer
-      if (! getFrame().getFP().equals(other.getFrame().getFP())) {
+      if (! getFrame().equals(other.getFrame())) {
           return false;
       }
       return true;
   }
 
   public int hashCode() {
-      return getMethod().hashCode() ^ getBCI() ^ getFrame().getFP().hashCode();
+      return getMethod().hashCode() ^ getBCI() ^ getFrame().hashCode();
   }
 
   /** Structural compare */
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/StackValue.java	Thu Sep 08 16:59:27 2011 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/StackValue.java	Fri Sep 09 16:17:16 2011 -0700
@@ -100,7 +100,7 @@
 
   public int hashCode() {
     if (type == BasicType.getTObject()) {
-      return handleValue.hashCode();
+      return handleValue != null ? handleValue.hashCode() : 5;
     } else {
       // Returns 0 for conflict type
       return (int) integerValue;
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/VFrame.java	Thu Sep 08 16:59:27 2011 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/VFrame.java	Fri Sep 09 16:17:16 2011 -0700
@@ -77,7 +77,7 @@
           return new CompiledVFrame(f, regMap, thread, scope, mayBeImprecise);
         }
 
-        if (f.isGlueFrame()) {
+        if (f.isRuntimeFrame()) {
           // This is a conversion frame. Skip this frame and try again.
           RegisterMap tempMap = regMap.copy();
           Frame s = f.sender(tempMap);
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java	Thu Sep 08 16:59:27 2011 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java	Fri Sep 09 16:17:16 2011 -0700
@@ -30,6 +30,7 @@
 import java.util.regex.*;
 import sun.jvm.hotspot.code.*;
 import sun.jvm.hotspot.c1.*;
+import sun.jvm.hotspot.code.*;
 import sun.jvm.hotspot.debugger.*;
 import sun.jvm.hotspot.interpreter.*;
 import sun.jvm.hotspot.memory.*;
@@ -85,6 +86,9 @@
   private Interpreter  interpreter;
   private StubRoutines stubRoutines;
   private Bytes        bytes;
+
+  private RicochetBlob ricochetBlob;
+
   /** Flags indicating whether we are attached to a core, C1, or C2 build */
   private boolean      usingClientCompiler;
   private boolean      usingServerCompiler;
@@ -618,6 +622,18 @@
     return stubRoutines;
   }
 
+  public RicochetBlob ricochetBlob() {
+    if (ricochetBlob == null) {
+      Type ricochetType  = db.lookupType("SharedRuntime");
+      AddressField ricochetBlobAddress = ricochetType.getAddressField("_ricochet_blob");
+      Address addr = ricochetBlobAddress.getValue();
+      if (addr != null) {
+        ricochetBlob = new RicochetBlob(addr);
+      }
+    }
+    return ricochetBlob;
+  }
+
   public VMRegImpl getVMRegImplInfo() {
     if (vmregImpl == null) {
       vmregImpl = new VMRegImpl();
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/amd64/AMD64CurrentFrameGuess.java	Thu Sep 08 16:59:27 2011 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/amd64/AMD64CurrentFrameGuess.java	Fri Sep 09 16:17:16 2011 -0700
@@ -29,6 +29,7 @@
 import sun.jvm.hotspot.code.*;
 import sun.jvm.hotspot.interpreter.*;
 import sun.jvm.hotspot.runtime.*;
+import sun.jvm.hotspot.runtime.x86.*;
 
 /** <P> Should be able to be used on all amd64 platforms we support
     (Linux/amd64) to implement JavaThread's
@@ -123,7 +124,7 @@
              offset += vm.getAddressSize()) {
           try {
             Address curSP = sp.addOffsetTo(offset);
-            Frame frame = new AMD64Frame(curSP, null, pc);
+            Frame frame = new X86Frame(curSP, null, pc);
             RegisterMap map = thread.newRegisterMap(false);
             while (frame != null) {
               if (frame.isEntryFrame() && frame.entryFrameIsFirst()) {
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/amd64/AMD64Frame.java	Thu Sep 08 16:59:27 2011 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,528 +0,0 @@
-/*
- * Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-package sun.jvm.hotspot.runtime.amd64;
-
-import java.util.*;
-import sun.jvm.hotspot.code.*;
-import sun.jvm.hotspot.compiler.*;
-import sun.jvm.hotspot.debugger.*;
-import sun.jvm.hotspot.oops.*;
-import sun.jvm.hotspot.runtime.*;
-import sun.jvm.hotspot.types.*;
-import sun.jvm.hotspot.utilities.*;
-
-/** Specialization of and implementation of abstract methods of the
-    Frame class for the amd64 CPU. */
-
-public class AMD64Frame extends Frame {
-  private static final boolean DEBUG;
-  static {
-    DEBUG = System.getProperty("sun.jvm.hotspot.runtime.amd64.AMD64Frame.DEBUG") != null;
-  }
-
-  // refer to frame_amd64.hpp
-  private static final int PC_RETURN_OFFSET           =  0;
-  // All frames
-  private static final int LINK_OFFSET                =  0;
-  private static final int RETURN_ADDR_OFFSET         =  1;
-  private static final int SENDER_SP_OFFSET           =  2;
-
-  // Interpreter frames
-  private static final int INTERPRETER_FRAME_MIRROR_OFFSET    =  2; // for native calls only
-  private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -1;
-  private static final int INTERPRETER_FRAME_LAST_SP_OFFSET   = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1;
-  private static final int INTERPRETER_FRAME_METHOD_OFFSET    = INTERPRETER_FRAME_LAST_SP_OFFSET - 1;
-  private static       int INTERPRETER_FRAME_MDX_OFFSET;         // Non-core builds only
-  private static       int INTERPRETER_FRAME_CACHE_OFFSET;
-  private static       int INTERPRETER_FRAME_LOCALS_OFFSET;
-  private static       int INTERPRETER_FRAME_BCX_OFFSET;
-  private static       int INTERPRETER_FRAME_INITIAL_SP_OFFSET;
-  private static       int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET;
-  private static       int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET;
-
-  // Entry frames
-  private static final int ENTRY_FRAME_CALL_WRAPPER_OFFSET   =  -6;
-
-  // Native frames
-  private static final int NATIVE_FRAME_INITIAL_PARAM_OFFSET =  2;
-
-  static {
-    VM.registerVMInitializedObserver(new Observer() {
-        public void update(Observable o, Object data) {
-          initialize(VM.getVM().getTypeDataBase());
-        }
-      });
-  }
-
-  private static synchronized void initialize(TypeDataBase db) {
-    if (VM.getVM().isCore()) {
-      INTERPRETER_FRAME_CACHE_OFFSET = INTERPRETER_FRAME_METHOD_OFFSET - 1;
-    } else {
-      INTERPRETER_FRAME_MDX_OFFSET   = INTERPRETER_FRAME_METHOD_OFFSET - 1;
-      INTERPRETER_FRAME_CACHE_OFFSET = INTERPRETER_FRAME_MDX_OFFSET - 1;
-    }
-    INTERPRETER_FRAME_LOCALS_OFFSET               = INTERPRETER_FRAME_CACHE_OFFSET - 1;
-    INTERPRETER_FRAME_BCX_OFFSET                  = INTERPRETER_FRAME_LOCALS_OFFSET - 1;
-    INTERPRETER_FRAME_INITIAL_SP_OFFSET           = INTERPRETER_FRAME_BCX_OFFSET - 1;
-    INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET    = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
-    INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
-  }
-
-  // an additional field beyond sp and pc:
-  Address raw_fp; // frame pointer
-  private Address raw_unextendedSP;
-
-  private AMD64Frame() {
-  }
-
-  private void adjustForDeopt() {
-    if ( pc != null) {
-      // Look for a deopt pc and if it is deopted convert to original pc
-      CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc);
-      if (cb != null && cb.isJavaMethod()) {
-        NMethod nm = (NMethod) cb;
-        if (pc.equals(nm.deoptBegin())) {
-          // adjust pc if frame is deoptimized.
-          if (Assert.ASSERTS_ENABLED) {
-            Assert.that(this.getUnextendedSP() != null, "null SP in Java frame");
-          }
-          pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset());
-          deoptimized = true;
-        }
-      }
-    }
-  }
-
-  public AMD64Frame(Address raw_sp, Address raw_fp, Address pc) {
-    this.raw_sp = raw_sp;
-    this.raw_unextendedSP = raw_sp;
-    this.raw_fp = raw_fp;
-    this.pc = pc;
-
-    // Frame must be fully constructed before this call
-    adjustForDeopt();
-
-    if (DEBUG) {
-      System.out.println("AMD64Frame(sp, fp, pc): " + this);
-      dumpStack();
-    }
-  }
-
-  public AMD64Frame(Address raw_sp, Address raw_fp) {
-    this.raw_sp = raw_sp;
-    this.raw_unextendedSP = raw_sp;
-    this.raw_fp = raw_fp;
-    this.pc = raw_sp.getAddressAt(-1 * VM.getVM().getAddressSize());
-
-    // Frame must be fully constructed before this call
-    adjustForDeopt();
-
-    if (DEBUG) {
-      System.out.println("AMD64Frame(sp, fp): " + this);
-      dumpStack();
-    }
-  }
-
-  // This constructor should really take the unextended SP as an arg
-  // but then the constructor is ambiguous with constructor that takes
-  // a PC so take an int and convert it.
-  public AMD64Frame(Address raw_sp, Address raw_fp, long extension) {
-    this.raw_sp = raw_sp;
-    if ( raw_sp == null) {
-      this.raw_unextendedSP = null;
-    } else {
-      this.raw_unextendedSP = raw_sp.addOffsetTo(extension);
-    }
-    this.raw_fp = raw_fp;
-    this.pc = raw_sp.getAddressAt(-1 * VM.getVM().getAddressSize());
-
-    // Frame must be fully constructed before this call
-    adjustForDeopt();
-
-    if (DEBUG) {
-      System.out.println("AMD64Frame(sp, fp, extension): " + this);
-      dumpStack();
-    }
-
-  }
-
-  public Object clone() {
-    AMD64Frame frame = new AMD64Frame();
-    frame.raw_sp = raw_sp;
-    frame.raw_unextendedSP = raw_unextendedSP;
-    frame.raw_fp = raw_fp;
-    frame.pc = pc;
-    frame.deoptimized = deoptimized;
-    return frame;
-  }
-
-  public boolean equals(Object arg) {
-    if (arg == null) {
-      return false;
-    }
-
-    if (!(arg instanceof AMD64Frame)) {
-      return false;
-    }
-
-    AMD64Frame other = (AMD64Frame) arg;
-
-    return (AddressOps.equal(getSP(), other.getSP()) &&
-            AddressOps.equal(getFP(), other.getFP()) &&
-            AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) &&
-            AddressOps.equal(getPC(), other.getPC()));
-  }
-
-  public int hashCode() {
-    if (raw_sp == null) {
-      return 0;
-    }
-
-    return raw_sp.hashCode();
-  }
-
-  public String toString() {
-    return "sp: " + (getSP() == null? "null" : getSP().toString()) +
-         ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) +
-         ", fp: " + (getFP() == null? "null" : getFP().toString()) +
-         ", pc: " + (pc == null? "null" : pc.toString());
-  }
-
-  // accessors for the instance variables
-  public Address getFP() { return raw_fp; }
-  public Address getSP() { return raw_sp; }
-  public Address getID() { return raw_sp; }
-
-  // FIXME: not implemented yet (should be done for Solaris/AMD64)
-  public boolean isSignalHandlerFrameDbg() { return false; }
-  public int     getSignalNumberDbg()      { return 0;     }
-  public String  getSignalNameDbg()        { return null;  }
-
-  public boolean isInterpretedFrameValid() {
-    if (Assert.ASSERTS_ENABLED) {
-      Assert.that(isInterpretedFrame(), "Not an interpreted frame");
-    }
-
-    // These are reasonable sanity checks
-    if (getFP() == null || getFP().andWithMask(0x3) != null) {
-      return false;
-    }
-
-    if (getSP() == null || getSP().andWithMask(0x3) != null) {
-      return false;
-    }
-
-    if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) {
-      return false;
-    }
-
-    // These are hacks to keep us out of trouble.
-    // The problem with these is that they mask other problems
-    if (getFP().lessThanOrEqual(getSP())) {
-      // this attempts to deal with unsigned comparison above
-      return false;
-    }
-
-    if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) {
-      // stack frames shouldn't be large.
-      return false;
-    }
-
-    return true;
-  }
-
-  // FIXME: not applicable in current system
-  //  void    patch_pc(Thread* thread, address pc);
-
-  public Frame sender(RegisterMap regMap, CodeBlob cb) {
-    AMD64RegisterMap map = (AMD64RegisterMap) regMap;
-
-    if (Assert.ASSERTS_ENABLED) {
-      Assert.that(map != null, "map must be set");
-    }
-
-    // Default is we done have to follow them. The sender_for_xxx will
-    // update it accordingly
-    map.setIncludeArgumentOops(false);
-
-    if (isEntryFrame())       return senderForEntryFrame(map);
-    if (isInterpretedFrame()) return senderForInterpreterFrame(map);
-
-
-    if (!VM.getVM().isCore()) {
-      if(cb == null) {
-        cb = VM.getVM().getCodeCache().findBlob(getPC());
-      } else {
-        if (Assert.ASSERTS_ENABLED) {
-          Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same");
-        }
-      }
-
-      if (cb != null) {
-         return senderForCompiledFrame(map, cb);
-      }
-    }
-
-    // Must be native-compiled frame, i.e. the marshaling code for native
-    // methods that exists in the core system.
-    return new AMD64Frame(getSenderSP(), getLink(), getSenderPC());
-  }
-
-  private Frame senderForEntryFrame(AMD64RegisterMap map) {
-    if (Assert.ASSERTS_ENABLED) {
-      Assert.that(map != null, "map must be set");
-    }
-    // Java frame called from C; skip all C frames and return top C
-    // frame of that chunk as the sender
-    AMD64JavaCallWrapper jcw = (AMD64JavaCallWrapper) getEntryFrameCallWrapper();
-    if (Assert.ASSERTS_ENABLED) {
-      Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero");
-      Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack");
-    }
-    AMD64Frame fr;
-    if (jcw.getLastJavaPC() != null) {
-      fr = new AMD64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC());
-    } else {
-      fr = new AMD64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP());
-    }
-    map.clear();
-    if (Assert.ASSERTS_ENABLED) {
-      Assert.that(map.getIncludeArgumentOops(), "should be set by clear");
-    }
-    return fr;
-  }
-
-  private Frame senderForInterpreterFrame(AMD64RegisterMap map) {
-    Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
-    Address sp = addressOfStackSlot(SENDER_SP_OFFSET);
-    // We do not need to update the callee-save register mapping because above
-    // us is either another interpreter frame or a converter-frame, but never
-    // directly a compiled frame.
-    // 11/24/04 SFG. This is no longer true after adapter were removed. However at the moment
-    // C2 no longer uses callee save register for java calls so there are no callee register
-    // to find.
-    return new AMD64Frame(sp, getLink(), unextendedSP.minus(sp));
-  }
-
-  private Frame senderForCompiledFrame(AMD64RegisterMap map, CodeBlob cb) {
-    //
-    // NOTE: some of this code is (unfortunately) duplicated in AMD64CurrentFrameGuess
-    //
-
-    if (Assert.ASSERTS_ENABLED) {
-      Assert.that(map != null, "map must be set");
-    }
-
-    // frame owned by optimizing compiler
-    Address        sender_sp = null;
-
-
-    if (VM.getVM().isClientCompiler()) {
-      sender_sp        = addressOfStackSlot(SENDER_SP_OFFSET);
-    } else {
-      if (Assert.ASSERTS_ENABLED) {
-        Assert.that(cb.getFrameSize() >= 0, "Compiled by Compiler1: do not use");
-      }
-      sender_sp = getUnextendedSP().addOffsetTo(cb.getFrameSize());
-    }
-
-    // On Intel the return_address is always the word on the stack
-    Address sender_pc = sender_sp.getAddressAt(-1 * VM.getVM().getAddressSize());
-
-    if (map.getUpdateMap() && cb.getOopMaps() != null) {
-      OopMapSet.updateRegisterMap(this, cb, map, true);
-    }
-
-    if (VM.getVM().isClientCompiler()) {
-      // Move this here for C1 and collecting oops in arguments (According to Rene)
-      map.setIncludeArgumentOops(cb.callerMustGCArguments(map.getThread()));
-    }
-
-    Address saved_fp = null;
-    if (VM.getVM().isClientCompiler()) {
-      saved_fp = getFP().getAddressAt(0);
-    } else if (VM.getVM().isServerCompiler() &&
-               (VM.getVM().getInterpreter().contains(sender_pc) ||
-               VM.getVM().getStubRoutines().returnsToCallStub(sender_pc))) {
-      // C2 prologue saves EBP in the usual place.
-      // however only use it if the sender had link infomration in it.
-      saved_fp = sender_sp.getAddressAt(-2 * VM.getVM().getAddressSize());
-    }
-
-    return new AMD64Frame(sender_sp, saved_fp, sender_pc);
-  }
-
-  protected boolean hasSenderPD() {
-    // FIXME
-    // Check for null ebp? Need to do some tests.
-    return true;
-  }
-
-  public long frameSize() {
-    return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize());
-  }
-
-  public Address getLink() {
-    return addressOfStackSlot(LINK_OFFSET).getAddressAt(0);
-  }
-
-  // FIXME: not implementable yet
-  //inline void      frame::set_link(intptr_t* addr)  { *(intptr_t **)addr_at(link_offset) = addr; }
-
-  public Address getUnextendedSP() { return raw_unextendedSP; }
-
-  // Return address:
-  public Address getSenderPCAddr() { return addressOfStackSlot(RETURN_ADDR_OFFSET); }
-  public Address getSenderPC()     { return getSenderPCAddr().getAddressAt(0);      }
-
-  // return address of param, zero origin index.
-  public Address getNativeParamAddr(int idx) {
-    return addressOfStackSlot(NATIVE_FRAME_INITIAL_PARAM_OFFSET + idx);
-  }
-
-  public Address getSenderSP()     { return addressOfStackSlot(SENDER_SP_OFFSET); }
-
-  public Address compiledArgumentToLocationPD(VMReg reg, RegisterMap regMap, int argSize) {
-    if (VM.getVM().isCore() || VM.getVM().isClientCompiler()) {
-      throw new RuntimeException("Should not reach here");
-    }
-
-    return oopMapRegToLocation(reg, regMap);
-  }
-
-  public Address addressOfInterpreterFrameLocals() {
-    return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET);
-  }
-
-  private Address addressOfInterpreterFrameBCX() {
-    return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET);
-  }
-
-  public int getInterpreterFrameBCI() {
-    // FIXME: this is not atomic with respect to GC and is unsuitable
-    // for use in a non-debugging, or reflective, system. Need to
-    // figure out how to express this.
-    Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0);
-    OopHandle methodHandle = addressOfInterpreterFrameMethod().getOopHandleAt(0);
-    Method method = (Method) VM.getVM().getObjectHeap().newOop(methodHandle);
-    return (int) bcpToBci(bcp, method);
-  }
-
-  public Address addressOfInterpreterFrameMDX() {
-    return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET);
-  }
-
-  // FIXME
-  //inline int frame::interpreter_frame_monitor_size() {
-  //  return BasicObjectLock::size();
-  //}
-
-  // expression stack
-  // (the max_stack arguments are used by the GC; see class FrameClosure)
-
-  public Address addressOfInterpreterFrameExpressionStack() {
-    Address monitorEnd = interpreterFrameMonitorEnd().address();
-    return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize());
-  }
-
-  public int getInterpreterFrameExpressionStackDirection() { return -1; }
-
-  // top of expression stack
-  public Address addressOfInterpreterFrameTOS() {
-    return getSP();
-  }
-
-  /** Expression stack from top down */
-  public Address addressOfInterpreterFrameTOSAt(int slot) {
-    return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize());
-  }
-
-  public Address getInterpreterFrameSenderSP() {
-    if (Assert.ASSERTS_ENABLED) {
-      Assert.that(isInterpretedFrame(), "interpreted frame expected");
-    }
-    return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
-  }
-
-  // Monitors
-  public BasicObjectLock interpreterFrameMonitorBegin() {
-    return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET));
-  }
-
-  public BasicObjectLock interpreterFrameMonitorEnd() {
-    Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0);
-    if (Assert.ASSERTS_ENABLED) {
-      // make sure the pointer points inside the frame
-      Assert.that(AddressOps.gt(getFP(), result), "result must <  than frame pointer");
-      Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer");
-    }
-    return new BasicObjectLock(result);
-  }
-
-  public int interpreterFrameMonitorSize() {
-    return BasicObjectLock.size();
-  }
-
-  // Method
-  public Address addressOfInterpreterFrameMethod() {
-    return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET);
-  }
-
-  // Constant pool cache
-  public Address addressOfInterpreterFrameCPCache() {
-    return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET);
-  }
-
-  // Entry frames
-  public JavaCallWrapper getEntryFrameCallWrapper() {
-    return new AMD64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0));
-  }
-
-  protected Address addressOfSavedOopResult() {
-    // offset is 2 for compiler2 and 3 for compiler1
-    return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) *
-                               VM.getVM().getAddressSize());
-  }
-
-  protected Address addressOfSavedReceiver() {
-    return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize());
-  }
-
-  private void dumpStack() {
-    if (getFP() != null) {
-      for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize());
-           AddressOps.lte(addr, getFP().addOffsetTo(5 * VM.getVM().getAddressSize()));
-           addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
-        System.out.println(addr + ": " + addr.getAddressAt(0));
-      }
-    } else {
-      for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize());
-           AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize()));
-           addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
-        System.out.println(addr + ": " + addr.getAddressAt(0));
-      }
-    }
-  }
-}
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/amd64/AMD64RegisterMap.java	Thu Sep 08 16:59:27 2011 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2003, 2005, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-package sun.jvm.hotspot.runtime.amd64;
-
-import sun.jvm.hotspot.asm.amd64.*;
-import sun.jvm.hotspot.debugger.*;
-import sun.jvm.hotspot.runtime.*;
-
-public class AMD64RegisterMap extends RegisterMap {
-
-  /** This is the only public constructor */
-  public AMD64RegisterMap(JavaThread thread, boolean updateMap) {
-    super(thread, updateMap);
-  }
-
-  protected AMD64RegisterMap(RegisterMap map) {
-    super(map);
-  }
-
-  public Object clone() {
-    AMD64RegisterMap retval = new AMD64RegisterMap(this);
-    return retval;
-  }
-
-  // no PD state to clear or copy:
-  protected void clearPD() {}
-  protected void initializePD() {}
-  protected void initializeFromPD(RegisterMap map) {}
-  protected Address getLocationPD(VMReg reg) { return null; }
-}
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/linux_amd64/LinuxAMD64JavaThreadPDAccess.java	Thu Sep 08 16:59:27 2011 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/linux_amd64/LinuxAMD64JavaThreadPDAccess.java	Fri Sep 09 16:17:16 2011 -0700
@@ -30,6 +30,7 @@
 import sun.jvm.hotspot.debugger.amd64.*;
 import sun.jvm.hotspot.runtime.*;
 import sun.jvm.hotspot.runtime.amd64.*;
+import sun.jvm.hotspot.runtime.x86.*;
 import sun.jvm.hotspot.types.*;
 import sun.jvm.hotspot.utilities.*;
 
@@ -80,11 +81,11 @@
     if (fp == null) {
       return null; // no information
     }
-    return new AMD64Frame(thread.getLastJavaSP(), fp);
+    return new X86Frame(thread.getLastJavaSP(), fp);
   }
 
   public    RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) {
-    return new AMD64RegisterMap(thread, updateMap);
+    return new X86RegisterMap(thread, updateMap);
   }
 
   public    Frame getCurrentFrameGuess(JavaThread thread, Address addr) {
@@ -95,9 +96,9 @@
       return null;
     }
     if (guesser.getPC() == null) {
-      return new AMD64Frame(guesser.getSP(), guesser.getFP());
+      return new X86Frame(guesser.getSP(), guesser.getFP());
     } else {
-      return new AMD64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC());
+      return new X86Frame(guesser.getSP(), guesser.getFP(), guesser.getPC());
     }
   }
 
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/solaris_amd64/SolarisAMD64JavaThreadPDAccess.java	Thu Sep 08 16:59:27 2011 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/solaris_amd64/SolarisAMD64JavaThreadPDAccess.java	Fri Sep 09 16:17:16 2011 -0700
@@ -30,6 +30,7 @@
 import sun.jvm.hotspot.debugger.amd64.*;
 import sun.jvm.hotspot.runtime.*;
 import sun.jvm.hotspot.runtime.amd64.*;
+import sun.jvm.hotspot.runtime.x86.*;
 import sun.jvm.hotspot.types.*;
 import sun.jvm.hotspot.utilities.*;
 
@@ -84,14 +85,14 @@
         }
         Address pc =  thread.getLastJavaPC();
         if ( pc != null ) {
-            return new AMD64Frame(thread.getLastJavaSP(), fp, pc);
+            return new X86Frame(thread.getLastJavaSP(), fp, pc);
         } else {
-            return new AMD64Frame(thread.getLastJavaSP(), fp);
+            return new X86Frame(thread.getLastJavaSP(), fp);
         }
     }
 
     public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) {
-        return new AMD64RegisterMap(thread, updateMap);
+        return new X86RegisterMap(thread, updateMap);
     }
 
     public Frame getCurrentFrameGuess(JavaThread thread, Address addr) {
@@ -102,9 +103,9 @@
             return null;
         }
         if (guesser.getPC() == null) {
-            return new AMD64Frame(guesser.getSP(), guesser.getFP());
+            return new X86Frame(guesser.getSP(), guesser.getFP());
         } else {
-            return new AMD64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC());
+            return new X86Frame(guesser.getSP(), guesser.getFP(), guesser.getPC());
         }
     }
 
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/sparc/SPARCFrame.java	Thu Sep 08 16:59:27 2011 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/sparc/SPARCFrame.java	Fri Sep 09 16:17:16 2011 -0700
@@ -236,7 +236,7 @@
       CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc);
       if (cb != null && cb.isJavaMethod()) {
         NMethod nm = (NMethod) cb;
-        if (pc.equals(nm.deoptBegin())) {
+        if (pc.equals(nm.deoptHandlerBegin())) {
           // adjust pc if frame is deoptimized.
           pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset());
           deoptimized = true;
@@ -559,49 +559,46 @@
       }
     }
 
-    if (!VM.getVM().isCore()) {
-      // Note:  The version of this operation on any platform with callee-save
-      //        registers must update the register map (if not null).
-      //        In order to do this correctly, the various subtypes of
-      //        of frame (interpreted, compiled, glue, native),
-      //        must be distinguished.  There is no need on SPARC for
-      //        such distinctions, because all callee-save registers are
-      //        preserved for all frames via SPARC-specific mechanisms.
-      //
-      //        *** HOWEVER, *** if and when we make any floating-point
-      //        registers callee-saved, then we will have to copy over
-      //        the RegisterMap update logic from the Intel code.
+    // Note:  The version of this operation on any platform with callee-save
+    //        registers must update the register map (if not null).
+    //        In order to do this correctly, the various subtypes of
+    //        of frame (interpreted, compiled, glue, native),
+    //        must be distinguished.  There is no need on SPARC for
+    //        such distinctions, because all callee-save registers are
+    //        preserved for all frames via SPARC-specific mechanisms.
+    //
+    //        *** HOWEVER, *** if and when we make any floating-point
+    //        registers callee-saved, then we will have to copy over
+    //        the RegisterMap update logic from the Intel code.
 
+    if (isRicochetFrame()) return senderForRicochetFrame(map);
 
-      // The constructor of the sender must know whether this frame is interpreted so it can set the
-      // sender's _interpreter_sp_adjustment field.
-      if (VM.getVM().getInterpreter().contains(pc)) {
-        isInterpreted = true;
-        map.makeIntegerRegsUnsaved();
+    // The constructor of the sender must know whether this frame is interpreted so it can set the
+    // sender's _interpreter_sp_adjustment field.
+    if (VM.getVM().getInterpreter().contains(pc)) {
+      isInterpreted = true;
+      map.makeIntegerRegsUnsaved();
+      map.shiftWindow(sp, youngerSP);
+    } else {
+      // Find a CodeBlob containing this frame's pc or elide the lookup and use the
+      // supplied blob which is already known to be associated with this frame.
+      cb = VM.getVM().getCodeCache().findBlob(pc);
+      if (cb != null) {
+        // Update the location of all implicitly saved registers
+        // as the address of these registers in the register save
+        // area (for %o registers we use the address of the %i
+        // register in the next younger frame)
         map.shiftWindow(sp, youngerSP);
-      } else {
-        // Find a CodeBlob containing this frame's pc or elide the lookup and use the
-        // supplied blob which is already known to be associated with this frame.
-        cb = VM.getVM().getCodeCache().findBlob(pc);
-        if (cb != null) {
-
-          if (cb.callerMustGCArguments(map.getThread())) {
+        if (map.getUpdateMap()) {
+          if (cb.callerMustGCArguments()) {
             map.setIncludeArgumentOops(true);
           }
-
-          // Update the location of all implicitly saved registers
-          // as the address of these registers in the register save
-          // area (for %o registers we use the address of the %i
-          // register in the next younger frame)
-          map.shiftWindow(sp, youngerSP);
-          if (map.getUpdateMap()) {
-            if (cb.getOopMaps() != null) {
-              OopMapSet.updateRegisterMap(this, cb, map, VM.getVM().isDebugging());
-            }
+          if (cb.getOopMaps() != null) {
+            OopMapSet.updateRegisterMap(this, cb, map, VM.getVM().isDebugging());
           }
         }
       }
-    } // #ifndef CORE
+    }
 
     return new SPARCFrame(biasSP(sp), biasSP(youngerSP), isInterpreted);
   }
@@ -948,6 +945,20 @@
   }
 
 
+  private Frame senderForRicochetFrame(SPARCRegisterMap map) {
+    if (DEBUG) {
+      System.out.println("senderForRicochetFrame");
+    }
+    //RicochetFrame* f = RicochetFrame::from_frame(fr);
+    // Cf. is_interpreted_frame path of frame::sender
+    Address youngerSP = getSP();
+    Address sp        = getSenderSP();
+    map.makeIntegerRegsUnsaved();
+    map.shiftWindow(sp, youngerSP);
+    boolean thisFrameAdjustedStack = true;  // I5_savedSP is live in this RF
+    return new SPARCFrame(sp, youngerSP, thisFrameAdjustedStack);
+  }
+
   private Frame senderForEntryFrame(RegisterMap regMap) {
     SPARCRegisterMap map = (SPARCRegisterMap) regMap;
 
@@ -965,10 +976,8 @@
     Address lastJavaPC = jcw.getLastJavaPC();
     map.clear();
 
-    if (!VM.getVM().isCore()) {
-      map.makeIntegerRegsUnsaved();
-      map.shiftWindow(lastJavaSP, null);
-    }
+    map.makeIntegerRegsUnsaved();
+    map.shiftWindow(lastJavaSP, null);
 
     if (Assert.ASSERTS_ENABLED) {
       Assert.that(map.getIncludeArgumentOops(), "should be set by clear");
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/sparc/SPARCRicochetFrame.java	Fri Sep 09 16:17:16 2011 -0700
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.runtime.sparc;
+
+import java.util.*;
+import sun.jvm.hotspot.asm.sparc.SPARCRegister;
+import sun.jvm.hotspot.asm.sparc.SPARCRegisters;
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.runtime.*;
+import sun.jvm.hotspot.types.*;
+
+public class SPARCRicochetFrame {
+  static {
+    VM.registerVMInitializedObserver(new Observer() {
+        public void update(Observable o, Object data) {
+          initialize(VM.getVM().getTypeDataBase());
+        }
+      });
+  }
+
+  private SPARCFrame frame;
+
+  private static void initialize(TypeDataBase db) {
+    // Type type = db.lookupType("MethodHandles::RicochetFrame");
+
+  }
+
+  static SPARCRicochetFrame fromFrame(SPARCFrame f) {
+    return new SPARCRicochetFrame(f);
+  }
+
+  private SPARCRicochetFrame(SPARCFrame f) {
+    frame = f;
+  }
+
+  private Address registerValue(SPARCRegister reg) {
+    return frame.getSP().addOffsetTo(reg.spOffsetInSavedWindow()).getAddressAt(0);
+  }
+
+  public Address savedArgsBase() {
+    return registerValue(SPARCRegisters.L4);
+  }
+  public Address exactSenderSP() {
+    return registerValue(SPARCRegisters.I5);
+  }
+  public Address senderLink() {
+    return frame.getSenderSP();
+  }
+  public Address senderPC() {
+    return frame.getSenderPC();
+  }
+  public Address extendedSenderSP() {
+    return savedArgsBase();
+  }
+}
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/win32_amd64/Win32AMD64JavaThreadPDAccess.java	Thu Sep 08 16:59:27 2011 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/win32_amd64/Win32AMD64JavaThreadPDAccess.java	Fri Sep 09 16:17:16 2011 -0700
@@ -31,6 +31,7 @@
 import sun.jvm.hotspot.debugger.amd64.*;
 import sun.jvm.hotspot.runtime.*;
 import sun.jvm.hotspot.runtime.amd64.*;
+import sun.jvm.hotspot.runtime.x86.*;
 import sun.jvm.hotspot.types.*;
 import sun.jvm.hotspot.utilities.*;
 
@@ -86,14 +87,14 @@
     }
     Address pc =  thread.getLastJavaPC();
     if ( pc != null ) {
-      return new AMD64Frame(thread.getLastJavaSP(), fp, pc);
+      return new X86Frame(thread.getLastJavaSP(), fp, pc);
     } else {
-      return new AMD64Frame(thread.getLastJavaSP(), fp);
+      return new X86Frame(thread.getLastJavaSP(), fp);
     }
   }
 
   public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) {
-    return new AMD64RegisterMap(thread, updateMap);
+    return new X86RegisterMap(thread, updateMap);
   }
 
   public Frame getCurrentFrameGuess(JavaThread thread, Address addr) {
@@ -104,9 +105,9 @@
       return null;
     }
     if (guesser.getPC() == null) {
-      return new AMD64Frame(guesser.getSP(), guesser.getFP());
+      return new X86Frame(guesser.getSP(), guesser.getFP());
     } else {
-      return new AMD64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC());
+      return new X86Frame(guesser.getSP(), guesser.getFP(), guesser.getPC());
     }
   }
 
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86Frame.java	Thu Sep 08 16:59:27 2011 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86Frame.java	Fri Sep 09 16:17:16 2011 -0700
@@ -25,7 +25,6 @@
 package sun.jvm.hotspot.runtime.x86;
 
 import java.util.*;
-import sun.jvm.hotspot.asm.x86.*;
 import sun.jvm.hotspot.code.*;
 import sun.jvm.hotspot.compiler.*;
 import sun.jvm.hotspot.debugger.*;
@@ -62,11 +61,13 @@
   private static       int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET;
 
   // Entry frames
-  private static final int ENTRY_FRAME_CALL_WRAPPER_OFFSET   =  2;
+  private static       int ENTRY_FRAME_CALL_WRAPPER_OFFSET;
 
   // Native frames
   private static final int NATIVE_FRAME_INITIAL_PARAM_OFFSET =  2;
 
+  private static VMReg rbp;
+
   static {
     VM.registerVMInitializedObserver(new Observer() {
         public void update(Observable o, Object data) {
@@ -76,19 +77,23 @@
   }
 
   private static synchronized void initialize(TypeDataBase db) {
-    if (VM.getVM().isCore()) {
-      INTERPRETER_FRAME_CACHE_OFFSET = INTERPRETER_FRAME_METHOD_OFFSET - 1;
-    } else {
-      INTERPRETER_FRAME_MDX_OFFSET   = INTERPRETER_FRAME_METHOD_OFFSET - 1;
-      INTERPRETER_FRAME_CACHE_OFFSET = INTERPRETER_FRAME_MDX_OFFSET - 1;
-    }
+    INTERPRETER_FRAME_MDX_OFFSET                  = INTERPRETER_FRAME_METHOD_OFFSET - 1;
+    INTERPRETER_FRAME_CACHE_OFFSET                = INTERPRETER_FRAME_MDX_OFFSET - 1;
     INTERPRETER_FRAME_LOCALS_OFFSET               = INTERPRETER_FRAME_CACHE_OFFSET - 1;
     INTERPRETER_FRAME_BCX_OFFSET                  = INTERPRETER_FRAME_LOCALS_OFFSET - 1;
     INTERPRETER_FRAME_INITIAL_SP_OFFSET           = INTERPRETER_FRAME_BCX_OFFSET - 1;
     INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET    = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
     INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
+
+    ENTRY_FRAME_CALL_WRAPPER_OFFSET = db.lookupIntConstant("frame::entry_frame_call_wrapper_offset");
+    if (VM.getVM().getAddressSize() == 4) {
+      rbp = new VMReg(5);
+    } else {
+      rbp = new VMReg(5 << 1);
+    }
   }
 
+
   // an additional field beyond sp and pc:
   Address raw_fp; // frame pointer
   private Address raw_unextendedSP;
@@ -102,7 +107,7 @@
       CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc);
       if (cb != null && cb.isJavaMethod()) {
         NMethod nm = (NMethod) cb;
-        if (pc.equals(nm.deoptBegin())) {
+        if (pc.equals(nm.deoptHandlerBegin())) {
           if (Assert.ASSERTS_ENABLED) {
             Assert.that(this.getUnextendedSP() != null, "null SP in Java frame");
           }
@@ -119,6 +124,7 @@
     this.raw_unextendedSP = raw_sp;
     this.raw_fp = raw_fp;
     this.pc = pc;
+    adjustUnextendedSP();
 
     // Frame must be fully constructed before this call
     adjustForDeopt();
@@ -134,6 +140,7 @@
     this.raw_unextendedSP = raw_sp;
     this.raw_fp = raw_fp;
     this.pc = raw_sp.getAddressAt(-1 * VM.getVM().getAddressSize());
+    adjustUnextendedSP();
 
     // Frame must be fully constructed before this call
     adjustForDeopt();
@@ -144,24 +151,18 @@
     }
   }
 
-  // This constructor should really take the unextended SP as an arg
-  // but then the constructor is ambiguous with constructor that takes
-  // a PC so take an int and convert it.
-  public X86Frame(Address raw_sp, Address raw_fp, long extension) {
+  public X86Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) {
     this.raw_sp = raw_sp;
-    if (raw_sp == null) {
-      this.raw_unextendedSP = null;
-    } else {
-      this.raw_unextendedSP = raw_sp.addOffsetTo(extension);
-    }
+    this.raw_unextendedSP = raw_unextendedSp;
     this.raw_fp = raw_fp;
-    this.pc = raw_sp.getAddressAt(-1 * VM.getVM().getAddressSize());
+    this.pc = pc;
+    adjustUnextendedSP();
 
     // Frame must be fully constructed before this call
     adjustForDeopt();
 
     if (DEBUG) {
-      System.out.println("X86Frame(sp, fp): " + this);
+      System.out.println("X86Frame(sp, unextendedSP, fp, pc): " + this);
       dumpStack();
     }
 
@@ -172,7 +173,6 @@
     frame.raw_sp = raw_sp;
     frame.raw_unextendedSP = raw_unextendedSP;
     frame.raw_fp = raw_fp;
-    frame.raw_fp = raw_fp;
     frame.pc = pc;
     frame.deoptimized = deoptimized;
     return frame;
@@ -269,19 +269,18 @@
 
     if (isEntryFrame())       return senderForEntryFrame(map);
     if (isInterpretedFrame()) return senderForInterpreterFrame(map);
+    if (isRicochetFrame())    return senderForRicochetFrame(map);
 
-    if (!VM.getVM().isCore()) {
-      if(cb == null) {
-        cb = VM.getVM().getCodeCache().findBlob(getPC());
-      } else {
-        if (Assert.ASSERTS_ENABLED) {
-          Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same");
-        }
+    if(cb == null) {
+      cb = VM.getVM().getCodeCache().findBlob(getPC());
+    } else {
+      if (Assert.ASSERTS_ENABLED) {
+        Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same");
       }
+    }
 
-      if (cb != null) {
-         return senderForCompiledFrame(map, cb);
-      }
+    if (cb != null) {
+      return senderForCompiledFrame(map, cb);
     }
 
     // Must be native-compiled frame, i.e. the marshaling code for native
@@ -289,7 +288,20 @@
     return new X86Frame(getSenderSP(), getLink(), getSenderPC());
   }
 
+  private Frame senderForRicochetFrame(X86RegisterMap map) {
+    if (DEBUG) {
+      System.out.println("senderForRicochetFrame");
+    }
+    X86RicochetFrame f = X86RicochetFrame.fromFrame(this);
+    if (map.getUpdateMap())
+      updateMapWithSavedLink(map, f.senderLinkAddress());
+    return new X86Frame(f.extendedSenderSP(), f.exactSenderSP(), f.senderLink(), f.senderPC());
+  }
+
   private Frame senderForEntryFrame(X86RegisterMap map) {
+    if (DEBUG) {
+      System.out.println("senderForEntryFrame");
+    }
     if (Assert.ASSERTS_ENABLED) {
       Assert.that(map != null, "map must be set");
     }
@@ -313,7 +325,37 @@
     return fr;
   }
 
+  //------------------------------------------------------------------------------
+  // frame::adjust_unextended_sp
+  private void adjustUnextendedSP() {
+    // If we are returning to a compiled MethodHandle call site, the
+    // saved_fp will in fact be a saved value of the unextended SP.  The
+    // simplest way to tell whether we are returning to such a call site
+    // is as follows:
+
+    CodeBlob cb = cb();
+    NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull();
+    if (senderNm != null) {
+      // If the sender PC is a deoptimization point, get the original
+      // PC.  For MethodHandle call site the unextended_sp is stored in
+      // saved_fp.
+      if (senderNm.isDeoptMhEntry(getPC())) {
+        // DEBUG_ONLY(verifyDeoptMhOriginalPc(senderNm, getFP()));
+        raw_unextendedSP = getFP();
+      }
+      else if (senderNm.isDeoptEntry(getPC())) {
+        // DEBUG_ONLY(verifyDeoptOriginalPc(senderNm, raw_unextendedSp));
+      }
+      else if (senderNm.isMethodHandleReturn(getPC())) {
+        raw_unextendedSP = getFP();
+      }
+    }
+  }
+
   private Frame senderForInterpreterFrame(X86RegisterMap map) {
+    if (DEBUG) {
+      System.out.println("senderForInterpreterFrame");
+    }
     Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
     Address sp = addressOfStackSlot(SENDER_SP_OFFSET);
     // We do not need to update the callee-save register mapping because above
@@ -323,10 +365,21 @@
     // However c2 no longer uses callee save register for java calls so there
     // are no callee register to find.
 
-    return new X86Frame(sp, getLink(), unextendedSP.minus(sp));
+    if (map.getUpdateMap())
+      updateMapWithSavedLink(map, addressOfStackSlot(LINK_OFFSET));
+
+    return new X86Frame(sp, unextendedSP, getLink(), getSenderPC());
+  }
+
+  private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) {
+    map.setLocation(rbp, savedFPAddr);
   }
 
   private Frame senderForCompiledFrame(X86RegisterMap map, CodeBlob cb) {
+    if (DEBUG) {
+      System.out.println("senderForCompiledFrame");
+    }
+
     //
     // NOTE: some of this code is (unfortunately) duplicated in X86CurrentFrameGuess
     //
@@ -336,41 +389,35 @@
     }
 
     // frame owned by optimizing compiler
-    Address        sender_sp = null;
+    if (Assert.ASSERTS_ENABLED) {
+        Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size");
+    }
+    Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize());
 
-    if (VM.getVM().isClientCompiler()) {
-      sender_sp        = addressOfStackSlot(SENDER_SP_OFFSET);
-    } else {
-      if (Assert.ASSERTS_ENABLED) {
-        Assert.that(cb.getFrameSize() >= 0, "Compiled by Compiler1: do not use");
+    // On Intel the return_address is always the word on the stack
+    Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize());
+
+    // This is the saved value of EBP which may or may not really be an FP.
+    // It is only an FP if the sender is an interpreter frame (or C1?).
+    Address savedFPAddr = senderSP.addOffsetTo(- SENDER_SP_OFFSET * VM.getVM().getAddressSize());
+
+    if (map.getUpdateMap()) {
+      // Tell GC to use argument oopmaps for some runtime stubs that need it.
+      // For C1, the runtime stub might not have oop maps, so set this flag
+      // outside of update_register_map.
+      map.setIncludeArgumentOops(cb.callerMustGCArguments());
+
+      if (cb.getOopMaps() != null) {
+        OopMapSet.updateRegisterMap(this, cb, map, true);
       }
-      sender_sp = getUnextendedSP().addOffsetTo(cb.getFrameSize());
+
+      // Since the prolog does the save and restore of EBP there is no oopmap
+      // for it so we must fill in its location as if there was an oopmap entry
+      // since if our caller was compiled code there could be live jvm state in it.
+      updateMapWithSavedLink(map, savedFPAddr);
     }
 
-    // On Intel the return_address is always the word on the stack
-    Address sender_pc = sender_sp.getAddressAt(-1 * VM.getVM().getAddressSize());
-
-    if (map.getUpdateMap() && cb.getOopMaps() != null) {
-      OopMapSet.updateRegisterMap(this, cb, map, true);
-    }
-
-    if (VM.getVM().isClientCompiler()) {
-      // Move this here for C1 and collecting oops in arguments (According to Rene)
-      map.setIncludeArgumentOops(cb.callerMustGCArguments(map.getThread()));
-    }
-
-    Address saved_fp = null;
-    if (VM.getVM().isClientCompiler()) {
-      saved_fp = getFP().getAddressAt(0);
-    } else if (VM.getVM().isServerCompiler() &&
-               (VM.getVM().getInterpreter().contains(sender_pc) ||
-               VM.getVM().getStubRoutines().returnsToCallStub(sender_pc))) {
-      // C2 prologue saves EBP in the usual place.
-      // however only use it if the sender had link infomration in it.
-      saved_fp = sender_sp.getAddressAt(-2 * VM.getVM().getAddressSize());
-    }
-
-    return new X86Frame(sender_sp, saved_fp, sender_pc);
+    return new X86Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC);
   }
 
   protected boolean hasSenderPD() {
@@ -403,14 +450,6 @@
 
   public Address getSenderSP()     { return addressOfStackSlot(SENDER_SP_OFFSET); }
 
-  public Address compiledArgumentToLocationPD(VMReg reg, RegisterMap regMap, int argSize) {
-    if (VM.getVM().isCore() || VM.getVM().isClientCompiler()) {
-      throw new RuntimeException("Should not reach here");
-    }
-
-    return oopMapRegToLocation(reg, regMap);
-  }
-
   public Address addressOfInterpreterFrameLocals() {
     return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET);
   }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86RicochetFrame.java	Fri Sep 09 16:17:16 2011 -0700
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.runtime.x86;
+
+import java.util.*;
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.runtime.*;
+import sun.jvm.hotspot.types.*;
+
+public class X86RicochetFrame extends VMObject {
+  static {
+    VM.registerVMInitializedObserver(new Observer() {
+        public void update(Observable o, Object data) {
+          initialize(VM.getVM().getTypeDataBase());
+        }
+      });
+  }
+
+  private static void initialize(TypeDataBase db) {
+    Type type = db.lookupType("MethodHandles::RicochetFrame");
+
+    senderLinkField    = type.getAddressField("_sender_link");
+    savedArgsBaseField = type.getAddressField("_saved_args_base");
+    exactSenderSPField = type.getAddressField("_exact_sender_sp");
+    senderPCField      = type.getAddressField("_sender_pc");
+  }
+
+  private static AddressField senderLinkField;
+  private static AddressField savedArgsBaseField;
+  private static AddressField exactSenderSPField;
+  private static AddressField senderPCField;
+
+  static X86RicochetFrame fromFrame(X86Frame f) {
+    return new X86RicochetFrame(f.getFP().addOffsetTo(- senderLinkField.getOffset()));
+  }
+
+  private X86RicochetFrame(Address addr) {
+    super(addr);
+  }
+
+  public Address senderLink() {
+    return senderLinkField.getValue(addr);
+  }
+  public Address senderLinkAddress() {
+    return addr.addOffsetTo(senderLinkField.getOffset());
+  }
+  public Address savedArgsBase() {
+    return savedArgsBaseField.getValue(addr);
+  }
+  public Address extendedSenderSP() {
+    return savedArgsBase();
+  }
+  public Address exactSenderSP() {
+    return exactSenderSPField.getValue(addr);
+  }
+  public Address senderPC() {
+    return senderPCField.getValue(addr);
+  }
+}
--- a/make/hotspot_version	Thu Sep 08 16:59:27 2011 -0700
+++ b/make/hotspot_version	Fri Sep 09 16:17:16 2011 -0700
@@ -35,7 +35,7 @@
 
 HS_MAJOR_VER=22
 HS_MINOR_VER=0
-HS_BUILD_NUMBER=03
+HS_BUILD_NUMBER=04
 
 JDK_MAJOR_VER=1
 JDK_MINOR_VER=8
--- a/make/linux/makefiles/mapfile-vers-debug	Thu Sep 08 16:59:27 2011 -0700
+++ b/make/linux/makefiles/mapfile-vers-debug	Fri Sep 09 16:17:16 2011 -0700
@@ -244,24 +244,6 @@
                 JVM_Yield;
                 JVM_handle_linux_signal;
 
-                # Old reflection routines
-                # These do not need to be present in the product build in JDK 1.4
-                # but their code has not been removed yet because there will not
-                # be a substantial code savings until JVM_InvokeMethod and
-                # JVM_NewInstanceFromConstructor can also be removed; see
-                # reflectionCompat.hpp.
-                JVM_GetClassConstructor;
-                JVM_GetClassConstructors;
-                JVM_GetClassField;
-                JVM_GetClassFields;
-                JVM_GetClassMethod;
-                JVM_GetClassMethods;
-                JVM_GetField;
-                JVM_GetPrimitiveField;
-                JVM_NewInstance;
-                JVM_SetField;
-                JVM_SetPrimitiveField;
-
                 # debug JVM
                 JVM_AccessVMBooleanFlag;
                 JVM_AccessVMIntFlag;
--- a/make/linux/makefiles/mapfile-vers-product	Thu Sep 08 16:59:27 2011 -0700
+++ b/make/linux/makefiles/mapfile-vers-product	Fri Sep 09 16:17:16 2011 -0700
@@ -244,24 +244,6 @@
                 JVM_Yield;
                 JVM_handle_linux_signal;
 
-                # Old reflection routines
-                # These do not need to be present in the product build in JDK 1.4
-                # but their code has not been removed yet because there will not
-                # be a substantial code savings until JVM_InvokeMethod and
-                # JVM_NewInstanceFromConstructor can also be removed; see
-                # reflectionCompat.hpp.
-                JVM_GetClassConstructor;
-                JVM_GetClassConstructors;
-                JVM_GetClassField;
-                JVM_GetClassFields;
-                JVM_GetClassMethod;
-                JVM_GetClassMethods;
-                JVM_GetField;
-                JVM_GetPrimitiveField;
-                JVM_NewInstance;
-                JVM_SetField;
-                JVM_SetPrimitiveField;
-
                 # miscellaneous functions
                 jio_fprintf;
                 jio_printf;
--- a/make/solaris/makefiles/debug.make	Thu Sep 08 16:59:27 2011 -0700
+++ b/make/solaris/makefiles/debug.make	Fri Sep 09 16:17:16 2011 -0700
@@ -41,8 +41,7 @@
 
 # Linker mapfiles
 MAPFILE = $(GAMMADIR)/make/solaris/makefiles/mapfile-vers \
-          $(GAMMADIR)/make/solaris/makefiles/mapfile-vers-debug \
-          $(GAMMADIR)/make/solaris/makefiles/mapfile-vers-nonproduct
+          $(GAMMADIR)/make/solaris/makefiles/mapfile-vers-debug
 
 # This mapfile is only needed when compiling with dtrace support, 
 # and mustn't be otherwise.
--- a/make/solaris/makefiles/fastdebug.make	Thu Sep 08 16:59:27 2011 -0700
+++ b/make/solaris/makefiles/fastdebug.make	Fri Sep 09 16:17:16 2011 -0700
@@ -107,8 +107,7 @@
 
 # Linker mapfiles
 MAPFILE = $(GAMMADIR)/make/solaris/makefiles/mapfile-vers \
-	  $(GAMMADIR)/make/solaris/makefiles/mapfile-vers-debug \
-	  $(GAMMADIR)/make/solaris/makefiles/mapfile-vers-nonproduct
+	  $(GAMMADIR)/make/solaris/makefiles/mapfile-vers-debug
 
 # This mapfile is only needed when compiling with dtrace support, 
 # and mustn't be otherwise.
--- a/make/solaris/makefiles/jvmg.make	Thu Sep 08 16:59:27 2011 -0700
+++ b/make/solaris/makefiles/jvmg.make	Fri Sep 09 16:17:16 2011 -0700
@@ -44,8 +44,7 @@
 
 # Linker mapfiles
 MAPFILE = $(GAMMADIR)/make/solaris/makefiles/mapfile-vers \
-          $(GAMMADIR)/make/solaris/makefiles/mapfile-vers-debug \
-          $(GAMMADIR)/make/solaris/makefiles/mapfile-vers-nonproduct
+          $(GAMMADIR)/make/solaris/makefiles/mapfile-vers-debug
 
 # This mapfile is only needed when compiling with dtrace support,
 # and mustn't be otherwise.
--- a/make/solaris/makefiles/mapfile-vers-nonproduct	Thu Sep 08 16:59:27 2011 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,48 +0,0 @@
-#
-
-#
-# Copyright (c) 2001, 2008, Oracle and/or its affiliates. All rights reserved.
-# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-#
-# This code is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License version 2 only, as
-# published by the Free Software Foundation.
-#
-# This code is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-# version 2 for more details (a copy is included in the LICENSE file that
-# accompanied this code).
-#
-# You should have received a copy of the GNU General Public License version
-# 2 along with this work; if not, write to the Free Software Foundation,
-# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-# or visit www.oracle.com if you need additional information or have any
-# questions.
-#  
-#
-
-# Define public interface.
-
-SUNWprivate_1.1 {
-        global:
-		# Old reflection routines
-		# These do not need to be present in the product build in JDK 1.4
-		# but their code has not been removed yet because there will not
-		# be a substantial code savings until JVM_InvokeMethod and
-		# JVM_NewInstanceFromConstructor can also be removed; see
-		# reflectionCompat.hpp.
-		JVM_GetClassConstructor;
-		JVM_GetClassConstructors;
-		JVM_GetClassField;
-		JVM_GetClassFields;
-		JVM_GetClassMethod;
-		JVM_GetClassMethods;
-		JVM_GetField;
-		JVM_GetPrimitiveField;
-		JVM_NewInstance;
-		JVM_SetField;
-		JVM_SetPrimitiveField;
-};
--- a/make/solaris/makefiles/optimized.make	Thu Sep 08 16:59:27 2011 -0700
+++ b/make/solaris/makefiles/optimized.make	Fri Sep 09 16:17:16 2011 -0700
@@ -48,9 +48,7 @@
 CFLAGS$(HOTSPARC_GENERIC) += $(OPT_CFLAGS/BYFILE)
 
 # Linker mapfiles
-# NOTE: inclusion of nonproduct mapfile not necessary; read it for details
-MAPFILE = $(GAMMADIR)/make/solaris/makefiles/mapfile-vers \
-          $(GAMMADIR)/make/solaris/makefiles/mapfile-vers-nonproduct
+MAPFILE = $(GAMMADIR)/make/solaris/makefiles/mapfile-vers
 
 # This mapfile is only needed when compiling with dtrace support, 
 # and mustn't be otherwise.
--- a/make/solaris/makefiles/product.make	Thu Sep 08 16:59:27 2011 -0700
+++ b/make/solaris/makefiles/product.make	Fri Sep 09 16:17:16 2011 -0700
@@ -58,13 +58,9 @@
 # to inhibit the effect of the previous line on CFLAGS.
 
 # Linker mapfiles
-# NOTE: inclusion of nonproduct mapfile not necessary; read it for details
-ifdef USE_GCC
 MAPFILE = $(GAMMADIR)/make/solaris/makefiles/mapfile-vers
-else
-MAPFILE = $(GAMMADIR)/make/solaris/makefiles/mapfile-vers \
-          $(GAMMADIR)/make/solaris/makefiles/mapfile-vers-nonproduct
 
+ifndef USE_GCC
 # This mapfile is only needed when compiling with dtrace support, 
 # and mustn't be otherwise.
 MAPFILE_DTRACE = $(GAMMADIR)/make/solaris/makefiles/mapfile-vers-$(TYPE)
--- a/src/cpu/sparc/vm/assembler_sparc.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/cpu/sparc/vm/assembler_sparc.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -1794,7 +1794,8 @@
   mov(reg,O0); // Move arg into O0; arg might be in O7 which is about to be crushed
   stx(O7,SP,frame::register_save_words*wordSize+STACK_BIAS+7*8);
 
-  set((intptr_t)real_msg, O1);
+  // Size of set() should stay the same
+  patchable_set((intptr_t)real_msg, O1);
   // Load address to call to into O7
   load_ptr_contents(a, O7);
   // Register call to verify_oop_subroutine
@@ -1831,7 +1832,8 @@
   ld_ptr(addr.base(), addr.disp() + 8*8, O0); // Load arg into O0; arg might be in O7 which is about to be crushed
   stx(O7,SP,frame::register_save_words*wordSize+STACK_BIAS+7*8);
 
-  set((intptr_t)real_msg, O1);
+  // Size of set() should stay the same
+  patchable_set((intptr_t)real_msg, O1);
   // Load address to call to into O7
   load_ptr_contents(a, O7);
   // Register call to verify_oop_subroutine
@@ -1976,7 +1978,8 @@
     save_frame(::round_to(sizeof(RegistersForDebugging) / BytesPerWord, 2));
 
     // stop_subroutine expects message pointer in I1.
-    set((intptr_t)msg, O1);
+    // Size of set() should stay the same
+    patchable_set((intptr_t)msg, O1);
 
     // factor long stop-sequence into subroutine to save space
     assert(StubRoutines::Sparc::stop_subroutine_entry_address(), "hasn't been generated yet");
@@ -1998,7 +2001,8 @@
   save_frame(::round_to(sizeof(RegistersForDebugging) / BytesPerWord, 2));
   RegistersForDebugging::save_registers(this);
   mov(O0, L0);
-  set((intptr_t)msg, O0);
+  // Size of set() should stay the same
+  patchable_set((intptr_t)msg, O0);
   call( CAST_FROM_FN_PTR(address, warning) );
   delayed()->nop();
 //  ret();
@@ -2161,29 +2165,6 @@
 #endif
 }
 
-void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p,
-                                     Register s1, address d,
-                                     relocInfo::relocType rt ) {
-  assert_not_delayed();
-  if (VM_Version::v9_instructions_work()) {
-    bpr(rc, a, p, s1, d, rt);
-  } else {
-    tst(s1);
-    br(reg_cond_to_cc_cond(rc), a, p, d, rt);
-  }
-}
-
-void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p,
-                                     Register s1, Label& L ) {
-  assert_not_delayed();
-  if (VM_Version::v9_instructions_work()) {
-    bpr(rc, a, p, s1, L);
-  } else {
-    tst(s1);
-    br(reg_cond_to_cc_cond(rc), a, p, L);
-  }
-}
-
 // Compare registers and branch with nop in delay slot or cbcond without delay slot.
 
 // Compare integer (32 bit) values (icc only).
@@ -4340,22 +4321,29 @@
   } else {
     pre_val = O0;
   }
+
   int satb_q_index_byte_offset =
     in_bytes(JavaThread::satb_mark_queue_offset() +
              PtrQueue::byte_offset_of_index());
+
   int satb_q_buf_byte_offset =
     in_bytes(JavaThread::satb_mark_queue_offset() +
              PtrQueue::byte_offset_of_buf());
+
   assert(in_bytes(PtrQueue::byte_width_of_index()) == sizeof(intptr_t) &&
          in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t),
          "check sizes in assembly below");
 
   __ bind(restart);
+
+  // Load the index into the SATB buffer. PtrQueue::_index is a size_t
+  // so ld_ptr is appropriate.
   __ ld_ptr(G2_thread, satb_q_index_byte_offset, L0);
 
-  __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill);
-  // If the branch is taken, no harm in executing this in the delay slot.
-  __ delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1);
+  // index == 0?
+  __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill);
+
+  __ ld_ptr(G2_thread, satb_q_buf_byte_offset, L1);
   __ sub(L0, oopSize, L0);
 
   __ st_ptr(pre_val, L1, L0);  // [_buf + index] := I0
@@ -4466,9 +4454,8 @@
          tmp);
   }
 
-  // Check on whether to annul.
-  br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered);
-  delayed()->nop();
+  // Is marking active?
+  cmp_and_br_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
 
   // Do we need to load the previous value?
   if (obj != noreg) {
@@ -4490,9 +4477,7 @@
   assert(pre_val != noreg, "must have a real register");
 
   // Is the previous value null?
-  // Check on whether to annul.
-  br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, pre_val, filtered);
-  delayed()->nop();
+  cmp_and_brx_short(pre_val, G0, Assembler::equal, Assembler::pt, filtered);
 
   // OK, it's not filtered, so we'll need to call enqueue.  In the normal
   // case, pre_val will be a scratch G-reg, but there are some cases in
@@ -4519,39 +4504,6 @@
   bind(filtered);
 }
 
-static jint num_ct_writes = 0;
-static jint num_ct_writes_filtered_in_hr = 0;
-static jint num_ct_writes_filtered_null = 0;
-static G1CollectedHeap* g1 = NULL;
-
-static Thread* count_ct_writes(void* filter_val, void* new_val) {
-  Atomic::inc(&num_ct_writes);
-  if (filter_val == NULL) {
-    Atomic::inc(&num_ct_writes_filtered_in_hr);
-  } else if (new_val == NULL) {
-    Atomic::inc(&num_ct_writes_filtered_null);
-  } else {
-    if (g1 == NULL) {
-      g1 = G1CollectedHeap::heap();
-    }
-  }
-  if ((num_ct_writes % 1000000) == 0) {
-    jint num_ct_writes_filtered =
-      num_ct_writes_filtered_in_hr +
-      num_ct_writes_filtered_null;
-
-    tty->print_cr("%d potential CT writes: %5.2f%% filtered\n"
-                  "   (%5.2f%% intra-HR, %5.2f%% null).",
-                  num_ct_writes,
-                  100.0*(float)num_ct_writes_filtered/(float)num_ct_writes,
-                  100.0*(float)num_ct_writes_filtered_in_hr/
-                  (float)num_ct_writes,
-                  100.0*(float)num_ct_writes_filtered_null/
-                  (float)num_ct_writes);
-  }
-  return Thread::current();
-}
-
 static address dirty_card_log_enqueue = 0;
 static u_char* dirty_card_log_enqueue_end = 0;
 
@@ -4574,11 +4526,8 @@
   __ set(addrlit, O1); // O1 := <card table base>
   __ ldub(O0, O1, O2); // O2 := [O0 + O1]
 
-  __ br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt,
-                      O2, not_already_dirty);
-  // Get O1 + O2 into a reg by itself -- useful in the take-the-branch
-  // case, harmless if not.
-  __ delayed()->add(O0, O1, O3);
+  assert(CardTableModRefBS::dirty_card_val() == 0, "otherwise check this code");
+  __ cmp_and_br_short(O2, G0, Assembler::notEqual, Assembler::pt, not_already_dirty);
 
   // We didn't take the branch, so we're already dirty: return.
   // Use return-from-leaf
@@ -4587,8 +4536,13 @@
 
   // Not dirty.
   __ bind(not_already_dirty);
+
+  // Get O0 + O1 into a reg by itself
+  __ add(O0, O1, O3);
+
   // First, dirty it.
   __ stb(G0, O3, G0);  // [cardPtr] := 0  (i.e., dirty).
+
   int dirty_card_q_index_byte_offset =
     in_bytes(JavaThread::dirty_card_queue_offset() +
              PtrQueue::byte_offset_of_index());
@@ -4596,12 +4550,15 @@
     in_bytes(JavaThread::dirty_card_queue_offset() +
              PtrQueue::byte_offset_of_buf());
   __ bind(restart);
+
+  // Load the index into the update buffer. PtrQueue::_index is
+  // a size_t so ld_ptr is appropriate here.
   __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0);
 
-  __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn,
-                      L0, refill);
-  // If the branch is taken, no harm in executing this in the delay slot.
-  __ delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1);
+  // index == 0?
+  __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill);
+
+  __ ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1);
   __ sub(L0, oopSize, L0);
 
   __ st_ptr(O3, L1, L0);  // [_buf + index] := I0
@@ -4664,6 +4621,7 @@
   G1SATBCardTableModRefBS* bs = (G1SATBCardTableModRefBS*) Universe::heap()->barrier_set();
   assert(bs->kind() == BarrierSet::G1SATBCT ||
          bs->kind() == BarrierSet::G1SATBCTLogging, "wrong barrier");
+
   if (G1RSBarrierRegionFilter) {
     xor3(store_addr, new_val, tmp);
 #ifdef _LP64
@@ -4672,33 +4630,8 @@
     srl(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
 #endif
 
-    if (G1PrintCTFilterStats) {
-      guarantee(tmp->is_global(), "Or stats won't work...");
-      // This is a sleazy hack: I'm temporarily hijacking G2, which I
-      // promise to restore.
-      mov(new_val, G2);
-      save_frame(0);
-      mov(tmp, O0);
-      mov(G2, O1);
-      // Save G-regs that target may use.
-      mov(G1, L1);
-      mov(G2, L2);
-      mov(G3, L3);
-      mov(G4, L4);
-      mov(G5, L5);
-      call(CAST_FROM_FN_PTR(address, &count_ct_writes));
-      delayed()->nop();
-      mov(O0, G2);
-      // Restore G-regs that target may have used.
-      mov(L1, G1);
-      mov(L3, G3);
-      mov(L4, G4);
-      mov(L5, G5);
-      restore(G0, G0, G0);
-    }
-    // XXX Should I predict this taken or not?  Does it mattern?
-    br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered);
-    delayed()->nop();
+    // XXX Should I predict this taken or not?  Does it matter?
+    cmp_and_brx_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
   }
 
   // If the "store_addr" register is an "in" or "local" register, move it to
@@ -4723,7 +4656,6 @@
   restore();
 
   bind(filtered);
-
 }
 
 #endif  // SERIALGC
@@ -4973,3 +4905,65 @@
   // Caller should set it:
   // add(G0, 1, result); // equals
 }
+
+// Use BIS for zeroing (count is in bytes).
+void MacroAssembler::bis_zeroing(Register to, Register count, Register temp, Label& Ldone) {
+  assert(UseBlockZeroing && VM_Version::has_block_zeroing(), "only works with BIS zeroing");
+  Register end = count;
+  int cache_line_size = VM_Version::prefetch_data_size();
+  // Minimum count when BIS zeroing can be used since
+  // it needs membar which is expensive.
+  int block_zero_size  = MAX2(cache_line_size*3, (int)BlockZeroingLowLimit);
+
+  Label small_loop;
+  // Check if count is negative (dead code) or zero.
+  // Note, count uses 64bit in 64 bit VM.
+  cmp_and_brx_short(count, 0, Assembler::lessEqual, Assembler::pn, Ldone);
+
+  // Use BIS zeroing only for big arrays since it requires membar.
+  if (Assembler::is_simm13(block_zero_size)) { // < 4096
+    cmp(count, block_zero_size);
+  } else {
+    set(block_zero_size, temp);
+    cmp(count, temp);
+  }
+  br(Assembler::lessUnsigned, false, Assembler::pt, small_loop);
+  delayed()->add(to, count, end);
+
+  // Note: size is >= three (32 bytes) cache lines.
+
+  // Clean the beginning of space up to next cache line.
+  for (int offs = 0; offs < cache_line_size; offs += 8) {
+    stx(G0, to, offs);
+  }
+
+  // align to next cache line
+  add(to, cache_line_size, to);
+  and3(to, -cache_line_size, to);
+
+  // Note: size left >= two (32 bytes) cache lines.
+
+  // BIS should not be used to zero tail (64 bytes)
+  // to avoid zeroing a header of the following object.
+  sub(end, (cache_line_size*2)-8, end);
+
+  Label bis_loop;
+  bind(bis_loop);
+  stxa(G0, to, G0, Assembler::ASI_ST_BLKINIT_PRIMARY);
+  add(to, cache_line_size, to);
+  cmp_and_brx_short(to, end, Assembler::lessUnsigned, Assembler::pt, bis_loop);
+
+  // BIS needs membar.
+  membar(Assembler::StoreLoad);
+
+  add(end, (cache_line_size*2)-8, end); // restore end
+  cmp_and_brx_short(to, end, Assembler::greaterEqualUnsigned, Assembler::pn, Ldone);
+
+  // Clean the tail.
+  bind(small_loop);
+  stx(G0, to, 0);
+  add(to, 8, to);
+  cmp_and_brx_short(to, end, Assembler::lessUnsigned, Assembler::pt, small_loop);
+  nop(); // Separate short branches
+}
+
--- a/src/cpu/sparc/vm/assembler_sparc.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/cpu/sparc/vm/assembler_sparc.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -885,8 +885,9 @@
   }
 
   enum ASIs { // page 72, v9
-    ASI_PRIMARY        = 0x80,
-    ASI_PRIMARY_LITTLE = 0x88,
+    ASI_PRIMARY            = 0x80,
+    ASI_PRIMARY_NOFAULT    = 0x82,
+    ASI_PRIMARY_LITTLE     = 0x88,
     // Block initializing store
     ASI_ST_BLKINIT_PRIMARY = 0xE2,
     // Most-Recently-Used (MRU) BIS variant
@@ -1786,9 +1787,12 @@
                                                                            rs1(s) |
                                                                            op3(wrreg_op3) |
                                                                            u_field(2, 29, 25) |
-                                                                           u_field(1, 13, 13) |
+                                                                           immed(true) |
                                                                            simm(simm13a, 13)); }
-  inline void wrasi(  Register d) { v9_only(); emit_long( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(3, 29, 25)); }
+  inline void wrasi(Register d) { v9_only(); emit_long( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(3, 29, 25)); }
+  // wrasi(d, imm) stores (d xor imm) to asi
+  inline void wrasi(Register d, int simm13a) { v9_only(); emit_long( op(arith_op) | rs1(d) | op3(wrreg_op3) |
+                                               u_field(3, 29, 25) | immed(true) | simm(simm13a, 13)); }
   inline void wrfprs( Register d) { v9_only(); emit_long( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(6, 29, 25)); }
 
 
@@ -1940,12 +1944,6 @@
   void br_null   ( Register s1, bool a, Predict p, Label& L );
   void br_notnull( Register s1, bool a, Predict p, Label& L );
 
-  // These versions will do the most efficient thing on v8 and v9.  Perhaps
-  // this is what the routine above was meant to do, but it didn't (and
-  // didn't cover both target address kinds.)
-  void br_on_reg_cond( RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt = relocInfo::none );
-  void br_on_reg_cond( RCondition c, bool a, Predict p, Register s1, Label& L);
-
   //
   // Compare registers and branch with nop in delay slot or cbcond without delay slot.
   //
@@ -2631,6 +2629,8 @@
   void char_arrays_equals(Register ary1, Register ary2,
                           Register limit, Register result,
                           Register chr1, Register chr2, Label& Ldone);
+  // Use BIS for zeroing
+  void bis_zeroing(Register to, Register count, Register temp, Label& Ldone);
 
 #undef VIRTUAL
 
--- a/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -421,8 +421,7 @@
   }
 
   if (__ is_in_wdisp16_range(_continuation)) {
-    __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt,
-                      pre_val_reg, _continuation);
+    __ br_null(pre_val_reg, /*annul*/false, Assembler::pt, _continuation);
   } else {
     __ cmp(pre_val_reg, G0);
     __ brx(Assembler::equal, false, Assembler::pn, _continuation);
@@ -458,8 +457,7 @@
     // The original src operand was not a constant.
     // Generate src == null?
     if (__ is_in_wdisp16_range(_continuation)) {
-      __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt,
-                        src_reg, _continuation);
+      __ br_null(src_reg, /*annul*/false, Assembler::pt, _continuation);
     } else {
       __ cmp(src_reg, G0);
       __ brx(Assembler::equal, false, Assembler::pt, _continuation);
@@ -476,13 +474,9 @@
   Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset_in_bytes() + sizeof(oopDesc));
   __ ld(ref_type_adr, tmp_reg);
 
-  if (__ is_in_wdisp16_range(_continuation)) {
-    __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt,
-                      tmp_reg, _continuation);
-  } else {
-    __ cmp(tmp_reg, G0);
-    __ brx(Assembler::equal, false, Assembler::pt, _continuation);
-  }
+  // _reference_type field is of type ReferenceType (enum)
+  assert(REF_NONE == 0, "check this code");
+  __ cmp_zero_and_br(Assembler::equal, tmp_reg, _continuation, /*annul*/false, Assembler::pt);
   __ delayed()->nop();
 
   // Is marking active?
@@ -498,13 +492,8 @@
     assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
     __ ldsb(in_progress, tmp_reg);
   }
-  if (__ is_in_wdisp16_range(_continuation)) {
-    __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt,
-                      tmp_reg, _continuation);
-  } else {
-    __ cmp(tmp_reg, G0);
-    __ brx(Assembler::equal, false, Assembler::pt, _continuation);
-  }
+
+  __ cmp_zero_and_br(Assembler::equal, tmp_reg, _continuation, /*annul*/false, Assembler::pt);
   __ delayed()->nop();
 
   // val == null?
@@ -512,8 +501,7 @@
   Register val_reg = val()->as_register();
 
   if (__ is_in_wdisp16_range(_continuation)) {
-    __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt,
-                      val_reg, _continuation);
+    __ br_null(val_reg, /*annul*/false, Assembler::pt, _continuation);
   } else {
     __ cmp(val_reg, G0);
     __ brx(Assembler::equal, false, Assembler::pt, _continuation);
@@ -542,9 +530,9 @@
   assert(new_val()->is_register(), "Precondition.");
   Register addr_reg = addr()->as_pointer_register();
   Register new_val_reg = new_val()->as_register();
+
   if (__ is_in_wdisp16_range(_continuation)) {
-    __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt,
-                      new_val_reg, _continuation);
+    __ br_null(new_val_reg, /*annul*/false, Assembler::pt, _continuation);
   } else {
     __ cmp(new_val_reg, G0);
     __ brx(Assembler::equal, false, Assembler::pn, _continuation);
--- a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -142,11 +142,6 @@
 }
 
 
-LIR_Opr LIR_Assembler::incomingReceiverOpr() {
-  return FrameMap::I0_oop_opr;
-}
-
-
 LIR_Opr LIR_Assembler::osrBufferPointer() {
   return FrameMap::I0_opr;
 }
--- a/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -782,13 +782,6 @@
       }
       break;
 
-    case jvmti_exception_throw_id:
-      { // Oexception : exception
-        __ set_info("jvmti_exception_throw", dont_gc_arguments);
-        oop_maps = generate_stub_call(sasm, noreg, CAST_FROM_FN_PTR(address, Runtime1::post_jvmti_exception_throw), I0);
-      }
-      break;
-
     case dtrace_object_alloc_id:
       { // O0: object
         __ set_info("dtrace_object_alloc", dont_gc_arguments);
@@ -834,14 +827,16 @@
         int satb_q_buf_byte_offset =
           in_bytes(JavaThread::satb_mark_queue_offset() +
                    PtrQueue::byte_offset_of_buf());
+
         __ bind(restart);
+        // Load the index into the SATB buffer. PtrQueue::_index is a
+        // size_t so ld_ptr is appropriate
         __ ld_ptr(G2_thread, satb_q_index_byte_offset, tmp);
 
-        __ br_on_reg_cond(Assembler::rc_z, /*annul*/false,
-                          Assembler::pn, tmp, refill);
+        // index == 0?
+        __ cmp_and_brx_short(tmp, G0, Assembler::equal, Assembler::pn, refill);
 
-        // If the branch is taken, no harm in executing this in the delay slot.
-        __ delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, tmp2);
+        __ ld_ptr(G2_thread, satb_q_buf_byte_offset, tmp2);
         __ sub(tmp, oopSize, tmp);
 
         __ st_ptr(pre_val, tmp2, tmp);  // [_buf + index] := <address_of_card>
@@ -901,11 +896,8 @@
         __ set(rs, cardtable);         // cardtable := <card table base>
         __ ldub(addr, cardtable, tmp); // tmp := [addr + cardtable]
 
-        __ br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt,
-                          tmp, not_already_dirty);
-        // Get cardtable + tmp into a reg by itself -- useful in the take-the-branch
-        // case, harmless if not.
-        __ delayed()->add(addr, cardtable, tmp2);
+        assert(CardTableModRefBS::dirty_card_val() == 0, "otherwise check this code");
+        __ cmp_and_br_short(tmp, G0, Assembler::notEqual, Assembler::pt, not_already_dirty);
 
         // We didn't take the branch, so we're already dirty: return.
         // Use return-from-leaf
@@ -914,6 +906,10 @@
 
         // Not dirty.
         __ bind(not_already_dirty);
+
+        // Get cardtable + tmp into a reg by itself
+        __ add(addr, cardtable, tmp2);
+
         // First, dirty it.
         __ stb(G0, tmp2, 0);  // [cardPtr] := 0  (i.e., dirty).
 
@@ -929,13 +925,17 @@
         int dirty_card_q_buf_byte_offset =
           in_bytes(JavaThread::dirty_card_queue_offset() +
                    PtrQueue::byte_offset_of_buf());
+
         __ bind(restart);
+
+        // Get the index into the update buffer. PtrQueue::_index is
+        // a size_t so ld_ptr is appropriate here.
         __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, tmp3);
 
-        __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn,
-                          tmp3, refill);
-        // If the branch is taken, no harm in executing this in the delay slot.
-        __ delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, tmp4);
+        // index == 0?
+        __ cmp_and_brx_short(tmp3, G0, Assembler::equal,  Assembler::pn, refill);
+
+        __ ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, tmp4);
         __ sub(tmp3, oopSize, tmp3);
 
         __ st_ptr(tmp2, tmp4, tmp3);  // [_buf + index] := <address_of_card>
--- a/src/cpu/sparc/vm/copy_sparc.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/cpu/sparc/vm/copy_sparc.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -156,9 +156,16 @@
 #endif // _LP64
 }
 
+typedef void (*_zero_Fn)(HeapWord* to, size_t count);
+
 static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) {
   assert(MinObjAlignmentInBytes >= BytesPerLong, "need alternate implementation");
 
+  if (value == 0 && UseBlockZeroing &&
+      (count > (BlockZeroingLowLimit >> LogHeapWordSize))) {
+   // Call it only when block zeroing is used
+   ((_zero_Fn)StubRoutines::zero_aligned_words())(tohw, count);
+  } else {
    julong* to = (julong*)tohw;
    julong  v  = ((julong)value << 32) | value;
    // If count is odd, odd will be equal to 1 on 32-bit platform
@@ -176,6 +183,7 @@
      *((juint*)to) = value;
 
    }
+  }
 }
 
 static void pd_fill_to_bytes(void* to, size_t count, jubyte value) {
--- a/src/cpu/sparc/vm/frame_sparc.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/cpu/sparc/vm/frame_sparc.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -259,13 +259,8 @@
   };
 #endif /* CC_INTERP */
 
-  // the compiler frame has many of the same fields as the interpreter frame
-  // %%%%% factor out declarations of the shared fields
   enum compiler_frame_fixed_locals {
-       compiler_frame_d_scratch_fp_offset          = -2,
-       compiler_frame_vm_locals_fp_offset          = -2, // should be same as above
-
-       compiler_frame_vm_local_words = -compiler_frame_vm_locals_fp_offset
+       compiler_frame_vm_locals_fp_offset          = -2
   };
 
  private:
@@ -283,9 +278,6 @@
 
   inline void interpreter_frame_set_tos_address(intptr_t* x);
 
-
-  // %%%%% Another idea: instead of defining 3 fns per item, just define one returning a ref
-
   // monitors:
 
   // next two fns read and write Lmonitors value,
@@ -298,22 +290,8 @@
     return ((interpreterState)sp_at(interpreter_state_ptr_offset));
   }
 
-
 #endif /* CC_INTERP */
 
-
-
- // Compiled frames
-
  public:
-  // Tells if this register can hold 64 bits on V9 (really, V8+).
-  static bool holds_a_doubleword(Register reg) {
-#ifdef _LP64
-    //    return true;
-    return reg->is_out() || reg->is_global();
-#else
-    return reg->is_out() || reg->is_global();
-#endif
-  }
 
 #endif // CPU_SPARC_VM_FRAME_SPARC_HPP
--- a/src/cpu/sparc/vm/methodHandles_sparc.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/cpu/sparc/vm/methodHandles_sparc.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -1262,6 +1262,15 @@
     }
     break;
 
+  case _adapter_opt_profiling:
+    if (java_lang_invoke_CountingMethodHandle::vmcount_offset_in_bytes() != 0) {
+      Address G3_mh_vmcount(G3_method_handle, java_lang_invoke_CountingMethodHandle::vmcount_offset_in_bytes());
+      __ ld(G3_mh_vmcount, O1_scratch);
+      __ add(O1_scratch, 1, O1_scratch);
+      __ st(O1_scratch, G3_mh_vmcount);
+    }
+    // fall through
+
   case _adapter_retype_only:
   case _adapter_retype_raw:
     // Immediately jump to the next MH layer:
--- a/src/cpu/sparc/vm/sparc.ad	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/cpu/sparc/vm/sparc.ad	Fri Sep 09 16:17:16 2011 -0700
@@ -460,6 +460,8 @@
 // Must be visible to the DFA in dfa_sparc.cpp
 extern bool can_branch_register( Node *bol, Node *cmp );
 
+extern bool use_block_zeroing(Node* count);
+
 // Macros to extract hi & lo halves from a long pair.
 // G0 is not part of any long pair, so assert on that.
 // Prevents accidentally using G1 instead of G0.
@@ -521,6 +523,12 @@
   return false;
 }
 
+bool use_block_zeroing(Node* count) {
+  // Use BIS for zeroing if count is not constant
+  // or it is >= BlockZeroingLowLimit.
+  return UseBlockZeroing && (count->find_intptr_t_con(BlockZeroingLowLimit) >= BlockZeroingLowLimit);
+}
+
 // ****************************************************************************
 
 // REQUIRED FUNCTIONALITY
@@ -832,6 +840,7 @@
           !(n->ideal_Opcode()==Op_ConvI2D   && ld_op==Op_LoadF) &&
           !(n->ideal_Opcode()==Op_PrefetchRead  && ld_op==Op_LoadI) &&
           !(n->ideal_Opcode()==Op_PrefetchWrite && ld_op==Op_LoadI) &&
+          !(n->ideal_Opcode()==Op_PrefetchAllocation && ld_op==Op_LoadI) &&
           !(n->ideal_Opcode()==Op_Load2I    && ld_op==Op_LoadD) &&
           !(n->ideal_Opcode()==Op_Load4C    && ld_op==Op_LoadD) &&
           !(n->ideal_Opcode()==Op_Load4S    && ld_op==Op_LoadD) &&
@@ -2810,25 +2819,6 @@
     __ float_cmp( $primary, -1, Fsrc1, Fsrc2, Rdst);
   %}
 
-  // Compiler ensures base is doubleword aligned and cnt is count of doublewords
-  enc_class enc_Clear_Array(iRegX cnt, iRegP base, iRegX temp) %{
-    MacroAssembler _masm(&cbuf);
-    Register    nof_bytes_arg   = reg_to_register_object($cnt$$reg);
-    Register    nof_bytes_tmp    = reg_to_register_object($temp$$reg);
-    Register    base_pointer_arg = reg_to_register_object($base$$reg);
-  
-    Label loop;
-    __ mov(nof_bytes_arg, nof_bytes_tmp);
-  
-    // Loop and clear, walking backwards through the array.
-    // nof_bytes_tmp (if >0) is always the number of bytes to zero
-    __ bind(loop);
-    __ deccc(nof_bytes_tmp, 8);
-    __ br(Assembler::greaterEqual, true, Assembler::pt, loop);
-    __ delayed()-> stx(G0, base_pointer_arg, nof_bytes_tmp);
-    // %%%% this mini-loop must not cross a cache boundary!
-  %}
-
 
   enc_class enc_String_Compare(o0RegP str1, o1RegP str2, g3RegI cnt1, g4RegI cnt2, notemp_iRegI result) %{
     Label Ldone, Lloop;
@@ -10257,9 +10247,9 @@
   ins_pipe(long_memory_op);
 %}
 
-// Count and Base registers are fixed because the allocator cannot
-// kill unknown registers.  The encodings are generic.
+// The encodings are generic.
 instruct clear_array(iRegX cnt, iRegP base, iRegX temp, Universe dummy, flagsReg ccr) %{
+  predicate(!use_block_zeroing(n->in(2)) );
   match(Set dummy (ClearArray cnt base));
   effect(TEMP temp, KILL ccr);
   ins_cost(300);
@@ -10267,7 +10257,71 @@
     "loop:   SUBcc  $temp,8,$temp\t! Count down a dword of bytes\n"
     "        BRge   loop\t\t! Clearing loop\n"
     "        STX    G0,[$base+$temp]\t! delay slot" %}
-  ins_encode( enc_Clear_Array(cnt, base, temp) );
+
+  ins_encode %{
+    // Compiler ensures base is doubleword aligned and cnt is count of doublewords
+    Register nof_bytes_arg    = $cnt$$Register;
+    Register nof_bytes_tmp    = $temp$$Register;
+    Register base_pointer_arg = $base$$Register;
+
+    Label loop;
+    __ mov(nof_bytes_arg, nof_bytes_tmp);
+
+    // Loop and clear, walking backwards through the array.
+    // nof_bytes_tmp (if >0) is always the number of bytes to zero
+    __ bind(loop);
+    __ deccc(nof_bytes_tmp, 8);
+    __ br(Assembler::greaterEqual, true, Assembler::pt, loop);
+    __ delayed()-> stx(G0, base_pointer_arg, nof_bytes_tmp);
+    // %%%% this mini-loop must not cross a cache boundary!
+  %}
+  ins_pipe(long_memory_op);
+%}
+
+instruct clear_array_bis(g1RegX cnt, o0RegP base, Universe dummy, flagsReg ccr) %{
+  predicate(use_block_zeroing(n->in(2)));
+  match(Set dummy (ClearArray cnt base));
+  effect(USE_KILL cnt, USE_KILL base, KILL ccr);
+  ins_cost(300);
+  format %{ "CLEAR  [$base, $cnt]\t! ClearArray" %}
+
+  ins_encode %{
+
+    assert(MinObjAlignmentInBytes >= BytesPerLong, "need alternate implementation");
+    Register to    = $base$$Register;
+    Register count = $cnt$$Register;
+
+    Label Ldone;
+    __ nop(); // Separate short branches
+    // Use BIS for zeroing (temp is not used).
+    __ bis_zeroing(to, count, G0, Ldone);
+    __ bind(Ldone);
+
+  %}
+  ins_pipe(long_memory_op);
+%}
+
+instruct clear_array_bis_2(g1RegX cnt, o0RegP base, iRegX tmp, Universe dummy, flagsReg ccr) %{
+  predicate(use_block_zeroing(n->in(2)) && !Assembler::is_simm13((int)BlockZeroingLowLimit));
+  match(Set dummy (ClearArray cnt base));
+  effect(TEMP tmp, USE_KILL cnt, USE_KILL base, KILL ccr);
+  ins_cost(300);
+  format %{ "CLEAR  [$base, $cnt]\t! ClearArray" %}
+
+  ins_encode %{
+
+    assert(MinObjAlignmentInBytes >= BytesPerLong, "need alternate implementation");
+    Register to    = $base$$Register;
+    Register count = $cnt$$Register;
+    Register temp  = $tmp$$Register;
+
+    Label Ldone;
+    __ nop(); // Separate short branches
+    // Use BIS for zeroing
+    __ bis_zeroing(to, count, temp, Ldone);
+    __ bind(Ldone);
+
+  %}
   ins_pipe(long_memory_op);
 %}
 
--- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -1124,6 +1124,126 @@
     }
   }
 
+  //
+  // Generate main code for disjoint arraycopy
+  //
+  typedef void (StubGenerator::*CopyLoopFunc)(Register from, Register to, Register count, int count_dec,
+                                              Label& L_loop, bool use_prefetch, bool use_bis);
+
+  void disjoint_copy_core(Register from, Register to, Register count, int log2_elem_size,
+                          int iter_size, CopyLoopFunc copy_loop_func) {
+    Label L_copy;
+
+    assert(log2_elem_size <= 3, "the following code should be changed");
+    int count_dec = 16>>log2_elem_size;
+
+    int prefetch_dist = MAX2(ArraycopySrcPrefetchDistance, ArraycopyDstPrefetchDistance);
+    assert(prefetch_dist < 4096, "invalid value");
+    prefetch_dist = (prefetch_dist + (iter_size-1)) & (-iter_size); // round up to one iteration copy size
+    int prefetch_count = (prefetch_dist >> log2_elem_size); // elements count
+
+    if (UseBlockCopy) {
+      Label L_block_copy, L_block_copy_prefetch, L_skip_block_copy;
+
+      // 64 bytes tail + bytes copied in one loop iteration
+      int tail_size = 64 + iter_size;
+      int block_copy_count = (MAX2(tail_size, (int)BlockCopyLowLimit)) >> log2_elem_size;
+      // Use BIS copy only for big arrays since it requires membar.
+      __ set(block_copy_count, O4);
+      __ cmp_and_br_short(count, O4, Assembler::lessUnsigned, Assembler::pt, L_skip_block_copy);
+      // This code is for disjoint source and destination:
+      //   to <= from || to >= from+count
+      // but BIS will stomp over 'from' if (to > from-tail_size && to <= from)
+      __ sub(from, to, O4);
+      __ srax(O4, 4, O4); // divide by 16 since following short branch have only 5 bits for imm.
+      __ cmp_and_br_short(O4, (tail_size>>4), Assembler::lessEqualUnsigned, Assembler::pn, L_skip_block_copy);
+
+      __ wrasi(G0, Assembler::ASI_ST_BLKINIT_PRIMARY);
+      // BIS should not be used to copy tail (64 bytes+iter_size)
+      // to avoid zeroing of following values.
+      __ sub(count, (tail_size>>log2_elem_size), count); // count is still positive >= 0
+
+      if (prefetch_count > 0) { // rounded up to one iteration count
+        // Do prefetching only if copy size is bigger
+        // than prefetch distance.
+        __ set(prefetch_count, O4);
+        __ cmp_and_brx_short(count, O4, Assembler::less, Assembler::pt, L_block_copy);
+        __ sub(count, prefetch_count, count);
+
+        (this->*copy_loop_func)(from, to, count, count_dec, L_block_copy_prefetch, true, true);
+        __ add(count, prefetch_count, count); // restore count
+
+      } // prefetch_count > 0
+
+      (this->*copy_loop_func)(from, to, count, count_dec, L_block_copy, false, true);
+      __ add(count, (tail_size>>log2_elem_size), count); // restore count
+
+      __ wrasi(G0, Assembler::ASI_PRIMARY_NOFAULT);
+      // BIS needs membar.
+      __ membar(Assembler::StoreLoad);
+      // Copy tail
+      __ ba_short(L_copy);
+
+      __ BIND(L_skip_block_copy);
+    } // UseBlockCopy
+
+    if (prefetch_count > 0) { // rounded up to one iteration count
+      // Do prefetching only if copy size is bigger
+      // than prefetch distance.
+      __ set(prefetch_count, O4);
+      __ cmp_and_brx_short(count, O4, Assembler::lessUnsigned, Assembler::pt, L_copy);
+      __ sub(count, prefetch_count, count);
+
+      Label L_copy_prefetch;
+      (this->*copy_loop_func)(from, to, count, count_dec, L_copy_prefetch, true, false);
+      __ add(count, prefetch_count, count); // restore count
+
+    } // prefetch_count > 0
+
+    (this->*copy_loop_func)(from, to, count, count_dec, L_copy, false, false);
+  }
+
+
+
+  //
+  // Helper methods for copy_16_bytes_forward_with_shift()
+  //
+  void copy_16_bytes_shift_loop(Register from, Register to, Register count, int count_dec,
+                                Label& L_loop, bool use_prefetch, bool use_bis) {
+
+    const Register left_shift  = G1; // left  shift bit counter
+    const Register right_shift = G5; // right shift bit counter
+
+    __ align(OptoLoopAlignment);
+    __ BIND(L_loop);
+    if (use_prefetch) {
+      if (ArraycopySrcPrefetchDistance > 0) {
+        __ prefetch(from, ArraycopySrcPrefetchDistance, Assembler::severalReads);
+      }
+      if (ArraycopyDstPrefetchDistance > 0) {
+        __ prefetch(to, ArraycopyDstPrefetchDistance, Assembler::severalWritesAndPossiblyReads);
+      }
+    }
+    __ ldx(from, 0, O4);
+    __ ldx(from, 8, G4);
+    __ inc(to, 16);
+    __ inc(from, 16);
+    __ deccc(count, count_dec); // Can we do next iteration after this one?
+    __ srlx(O4, right_shift, G3);
+    __ bset(G3, O3);
+    __ sllx(O4, left_shift,  O4);
+    __ srlx(G4, right_shift, G3);
+    __ bset(G3, O4);
+    if (use_bis) {
+      __ stxa(O3, to, -16);
+      __ stxa(O4, to, -8);
+    } else {
+      __ stx(O3, to, -16);
+      __ stx(O4, to, -8);
+    }
+    __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop);
+    __ delayed()->sllx(G4, left_shift,  O3);
+  }
 
   // Copy big chunks forward with shift
   //
@@ -1135,64 +1255,51 @@
   //   L_copy_bytes - copy exit label
   //
   void copy_16_bytes_forward_with_shift(Register from, Register to,
-                     Register count, int count_dec, Label& L_copy_bytes) {
-    Label L_loop, L_aligned_copy, L_copy_last_bytes;
+                     Register count, int log2_elem_size, Label& L_copy_bytes) {
+    Label L_aligned_copy, L_copy_last_bytes;
+    assert(log2_elem_size <= 3, "the following code should be changed");
+    int count_dec = 16>>log2_elem_size;
 
     // if both arrays have the same alignment mod 8, do 8 bytes aligned copy
-      __ andcc(from, 7, G1); // misaligned bytes
-      __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy);
-      __ delayed()->nop();
+    __ andcc(from, 7, G1); // misaligned bytes
+    __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy);
+    __ delayed()->nop();
 
     const Register left_shift  = G1; // left  shift bit counter
     const Register right_shift = G5; // right shift bit counter
 
-      __ sll(G1, LogBitsPerByte, left_shift);
-      __ mov(64, right_shift);
-      __ sub(right_shift, left_shift, right_shift);
+    __ sll(G1, LogBitsPerByte, left_shift);
+    __ mov(64, right_shift);
+    __ sub(right_shift, left_shift, right_shift);
 
     //
     // Load 2 aligned 8-bytes chunks and use one from previous iteration
     // to form 2 aligned 8-bytes chunks to store.
     //
-      __ deccc(count, count_dec); // Pre-decrement 'count'
-      __ andn(from, 7, from);     // Align address
-      __ ldx(from, 0, O3);
-      __ inc(from, 8);
-      __ align(OptoLoopAlignment);
-    __ BIND(L_loop);
-      __ ldx(from, 0, O4);
-      __ deccc(count, count_dec); // Can we do next iteration after this one?
-      __ ldx(from, 8, G4);
-      __ inc(to, 16);
-      __ inc(from, 16);
-      __ sllx(O3, left_shift,  O3);
-      __ srlx(O4, right_shift, G3);
-      __ bset(G3, O3);
-      __ stx(O3, to, -16);
-      __ sllx(O4, left_shift,  O4);
-      __ srlx(G4, right_shift, G3);
-      __ bset(G3, O4);
-      __ stx(O4, to, -8);
-      __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop);
-      __ delayed()->mov(G4, O3);
-
-      __ inccc(count, count_dec>>1 ); // + 8 bytes
-      __ brx(Assembler::negative, true, Assembler::pn, L_copy_last_bytes);
-      __ delayed()->inc(count, count_dec>>1); // restore 'count'
-
-      // copy 8 bytes, part of them already loaded in O3
-      __ ldx(from, 0, O4);
-      __ inc(to, 8);
-      __ inc(from, 8);
-      __ sllx(O3, left_shift,  O3);
-      __ srlx(O4, right_shift, G3);
-      __ bset(O3, G3);
-      __ stx(G3, to, -8);
+    __ dec(count, count_dec);   // Pre-decrement 'count'
+    __ andn(from, 7, from);     // Align address
+    __ ldx(from, 0, O3);
+    __ inc(from, 8);
+    __ sllx(O3, left_shift,  O3);
+
+    disjoint_copy_core(from, to, count, log2_elem_size, 16, copy_16_bytes_shift_loop);
+
+    __ inccc(count, count_dec>>1 ); // + 8 bytes
+    __ brx(Assembler::negative, true, Assembler::pn, L_copy_last_bytes);
+    __ delayed()->inc(count, count_dec>>1); // restore 'count'
+
+    // copy 8 bytes, part of them already loaded in O3
+    __ ldx(from, 0, O4);
+    __ inc(to, 8);
+    __ inc(from, 8);
+    __ srlx(O4, right_shift, G3);
+    __ bset(O3, G3);
+    __ stx(G3, to, -8);
 
     __ BIND(L_copy_last_bytes);
-      __ srl(right_shift, LogBitsPerByte, right_shift); // misaligned bytes
-      __ br(Assembler::always, false, Assembler::pt, L_copy_bytes);
-      __ delayed()->sub(from, right_shift, from);       // restore address
+    __ srl(right_shift, LogBitsPerByte, right_shift); // misaligned bytes
+    __ br(Assembler::always, false, Assembler::pt, L_copy_bytes);
+    __ delayed()->sub(from, right_shift, from);       // restore address
 
     __ BIND(L_aligned_copy);
   }
@@ -1348,7 +1455,7 @@
       // The compare above (count >= 23) guarantes 'count' >= 16 bytes.
       // Also jump over aligned copy after the copy with shift completed.
 
-      copy_16_bytes_forward_with_shift(from, to, count, 16, L_copy_byte);
+      copy_16_bytes_forward_with_shift(from, to, count, 0, L_copy_byte);
     }
 
     // Both array are 8 bytes aligned, copy 16 bytes at a time
@@ -1576,7 +1683,7 @@
       // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
       // Also jump over aligned copy after the copy with shift completed.
 
-      copy_16_bytes_forward_with_shift(from, to, count, 8, L_copy_2_bytes);
+      copy_16_bytes_forward_with_shift(from, to, count, 1, L_copy_2_bytes);
     }
 
     // Both array are 8 bytes aligned, copy 16 bytes at a time
@@ -1950,6 +2057,45 @@
   }
 
   //
+  // Helper methods for generate_disjoint_int_copy_core()
+  //
+  void copy_16_bytes_loop(Register from, Register to, Register count, int count_dec,
+                          Label& L_loop, bool use_prefetch, bool use_bis) {
+
+    __ align(OptoLoopAlignment);
+    __ BIND(L_loop);
+    if (use_prefetch) {
+      if (ArraycopySrcPrefetchDistance > 0) {
+        __ prefetch(from, ArraycopySrcPrefetchDistance, Assembler::severalReads);
+      }
+      if (ArraycopyDstPrefetchDistance > 0) {
+        __ prefetch(to, ArraycopyDstPrefetchDistance, Assembler::severalWritesAndPossiblyReads);
+      }
+    }
+    __ ldx(from, 4, O4);
+    __ ldx(from, 12, G4);
+    __ inc(to, 16);
+    __ inc(from, 16);
+    __ deccc(count, 4); // Can we do next iteration after this one?
+
+    __ srlx(O4, 32, G3);
+    __ bset(G3, O3);
+    __ sllx(O4, 32, O4);
+    __ srlx(G4, 32, G3);
+    __ bset(G3, O4);
+    if (use_bis) {
+      __ stxa(O3, to, -16);
+      __ stxa(O4, to, -8);
+    } else {
+      __ stx(O3, to, -16);
+      __ stx(O4, to, -8);
+    }
+    __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop);
+    __ delayed()->sllx(G4, 32,  O3);
+
+  }
+
+  //
   //  Generate core code for disjoint int copy (and oop copy on 32-bit).
   //  If "aligned" is true, the "from" and "to" addresses are assumed
   //  to be heapword aligned.
@@ -1962,7 +2108,7 @@
   void generate_disjoint_int_copy_core(bool aligned) {
 
     Label L_skip_alignment, L_aligned_copy;
-    Label L_copy_16_bytes,  L_copy_4_bytes, L_copy_4_bytes_loop, L_exit;
+    Label L_copy_4_bytes, L_copy_4_bytes_loop, L_exit;
 
     const Register from      = O0;   // source array address
     const Register to        = O1;   // destination array address
@@ -2013,30 +2159,16 @@
 
     // copy with shift 4 elements (16 bytes) at a time
       __ dec(count, 4);   // The cmp at the beginning guaranty count >= 4
-
-      __ align(OptoLoopAlignment);
-    __ BIND(L_copy_16_bytes);
-      __ ldx(from, 4, O4);
-      __ deccc(count, 4); // Can we do next iteration after this one?
-      __ ldx(from, 12, G4);
-      __ inc(to, 16);
-      __ inc(from, 16);
-      __ sllx(O3, 32, O3);
-      __ srlx(O4, 32, G3);
-      __ bset(G3, O3);
-      __ stx(O3, to, -16);
-      __ sllx(O4, 32, O4);
-      __ srlx(G4, 32, G3);
-      __ bset(G3, O4);
-      __ stx(O4, to, -8);
-      __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes);
-      __ delayed()->mov(G4, O3);
+      __ sllx(O3, 32,  O3);
+
+      disjoint_copy_core(from, to, count, 2, 16, copy_16_bytes_loop);
 
       __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes);
       __ delayed()->inc(count, 4); // restore 'count'
 
     __ BIND(L_aligned_copy);
-    }
+    } // !aligned
+
     // copy 4 elements (16 bytes) at a time
       __ and3(count, 1, G4); // Save
       __ srl(count, 1, count);
@@ -2223,6 +2355,38 @@
   }
 
   //
+  // Helper methods for generate_disjoint_long_copy_core()
+  //
+  void copy_64_bytes_loop(Register from, Register to, Register count, int count_dec,
+                          Label& L_loop, bool use_prefetch, bool use_bis) {
+    __ align(OptoLoopAlignment);
+    __ BIND(L_loop);
+    for (int off = 0; off < 64; off += 16) {
+      if (use_prefetch && (off & 31) == 0) {
+        if (ArraycopySrcPrefetchDistance > 0) {
+          __ prefetch(from, ArraycopySrcPrefetchDistance, Assembler::severalReads);
+        }
+        if (ArraycopyDstPrefetchDistance > 0) {
+          __ prefetch(to, ArraycopyDstPrefetchDistance, Assembler::severalWritesAndPossiblyReads);
+        }
+      }
+      __ ldx(from,  off+0, O4);
+      __ ldx(from,  off+8, O5);
+      if (use_bis) {
+        __ stxa(O4, to,  off+0);
+        __ stxa(O5, to,  off+8);
+      } else {
+        __ stx(O4, to,  off+0);
+        __ stx(O5, to,  off+8);
+      }
+    }
+    __ deccc(count, 8);
+    __ inc(from, 64);
+    __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop);
+    __ delayed()->inc(to, 64);
+  }
+
+  //
   //  Generate core code for disjoint long copy (and oop copy on 64-bit).
   //  "aligned" is ignored, because we must make the stronger
   //  assumption that both addresses are always 64-bit aligned.
@@ -2261,38 +2425,28 @@
     const Register offset0 = O4;  // element offset
     const Register offset8 = O5;  // next element offset
 
-      __ deccc(count, 2);
-      __ mov(G0, offset0);   // offset from start of arrays (0)
-      __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes );
-      __ delayed()->add(offset0, 8, offset8);
+    __ deccc(count, 2);
+    __ mov(G0, offset0);   // offset from start of arrays (0)
+    __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes );
+    __ delayed()->add(offset0, 8, offset8);
 
     // Copy by 64 bytes chunks
-    Label L_copy_64_bytes;
+
     const Register from64 = O3;  // source address
     const Register to64   = G3;  // destination address
-      __ subcc(count, 6, O3);
-      __ brx(Assembler::negative, false, Assembler::pt, L_copy_16_bytes );
-      __ delayed()->mov(to,   to64);
-      // Now we can use O4(offset0), O5(offset8) as temps
-      __ mov(O3, count);
-      __ mov(from, from64);
-
-      __ align(OptoLoopAlignment);
-    __ BIND(L_copy_64_bytes);
-      for( int off = 0; off < 64; off += 16 ) {
-        __ ldx(from64,  off+0, O4);
-        __ ldx(from64,  off+8, O5);
-        __ stx(O4, to64,  off+0);
-        __ stx(O5, to64,  off+8);
-      }
-      __ deccc(count, 8);
-      __ inc(from64, 64);
-      __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_64_bytes);
-      __ delayed()->inc(to64, 64);
+    __ subcc(count, 6, O3);
+    __ brx(Assembler::negative, false, Assembler::pt, L_copy_16_bytes );
+    __ delayed()->mov(to,   to64);
+    // Now we can use O4(offset0), O5(offset8) as temps
+    __ mov(O3, count);
+    // count >= 0 (original count - 8)
+    __ mov(from, from64);
+
+    disjoint_copy_core(from64, to64, count, 3, 64, copy_64_bytes_loop);
 
       // Restore O4(offset0), O5(offset8)
       __ sub(from64, from, offset0);
-      __ inccc(count, 6);
+      __ inccc(count, 6); // restore count
       __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes );
       __ delayed()->add(offset0, 8, offset8);
 
@@ -3069,6 +3223,34 @@
     return start;
   }
 
+  //
+  //  Generate stub for heap zeroing.
+  //  "to" address is aligned to jlong (8 bytes).
+  //
+  // Arguments for generated stub:
+  //      to:    O0
+  //      count: O1 treated as signed (count of HeapWord)
+  //             count could be 0
+  //
+  address generate_zero_aligned_words(const char* name) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ pc();
+
+    const Register to    = O0;   // source array address
+    const Register count = O1;   // HeapWords count
+    const Register temp  = O2;   // scratch
+
+    Label Ldone;
+    __ sllx(count, LogHeapWordSize, count); // to bytes count
+    // Use BIS for zeroing
+    __ bis_zeroing(to, count, temp, Ldone);
+    __ bind(Ldone);
+    __ retl();
+    __ delayed()->nop();
+    return start;
+}
+
   void generate_arraycopy_stubs() {
     address entry;
     address entry_jbyte_arraycopy;
@@ -3195,6 +3377,10 @@
     StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
     StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
     StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
+
+    if (UseBlockZeroing) {
+      StubRoutines::_zero_aligned_words = generate_zero_aligned_words("zero_aligned_words");
+    }
   }
 
   void generate_initial() {
--- a/src/cpu/sparc/vm/templateTable_sparc.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/cpu/sparc/vm/templateTable_sparc.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -3374,7 +3374,7 @@
 
   if(UseTLAB) {
     Register RoldTopValue = RallocatedObject;
-    Register RtopAddr = G3_scratch, RtlabWasteLimitValue = G3_scratch;
+    Register RtlabWasteLimitValue = G3_scratch;
     Register RnewTopValue = G1_scratch;
     Register RendValue = Rscratch;
     Register RfreeValue = RnewTopValue;
@@ -3455,7 +3455,11 @@
     __ delayed()->add(RallocatedObject, sizeof(oopDesc), G3_scratch);
 
     // initialize remaining object fields
-    { Label loop;
+    if (UseBlockZeroing) {
+      // Use BIS for zeroing
+      __ bis_zeroing(G3_scratch, Roffset, G1_scratch, initialize_header);
+    } else {
+      Label loop;
       __ subcc(Roffset, wordSize, Roffset);
       __ bind(loop);
       //__ subcc(Roffset, wordSize, Roffset);      // executed above loop or in delay slot
--- a/src/cpu/sparc/vm/vm_version_sparc.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/cpu/sparc/vm/vm_version_sparc.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -75,6 +75,24 @@
     FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1);
   }
 
+  if (has_v9()) {
+    assert(ArraycopySrcPrefetchDistance < 4096, "invalid value");
+    if (ArraycopySrcPrefetchDistance >= 4096)
+      ArraycopySrcPrefetchDistance = 4064;
+    assert(ArraycopyDstPrefetchDistance < 4096, "invalid value");
+    if (ArraycopyDstPrefetchDistance >= 4096)
+      ArraycopyDstPrefetchDistance = 4064;
+  } else {
+    if (ArraycopySrcPrefetchDistance > 0) {
+      warning("prefetch instructions are not available on this CPU");
+      FLAG_SET_DEFAULT(ArraycopySrcPrefetchDistance, 0);
+    }
+    if (ArraycopyDstPrefetchDistance > 0) {
+      warning("prefetch instructions are not available on this CPU");
+      FLAG_SET_DEFAULT(ArraycopyDstPrefetchDistance, 0);
+    }
+  }
+
   UseSSE = 0; // Only on x86 and x64
 
   _supports_cx8 = has_v9();
@@ -170,6 +188,26 @@
     FLAG_SET_DEFAULT(UseCBCond, false);
   }
 
+  assert(BlockZeroingLowLimit > 0, "invalid value");
+  if (has_block_zeroing()) {
+    if (FLAG_IS_DEFAULT(UseBlockZeroing)) {
+      FLAG_SET_DEFAULT(UseBlockZeroing, true);
+    }
+  } else if (UseBlockZeroing) {
+    warning("BIS zeroing instructions are not available on this CPU");
+    FLAG_SET_DEFAULT(UseBlockZeroing, false);
+  }
+
+  assert(BlockCopyLowLimit > 0, "invalid value");
+  if (has_block_zeroing()) { // has_blk_init() && is_T4(): core's local L2 cache
+    if (FLAG_IS_DEFAULT(UseBlockCopy)) {
+      FLAG_SET_DEFAULT(UseBlockCopy, true);
+    }
+  } else if (UseBlockCopy) {
+    warning("BIS instructions are not available or expensive on this CPU");
+    FLAG_SET_DEFAULT(UseBlockCopy, false);
+  }
+
 #ifdef COMPILER2
   // T4 and newer Sparc cpus have fast RDPC.
   if (has_fast_rdpc() && FLAG_IS_DEFAULT(UseRDPCForConstantTableBase)) {
--- a/src/cpu/sparc/vm/vm_version_sparc.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/cpu/sparc/vm/vm_version_sparc.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -135,8 +135,8 @@
   // T4 and newer Sparc have fast RDPC instruction.
   static bool has_fast_rdpc()           { return is_T4(); }
 
-  // T4 and newer Sparc have Most-Recently-Used (MRU) BIS.
-  static bool has_mru_blk_init()        { return has_blk_init() && is_T4(); }
+  // On T4 and newer Sparc BIS to the beginning of cache line always zeros it.
+  static bool has_block_zeroing()       { return has_blk_init() && is_T4(); }
 
   static const char* cpu_features()     { return _features_str; }
 
--- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -129,10 +129,6 @@
   return FrameMap::receiver_opr;
 }
 
-LIR_Opr LIR_Assembler::incomingReceiverOpr() {
-  return receiverOpr();
-}
-
 LIR_Opr LIR_Assembler::osrBufferPointer() {
   return FrameMap::as_pointer_opr(receiverOpr()->as_register());
 }
@@ -371,55 +367,6 @@
 }
 
 
-void LIR_Assembler::monitorexit(LIR_Opr obj_opr, LIR_Opr lock_opr, Register new_hdr, int monitor_no, Register exception) {
-  if (exception->is_valid()) {
-    // preserve exception
-    // note: the monitor_exit runtime call is a leaf routine
-    //       and cannot block => no GC can happen
-    // The slow case (MonitorAccessStub) uses the first two stack slots
-    // ([esp+0] and [esp+4]), therefore we store the exception at [esp+8]
-    __ movptr (Address(rsp, 2*wordSize), exception);
-  }
-
-  Register obj_reg  = obj_opr->as_register();
-  Register lock_reg = lock_opr->as_register();
-
-  // setup registers (lock_reg must be rax, for lock_object)
-  assert(obj_reg != SYNC_header && lock_reg != SYNC_header, "rax, must be available here");
-  Register hdr = lock_reg;
-  assert(new_hdr == SYNC_header, "wrong register");
-  lock_reg = new_hdr;
-  // compute pointer to BasicLock
-  Address lock_addr = frame_map()->address_for_monitor_lock(monitor_no);
-  __ lea(lock_reg, lock_addr);
-  // unlock object
-  MonitorAccessStub* slow_case = new MonitorExitStub(lock_opr, true, monitor_no);
-  // _slow_case_stubs->append(slow_case);
-  // temporary fix: must be created after exceptionhandler, therefore as call stub
-  _slow_case_stubs->append(slow_case);
-  if (UseFastLocking) {
-    // try inlined fast unlocking first, revert to slow locking if it fails
-    // note: lock_reg points to the displaced header since the displaced header offset is 0!
-    assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
-    __ unlock_object(hdr, obj_reg, lock_reg, *slow_case->entry());
-  } else {
-    // always do slow unlocking
-    // note: the slow unlocking code could be inlined here, however if we use
-    //       slow unlocking, speed doesn't matter anyway and this solution is
-    //       simpler and requires less duplicated code - additionally, the
-    //       slow unlocking code is the same in either case which simplifies
-    //       debugging
-    __ jmp(*slow_case->entry());
-  }
-  // done
-  __ bind(*slow_case->continuation());
-
-  if (exception->is_valid()) {
-    // restore exception
-    __ movptr (exception, Address(rsp, 2 * wordSize));
-  }
-}
-
 // This specifies the rsp decrement needed to build the frame
 int LIR_Assembler::initial_frame_size_in_bytes() {
   // if rounding, must let FrameMap know!
--- a/src/cpu/x86/vm/c1_LIRAssembler_x86.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -29,8 +29,6 @@
 
   Address::ScaleFactor array_element_size(BasicType type) const;
 
-  void monitorexit(LIR_Opr obj_opr, LIR_Opr lock_opr, Register new_hdr, int monitor_no, Register exception);
-
   void arith_fpu_implementation(LIR_Code code, int left_index, int right_index, int dest_index, bool pop_fpu_stack);
 
   // helper functions which checks for overflow and sets bailout if it
--- a/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -1465,19 +1465,6 @@
       }
       break;
 
-    case jvmti_exception_throw_id:
-      { // rax,: exception oop
-        StubFrame f(sasm, "jvmti_exception_throw", dont_gc_arguments);
-        // Preserve all registers across this potentially blocking call
-        const int num_rt_args = 2;  // thread, exception oop
-        OopMap* map = save_live_registers(sasm, num_rt_args);
-        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, Runtime1::post_jvmti_exception_throw), rax);
-        oop_maps = new OopMapSet();
-        oop_maps->add_gc_map(call_offset, map);
-        restore_live_registers(sasm);
-      }
-      break;
-
     case dtrace_object_alloc_id:
       { // rax,: object
         StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments);
--- a/src/cpu/x86/vm/methodHandles_x86.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/cpu/x86/vm/methodHandles_x86.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -1343,6 +1343,13 @@
     }
     break;
 
+  case _adapter_opt_profiling:
+    if (java_lang_invoke_CountingMethodHandle::vmcount_offset_in_bytes() != 0) {
+      Address rcx_mh_vmcount(rcx_recv, java_lang_invoke_CountingMethodHandle::vmcount_offset_in_bytes());
+      __ incrementl(rcx_mh_vmcount);
+    }
+    // fall through
+
   case _adapter_retype_only:
   case _adapter_retype_raw:
     // immediately jump to the next MH layer:
--- a/src/cpu/x86/vm/methodHandles_x86.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/cpu/x86/vm/methodHandles_x86.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -110,6 +110,7 @@
 
 class RicochetFrame {
   friend class MethodHandles;
+  friend class VMStructs;
 
  private:
   intptr_t* _continuation;          // what to do when control gets back here
--- a/src/os/linux/vm/os_linux.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/os/linux/vm/os_linux.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -125,10 +125,6 @@
 # include <inttypes.h>
 # include <sys/ioctl.h>
 
-#ifdef AMD64
-#include <asm/vsyscall.h>
-#endif
-
 #define MAX_PATH    (2 * K)
 
 // for timer info max values which include all bits
@@ -2502,7 +2498,13 @@
   int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
   uintptr_t res = (uintptr_t) ::mmap(addr, size, prot,
                                    MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0);
-  return res != (uintptr_t) MAP_FAILED;
+  if (res != (uintptr_t) MAP_FAILED) {
+    if (UseNUMAInterleaving) {
+      numa_make_global(addr, size);
+    }
+    return true;
+  }
+  return false;
 }
 
 // Define MAP_HUGETLB here so we can build HotSpot on old systems.
@@ -2523,10 +2525,20 @@
       (uintptr_t) ::mmap(addr, size, prot,
                          MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS|MAP_HUGETLB,
                          -1, 0);
-    return res != (uintptr_t) MAP_FAILED;
-  }
-
-  return commit_memory(addr, size, exec);
+    if (res != (uintptr_t) MAP_FAILED) {
+      if (UseNUMAInterleaving) {
+        numa_make_global(addr, size);
+      }
+      return true;
+    }
+    // Fall through and try to use small pages
+  }
+
+  if (commit_memory(addr, size, exec)) {
+    realign_memory(addr, size, alignment_hint);
+    return true;
+  }
+  return false;
 }
 
 void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
@@ -2588,8 +2600,17 @@
   int retval = -1;
 
 #if defined(IA32)
+# ifndef SYS_getcpu
+# define SYS_getcpu 318
+# endif
   retval = syscall(SYS_getcpu, &cpu, NULL, NULL);
 #elif defined(AMD64)
+// Unfortunately we have to bring all these macros here from vsyscall.h
+// to be able to compile on old linuxes.
+# define __NR_vgetcpu 2
+# define VSYSCALL_START (-10UL << 20)
+# define VSYSCALL_SIZE 1024
+# define VSYSCALL_ADDR(vsyscall_nr) (VSYSCALL_START+VSYSCALL_SIZE*(vsyscall_nr))
   typedef long (*vgetcpu_t)(unsigned int *cpu, unsigned int *node, unsigned long *tcache);
   vgetcpu_t vgetcpu = (vgetcpu_t)VSYSCALL_ADDR(__NR_vgetcpu);
   retval = vgetcpu(&cpu, NULL, NULL);
@@ -3115,6 +3136,10 @@
      return NULL;
   }
 
+  if ((addr != NULL) && UseNUMAInterleaving) {
+    numa_make_global(addr, bytes);
+  }
+
   return addr;
 }
 
--- a/src/os/solaris/vm/os_solaris.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/os/solaris/vm/os_solaris.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -2777,8 +2777,14 @@
 bool os::commit_memory(char* addr, size_t bytes, bool exec) {
   int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
   size_t size = bytes;
-  return
-     NULL != Solaris::mmap_chunk(addr, size, MAP_PRIVATE|MAP_FIXED, prot);
+  char *res = Solaris::mmap_chunk(addr, size, MAP_PRIVATE|MAP_FIXED, prot);
+  if (res != NULL) {
+    if (UseNUMAInterleaving) {
+      numa_make_global(addr, bytes);
+    }
+    return true;
+  }
+  return false;
 }
 
 bool os::commit_memory(char* addr, size_t bytes, size_t alignment_hint,
@@ -3389,12 +3395,11 @@
   return true;
 }
 
-char* os::reserve_memory_special(size_t bytes, char* addr, bool exec) {
+char* os::reserve_memory_special(size_t size, char* addr, bool exec) {
   // "exec" is passed in but not used.  Creating the shared image for
   // the code cache doesn't have an SHM_X executable permission to check.
   assert(UseLargePages && UseISM, "only for ISM large pages");
 
-  size_t size = bytes;
   char* retAddr = NULL;
   int shmid;
   key_t ismKey;
@@ -3436,7 +3441,9 @@
     }
     return NULL;
   }
-
+  if ((retAddr != NULL) && UseNUMAInterleaving) {
+    numa_make_global(retAddr, size);
+  }
   return retAddr;
 }
 
--- a/src/os/windows/vm/os_windows.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/os/windows/vm/os_windows.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -2614,6 +2614,57 @@
 static HANDLE    _hProcess;
 static HANDLE    _hToken;
 
+// Container for NUMA node list info
+class NUMANodeListHolder {
+private:
+  int *_numa_used_node_list;  // allocated below
+  int _numa_used_node_count;
+
+  void free_node_list() {
+    if (_numa_used_node_list != NULL) {
+      FREE_C_HEAP_ARRAY(int, _numa_used_node_list);
+    }
+  }
+
+public:
+  NUMANodeListHolder() {
+    _numa_used_node_count = 0;
+    _numa_used_node_list = NULL;
+    // do rest of initialization in build routine (after function pointers are set up)
+  }
+
+  ~NUMANodeListHolder() {
+    free_node_list();
+  }
+
+  bool build() {
+    DWORD_PTR proc_aff_mask;
+    DWORD_PTR sys_aff_mask;
+    if (!GetProcessAffinityMask(GetCurrentProcess(), &proc_aff_mask, &sys_aff_mask)) return false;
+    ULONG highest_node_number;
+    if (!os::Kernel32Dll::GetNumaHighestNodeNumber(&highest_node_number)) return false;
+    free_node_list();
+    _numa_used_node_list = NEW_C_HEAP_ARRAY(int, highest_node_number);
+    for (unsigned int i = 0; i <= highest_node_number; i++) {
+      ULONGLONG proc_mask_numa_node;
+      if (!os::Kernel32Dll::GetNumaNodeProcessorMask(i, &proc_mask_numa_node)) return false;
+      if ((proc_aff_mask & proc_mask_numa_node)!=0) {
+        _numa_used_node_list[_numa_used_node_count++] = i;
+      }
+    }
+    return (_numa_used_node_count > 1);
+  }
+
+  int get_count() {return _numa_used_node_count;}
+  int get_node_list_entry(int n) {
+    // for indexes out of range, returns -1
+    return (n < _numa_used_node_count ? _numa_used_node_list[n] : -1);
+  }
+
+} numa_node_list_holder;
+
+
+
 static size_t _large_page_size = 0;
 
 static bool resolve_functions_for_large_page_init() {
@@ -2653,6 +2704,153 @@
   _hToken = NULL;
 }
 
+static bool numa_interleaving_init() {
+  bool success = false;
+  bool use_numa_interleaving_specified = !FLAG_IS_DEFAULT(UseNUMAInterleaving);
+
+  // print a warning if UseNUMAInterleaving flag is specified on command line
+  bool warn_on_failure = use_numa_interleaving_specified;
+# define WARN(msg) if (warn_on_failure) { warning(msg); }
+
+  // NUMAInterleaveGranularity cannot be less than vm_allocation_granularity (or _large_page_size if using large pages)
+  size_t min_interleave_granularity = UseLargePages ? _large_page_size : os::vm_allocation_granularity();
+  NUMAInterleaveGranularity = align_size_up(NUMAInterleaveGranularity, min_interleave_granularity);
+
+  if (os::Kernel32Dll::NumaCallsAvailable()) {
+    if (numa_node_list_holder.build()) {
+      if (PrintMiscellaneous && Verbose) {
+        tty->print("NUMA UsedNodeCount=%d, namely ", numa_node_list_holder.get_count());
+        for (int i = 0; i < numa_node_list_holder.get_count(); i++) {
+          tty->print("%d ", numa_node_list_holder.get_node_list_entry(i));
+        }
+        tty->print("\n");
+      }
+      success = true;
+    } else {
+      WARN("Process does not cover multiple NUMA nodes.");
+    }
+  } else {
+    WARN("NUMA Interleaving is not supported by the operating system.");
+  }
+  if (!success) {
+    if (use_numa_interleaving_specified) WARN("...Ignoring UseNUMAInterleaving flag.");
+  }
+  return success;
+#undef WARN
+}
+
+// this routine is used whenever we need to reserve a contiguous VA range
+// but we need to make separate VirtualAlloc calls for each piece of the range
+// Reasons for doing this:
+//  * UseLargePagesIndividualAllocation was set (normally only needed on WS2003 but possible to be set otherwise)
+//  * UseNUMAInterleaving requires a separate node for each piece
+static char* allocate_pages_individually(size_t bytes, char* addr, DWORD flags, DWORD prot,
+                                         bool should_inject_error=false) {
+  char * p_buf;
+  // note: at setup time we guaranteed that NUMAInterleaveGranularity was aligned up to a page size
+  size_t page_size = UseLargePages ? _large_page_size : os::vm_allocation_granularity();
+  size_t chunk_size = UseNUMAInterleaving ? NUMAInterleaveGranularity : page_size;
+
+  // first reserve enough address space in advance since we want to be
+  // able to break a single contiguous virtual address range into multiple
+  // large page commits but WS2003 does not allow reserving large page space
+  // so we just use 4K pages for reserve, this gives us a legal contiguous
+  // address space. then we will deallocate that reservation, and re alloc
+  // using large pages
+  const size_t size_of_reserve = bytes + chunk_size;
+  if (bytes > size_of_reserve) {
+    // Overflowed.
+    return NULL;
+  }
+  p_buf = (char *) VirtualAlloc(addr,
+                                size_of_reserve,  // size of Reserve
+                                MEM_RESERVE,
+                                PAGE_READWRITE);
+  // If reservation failed, return NULL
+  if (p_buf == NULL) return NULL;
+
+  os::release_memory(p_buf, bytes + chunk_size);
+
+  // we still need to round up to a page boundary (in case we are using large pages)
+  // but not to a chunk boundary (in case InterleavingGranularity doesn't align with page size)
+  // instead we handle this in the bytes_to_rq computation below
+  p_buf = (char *) align_size_up((size_t)p_buf, page_size);
+
+  // now go through and allocate one chunk at a time until all bytes are
+  // allocated
+  size_t  bytes_remaining = bytes;
+  // An overflow of align_size_up() would have been caught above
+  // in the calculation of size_of_reserve.
+  char * next_alloc_addr = p_buf;
+  HANDLE hProc = GetCurrentProcess();
+
+#ifdef ASSERT
+  // Variable for the failure injection
+  long ran_num = os::random();
+  size_t fail_after = ran_num % bytes;
+#endif
+
+  int count=0;
+  while (bytes_remaining) {
+    // select bytes_to_rq to get to the next chunk_size boundary
+
+    size_t bytes_to_rq = MIN2(bytes_remaining, chunk_size - ((size_t)next_alloc_addr % chunk_size));
+    // Note allocate and commit
+    char * p_new;
+
+#ifdef ASSERT
+    bool inject_error_now = should_inject_error && (bytes_remaining <= fail_after);
+#else
+    const bool inject_error_now = false;
+#endif
+
+    if (inject_error_now) {
+      p_new = NULL;
+    } else {
+      if (!UseNUMAInterleaving) {
+        p_new = (char *) VirtualAlloc(next_alloc_addr,
+                                      bytes_to_rq,
+                                      flags,
+                                      prot);
+      } else {
+        // get the next node to use from the used_node_list
+        assert(numa_node_list_holder.get_count() > 0, "Multiple NUMA nodes expected");
+        DWORD node = numa_node_list_holder.get_node_list_entry(count % numa_node_list_holder.get_count());
+        p_new = (char *)os::Kernel32Dll::VirtualAllocExNuma(hProc,
+                                                            next_alloc_addr,
+                                                            bytes_to_rq,
+                                                            flags,
+                                                            prot,
+                                                            node);
+      }
+    }
+
+    if (p_new == NULL) {
+      // Free any allocated pages
+      if (next_alloc_addr > p_buf) {
+        // Some memory was committed so release it.
+        size_t bytes_to_release = bytes - bytes_remaining;
+        os::release_memory(p_buf, bytes_to_release);
+      }
+#ifdef ASSERT
+      if (should_inject_error) {
+        if (TracePageSizes && Verbose) {
+          tty->print_cr("Reserving pages individually failed.");
+        }
+      }
+#endif
+      return NULL;
+    }
+    bytes_remaining -= bytes_to_rq;
+    next_alloc_addr += bytes_to_rq;
+    count++;
+  }
+  // made it this far, success
+  return p_buf;
+}
+
+
+
 void os::large_page_init() {
   if (!UseLargePages) return;
 
@@ -2722,9 +2920,30 @@
   assert((size_t)addr % os::vm_allocation_granularity() == 0,
          "reserve alignment");
   assert(bytes % os::vm_allocation_granularity() == 0, "reserve block size");
-  char* res = (char*)VirtualAlloc(addr, bytes, MEM_RESERVE, PAGE_READWRITE);
+  char* res;
+  // note that if UseLargePages is on, all the areas that require interleaving
+  // will go thru reserve_memory_special rather than thru here.
+  bool use_individual = (UseNUMAInterleaving && !UseLargePages);
+  if (!use_individual) {
+    res = (char*)VirtualAlloc(addr, bytes, MEM_RESERVE, PAGE_READWRITE);
+  } else {
+    elapsedTimer reserveTimer;
+    if( Verbose && PrintMiscellaneous ) reserveTimer.start();
+    // in numa interleaving, we have to allocate pages individually
+    // (well really chunks of NUMAInterleaveGranularity size)
+    res = allocate_pages_individually(bytes, addr, MEM_RESERVE, PAGE_READWRITE);
+    if (res == NULL) {
+      warning("NUMA page allocation failed");
+    }
+    if( Verbose && PrintMiscellaneous ) {
+      reserveTimer.stop();
+      tty->print_cr("reserve_memory of %Ix bytes took %ld ms (%ld ticks)", bytes,
+                    reserveTimer.milliseconds(), reserveTimer.ticks());
+    }
+  }
   assert(res == NULL || addr == NULL || addr == res,
          "Unexpected address from reserve.");
+
   return res;
 }
 
@@ -2754,92 +2973,27 @@
 char* os::reserve_memory_special(size_t bytes, char* addr, bool exec) {
 
   const DWORD prot = exec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE;
-
-  if (UseLargePagesIndividualAllocation) {
+  const DWORD flags = MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES;
+
+  // with large pages, there are two cases where we need to use Individual Allocation
+  // 1) the UseLargePagesIndividualAllocation flag is set (set by default on WS2003)
+  // 2) NUMA Interleaving is enabled, in which case we use a different node for each page
+  if (UseLargePagesIndividualAllocation || UseNUMAInterleaving) {
     if (TracePageSizes && Verbose) {
        tty->print_cr("Reserving large pages individually.");
     }
-    char * p_buf;
-    // first reserve enough address space in advance since we want to be
-    // able to break a single contiguous virtual address range into multiple
-    // large page commits but WS2003 does not allow reserving large page space
-    // so we just use 4K pages for reserve, this gives us a legal contiguous
-    // address space. then we will deallocate that reservation, and re alloc
-    // using large pages
-    const size_t size_of_reserve = bytes + _large_page_size;
-    if (bytes > size_of_reserve) {
-      // Overflowed.
-      warning("Individually allocated large pages failed, "
-        "use -XX:-UseLargePagesIndividualAllocation to turn off");
+    char * p_buf = allocate_pages_individually(bytes, addr, flags, prot, LargePagesIndividualAllocationInjectError);
+    if (p_buf == NULL) {
+      // give an appropriate warning message
+      if (UseNUMAInterleaving) {
+        warning("NUMA large page allocation failed, UseLargePages flag ignored");
+      }
+      if (UseLargePagesIndividualAllocation) {
+        warning("Individually allocated large pages failed, "
+                "use -XX:-UseLargePagesIndividualAllocation to turn off");
+      }
       return NULL;
     }
-    p_buf = (char *) VirtualAlloc(addr,
-                                 size_of_reserve,  // size of Reserve
-                                 MEM_RESERVE,
-                                 PAGE_READWRITE);
-    // If reservation failed, return NULL
-    if (p_buf == NULL) return NULL;
-
-    release_memory(p_buf, bytes + _large_page_size);
-    // round up to page boundary.  If the size_of_reserve did not
-    // overflow and the reservation did not fail, this align up
-    // should not overflow.
-    p_buf = (char *) align_size_up((size_t)p_buf, _large_page_size);
-
-    // now go through and allocate one page at a time until all bytes are
-    // allocated
-    size_t  bytes_remaining = align_size_up(bytes, _large_page_size);
-    // An overflow of align_size_up() would have been caught above
-    // in the calculation of size_of_reserve.
-    char * next_alloc_addr = p_buf;
-
-#ifdef ASSERT
-    // Variable for the failure injection
-    long ran_num = os::random();
-    size_t fail_after = ran_num % bytes;
-#endif
-
-    while (bytes_remaining) {
-      size_t bytes_to_rq = MIN2(bytes_remaining, _large_page_size);
-      // Note allocate and commit
-      char * p_new;
-
-#ifdef ASSERT
-      bool inject_error = LargePagesIndividualAllocationInjectError &&
-          (bytes_remaining <= fail_after);
-#else
-      const bool inject_error = false;
-#endif
-
-      if (inject_error) {
-        p_new = NULL;
-      } else {
-        p_new = (char *) VirtualAlloc(next_alloc_addr,
-                                    bytes_to_rq,
-                                    MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES,
-                                    prot);
-      }
-
-      if (p_new == NULL) {
-        // Free any allocated pages
-        if (next_alloc_addr > p_buf) {
-          // Some memory was committed so release it.
-          size_t bytes_to_release = bytes - bytes_remaining;
-          release_memory(p_buf, bytes_to_release);
-        }
-#ifdef ASSERT
-        if (UseLargePagesIndividualAllocation &&
-            LargePagesIndividualAllocationInjectError) {
-          if (TracePageSizes && Verbose) {
-             tty->print_cr("Reserving large pages individually failed.");
-          }
-        }
-#endif
-        return NULL;
-      }
-      bytes_remaining -= bytes_to_rq;
-      next_alloc_addr += bytes_to_rq;
-    }
 
     return p_buf;
 
@@ -2867,14 +3021,43 @@
   assert(bytes % os::vm_page_size() == 0, "commit in page-sized chunks");
   // Don't attempt to print anything if the OS call fails. We're
   // probably low on resources, so the print itself may cause crashes.
-  bool result = VirtualAlloc(addr, bytes, MEM_COMMIT, PAGE_READWRITE) != 0;
-  if (result != NULL && exec) {
-    DWORD oldprot;
-    // Windows doc says to use VirtualProtect to get execute permissions
-    return VirtualProtect(addr, bytes, PAGE_EXECUTE_READWRITE, &oldprot) != 0;
+
+  // unless we have NUMAInterleaving enabled, the range of a commit
+  // is always within a reserve covered by a single VirtualAlloc
+  // in that case we can just do a single commit for the requested size
+  if (!UseNUMAInterleaving) {
+    if (VirtualAlloc(addr, bytes, MEM_COMMIT, PAGE_READWRITE) == NULL) return false;
+    if (exec) {
+      DWORD oldprot;
+      // Windows doc says to use VirtualProtect to get execute permissions
+      if (!VirtualProtect(addr, bytes, PAGE_EXECUTE_READWRITE, &oldprot)) return false;
+    }
+    return true;
   } else {
-    return result;
-  }
+
+    // when NUMAInterleaving is enabled, the commit might cover a range that
+    // came from multiple VirtualAlloc reserves (using allocate_pages_individually).
+    // VirtualQuery can help us determine that.  The RegionSize that VirtualQuery
+    // returns represents the number of bytes that can be committed in one step.
+    size_t bytes_remaining = bytes;
+    char * next_alloc_addr = addr;
+    while (bytes_remaining > 0) {
+      MEMORY_BASIC_INFORMATION alloc_info;
+      VirtualQuery(next_alloc_addr, &alloc_info, sizeof(alloc_info));
+      size_t bytes_to_rq = MIN2(bytes_remaining, (size_t)alloc_info.RegionSize);
+      if (VirtualAlloc(next_alloc_addr, bytes_to_rq, MEM_COMMIT, PAGE_READWRITE) == NULL)
+        return false;
+      if (exec) {
+        DWORD oldprot;
+        if (!VirtualProtect(next_alloc_addr, bytes_to_rq, PAGE_EXECUTE_READWRITE, &oldprot))
+          return false;
+      }
+      bytes_remaining -= bytes_to_rq;
+      next_alloc_addr += bytes_to_rq;
+    }
+  }
+  // if we made it this far, return true
+  return true;
 }
 
 bool os::commit_memory(char* addr, size_t size, size_t alignment_hint,
@@ -2948,14 +3131,21 @@
 void os::numa_make_global(char *addr, size_t bytes)    { }
 void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint)    { }
 bool os::numa_topology_changed()                       { return false; }
-size_t os::numa_get_groups_num()                       { return 1; }
+size_t os::numa_get_groups_num()                       { return MAX2(numa_node_list_holder.get_count(), 1); }
 int os::numa_get_group_id()                            { return 0; }
 size_t os::numa_get_leaf_groups(int *ids, size_t size) {
-  if (size > 0) {
+  if (numa_node_list_holder.get_count() == 0 && size > 0) {
+    // Provide an answer for UMA systems
     ids[0] = 0;
     return 1;
-  }
-  return 0;
+  } else {
+    // check for size bigger than actual groups_num
+    size = MIN2(size, numa_get_groups_num());
+    for (int i = 0; i < (int)size; i++) {
+      ids[i] = numa_node_list_holder.get_node_list_entry(i);
+    }
+    return size;
+  }
 }
 
 bool os::get_page_info(char *start, page_info* info) {
@@ -3480,7 +3670,7 @@
     if(Verbose && PrintMiscellaneous)
       tty->print("[Memory Serialize  Page address: " INTPTR_FORMAT "]\n", (intptr_t)mem_serialize_page);
 #endif
-}
+  }
 
   os::large_page_init();
 
@@ -3584,7 +3774,13 @@
   prio_init();
 
   if (UseNUMA && !ForceNUMA) {
-    UseNUMA = false; // Currently unsupported.
+    UseNUMA = false; // We don't fully support this yet
+  }
+
+  if (UseNUMAInterleaving) {
+    // first check whether this Windows OS supports VirtualAllocExNuma, if not ignore this flag
+    bool success = numa_interleaving_init();
+    if (!success) UseNUMAInterleaving = false;
   }
 
   return JNI_OK;
@@ -4758,7 +4954,14 @@
 
 // Kernel32 API
 typedef SIZE_T (WINAPI* GetLargePageMinimum_Fn)(void);
+typedef LPVOID (WINAPI *VirtualAllocExNuma_Fn) (HANDLE, LPVOID, SIZE_T, DWORD, DWORD, DWORD);
+typedef BOOL (WINAPI *GetNumaHighestNodeNumber_Fn) (PULONG);
+typedef BOOL (WINAPI *GetNumaNodeProcessorMask_Fn) (UCHAR, PULONGLONG);
+
 GetLargePageMinimum_Fn      os::Kernel32Dll::_GetLargePageMinimum = NULL;
+VirtualAllocExNuma_Fn       os::Kernel32Dll::_VirtualAllocExNuma = NULL;
+GetNumaHighestNodeNumber_Fn os::Kernel32Dll::_GetNumaHighestNodeNumber = NULL;
+GetNumaNodeProcessorMask_Fn os::Kernel32Dll::_GetNumaNodeProcessorMask = NULL;
 BOOL                        os::Kernel32Dll::initialized = FALSE;
 SIZE_T os::Kernel32Dll::GetLargePageMinimum() {
   assert(initialized && _GetLargePageMinimum != NULL,
@@ -4773,19 +4976,56 @@
   return _GetLargePageMinimum != NULL;
 }
 
-
-#ifndef JDK6_OR_EARLIER
-
-void os::Kernel32Dll::initialize() {
+BOOL os::Kernel32Dll::NumaCallsAvailable() {
+  if (!initialized) {
+    initialize();
+  }
+  return _VirtualAllocExNuma != NULL;
+}
+
+LPVOID os::Kernel32Dll::VirtualAllocExNuma(HANDLE hProc, LPVOID addr, SIZE_T bytes, DWORD flags, DWORD prot, DWORD node) {
+  assert(initialized && _VirtualAllocExNuma != NULL,
+    "NUMACallsAvailable() not yet called");
+
+  return _VirtualAllocExNuma(hProc, addr, bytes, flags, prot, node);
+}
+
+BOOL os::Kernel32Dll::GetNumaHighestNodeNumber(PULONG ptr_highest_node_number) {
+  assert(initialized && _GetNumaHighestNodeNumber != NULL,
+    "NUMACallsAvailable() not yet called");
+
+  return _GetNumaHighestNodeNumber(ptr_highest_node_number);
+}
+
+BOOL os::Kernel32Dll::GetNumaNodeProcessorMask(UCHAR node, PULONGLONG proc_mask) {
+  assert(initialized && _GetNumaNodeProcessorMask != NULL,
+    "NUMACallsAvailable() not yet called");
+
+  return _GetNumaNodeProcessorMask(node, proc_mask);
+}
+
+
+void os::Kernel32Dll::initializeCommon() {
   if (!initialized) {
     HMODULE handle = ::GetModuleHandle("Kernel32.dll");
     assert(handle != NULL, "Just check");
     _GetLargePageMinimum = (GetLargePageMinimum_Fn)::GetProcAddress(handle, "GetLargePageMinimum");
+    _VirtualAllocExNuma = (VirtualAllocExNuma_Fn)::GetProcAddress(handle, "VirtualAllocExNuma");
+    _GetNumaHighestNodeNumber = (GetNumaHighestNodeNumber_Fn)::GetProcAddress(handle, "GetNumaHighestNodeNumber");
+    _GetNumaNodeProcessorMask = (GetNumaNodeProcessorMask_Fn)::GetProcAddress(handle, "GetNumaNodeProcessorMask");
     initialized = TRUE;
   }
 }
 
 
+
+#ifndef JDK6_OR_EARLIER
+
+void os::Kernel32Dll::initialize() {
+  initializeCommon();
+}
+
+
 // Kernel32 API
 inline BOOL os::Kernel32Dll::SwitchToThread() {
   return ::SwitchToThread();
@@ -4887,18 +5127,19 @@
 Module32Next_Fn             os::Kernel32Dll::_Module32Next = NULL;
 GetNativeSystemInfo_Fn      os::Kernel32Dll::_GetNativeSystemInfo = NULL;
 
+
 void os::Kernel32Dll::initialize() {
   if (!initialized) {
     HMODULE handle = ::GetModuleHandle("Kernel32.dll");
     assert(handle != NULL, "Just check");
 
     _SwitchToThread = (SwitchToThread_Fn)::GetProcAddress(handle, "SwitchToThread");
-    _GetLargePageMinimum = (GetLargePageMinimum_Fn)::GetProcAddress(handle, "GetLargePageMinimum");
     _CreateToolhelp32Snapshot = (CreateToolhelp32Snapshot_Fn)
       ::GetProcAddress(handle, "CreateToolhelp32Snapshot");
     _Module32First = (Module32First_Fn)::GetProcAddress(handle, "Module32First");
     _Module32Next = (Module32Next_Fn)::GetProcAddress(handle, "Module32Next");
     _GetNativeSystemInfo = (GetNativeSystemInfo_Fn)::GetProcAddress(handle, "GetNativeSystemInfo");
+    initializeCommon();  // resolve the functions that always need resolving
 
     initialized = TRUE;
   }
@@ -4964,6 +5205,8 @@
   _GetNativeSystemInfo(lpSystemInfo);
 }
 
+
+
 // PSAPI API
 
 
--- a/src/os/windows/vm/os_windows.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/os/windows/vm/os_windows.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -173,13 +173,25 @@
   static BOOL GetNativeSystemInfoAvailable();
   static void GetNativeSystemInfo(LPSYSTEM_INFO);
 
+  // NUMA calls
+  static BOOL NumaCallsAvailable();
+  static LPVOID VirtualAllocExNuma(HANDLE, LPVOID, SIZE_T, DWORD, DWORD, DWORD);
+  static BOOL GetNumaHighestNodeNumber(PULONG);
+  static BOOL GetNumaNodeProcessorMask(UCHAR, PULONGLONG);
+
 private:
   // GetLargePageMinimum available on Windows Vista/Windows Server 2003
   // and later
+  // NUMA calls available Windows Vista/WS2008 and later
+
   static SIZE_T (WINAPI *_GetLargePageMinimum)(void);
+  static LPVOID (WINAPI *_VirtualAllocExNuma) (HANDLE, LPVOID, SIZE_T, DWORD, DWORD, DWORD);
+  static BOOL (WINAPI *_GetNumaHighestNodeNumber) (PULONG);
+  static BOOL (WINAPI *_GetNumaNodeProcessorMask) (UCHAR, PULONGLONG);
   static BOOL initialized;
 
   static void initialize();
+  static void initializeCommon();
 
 #ifdef JDK6_OR_EARLIER
 private:
--- a/src/share/vm/c1/c1_Compilation.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/c1/c1_Compilation.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -346,7 +346,6 @@
     implicit_exception_table(),
     compiler(),
     _env->comp_level(),
-    true,
     has_unsafe_access()
   );
 }
--- a/src/share/vm/c1/c1_GraphBuilder.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/c1/c1_GraphBuilder.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -28,8 +28,10 @@
 #include "c1/c1_Compilation.hpp"
 #include "c1/c1_GraphBuilder.hpp"
 #include "c1/c1_InstructionPrinter.hpp"
+#include "ci/ciCallSite.hpp"
 #include "ci/ciField.hpp"
 #include "ci/ciKlass.hpp"
+#include "ci/ciMethodHandle.hpp"
 #include "compiler/compileBroker.hpp"
 #include "interpreter/bytecode.hpp"
 #include "runtime/sharedRuntime.hpp"
@@ -1424,7 +1426,7 @@
     // See whether this is the first return; if so, store off some
     // of the state for later examination
     if (num_returns() == 0) {
-      set_inline_cleanup_info(_block, _last, state());
+      set_inline_cleanup_info();
     }
 
     // The current bci() is in the wrong scope, so use the bci() of
@@ -1582,6 +1584,8 @@
     code = Bytecodes::_invokespecial;
   }
 
+  bool is_invokedynamic = code == Bytecodes::_invokedynamic;
+
   // NEEDS_CLEANUP
   // I've added the target-is_loaded() test below but I don't really understand
   // how klass->is_loaded() can be true and yet target->is_loaded() is false.
@@ -1693,26 +1697,31 @@
       && target->will_link(klass, callee_holder, code)) {
     // callee is known => check if we have static binding
     assert(target->is_loaded(), "callee must be known");
-    if (code == Bytecodes::_invokestatic
-     || code == Bytecodes::_invokespecial
-     || code == Bytecodes::_invokevirtual && target->is_final_method()
-    ) {
-      // static binding => check if callee is ok
-      ciMethod* inline_target = (cha_monomorphic_target != NULL)
-                                  ? cha_monomorphic_target
-                                  : target;
-      bool res = try_inline(inline_target, (cha_monomorphic_target != NULL) || (exact_target != NULL));
+    if (code == Bytecodes::_invokestatic  ||
+        code == Bytecodes::_invokespecial ||
+        code == Bytecodes::_invokevirtual && target->is_final_method() ||
+        code == Bytecodes::_invokedynamic) {
+      ciMethod* inline_target = (cha_monomorphic_target != NULL) ? cha_monomorphic_target : target;
+      bool success = false;
+      if (target->is_method_handle_invoke()) {
+        // method handle invokes
+        success = !is_invokedynamic ? for_method_handle_inline(target) : for_invokedynamic_inline(target);
+      }
+      if (!success) {
+        // static binding => check if callee is ok
+        success = try_inline(inline_target, (cha_monomorphic_target != NULL) || (exact_target != NULL));
+      }
       CHECK_BAILOUT();
 
 #ifndef PRODUCT
       // printing
-      if (PrintInlining && !res) {
+      if (PrintInlining && !success) {
         // if it was successfully inlined, then it was already printed.
-        print_inline_result(inline_target, res);
+        print_inline_result(inline_target, success);
       }
 #endif
       clear_inline_bailout();
-      if (res) {
+      if (success) {
         // Register dependence if JVMTI has either breakpoint
         // setting or hotswapping of methods capabilities since they may
         // cause deoptimization.
@@ -1740,7 +1749,6 @@
     code == Bytecodes::_invokespecial   ||
     code == Bytecodes::_invokevirtual   ||
     code == Bytecodes::_invokeinterface;
-  bool is_invokedynamic = code == Bytecodes::_invokedynamic;
   ValueType* result_type = as_ValueType(target->return_type());
 
   // We require the debug info to be the "state before" because
@@ -3038,7 +3046,7 @@
     INLINE_BAILOUT("disallowed by CompilerOracle")
   } else if (!callee->can_be_compiled()) {
     // callee is not compilable (prob. has breakpoints)
-    INLINE_BAILOUT("not compilable")
+    INLINE_BAILOUT("not compilable (disabled)")
   } else if (callee->intrinsic_id() != vmIntrinsics::_none && try_inline_intrinsics(callee)) {
     // intrinsics can be native or not
     return true;
@@ -3397,7 +3405,7 @@
 }
 
 
-bool GraphBuilder::try_inline_full(ciMethod* callee, bool holder_known) {
+bool GraphBuilder::try_inline_full(ciMethod* callee, bool holder_known, BlockBegin* cont_block) {
   assert(!callee->is_native(), "callee must not be native");
   if (CompilationPolicy::policy()->should_not_inline(compilation()->env(), callee)) {
     INLINE_BAILOUT("inlining prohibited by policy");
@@ -3430,7 +3438,7 @@
   } else {
     if (inline_level() > MaxInlineLevel                         ) INLINE_BAILOUT("too-deep inlining");
     if (recursive_inline_level(callee) > MaxRecursiveInlineLevel) INLINE_BAILOUT("too-deep recursive inlining");
-    if (callee->code_size() > max_inline_size()                 ) INLINE_BAILOUT("callee is too large");
+    if (callee->code_size_for_inlining() > max_inline_size()    ) INLINE_BAILOUT("callee is too large");
 
     // don't inline throwable methods unless the inlining tree is rooted in a throwable class
     if (callee->name() == ciSymbol::object_initializer_name() &&
@@ -3468,7 +3476,8 @@
 
   // Insert null check if necessary
   Value recv = NULL;
-  if (code() != Bytecodes::_invokestatic) {
+  if (code() != Bytecodes::_invokestatic &&
+      code() != Bytecodes::_invokedynamic) {
     // note: null check must happen even if first instruction of callee does
     //       an implicit null check since the callee is in a different scope
     //       and we must make sure exception handling does the right thing
@@ -3496,7 +3505,7 @@
   // fall-through of control flow, all return instructions of the
   // callee will need to be replaced by Goto's pointing to this
   // continuation point.
-  BlockBegin* cont = block_at(next_bci());
+  BlockBegin* cont = cont_block != NULL ? cont_block : block_at(next_bci());
   bool continuation_existed = true;
   if (cont == NULL) {
     cont = new BlockBegin(next_bci());
@@ -3608,27 +3617,29 @@
   // block merging. This allows load elimination and CSE to take place
   // across multiple callee scopes if they are relatively simple, and
   // is currently essential to making inlining profitable.
-  if (   num_returns() == 1
-      && block() == orig_block
-      && block() == inline_cleanup_block()) {
-    _last = inline_cleanup_return_prev();
-    _state = inline_cleanup_state();
-  } else if (continuation_preds == cont->number_of_preds()) {
-    // Inlining caused that the instructions after the invoke in the
-    // caller are not reachable any more. So skip filling this block
-    // with instructions!
-    assert (cont == continuation(), "");
-    assert(_last && _last->as_BlockEnd(), "");
-    _skip_block = true;
-  } else {
-    // Resume parsing in continuation block unless it was already parsed.
-    // Note that if we don't change _last here, iteration in
-    // iterate_bytecodes_for_block will stop when we return.
-    if (!continuation()->is_set(BlockBegin::was_visited_flag)) {
-      // add continuation to work list instead of parsing it immediately
+  if (cont_block == NULL) {
+    if (num_returns() == 1
+        && block() == orig_block
+        && block() == inline_cleanup_block()) {
+      _last  = inline_cleanup_return_prev();
+      _state = inline_cleanup_state();
+    } else if (continuation_preds == cont->number_of_preds()) {
+      // Inlining caused that the instructions after the invoke in the
+      // caller are not reachable any more. So skip filling this block
+      // with instructions!
+      assert(cont == continuation(), "");
       assert(_last && _last->as_BlockEnd(), "");
-      scope_data()->parent()->add_to_work_list(continuation());
       _skip_block = true;
+    } else {
+      // Resume parsing in continuation block unless it was already parsed.
+      // Note that if we don't change _last here, iteration in
+      // iterate_bytecodes_for_block will stop when we return.
+      if (!continuation()->is_set(BlockBegin::was_visited_flag)) {
+        // add continuation to work list instead of parsing it immediately
+        assert(_last && _last->as_BlockEnd(), "");
+        scope_data()->parent()->add_to_work_list(continuation());
+        _skip_block = true;
+      }
     }
   }
 
@@ -3645,6 +3656,114 @@
 }
 
 
+bool GraphBuilder::for_method_handle_inline(ciMethod* callee) {
+  assert(!callee->is_static(), "change next line");
+  int index = state()->stack_size() - (callee->arg_size_no_receiver() + 1);
+  Value receiver = state()->stack_at(index);
+
+  if (receiver->type()->is_constant()) {
+    ciMethodHandle* method_handle = receiver->type()->as_ObjectType()->constant_value()->as_method_handle();
+
+    // Set the callee to have access to the class and signature in
+    // the MethodHandleCompiler.
+    method_handle->set_callee(callee);
+    method_handle->set_caller(method());
+
+    // Get an adapter for the MethodHandle.
+    ciMethod* method_handle_adapter = method_handle->get_method_handle_adapter();
+    if (method_handle_adapter != NULL) {
+      return try_inline(method_handle_adapter, /*holder_known=*/ true);
+    }
+  } else if (receiver->as_CheckCast()) {
+    // Match MethodHandle.selectAlternative idiom
+    Phi* phi = receiver->as_CheckCast()->obj()->as_Phi();
+
+    if (phi != NULL && phi->operand_count() == 2) {
+      // Get the two MethodHandle inputs from the Phi.
+      Value op1 = phi->operand_at(0);
+      Value op2 = phi->operand_at(1);
+      ciMethodHandle* mh1 = op1->type()->as_ObjectType()->constant_value()->as_method_handle();
+      ciMethodHandle* mh2 = op2->type()->as_ObjectType()->constant_value()->as_method_handle();
+
+      // Set the callee to have access to the class and signature in
+      // the MethodHandleCompiler.
+      mh1->set_callee(callee);
+      mh1->set_caller(method());
+      mh2->set_callee(callee);
+      mh2->set_caller(method());
+
+      // Get adapters for the MethodHandles.
+      ciMethod* mh1_adapter = mh1->get_method_handle_adapter();
+      ciMethod* mh2_adapter = mh2->get_method_handle_adapter();
+
+      if (mh1_adapter != NULL && mh2_adapter != NULL) {
+        set_inline_cleanup_info();
+
+        // Build the If guard
+        BlockBegin* one = new BlockBegin(next_bci());
+        BlockBegin* two = new BlockBegin(next_bci());
+        BlockBegin* end = new BlockBegin(next_bci());
+        Instruction* iff = append(new If(phi, If::eql, false, op1, one, two, NULL, false));
+        block()->set_end(iff->as_BlockEnd());
+
+        // Connect up the states
+        one->merge(block()->end()->state());
+        two->merge(block()->end()->state());
+
+        // Save the state for the second inlinee
+        ValueStack* state_before = copy_state_before();
+
+        // Parse first adapter
+        _last = _block = one;
+        if (!try_inline_full(mh1_adapter, /*holder_known=*/ true, end)) {
+          restore_inline_cleanup_info();
+          block()->clear_end();  // remove appended iff
+          return false;
+        }
+
+        // Parse second adapter
+        _last = _block = two;
+        _state = state_before;
+        if (!try_inline_full(mh2_adapter, /*holder_known=*/ true, end)) {
+          restore_inline_cleanup_info();
+          block()->clear_end();  // remove appended iff
+          return false;
+        }
+
+        connect_to_end(end);
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+
+bool GraphBuilder::for_invokedynamic_inline(ciMethod* callee) {
+  // Get the MethodHandle from the CallSite.
+  ciCallSite*     call_site     = stream()->get_call_site();
+  ciMethodHandle* method_handle = call_site->get_target();
+
+  // Set the callee to have access to the class and signature in the
+  // MethodHandleCompiler.
+  method_handle->set_callee(callee);
+  method_handle->set_caller(method());
+
+  // Get an adapter for the MethodHandle.
+  ciMethod* method_handle_adapter = method_handle->get_invokedynamic_adapter();
+  if (method_handle_adapter != NULL) {
+    if (try_inline(method_handle_adapter, /*holder_known=*/ true)) {
+      // Add a dependence for invalidation of the optimization.
+      if (!call_site->is_constant_call_site()) {
+        dependency_recorder()->assert_call_site_target_value(call_site, method_handle);
+      }
+      return true;
+    }
+  }
+  return false;
+}
+
+
 void GraphBuilder::inline_bailout(const char* msg) {
   assert(msg != NULL, "inline bailout msg must exist");
   _inline_bailout_msg = msg;
--- a/src/share/vm/c1/c1_GraphBuilder.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/c1/c1_GraphBuilder.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -315,9 +315,17 @@
                                ValueStack* return_state) { scope_data()->set_inline_cleanup_info(block,
                                                                                                   return_prev,
                                                                                                   return_state); }
+  void set_inline_cleanup_info() {
+    set_inline_cleanup_info(_block, _last, _state);
+  }
   BlockBegin*  inline_cleanup_block() const              { return scope_data()->inline_cleanup_block();  }
   Instruction* inline_cleanup_return_prev() const        { return scope_data()->inline_cleanup_return_prev(); }
   ValueStack*  inline_cleanup_state() const              { return scope_data()->inline_cleanup_state();  }
+  void restore_inline_cleanup_info() {
+    _block = inline_cleanup_block();
+    _last  = inline_cleanup_return_prev();
+    _state = inline_cleanup_state();
+  }
   void incr_num_returns()                                { scope_data()->incr_num_returns();             }
   int  num_returns() const                               { return scope_data()->num_returns();           }
   intx max_inline_size() const                           { return scope_data()->max_inline_size();       }
@@ -329,11 +337,15 @@
   void fill_sync_handler(Value lock, BlockBegin* sync_handler, bool default_handler = false);
 
   // inliners
-  bool try_inline(ciMethod* callee, bool holder_known);
+  bool try_inline(           ciMethod* callee, bool holder_known);
   bool try_inline_intrinsics(ciMethod* callee);
-  bool try_inline_full      (ciMethod* callee, bool holder_known);
+  bool try_inline_full(      ciMethod* callee, bool holder_known, BlockBegin* cont_block = NULL);
   bool try_inline_jsr(int jsr_dest_bci);
 
+  // JSR 292 support
+  bool for_method_handle_inline(ciMethod* callee);
+  bool for_invokedynamic_inline(ciMethod* callee);
+
   // helpers
   void inline_bailout(const char* msg);
   BlockBegin* header_block(BlockBegin* entry, BlockBegin::Flag f, ValueStack* state);
--- a/src/share/vm/c1/c1_Instruction.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/c1/c1_Instruction.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -514,28 +514,17 @@
 
 void BlockBegin::set_end(BlockEnd* end) {
   assert(end != NULL, "should not reset block end to NULL");
-  BlockEnd* old_end = _end;
-  if (end == old_end) {
+  if (end == _end) {
     return;
   }
-  // Must make the predecessors/successors match up with the
-  // BlockEnd's notion.
-  int i, n;
-  if (old_end != NULL) {
-    // disconnect from the old end
-    old_end->set_begin(NULL);
+  clear_end();
 
-    // disconnect this block from it's current successors
-    for (i = 0; i < _successors.length(); i++) {
-      _successors.at(i)->remove_predecessor(this);
-    }
-  }
+  // Set the new end
   _end = end;
 
   _successors.clear();
   // Now reset successors list based on BlockEnd
-  n = end->number_of_sux();
-  for (i = 0; i < n; i++) {
+  for (int i = 0; i < end->number_of_sux(); i++) {
     BlockBegin* sux = end->sux_at(i);
     _successors.append(sux);
     sux->_predecessors.append(this);
@@ -544,6 +533,22 @@
 }
 
 
+void BlockBegin::clear_end() {
+  // Must make the predecessors/successors match up with the
+  // BlockEnd's notion.
+  if (_end != NULL) {
+    // disconnect from the old end
+    _end->set_begin(NULL);
+
+    // disconnect this block from it's current successors
+    for (int i = 0; i < _successors.length(); i++) {
+      _successors.at(i)->remove_predecessor(this);
+    }
+    _end = NULL;
+  }
+}
+
+
 void BlockBegin::disconnect_edge(BlockBegin* from, BlockBegin* to) {
   // disconnect any edges between from and to
 #ifndef PRODUCT
--- a/src/share/vm/c1/c1_Instruction.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/c1/c1_Instruction.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -1601,6 +1601,7 @@
   void set_depth_first_number(int dfn)           { _depth_first_number = dfn; }
   void set_linear_scan_number(int lsn)           { _linear_scan_number = lsn; }
   void set_end(BlockEnd* end);
+  void clear_end();
   void disconnect_from_graph();
   static void disconnect_edge(BlockBegin* from, BlockBegin* to);
   BlockBegin* insert_block_between(BlockBegin* sux);
--- a/src/share/vm/c1/c1_LIRAssembler.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/c1/c1_LIRAssembler.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -121,7 +121,7 @@
 
 void LIR_Assembler::check_codespace() {
   CodeSection* cs = _masm->code_section();
-  if (cs->remaining() < (int)(1*K)) {
+  if (cs->remaining() < (int)(NOT_LP64(1*K)LP64_ONLY(2*K))) {
     BAILOUT("CodeBuffer overflow");
   }
 }
--- a/src/share/vm/c1/c1_LIRAssembler.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/c1/c1_LIRAssembler.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -133,7 +133,6 @@
   static bool is_small_constant(LIR_Opr opr);
 
   static LIR_Opr receiverOpr();
-  static LIR_Opr incomingReceiverOpr();
   static LIR_Opr osrBufferPointer();
 
   // stubs
--- a/src/share/vm/c1/c1_LinearScan.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/c1/c1_LinearScan.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -2404,7 +2404,7 @@
       assert(!is_call_site || assigned_reg >= nof_regs || !is_caller_save(assigned_reg), "interval is in a caller-save register at a call -> register will be overwritten");
 
       VMReg name = vm_reg_for_interval(interval);
-      map->set_oop(name);
+      set_oop(map, name);
 
       // Spill optimization: when the stack value is guaranteed to be always correct,
       // then it must be added to the oop map even if the interval is currently in a register
@@ -2415,7 +2415,7 @@
         assert(interval->canonical_spill_slot() >= LinearScan::nof_regs, "no spill slot assigned");
         assert(interval->assigned_reg() < LinearScan::nof_regs, "interval is on stack, so stack slot is registered twice");
 
-        map->set_oop(frame_map()->slot_regname(interval->canonical_spill_slot() - LinearScan::nof_regs));
+        set_oop(map, frame_map()->slot_regname(interval->canonical_spill_slot() - LinearScan::nof_regs));
       }
     }
   }
@@ -2424,7 +2424,7 @@
   assert(info->stack() != NULL, "CodeEmitInfo must always have a stack");
   int locks_count = info->stack()->total_locks_size();
   for (int i = 0; i < locks_count; i++) {
-    map->set_oop(frame_map()->monitor_object_regname(i));
+    set_oop(map, frame_map()->monitor_object_regname(i));
   }
 
   return map;
--- a/src/share/vm/c1/c1_LinearScan.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/c1/c1_LinearScan.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -352,6 +352,13 @@
 
   MonitorValue*  location_for_monitor_index(int monitor_index);
   LocationValue* location_for_name(int name, Location::Type loc_type);
+  void set_oop(OopMap* map, VMReg name) {
+    if (map->legal_vm_reg_name(name)) {
+      map->set_oop(name);
+    } else {
+      bailout("illegal oopMap register name");
+    }
+  }
 
   int append_scope_value_for_constant(LIR_Opr opr, GrowableArray<ScopeValue*>* scope_values);
   int append_scope_value_for_operand(LIR_Opr opr, GrowableArray<ScopeValue*>* scope_values);
--- a/src/share/vm/c1/c1_Runtime1.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/c1/c1_Runtime1.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -375,14 +375,6 @@
 JRT_END
 
 
-JRT_ENTRY(void, Runtime1::post_jvmti_exception_throw(JavaThread* thread))
-  if (JvmtiExport::can_post_on_exceptions()) {
-    vframeStream vfst(thread, true);
-    address bcp = vfst.method()->bcp_from(vfst.bci());
-    JvmtiExport::post_exception_throw(thread, vfst.method(), bcp, thread->exception_oop());
-  }
-JRT_END
-
 // counter_overflow() is called from within C1-compiled methods. The enclosing method is the method
 // associated with the top activation record. The inlinee (that is possibly included in the enclosing
 // method) method oop is passed as an argument. In order to do that it is embedded in the code as
--- a/src/share/vm/c1/c1_Runtime1.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/c1/c1_Runtime1.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -65,7 +65,6 @@
   stub(monitorexit_nofpu)              /* optimized version that does not preserve fpu registers */ \
   stub(access_field_patching)        \
   stub(load_klass_patching)          \
-  stub(jvmti_exception_throw)        \
   stub(g1_pre_barrier_slow)          \
   stub(g1_post_barrier_slow)         \
   stub(fpu2long_stub)                \
@@ -141,7 +140,6 @@
   static void unimplemented_entry   (JavaThread* thread, StubID id);
 
   static address exception_handler_for_pc(JavaThread* thread);
-  static void post_jvmti_exception_throw(JavaThread* thread);
 
   static void throw_range_check_exception(JavaThread* thread, int index);
   static void throw_index_exception(JavaThread* thread, int index);
--- a/src/share/vm/c1/c1_globals.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/c1/c1_globals.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -278,7 +278,7 @@
   product(intx, CompilationRepeat, 0,                                       \
           "Number of times to recompile method before returning result")    \
                                                                             \
-  develop(intx, NMethodSizeLimit, (32*K)*wordSize,                          \
+  develop(intx, NMethodSizeLimit, (64*K)*wordSize,                          \
           "Maximum size of a compiled method.")                             \
                                                                             \
   develop(bool, TraceFPUStack, false,                                       \
--- a/src/share/vm/ci/ciCallProfile.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/ci/ciCallProfile.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -79,6 +79,17 @@
     assert(i < _limit, "out of Call Profile MorphismLimit");
     return _receiver[i];
   }
+
+  // Rescale the current profile based on the incoming scale
+  ciCallProfile rescale(double scale) {
+    assert(scale >= 0 && scale <= 1.0, "out of range");
+    ciCallProfile call = *this;
+    call._count = (int)(call._count * scale);
+    for (int i = 0; i < _morphism; i++) {
+      call._receiver_count[i] = (int)(call._receiver_count[i] * scale);
+    }
+    return call;
+  }
 };
 
 #endif // SHARE_VM_CI_CICALLPROFILE_HPP
--- a/src/share/vm/ci/ciConstant.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/ci/ciConstant.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -46,9 +46,6 @@
     ciObject* _object;
   } _value;
 
-  // Implementation of the print method.
-  void print_impl(outputStream* st);
-
 public:
 
   ciConstant() {
--- a/src/share/vm/ci/ciEnv.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/ci/ciEnv.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -884,19 +884,31 @@
 }
 
 // ------------------------------------------------------------------
-// ciEnv::check_for_system_dictionary_modification
-// Check for changes to the system dictionary during compilation
-// class loads, evolution, breakpoints
-void ciEnv::check_for_system_dictionary_modification(ciMethod* target) {
+// ciEnv::validate_compile_task_dependencies
+//
+// Check for changes during compilation (e.g. class loads, evolution,
+// breakpoints, call site invalidation).
+void ciEnv::validate_compile_task_dependencies(ciMethod* target) {
   if (failing())  return;  // no need for further checks
 
-  // Dependencies must be checked when the system dictionary changes.
-  // If logging is enabled all violated dependences will be recorded in
-  // the log.  In debug mode check dependencies even if the system
-  // dictionary hasn't changed to verify that no invalid dependencies
-  // were inserted.  Any violated dependences in this case are dumped to
-  // the tty.
+  // First, check non-klass dependencies as we might return early and
+  // not check klass dependencies if the system dictionary
+  // modification counter hasn't changed (see below).
+  for (Dependencies::DepStream deps(dependencies()); deps.next(); ) {
+    if (deps.is_klass_type())  continue;  // skip klass dependencies
+    klassOop witness = deps.check_dependency();
+    if (witness != NULL) {
+      record_failure("invalid non-klass dependency");
+      return;
+    }
+  }
 
+  // Klass dependencies must be checked when the system dictionary
+  // changes.  If logging is enabled all violated dependences will be
+  // recorded in the log.  In debug mode check dependencies even if
+  // the system dictionary hasn't changed to verify that no invalid
+  // dependencies were inserted.  Any violated dependences in this
+  // case are dumped to the tty.
   bool counter_changed = system_dictionary_modification_counter_changed();
   bool test_deps = counter_changed;
   DEBUG_ONLY(test_deps = true);
@@ -904,22 +916,21 @@
 
   bool print_failures = false;
   DEBUG_ONLY(print_failures = !counter_changed);
-
   bool keep_going = (print_failures || xtty != NULL);
-
-  int violated = 0;
+  int klass_violations = 0;
 
   for (Dependencies::DepStream deps(dependencies()); deps.next(); ) {
+    if (!deps.is_klass_type())  continue;  // skip non-klass dependencies
     klassOop witness = deps.check_dependency();
     if (witness != NULL) {
-      ++violated;
+      klass_violations++;
       if (print_failures)  deps.print_dependency(witness, /*verbose=*/ true);
-      // If there's no log and we're not sanity-checking, we're done.
-      if (!keep_going)     break;
     }
+    // If there's no log and we're not sanity-checking, we're done.
+    if (!keep_going)  break;
   }
 
-  if (violated != 0) {
+  if (klass_violations != 0) {
     assert(counter_changed, "failed dependencies, but counter didn't change");
     record_failure("concurrent class loading");
   }
@@ -938,7 +949,6 @@
                             ImplicitExceptionTable* inc_table,
                             AbstractCompiler* compiler,
                             int comp_level,
-                            bool has_debug_info,
                             bool has_unsafe_access) {
   VM_ENTRY_MARK;
   nmethod* nm = NULL;
@@ -978,8 +988,8 @@
       // Encode the dependencies now, so we can check them right away.
       dependencies()->encode_content_bytes();
 
-      // Check for {class loads, evolution, breakpoints} during compilation
-      check_for_system_dictionary_modification(target);
+      // Check for {class loads, evolution, breakpoints, ...} during compilation
+      validate_compile_task_dependencies(target);
     }
 
     methodHandle method(THREAD, target->get_methodOop());
@@ -1033,7 +1043,6 @@
         CompileBroker::handle_full_code_cache();
       }
     } else {
-      NOT_PRODUCT(nm->set_has_debug_info(has_debug_info); )
       nm->set_has_unsafe_access(has_unsafe_access);
 
       // Record successful registration.
--- a/src/share/vm/ci/ciEnv.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/ci/ciEnv.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -247,9 +247,9 @@
   // Is this thread currently in the VM state?
   static bool is_in_vm();
 
-  // Helper routine for determining the validity of a compilation
-  // with respect to concurrent class loading.
-  void check_for_system_dictionary_modification(ciMethod* target);
+  // Helper routine for determining the validity of a compilation with
+  // respect to method dependencies (e.g. concurrent class loading).
+  void validate_compile_task_dependencies(ciMethod* target);
 
 public:
   enum {
@@ -317,8 +317,7 @@
                        ImplicitExceptionTable*   inc_table,
                        AbstractCompiler*         compiler,
                        int                       comp_level,
-                       bool                      has_debug_info = true,
-                       bool                      has_unsafe_access = false);
+                       bool                      has_unsafe_access);
 
 
   // Access to certain well known ciObjects.
--- a/src/share/vm/ci/ciField.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/ci/ciField.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -64,9 +64,6 @@
   // shared constructor code
   void initialize_from(fieldDescriptor* fd);
 
-  // The implementation of the print method.
-  void print_impl(outputStream* st);
-
 public:
   ciFlags flags() { return _flags; }
 
@@ -178,7 +175,12 @@
   bool is_volatile    () { return flags().is_volatile(); }
   bool is_transient   () { return flags().is_transient(); }
 
-  bool is_call_site_target() { return ((holder() == CURRENT_ENV->CallSite_klass()) && (name() == ciSymbol::target_name())); }
+  bool is_call_site_target() {
+    ciInstanceKlass* callsite_klass = CURRENT_ENV->CallSite_klass();
+    if (callsite_klass == NULL)
+      return false;
+    return (holder()->is_subclass_of(callsite_klass) && (name() == ciSymbol::target_name()));
+  }
 
   // Debugging output
   void print();
--- a/src/share/vm/ci/ciMethod.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/ci/ciMethod.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -1017,6 +1017,34 @@
 }
 
 // ------------------------------------------------------------------
+// ciMethod::code_size_for_inlining
+//
+// Code size for inlining decisions.
+//
+// Don't fully count method handle adapters against inlining budgets:
+// the metric we use here is the number of call sites in the adapter
+// as they are probably the instructions which generate some code.
+int ciMethod::code_size_for_inlining() {
+  check_is_loaded();
+
+  // Method handle adapters
+  if (is_method_handle_adapter()) {
+    // Count call sites
+    int call_site_count = 0;
+    ciBytecodeStream iter(this);
+    while (iter.next() != ciBytecodeStream::EOBC()) {
+      if (Bytecodes::is_invoke(iter.cur_bc())) {
+        call_site_count++;
+      }
+    }
+    return call_site_count;
+  }
+
+  // Normal method
+  return code_size();
+}
+
+// ------------------------------------------------------------------
 // ciMethod::instructions_size
 //
 // This is a rough metric for "fat" methods, compared before inlining
--- a/src/share/vm/ci/ciMethod.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/ci/ciMethod.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -157,6 +157,9 @@
   int interpreter_invocation_count() const       { check_is_loaded(); return _interpreter_invocation_count; }
   int interpreter_throwout_count() const         { check_is_loaded(); return _interpreter_throwout_count; }
 
+  // Code size for inlining decisions.
+  int code_size_for_inlining();
+
   int comp_level();
   int highest_osr_comp_level();
 
--- a/src/share/vm/ci/ciMethodHandle.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/ci/ciMethodHandle.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -37,7 +37,7 @@
 // ciMethodHandle::get_adapter
 //
 // Return an adapter for this MethodHandle.
-ciMethod* ciMethodHandle::get_adapter_impl(bool is_invokedynamic) const {
+ciMethod* ciMethodHandle::get_adapter_impl(bool is_invokedynamic) {
   VM_ENTRY_MARK;
   Handle h(get_oop());
   methodHandle callee(_callee->get_methodOop());
@@ -73,7 +73,7 @@
 // ciMethodHandle::get_adapter
 //
 // Return an adapter for this MethodHandle.
-ciMethod* ciMethodHandle::get_adapter(bool is_invokedynamic) const {
+ciMethod* ciMethodHandle::get_adapter(bool is_invokedynamic) {
   ciMethod* result = get_adapter_impl(is_invokedynamic);
   if (result) {
     // Fake up the MDO maturity.
@@ -86,11 +86,22 @@
 }
 
 
+#ifndef PRODUCT
 // ------------------------------------------------------------------
-// ciMethodHandle::print_impl
+// ciMethodHandle::print_chain_impl
 //
 // Implementation of the print method.
-void ciMethodHandle::print_impl(outputStream* st) {
-  st->print(" type=");
-  get_oop()->print();
+void ciMethodHandle::print_chain_impl(outputStream* st) {
+  ASSERT_IN_VM;
+  MethodHandleChain::print(get_oop());
 }
+
+
+// ------------------------------------------------------------------
+// ciMethodHandle::print_chain
+//
+// Implementation of the print_chain method.
+void ciMethodHandle::print_chain(outputStream* st) {
+  GUARDED_VM_ENTRY(print_chain_impl(st););
+}
+#endif
--- a/src/share/vm/ci/ciMethodHandle.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/ci/ciMethodHandle.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -37,19 +37,23 @@
   ciMethod*      _callee;
   ciMethod*      _caller;
   ciCallProfile  _profile;
+  ciMethod*      _method_handle_adapter;
+  ciMethod*      _invokedynamic_adapter;
 
   // Return an adapter for this MethodHandle.
-  ciMethod* get_adapter_impl(bool is_invokedynamic) const;
-  ciMethod* get_adapter(     bool is_invokedynamic) const;
+  ciMethod* get_adapter_impl(bool is_invokedynamic);
+  ciMethod* get_adapter(     bool is_invokedynamic);
 
 protected:
-  void print_impl(outputStream* st);
+  void print_chain_impl(outputStream* st) PRODUCT_RETURN;
 
 public:
   ciMethodHandle(instanceHandle h_i) :
     ciInstance(h_i),
     _callee(NULL),
-    _caller(NULL)
+    _caller(NULL),
+    _method_handle_adapter(NULL),
+    _invokedynamic_adapter(NULL)
   {}
 
   // What kind of ciObject is this?
@@ -60,10 +64,22 @@
   void set_call_profile(ciCallProfile profile)  { _profile = profile; }
 
   // Return an adapter for a MethodHandle call.
-  ciMethod* get_method_handle_adapter() const { return get_adapter(false); }
+  ciMethod* get_method_handle_adapter() {
+    if (_method_handle_adapter == NULL) {
+      _method_handle_adapter = get_adapter(false);
+    }
+    return _method_handle_adapter;
+  }
 
   // Return an adapter for an invokedynamic call.
-  ciMethod* get_invokedynamic_adapter() const { return get_adapter(true);  }
+  ciMethod* get_invokedynamic_adapter() {
+    if (_invokedynamic_adapter == NULL) {
+      _invokedynamic_adapter = get_adapter(true);
+    }
+    return _invokedynamic_adapter;
+  }
+
+  void print_chain(outputStream* st = tty) PRODUCT_RETURN;
 };
 
 #endif // SHARE_VM_CI_CIMETHODHANDLE_HPP
--- a/src/share/vm/ci/ciObject.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/ci/ciObject.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -194,16 +194,26 @@
 // ciObject::should_be_constant()
 bool ciObject::should_be_constant() {
   if (ScavengeRootsInCode >= 2)  return true;  // force everybody to be a constant
-  if (!JavaObjectsInPerm && !is_null_object()) {
+  if (is_null_object()) return true;
+
+  ciEnv* env = CURRENT_ENV;
+  if (!JavaObjectsInPerm) {
     // We want Strings and Classes to be embeddable by default since
     // they used to be in the perm world.  Not all Strings used to be
     // embeddable but there's no easy way to distinguish the interned
     // from the regulars ones so just treat them all that way.
-    ciEnv* env = CURRENT_ENV;
     if (klass() == env->String_klass() || klass() == env->Class_klass()) {
       return true;
     }
   }
+  if (EnableInvokeDynamic &&
+      (klass()->is_subclass_of(env->MethodHandle_klass()) ||
+       klass()->is_subclass_of(env->CallSite_klass()))) {
+    assert(ScavengeRootsInCode >= 1, "must be");
+    // We want to treat these aggressively.
+    return true;
+  }
+
   return handle() == NULL || is_perm();
 }
 
--- a/src/share/vm/ci/ciStreams.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/ci/ciStreams.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -129,7 +129,8 @@
   // Return current ByteCode and increment PC to next bytecode, skipping all
   // intermediate constants.  Returns EOBC at end.
   // Expected usage:
-  //     while( (bc = iter.next()) != EOBC() ) { ... }
+  //     ciBytecodeStream iter(m);
+  //     while (iter.next() != ciBytecodeStream::EOBC()) { ... }
   Bytecodes::Code next() {
     _bc_start = _pc;                        // Capture start of bc
     if( _pc >= _end ) return EOBC();        // End-Of-Bytecodes
--- a/src/share/vm/classfile/javaClasses.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/classfile/javaClasses.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -28,6 +28,7 @@
 #include "classfile/vmSymbols.hpp"
 #include "code/debugInfo.hpp"
 #include "code/pcDesc.hpp"
+#include "compiler/compilerOracle.hpp"
 #include "interpreter/interpreter.hpp"
 #include "memory/oopFactory.hpp"
 #include "memory/resourceArea.hpp"
@@ -2323,6 +2324,8 @@
 
 int java_lang_invoke_AdapterMethodHandle::_conversion_offset;
 
+int java_lang_invoke_CountingMethodHandle::_vmcount_offset;
+
 void java_lang_invoke_MethodHandle::compute_offsets() {
   klassOop k = SystemDictionary::MethodHandle_klass();
   if (k != NULL && EnableInvokeDynamic) {
@@ -2371,6 +2374,23 @@
   }
 }
 
+void java_lang_invoke_CountingMethodHandle::compute_offsets() {
+  klassOop k = SystemDictionary::CountingMethodHandle_klass();
+  if (k != NULL && EnableInvokeDynamic) {
+    compute_offset(_vmcount_offset, k, vmSymbols::vmcount_name(), vmSymbols::int_signature(), true);
+  }
+}
+
+int java_lang_invoke_CountingMethodHandle::vmcount(oop mh) {
+  assert(is_instance(mh), "CMH only");
+  return mh->int_field(_vmcount_offset);
+}
+
+void java_lang_invoke_CountingMethodHandle::set_vmcount(oop mh, int count) {
+  assert(is_instance(mh), "CMH only");
+  mh->int_field_put(_vmcount_offset, count);
+}
+
 oop java_lang_invoke_MethodHandle::type(oop mh) {
   return mh->obj_field(_type_offset);
 }
@@ -2674,6 +2694,17 @@
   if (k != NULL) {
     compute_offset(_target_offset, k, vmSymbols::target_name(), vmSymbols::java_lang_invoke_MethodHandle_signature());
   }
+
+  // Disallow compilation of CallSite.setTargetNormal and CallSite.setTargetVolatile
+  // (For C2:  keep this until we have throttling logic for uncommon traps.)
+  if (k != NULL) {
+    instanceKlass* ik = instanceKlass::cast(k);
+    methodOop m_normal   = ik->lookup_method(vmSymbols::setTargetNormal_name(),   vmSymbols::setTarget_signature());
+    methodOop m_volatile = ik->lookup_method(vmSymbols::setTargetVolatile_name(), vmSymbols::setTarget_signature());
+    guarantee(m_normal && m_volatile, "must exist");
+    m_normal->set_not_compilable_quietly();
+    m_volatile->set_not_compilable_quietly();
+  }
 }
 
 oop java_lang_invoke_CallSite::target(oop site) {
@@ -3031,6 +3062,7 @@
     java_lang_invoke_MethodType::compute_offsets();
     java_lang_invoke_MethodTypeForm::compute_offsets();
     java_lang_invoke_CallSite::compute_offsets();
+    java_lang_invoke_CountingMethodHandle::compute_offsets();
   }
   java_security_AccessControlContext::compute_offsets();
   // Initialize reflection classes. The layouts of these classes
--- a/src/share/vm/classfile/javaClasses.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/classfile/javaClasses.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -981,6 +981,34 @@
 };
 
 
+// A simple class that maintains an invocation count
+class java_lang_invoke_CountingMethodHandle: public java_lang_invoke_MethodHandle {
+  friend class JavaClasses;
+
+ private:
+  static int _vmcount_offset;
+  static void compute_offsets();
+
+ public:
+  // Accessors
+  static int            vmcount(oop mh);
+  static void       set_vmcount(oop mh, int count);
+
+  // Testers
+  static bool is_subclass(klassOop klass) {
+    return SystemDictionary::CountingMethodHandle_klass() != NULL &&
+      Klass::cast(klass)->is_subclass_of(SystemDictionary::CountingMethodHandle_klass());
+  }
+  static bool is_instance(oop obj) {
+    return obj != NULL && is_subclass(obj->klass());
+  }
+
+  // Accessors for code generation:
+  static int vmcount_offset_in_bytes()          { return _vmcount_offset; }
+};
+
+
+
 // Interface to java.lang.invoke.MemberName objects
 // (These are a private interface for Java code to query the class hierarchy.)
 
--- a/src/share/vm/classfile/systemDictionary.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/classfile/systemDictionary.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -133,14 +133,14 @@
   template(reflect_Method_klass,         java_lang_reflect_Method,       Pre) \
   template(reflect_Constructor_klass,    java_lang_reflect_Constructor,  Pre) \
                                                                               \
-  /* NOTE: needed too early in bootstrapping process to have checks based on JDK version */ \
-  /* Universe::is_gte_jdk14x_version() is not set up by this point. */        \
-  /* It's okay if this turns out to be NULL in non-1.4 JDKs. */               \
-  template(reflect_MagicAccessorImpl_klass,          sun_reflect_MagicAccessorImpl,  Opt) \
-  template(reflect_MethodAccessorImpl_klass, sun_reflect_MethodAccessorImpl, Opt_Only_JDK14NewRef) \
-  template(reflect_ConstructorAccessorImpl_klass, sun_reflect_ConstructorAccessorImpl, Opt_Only_JDK14NewRef) \
-  template(reflect_DelegatingClassLoader_klass, sun_reflect_DelegatingClassLoader, Opt) \
-  template(reflect_ConstantPool_klass,  sun_reflect_ConstantPool,       Opt_Only_JDK15) \
+  /* NOTE: needed too early in bootstrapping process to have checks based on JDK version */                        \
+  /* Universe::is_gte_jdk14x_version() is not set up by this point. */                                             \
+  /* It's okay if this turns out to be NULL in non-1.4 JDKs. */                                                    \
+  template(reflect_MagicAccessorImpl_klass,          sun_reflect_MagicAccessorImpl,  Opt)                          \
+  template(reflect_MethodAccessorImpl_klass, sun_reflect_MethodAccessorImpl, Opt_Only_JDK14NewRef)                 \
+  template(reflect_ConstructorAccessorImpl_klass, sun_reflect_ConstructorAccessorImpl, Opt_Only_JDK14NewRef)       \
+  template(reflect_DelegatingClassLoader_klass, sun_reflect_DelegatingClassLoader, Opt)                            \
+  template(reflect_ConstantPool_klass,  sun_reflect_ConstantPool,       Opt_Only_JDK15)                            \
   template(reflect_UnsafeStaticFieldAccessorImpl_klass, sun_reflect_UnsafeStaticFieldAccessorImpl, Opt_Only_JDK15) \
                                                                               \
   /* support for dynamic typing; it's OK if these are NULL in earlier JDKs */ \
@@ -155,6 +155,7 @@
   template(BootstrapMethodError_klass,     java_lang_BootstrapMethodError,            Pre_JSR292) \
   template(WrongMethodTypeException_klass, java_lang_invoke_WrongMethodTypeException, Pre_JSR292) \
   template(CallSite_klass,                 java_lang_invoke_CallSite,                 Pre_JSR292) \
+  template(CountingMethodHandle_klass,     java_lang_invoke_CountingMethodHandle,     Opt)        \
   template(ConstantCallSite_klass,         java_lang_invoke_ConstantCallSite,         Pre_JSR292) \
   template(MutableCallSite_klass,          java_lang_invoke_MutableCallSite,          Pre_JSR292) \
   template(VolatileCallSite_klass,         java_lang_invoke_VolatileCallSite,         Pre_JSR292) \
--- a/src/share/vm/classfile/vmSymbols.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/classfile/vmSymbols.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -218,6 +218,7 @@
   template(returnType_name,                           "returnType")                               \
   template(signature_name,                            "signature")                                \
   template(slot_name,                                 "slot")                                     \
+  template(selectAlternative_name,                    "selectAlternative")                        \
                                                                                                   \
   /* Support for annotations (JDK 1.5 and above) */                                               \
                                                                                                   \
@@ -246,9 +247,11 @@
   template(java_lang_invoke_MethodTypeForm_signature, "Ljava/lang/invoke/MethodTypeForm;")        \
   template(java_lang_invoke_MemberName,               "java/lang/invoke/MemberName")              \
   template(java_lang_invoke_MethodHandleNatives,      "java/lang/invoke/MethodHandleNatives")     \
+  template(java_lang_invoke_MethodHandleImpl,         "java/lang/invoke/MethodHandleImpl")        \
   template(java_lang_invoke_AdapterMethodHandle,      "java/lang/invoke/AdapterMethodHandle")     \
   template(java_lang_invoke_BoundMethodHandle,        "java/lang/invoke/BoundMethodHandle")       \
   template(java_lang_invoke_DirectMethodHandle,       "java/lang/invoke/DirectMethodHandle")      \
+  template(java_lang_invoke_CountingMethodHandle,     "java/lang/invoke/CountingMethodHandle")    \
   /* internal up-calls made only by the JVM, via class sun.invoke.MethodHandleNatives: */         \
   template(findMethodHandleType_name,                 "findMethodHandleType")                     \
   template(findMethodHandleType_signature,       "(Ljava/lang/Class;[Ljava/lang/Class;)Ljava/lang/invoke/MethodType;") \
@@ -258,8 +261,12 @@
   template(linkMethodHandleConstant_signature, "(Ljava/lang/Class;ILjava/lang/Class;Ljava/lang/String;Ljava/lang/Object;)Ljava/lang/invoke/MethodHandle;") \
   template(makeDynamicCallSite_name,                  "makeDynamicCallSite")                      \
   template(makeDynamicCallSite_signature, "(Ljava/lang/invoke/MethodHandle;Ljava/lang/String;Ljava/lang/invoke/MethodType;Ljava/lang/Object;Ljava/lang/invoke/MemberName;I)Ljava/lang/invoke/CallSite;") \
+  template(setTargetNormal_name,                      "setTargetNormal")                          \
+  template(setTargetVolatile_name,                    "setTargetVolatile")                        \
+  template(setTarget_signature,                       "(Ljava/lang/invoke/MethodHandle;)V")       \
   NOT_LP64(  do_alias(machine_word_signature,         int_signature)  )                           \
   LP64_ONLY( do_alias(machine_word_signature,         long_signature) )                           \
+  template(selectAlternative_signature, "(ZLjava/lang/invoke/MethodHandle;Ljava/lang/invoke/MethodHandle;)Ljava/lang/invoke/MethodHandle;") \
                                                                                                   \
   /* common method and field names */                                                             \
   template(object_initializer_name,                   "<init>")                                   \
@@ -344,6 +351,7 @@
   template(vmmethod_name,                             "vmmethod")                                 \
   template(vmtarget_name,                             "vmtarget")                                 \
   template(vmentry_name,                              "vmentry")                                  \
+  template(vmcount_name,                              "vmcount")                                  \
   template(vmslots_name,                              "vmslots")                                  \
   template(vmlayout_name,                             "vmlayout")                                 \
   template(vmindex_name,                              "vmindex")                                  \
@@ -907,6 +915,8 @@
   do_intrinsic(_invokeVarargs,            java_lang_invoke_MethodHandle, invokeVarargs_name, object_array_object_signature, F_R)  \
   do_intrinsic(_invokeDynamic,            java_lang_invoke_InvokeDynamic, star_name,         object_array_object_signature, F_SN) \
                                                                                                                         \
+  do_intrinsic(_selectAlternative,        java_lang_invoke_MethodHandleImpl, selectAlternative_name, selectAlternative_signature, F_S)  \
+                                                                                                                        \
   /* unboxing methods: */                                                                                               \
   do_intrinsic(_booleanValue,             java_lang_Boolean,      booleanValue_name, void_boolean_signature, F_R)       \
    do_name(     booleanValue_name,       "booleanValue")                                                                \
--- a/src/share/vm/code/dependencies.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/code/dependencies.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -113,9 +113,9 @@
   assert_common_1(no_finalizable_subclasses, ctxk);
 }
 
-void Dependencies::assert_call_site_target_value(ciKlass* ctxk, ciCallSite* call_site, ciMethodHandle* method_handle) {
-  check_ctxk(ctxk);
-  assert_common_3(call_site_target_value, ctxk, call_site, method_handle);
+void Dependencies::assert_call_site_target_value(ciCallSite* call_site, ciMethodHandle* method_handle) {
+  check_ctxk(call_site->klass());
+  assert_common_2(call_site_target_value, call_site, method_handle);
 }
 
 // Helper function.  If we are adding a new dep. under ctxk2,
@@ -135,7 +135,7 @@
   }
 }
 
-void Dependencies::assert_common_1(Dependencies::DepType dept, ciObject* x) {
+void Dependencies::assert_common_1(DepType dept, ciObject* x) {
   assert(dep_args(dept) == 1, "sanity");
   log_dependency(dept, x);
   GrowableArray<ciObject*>* deps = _deps[dept];
@@ -148,21 +148,37 @@
   }
 }
 
-void Dependencies::assert_common_2(Dependencies::DepType dept,
-                                   ciKlass* ctxk, ciObject* x) {
-  assert(dep_context_arg(dept) == 0, "sanity");
+void Dependencies::assert_common_2(DepType dept,
+                                   ciObject* x0, ciObject* x1) {
   assert(dep_args(dept) == 2, "sanity");
-  log_dependency(dept, ctxk, x);
+  log_dependency(dept, x0, x1);
   GrowableArray<ciObject*>* deps = _deps[dept];
 
   // see if the same (or a similar) dep is already recorded
-  if (note_dep_seen(dept, x)) {
-    // look in this bucket for redundant assertions
-    const int stride = 2;
-    for (int i = deps->length(); (i -= stride) >= 0; ) {
-      ciObject* x1 = deps->at(i+1);
-      if (x == x1) {  // same subject; check the context
-        if (maybe_merge_ctxk(deps, i+0, ctxk)) {
+  bool has_ctxk = has_explicit_context_arg(dept);
+  if (has_ctxk) {
+    assert(dep_context_arg(dept) == 0, "sanity");
+    if (note_dep_seen(dept, x1)) {
+      // look in this bucket for redundant assertions
+      const int stride = 2;
+      for (int i = deps->length(); (i -= stride) >= 0; ) {
+        ciObject* y1 = deps->at(i+1);
+        if (x1 == y1) {  // same subject; check the context
+          if (maybe_merge_ctxk(deps, i+0, x0->as_klass())) {
+            return;
+          }
+        }
+      }
+    }
+  } else {
+    assert(dep_implicit_context_arg(dept) == 0, "sanity");
+    if (note_dep_seen(dept, x0) && note_dep_seen(dept, x1)) {
+      // look in this bucket for redundant assertions
+      const int stride = 2;
+      for (int i = deps->length(); (i -= stride) >= 0; ) {
+        ciObject* y0 = deps->at(i+0);
+        ciObject* y1 = deps->at(i+1);
+        if (x0 == y0 && x1 == y1) {
           return;
         }
       }
@@ -170,11 +186,11 @@
   }
 
   // append the assertion in the correct bucket:
-  deps->append(ctxk);
-  deps->append(x);
+  deps->append(x0);
+  deps->append(x1);
 }
 
-void Dependencies::assert_common_3(Dependencies::DepType dept,
+void Dependencies::assert_common_3(DepType dept,
                                    ciKlass* ctxk, ciObject* x, ciObject* x2) {
   assert(dep_context_arg(dept) == 0, "sanity");
   assert(dep_args(dept) == 3, "sanity");
@@ -361,7 +377,7 @@
   3, // unique_concrete_subtypes_2 ctxk, k1, k2
   3, // unique_concrete_methods_2 ctxk, m1, m2
   1, // no_finalizable_subclasses ctxk
-  3  // call_site_target_value ctxk, call_site, method_handle
+  2  // call_site_target_value call_site, method_handle
 };
 
 const char* Dependencies::dep_name(Dependencies::DepType dept) {
@@ -375,10 +391,7 @@
 }
 
 void Dependencies::check_valid_dependency_type(DepType dept) {
-  for (int deptv = (int) FIRST_TYPE; deptv < (int) TYPE_LIMIT; deptv++) {
-    if (dept == ((DepType) deptv))  return;
-  }
-  ShouldNotReachHere();
+  guarantee(FIRST_TYPE <= dept && dept < TYPE_LIMIT, err_msg("invalid dependency type: %d", (int) dept));
 }
 
 // for the sake of the compiler log, print out current dependencies:
@@ -586,8 +599,7 @@
     code_byte -= ctxk_bit;
     DepType dept = (DepType)code_byte;
     _type = dept;
-    guarantee((dept - FIRST_TYPE) < (TYPE_LIMIT - FIRST_TYPE),
-              "bad dependency type tag");
+    Dependencies::check_valid_dependency_type(dept);
     int stride = _dep_args[dept];
     assert(stride == dep_args(dept), "sanity");
     int skipj = -1;
@@ -615,18 +627,35 @@
 
 klassOop Dependencies::DepStream::context_type() {
   assert(must_be_in_vm(), "raw oops here");
-  int ctxkj = dep_context_arg(_type);  // -1 if no context arg
-  if (ctxkj < 0) {
-    return NULL;           // for example, evol_method
-  } else {
-    oop k = recorded_oop_at(_xi[ctxkj]);
-    if (k != NULL) {       // context type was not compressed away
+
+  // Most dependencies have an explicit context type argument.
+  {
+    int ctxkj = dep_context_arg(_type);  // -1 if no explicit context arg
+    if (ctxkj >= 0) {
+      oop k = argument(ctxkj);
+      if (k != NULL) {       // context type was not compressed away
+        assert(k->is_klass(), "type check");
+        return (klassOop) k;
+      }
+      // recompute "default" context type
+      return ctxk_encoded_as_null(_type, argument(ctxkj+1));
+    }
+  }
+
+  // Some dependencies are using the klass of the first object
+  // argument as implicit context type (e.g. call_site_target_value).
+  {
+    int ctxkj = dep_implicit_context_arg(_type);
+    if (ctxkj >= 0) {
+      oop k = argument(ctxkj)->klass();
       assert(k->is_klass(), "type check");
       return (klassOop) k;
-    } else {               // recompute "default" context type
-      return ctxk_encoded_as_null(_type, recorded_oop_at(_xi[ctxkj+1]));
     }
   }
+
+  // And some dependencies don't have a context type at all,
+  // e.g. evol_method.
+  return NULL;
 }
 
 /// Checking dependencies:
@@ -1409,21 +1438,20 @@
 }
 
 
-klassOop Dependencies::check_call_site_target_value(klassOop ctxk, oop call_site, oop method_handle, CallSiteDepChange* changes) {
+klassOop Dependencies::check_call_site_target_value(oop call_site, oop method_handle, CallSiteDepChange* changes) {
   assert(call_site    ->is_a(SystemDictionary::CallSite_klass()),     "sanity");
   assert(method_handle->is_a(SystemDictionary::MethodHandle_klass()), "sanity");
   if (changes == NULL) {
     // Validate all CallSites
     if (java_lang_invoke_CallSite::target(call_site) != method_handle)
-      return ctxk;  // assertion failed
+      return call_site->klass();  // assertion failed
   } else {
     // Validate the given CallSite
     if (call_site == changes->call_site() && java_lang_invoke_CallSite::target(call_site) != changes->method_handle()) {
       assert(method_handle != changes->method_handle(), "must be");
-      return ctxk;  // assertion failed
+      return call_site->klass();  // assertion failed
     }
   }
-  assert(java_lang_invoke_CallSite::target(call_site) == method_handle, "should still be valid");
   return NULL;  // assertion still valid
 }
 
@@ -1488,7 +1516,7 @@
   klassOop witness = NULL;
   switch (type()) {
   case call_site_target_value:
-    witness = check_call_site_target_value(context_type(), argument(1), argument(2), changes);
+    witness = check_call_site_target_value(argument(0), argument(1), changes);
     break;
   default:
     witness = NULL;
--- a/src/share/vm/code/dependencies.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/code/dependencies.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -166,9 +166,14 @@
     LG2_TYPE_LIMIT = 4,  // assert(TYPE_LIMIT <= (1<<LG2_TYPE_LIMIT))
 
     // handy categorizations of dependency types:
-    all_types      = ((1<<TYPE_LIMIT)-1) & ((-1)<<FIRST_TYPE),
-    non_ctxk_types = (1<<evol_method),
-    ctxk_types     = all_types & ~non_ctxk_types,
+    all_types           = ((1 << TYPE_LIMIT) - 1) & ((-1) << FIRST_TYPE),
+
+    non_klass_types     = (1 << call_site_target_value),
+    klass_types         = all_types & ~non_klass_types,
+
+    non_ctxk_types      = (1 << evol_method),
+    implicit_ctxk_types = (1 << call_site_target_value),
+    explicit_ctxk_types = all_types & ~(non_ctxk_types | implicit_ctxk_types),
 
     max_arg_count = 3,   // current maximum number of arguments (incl. ctxk)
 
@@ -184,9 +189,15 @@
 
   static const char* dep_name(DepType dept);
   static int         dep_args(DepType dept);
-  static int  dep_context_arg(DepType dept) {
-    return dept_in_mask(dept, ctxk_types)? 0: -1;
-  }
+
+  static bool is_klass_type(           DepType dept) { return dept_in_mask(dept, klass_types        ); }
+
+  static bool has_explicit_context_arg(DepType dept) { return dept_in_mask(dept, explicit_ctxk_types); }
+  static bool has_implicit_context_arg(DepType dept) { return dept_in_mask(dept, implicit_ctxk_types); }
+
+  static int           dep_context_arg(DepType dept) { return has_explicit_context_arg(dept) ? 0 : -1; }
+  static int  dep_implicit_context_arg(DepType dept) { return has_implicit_context_arg(dept) ? 0 : -1; }
+
   static void check_valid_dependency_type(DepType dept);
 
  private:
@@ -250,8 +261,8 @@
   }
 
   void assert_common_1(DepType dept, ciObject* x);
-  void assert_common_2(DepType dept, ciKlass* ctxk, ciObject* x);
-  void assert_common_3(DepType dept, ciKlass* ctxk, ciObject* x, ciObject* x2);
+  void assert_common_2(DepType dept, ciObject* x0, ciObject* x1);
+  void assert_common_3(DepType dept, ciKlass* ctxk, ciObject* x1, ciObject* x2);
 
  public:
   // Adding assertions to a new dependency set at compile time:
@@ -264,7 +275,7 @@
   void assert_abstract_with_exclusive_concrete_subtypes(ciKlass* ctxk, ciKlass* k1, ciKlass* k2);
   void assert_exclusive_concrete_methods(ciKlass* ctxk, ciMethod* m1, ciMethod* m2);
   void assert_has_no_finalizable_subclasses(ciKlass* ctxk);
-  void assert_call_site_target_value(ciKlass* ctxk, ciCallSite* call_site, ciMethodHandle* method_handle);
+  void assert_call_site_target_value(ciCallSite* call_site, ciMethodHandle* method_handle);
 
   // Define whether a given method or type is concrete.
   // These methods define the term "concrete" as used in this module.
@@ -318,7 +329,7 @@
   static klassOop check_exclusive_concrete_methods(klassOop ctxk, methodOop m1, methodOop m2,
                                                    KlassDepChange* changes = NULL);
   static klassOop check_has_no_finalizable_subclasses(klassOop ctxk, KlassDepChange* changes = NULL);
-  static klassOop check_call_site_target_value(klassOop ctxk, oop call_site, oop method_handle, CallSiteDepChange* changes = NULL);
+  static klassOop check_call_site_target_value(oop call_site, oop method_handle, CallSiteDepChange* changes = NULL);
   // A returned klassOop is NULL if the dependency assertion is still
   // valid.  A non-NULL klassOop is a 'witness' to the assertion
   // failure, a point in the class hierarchy where the assertion has
@@ -455,6 +466,8 @@
     oop argument(int i);         // => recorded_oop_at(argument_index(i))
     klassOop context_type();
 
+    bool is_klass_type()         { return Dependencies::is_klass_type(type()); }
+
     methodOop method_argument(int i) {
       oop x = argument(i);
       assert(x->is_method(), "type");
--- a/src/share/vm/code/nmethod.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/code/nmethod.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -451,7 +451,6 @@
   _stack_traversal_mark       = 0;
   _unload_reported            = false;           // jvmti state
 
-  NOT_PRODUCT(_has_debug_info = false);
 #ifdef ASSERT
   _oops_are_stale             = false;
 #endif
--- a/src/share/vm/code/nmethod.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/code/nmethod.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -191,8 +191,6 @@
 
   jbyte _scavenge_root_state;
 
-  NOT_PRODUCT(bool _has_debug_info; )
-
   // Nmethod Flushing lock. If non-zero, then the nmethod is not removed
   // and is not made into a zombie. However, once the nmethod is made into
   // a zombie, it will be locked one final time if CompiledMethodUnload
@@ -329,11 +327,6 @@
   methodOop method() const                        { return _method; }
   AbstractCompiler* compiler() const              { return _compiler; }
 
-#ifndef PRODUCT
-  bool has_debug_info() const                     { return _has_debug_info; }
-  void set_has_debug_info(bool f)                 { _has_debug_info = false; }
-#endif // NOT PRODUCT
-
   // type info
   bool is_nmethod() const                         { return true; }
   bool is_java_method() const                     { return !method()->is_native(); }
--- a/src/share/vm/code/pcDesc.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/code/pcDesc.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -30,11 +30,10 @@
 #include "memory/resourceArea.hpp"
 
 PcDesc::PcDesc(int pc_offset, int scope_decode_offset, int obj_decode_offset) {
-  assert(sizeof(PcDescFlags) <= 4, "occupies more than a word");
   _pc_offset           = pc_offset;
   _scope_decode_offset = scope_decode_offset;
   _obj_decode_offset   = obj_decode_offset;
-  _flags.word          = 0;
+  _flags               = 0;
 }
 
 address PcDesc::real_pc(const nmethod* code) const {
@@ -44,7 +43,7 @@
 void PcDesc::print(nmethod* code) {
 #ifndef PRODUCT
   ResourceMark rm;
-  tty->print_cr("PcDesc(pc=0x%lx offset=%x bits=%x):", real_pc(code), pc_offset(), _flags.bits);
+  tty->print_cr("PcDesc(pc=0x%lx offset=%x bits=%x):", real_pc(code), pc_offset(), _flags);
 
   if (scope_decode_offset() == DebugInformationRecorder::serialized_null) {
     return;
--- a/src/share/vm/code/pcDesc.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/code/pcDesc.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -39,15 +39,17 @@
   int _scope_decode_offset; // offset for scope in nmethod
   int _obj_decode_offset;
 
-  union PcDescFlags {
-    int word;
-    struct {
-      unsigned int reexecute: 1;
-      unsigned int is_method_handle_invoke: 1;
-      unsigned int return_oop: 1;
-    } bits;
-    bool operator ==(const PcDescFlags& other) { return word == other.word; }
-  } _flags;
+  enum {
+    PCDESC_reexecute               = 1 << 0,
+    PCDESC_is_method_handle_invoke = 1 << 1,
+    PCDESC_return_oop              = 1 << 2
+  };
+
+  int _flags;
+
+  void set_flag(int mask, bool z) {
+    _flags = z ? (_flags | mask) : (_flags & ~mask);
+  }
 
  public:
   int pc_offset() const           { return _pc_offset;   }
@@ -69,8 +71,8 @@
   };
 
   // Flags
-  bool     should_reexecute()              const { return _flags.bits.reexecute; }
-  void set_should_reexecute(bool z)              { _flags.bits.reexecute = z;    }
+  bool     should_reexecute()              const { return (_flags & PCDESC_reexecute) != 0; }
+  void set_should_reexecute(bool z)              { set_flag(PCDESC_reexecute, z); }
 
   // Does pd refer to the same information as pd?
   bool is_same_info(const PcDesc* pd) {
@@ -79,11 +81,11 @@
       _flags == pd->_flags;
   }
 
-  bool     is_method_handle_invoke()       const { return _flags.bits.is_method_handle_invoke;     }
-  void set_is_method_handle_invoke(bool z)       {        _flags.bits.is_method_handle_invoke = z; }
+  bool     is_method_handle_invoke()       const { return (_flags & PCDESC_is_method_handle_invoke) != 0;     }
+  void set_is_method_handle_invoke(bool z)       { set_flag(PCDESC_is_method_handle_invoke, z); }
 
-  bool     return_oop()                    const { return _flags.bits.return_oop;     }
-  void set_return_oop(bool z)                    {        _flags.bits.return_oop = z; }
+  bool     return_oop()                    const { return (_flags & PCDESC_return_oop) != 0;     }
+  void set_return_oop(bool z)                    { set_flag(PCDESC_return_oop, z); }
 
   // Returns the real pc
   address real_pc(const nmethod* code) const;
--- a/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -26,6 +26,7 @@
 #include "gc_implementation/g1/collectionSetChooser.hpp"
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
+#include "gc_implementation/g1/g1ErgoVerbose.hpp"
 #include "memory/space.inline.hpp"
 
 CSetChooserCache::CSetChooserCache() {
@@ -358,6 +359,9 @@
   if (_cache.is_empty()) {
     assert(_curMarkedIndex == _numMarkedRegions,
            "if cache is empty, list should also be empty");
+    ergo_verbose0(ErgoCSetConstruction,
+                  "stop adding old regions to CSet",
+                  ergo_format_reason("cache is empty"));
     return NULL;
   }
 
@@ -368,10 +372,23 @@
   if (g1p->adaptive_young_list_length()) {
     if (time_remaining - predicted_time < 0.0) {
       g1h->check_if_region_is_too_expensive(predicted_time);
+      ergo_verbose2(ErgoCSetConstruction,
+                    "stop adding old regions to CSet",
+                    ergo_format_reason("predicted old region time higher than remaining time")
+                    ergo_format_ms("predicted old region time")
+                    ergo_format_ms("remaining time"),
+                    predicted_time, time_remaining);
       return NULL;
     }
   } else {
-    if (predicted_time > 2.0 * avg_prediction) {
+    double threshold = 2.0 * avg_prediction;
+    if (predicted_time > threshold) {
+      ergo_verbose2(ErgoCSetConstruction,
+                    "stop adding old regions to CSet",
+                    ergo_format_reason("predicted old region time higher than threshold")
+                    ergo_format_ms("predicted old region time")
+                    ergo_format_ms("threshold"),
+                    predicted_time, threshold);
       return NULL;
     }
   }
--- a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -91,7 +91,7 @@
       }
     }
 
-    g1p->check_prediction_validity();
+    g1p->revise_young_list_target_length_if_necessary();
   }
 }
 
--- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -28,6 +28,7 @@
 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
+#include "gc_implementation/g1/g1ErgoVerbose.hpp"
 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
 #include "gc_implementation/g1/g1RemSet.hpp"
 #include "gc_implementation/g1/heapRegionRemSet.hpp"
@@ -1727,18 +1728,21 @@
 
   size_t known_garbage_bytes =
     g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
-#if 0
-  gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
-                         (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
-                         (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
-                         (double) known_garbage_bytes / (double) (1024 * 1024));
-#endif // 0
   g1p->set_known_garbage_bytes(known_garbage_bytes);
 
   size_t start_used_bytes = g1h->used();
   _at_least_one_mark_complete = true;
   g1h->set_marking_complete();
 
+  ergo_verbose4(ErgoConcCycles,
+           "finish cleanup",
+           ergo_format_byte("occupancy")
+           ergo_format_byte("capacity")
+           ergo_format_byte_perc("known garbage"),
+           start_used_bytes, g1h->capacity(),
+           known_garbage_bytes,
+           ((double) known_garbage_bytes / (double) g1h->capacity()) * 100.0);
+
   double count_end = os::elapsedTime();
   double this_final_counting_time = (count_end - start);
   if (G1PrintParCleanupStats) {
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -31,6 +31,7 @@
 #include "gc_implementation/g1/g1AllocRegion.inline.hpp"
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
+#include "gc_implementation/g1/g1ErgoVerbose.hpp"
 #include "gc_implementation/g1/g1MarkSweep.hpp"
 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
 #include "gc_implementation/g1/g1RemSet.inline.hpp"
@@ -577,6 +578,11 @@
     res = new_region_try_secondary_free_list();
   }
   if (res == NULL && do_expand) {
+    ergo_verbose1(ErgoHeapSizing,
+                  "attempt heap expansion",
+                  ergo_format_reason("region allocation request failed")
+                  ergo_format_byte("allocation request"),
+                  word_size * HeapWordSize);
     if (expand(word_size * HeapWordSize)) {
       // Even though the heap was expanded, it might not have reached
       // the desired size. So, we cannot assume that the allocation
@@ -790,6 +796,11 @@
       // room available.
       assert(num_regions > fs, "earlier allocation should have succeeded");
 
+      ergo_verbose1(ErgoHeapSizing,
+                    "attempt heap expansion",
+                    ergo_format_reason("humongous allocation request failed")
+                    ergo_format_byte("allocation request"),
+                    word_size * HeapWordSize);
       if (expand((num_regions - fs) * HeapRegion::GrainBytes)) {
         // Even though the heap was expanded, it might not have
         // reached the desired size. So, we cannot assume that the
@@ -906,6 +917,8 @@
 
       if (GC_locker::is_active_and_needs_gc()) {
         if (g1_policy()->can_expand_young_list()) {
+          // No need for an ergo verbose message here,
+          // can_expand_young_list() does this when it returns true.
           result = _mutator_alloc_region.attempt_allocation_force(word_size,
                                                       false /* bot_updates */);
           if (result != NULL) {
@@ -1477,63 +1490,34 @@
   // we'll try to make the capacity smaller than it, not greater).
   maximum_desired_capacity =  MAX2(maximum_desired_capacity, min_heap_size);
 
-  if (PrintGC && Verbose) {
-    const double free_percentage =
-      (double) free_after_gc / (double) capacity_after_gc;
-    gclog_or_tty->print_cr("Computing new size after full GC ");
-    gclog_or_tty->print_cr("  "
-                           "  minimum_free_percentage: %6.2f",
-                           minimum_free_percentage);
-    gclog_or_tty->print_cr("  "
-                           "  maximum_free_percentage: %6.2f",
-                           maximum_free_percentage);
-    gclog_or_tty->print_cr("  "
-                           "  capacity: %6.1fK"
-                           "  minimum_desired_capacity: %6.1fK"
-                           "  maximum_desired_capacity: %6.1fK",
-                           (double) capacity_after_gc / (double) K,
-                           (double) minimum_desired_capacity / (double) K,
-                           (double) maximum_desired_capacity / (double) K);
-    gclog_or_tty->print_cr("  "
-                           "  free_after_gc: %6.1fK"
-                           "  used_after_gc: %6.1fK",
-                           (double) free_after_gc / (double) K,
-                           (double) used_after_gc / (double) K);
-    gclog_or_tty->print_cr("  "
-                           "   free_percentage: %6.2f",
-                           free_percentage);
-  }
   if (capacity_after_gc < minimum_desired_capacity) {
     // Don't expand unless it's significant
     size_t expand_bytes = minimum_desired_capacity - capacity_after_gc;
-    if (expand(expand_bytes)) {
-      if (PrintGC && Verbose) {
-        gclog_or_tty->print_cr("  "
-                               "  expanding:"
-                               "  max_heap_size: %6.1fK"
-                               "  minimum_desired_capacity: %6.1fK"
-                               "  expand_bytes: %6.1fK",
-                               (double) max_heap_size / (double) K,
-                               (double) minimum_desired_capacity / (double) K,
-                               (double) expand_bytes / (double) K);
-      }
-    }
+    ergo_verbose4(ErgoHeapSizing,
+                  "attempt heap expansion",
+                  ergo_format_reason("capacity lower than "
+                                     "min desired capacity after Full GC")
+                  ergo_format_byte("capacity")
+                  ergo_format_byte("occupancy")
+                  ergo_format_byte_perc("min desired capacity"),
+                  capacity_after_gc, used_after_gc,
+                  minimum_desired_capacity, (double) MinHeapFreeRatio);
+    expand(expand_bytes);
 
     // No expansion, now see if we want to shrink
   } else if (capacity_after_gc > maximum_desired_capacity) {
     // Capacity too large, compute shrinking size
     size_t shrink_bytes = capacity_after_gc - maximum_desired_capacity;
+    ergo_verbose4(ErgoHeapSizing,
+                  "attempt heap shrinking",
+                  ergo_format_reason("capacity higher than "
+                                     "max desired capacity after Full GC")
+                  ergo_format_byte("capacity")
+                  ergo_format_byte("occupancy")
+                  ergo_format_byte_perc("max desired capacity"),
+                  capacity_after_gc, used_after_gc,
+                  maximum_desired_capacity, (double) MaxHeapFreeRatio);
     shrink(shrink_bytes);
-    if (PrintGC && Verbose) {
-      gclog_or_tty->print_cr("  "
-                             "  shrinking:"
-                             "  min_heap_size: %6.1fK"
-                             "  maximum_desired_capacity: %6.1fK"
-                             "  shrink_bytes: %6.1fK",
-                             (double) min_heap_size / (double) K,
-                             (double) maximum_desired_capacity / (double) K,
-                             (double) shrink_bytes / (double) K);
-    }
   }
 }
 
@@ -1619,6 +1603,11 @@
   verify_region_sets_optional();
 
   size_t expand_bytes = MAX2(word_size * HeapWordSize, MinHeapDeltaBytes);
+  ergo_verbose1(ErgoHeapSizing,
+                "attempt heap expansion",
+                ergo_format_reason("allocation request failed")
+                ergo_format_byte("allocation request"),
+                word_size * HeapWordSize);
   if (expand(expand_bytes)) {
     _hrs.verify_optional();
     verify_region_sets_optional();
@@ -1646,11 +1635,11 @@
   size_t aligned_expand_bytes = ReservedSpace::page_align_size_up(expand_bytes);
   aligned_expand_bytes = align_size_up(aligned_expand_bytes,
                                        HeapRegion::GrainBytes);
-
-  if (Verbose && PrintGC) {
-    gclog_or_tty->print("Expanding garbage-first heap from %ldK by %ldK",
-                           old_mem_size/K, aligned_expand_bytes/K);
-  }
+  ergo_verbose2(ErgoHeapSizing,
+                "expand the heap",
+                ergo_format_byte("requested expansion amount")
+                ergo_format_byte("attempted expansion amount"),
+                expand_bytes, aligned_expand_bytes);
 
   // First commit the memory.
   HeapWord* old_end = (HeapWord*) _g1_storage.high();
@@ -1693,7 +1682,11 @@
       }
       assert(curr == mr.end(), "post-condition");
     }
+    g1_policy()->record_new_heap_size(n_regions());
   } else {
+    ergo_verbose0(ErgoHeapSizing,
+                  "did not expand the heap",
+                  ergo_format_reason("heap expansion operation failed"));
     // The expansion of the virtual storage space was unsuccessful.
     // Let's see if it was because we ran out of swap.
     if (G1ExitOnExpansionFailure &&
@@ -1702,13 +1695,6 @@
       vm_exit_out_of_memory(aligned_expand_bytes, "G1 heap expansion");
     }
   }
-
-  if (Verbose && PrintGC) {
-    size_t new_mem_size = _g1_storage.committed_size();
-    gclog_or_tty->print_cr("...%s, expanded to %ldK",
-                           (successful ? "Successful" : "Failed"),
-                           new_mem_size/K);
-  }
   return successful;
 }
 
@@ -1722,6 +1708,13 @@
   MemRegion mr = _hrs.shrink_by(aligned_shrink_bytes, &num_regions_deleted);
   HeapWord* old_end = (HeapWord*) _g1_storage.high();
   assert(mr.end() == old_end, "post-condition");
+
+  ergo_verbose3(ErgoHeapSizing,
+                "shrink the heap",
+                ergo_format_byte("requested shrinking amount")
+                ergo_format_byte("aligned shrinking amount")
+                ergo_format_byte("attempted shrinking amount"),
+                shrink_bytes, aligned_shrink_bytes, mr.byte_size());
   if (mr.byte_size() > 0) {
     if (_hr_printer.is_active()) {
       HeapWord* curr = mr.end();
@@ -1740,13 +1733,11 @@
     _expansion_regions += num_regions_deleted;
     update_committed_space(old_end, new_end);
     HeapRegionRemSet::shrink_heap(n_regions());
-
-    if (Verbose && PrintGC) {
-      size_t new_mem_size = _g1_storage.committed_size();
-      gclog_or_tty->print_cr("Shrinking garbage-first heap from %ldK by %ldK to %ldK",
-                             old_mem_size/K, aligned_shrink_bytes/K,
-                             new_mem_size/K);
-    }
+    g1_policy()->record_new_heap_size(n_regions());
+  } else {
+    ergo_verbose0(ErgoHeapSizing,
+                  "did not shrink the heap",
+                  ergo_format_reason("heap shrinking operation failed"));
   }
 }
 
@@ -3534,6 +3525,19 @@
 
       init_mutator_alloc_region();
 
+      {
+        size_t expand_bytes = g1_policy()->expansion_amount();
+        if (expand_bytes > 0) {
+          size_t bytes_before = capacity();
+          if (!expand(expand_bytes)) {
+            // We failed to expand the heap so let's verify that
+            // committed/uncommitted amount match the backing store
+            assert(capacity() == _g1_storage.committed_size(), "committed size mismatch");
+            assert(max_capacity() == _g1_storage.reserved_size(), "reserved size mismatch");
+          }
+        }
+      }
+
       double end_time_sec = os::elapsedTime();
       double pause_time_ms = (end_time_sec - start_time_sec) * MILLIUNITS;
       g1_policy()->record_pause_time_ms(pause_time_ms);
@@ -3579,6 +3583,8 @@
         size_t expand_bytes = g1_policy()->expansion_amount();
         if (expand_bytes > 0) {
           size_t bytes_before = capacity();
+          // No need for an ergo verbose message here,
+          // expansion_amount() does this when it returns a value > 0.
           if (!expand(expand_bytes)) {
             // We failed to expand the heap so let's verify that
             // committed/uncommitted amount match the backing store
@@ -3732,13 +3738,6 @@
   bool do_object_b(oop p) {
     // It is reachable if it is outside the collection set, or is inside
     // and forwarded.
-
-#ifdef G1_DEBUG
-    gclog_or_tty->print_cr("is alive "PTR_FORMAT" in CS %d forwarded %d overall %d",
-                           (void*) p, _g1->obj_in_cs(p), p->is_forwarded(),
-                           !_g1->obj_in_cs(p) || p->is_forwarded());
-#endif // G1_DEBUG
-
     return !_g1->obj_in_cs(p) || p->is_forwarded();
   }
 };
@@ -3750,20 +3749,9 @@
   void do_oop(narrowOop* p) { guarantee(false, "Not needed"); }
   void do_oop(      oop* p) {
     oop obj = *p;
-#ifdef G1_DEBUG
-    if (PrintGC && Verbose) {
-      gclog_or_tty->print_cr("keep alive *"PTR_FORMAT" = "PTR_FORMAT" "PTR_FORMAT,
-                             p, (void*) obj, (void*) *p);
-    }
-#endif // G1_DEBUG
-
     if (_g1->obj_in_cs(obj)) {
       assert( obj->is_forwarded(), "invariant" );
       *p = obj->forwardee();
-#ifdef G1_DEBUG
-      gclog_or_tty->print_cr("     in CSet: moved "PTR_FORMAT" -> "PTR_FORMAT,
-                             (void*) obj, (void*) *p);
-#endif // G1_DEBUG
     }
   }
 };
@@ -4069,6 +4057,23 @@
 }
 #endif // PRODUCT
 
+G1ParGCAllocBuffer::G1ParGCAllocBuffer(size_t gclab_word_size) :
+  ParGCAllocBuffer(gclab_word_size),
+  _should_mark_objects(false),
+  _bitmap(G1CollectedHeap::heap()->reserved_region().start(), gclab_word_size),
+  _retired(false)
+{
+  //_should_mark_objects is set to true when G1ParCopyHelper needs to
+  // mark the forwarded location of an evacuated object.
+  // We set _should_mark_objects to true if marking is active, i.e. when we
+  // need to propagate a mark, or during an initial mark pause, i.e. when we
+  // need to mark objects immediately reachable by the roots.
+  if (G1CollectedHeap::heap()->mark_in_progress() ||
+      G1CollectedHeap::heap()->g1_policy()->during_initial_mark_pause()) {
+    _should_mark_objects = true;
+  }
+}
+
 G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, int queue_num)
   : _g1h(g1h),
     _refs(g1h->task_queue(queue_num)),
@@ -4184,12 +4189,14 @@
 
 G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
   _g1(g1), _g1_rem(_g1->g1_rem_set()), _cm(_g1->concurrent_mark()),
-  _par_scan_state(par_scan_state) { }
-
-template <class T> void G1ParCopyHelper::mark_forwardee(T* p) {
-  // This is called _after_ do_oop_work has been called, hence after
-  // the object has been relocated to its new location and *p points
-  // to its new location.
+  _par_scan_state(par_scan_state),
+  _during_initial_mark(_g1->g1_policy()->during_initial_mark_pause()),
+  _mark_in_progress(_g1->mark_in_progress()) { }
+
+template <class T> void G1ParCopyHelper::mark_object(T* p) {
+  // This is called from do_oop_work for objects that are not
+  // in the collection set. Objects in the collection set
+  // are marked after they have been evacuated.
 
   T heap_oop = oopDesc::load_heap_oop(p);
   if (!oopDesc::is_null(heap_oop)) {
@@ -4201,7 +4208,7 @@
   }
 }
 
-oop G1ParCopyHelper::copy_to_survivor_space(oop old) {
+oop G1ParCopyHelper::copy_to_survivor_space(oop old, bool should_mark_copy) {
   size_t    word_sz = old->size();
   HeapRegion* from_region = _g1->heap_region_containing_raw(old);
   // +1 to make the -1 indexes valid...
@@ -4257,8 +4264,8 @@
       obj->set_mark(m);
     }
 
-    // preserve "next" mark bit
-    if (_g1->mark_in_progress() && !_g1->is_obj_ill(old)) {
+    // Mark the evacuated object or propagate "next" mark bit
+    if (should_mark_copy) {
       if (!use_local_bitmaps ||
           !_par_scan_state->alloc_buffer(alloc_purpose)->mark(obj_ptr)) {
         // if we couldn't mark it on the local bitmap (this happens when
@@ -4266,11 +4273,12 @@
         // the bullet and do the standard parallel mark
         _cm->markAndGrayObjectIfNecessary(obj);
       }
-#if 1
+
       if (_g1->isMarkedNext(old)) {
+        // Unmark the object's old location so that marking
+        // doesn't think the old object is alive.
         _cm->nextMarkBitMap()->parClear((HeapWord*)old);
       }
-#endif
     }
 
     size_t* surv_young_words = _par_scan_state->surviving_young_words();
@@ -4293,26 +4301,62 @@
   return obj;
 }
 
-template <bool do_gen_barrier, G1Barrier barrier, bool do_mark_forwardee>
+template <bool do_gen_barrier, G1Barrier barrier, bool do_mark_object>
 template <class T>
-void G1ParCopyClosure <do_gen_barrier, barrier, do_mark_forwardee>
+void G1ParCopyClosure<do_gen_barrier, barrier, do_mark_object>
 ::do_oop_work(T* p) {
   oop obj = oopDesc::load_decode_heap_oop(p);
   assert(barrier != G1BarrierRS || obj != NULL,
          "Precondition: G1BarrierRS implies obj is nonNull");
 
+  // Marking:
+  // If the object is in the collection set, then the thread
+  // that copies the object should mark, or propagate the
+  // mark to, the evacuated object.
+  // If the object is not in the collection set then we
+  // should call the mark_object() method depending on the
+  // value of the template parameter do_mark_object (which will
+  // be true for root scanning closures during an initial mark
+  // pause).
+  // The mark_object() method first checks whether the object
+  // is marked and, if not, attempts to mark the object.
+
   // here the null check is implicit in the cset_fast_test() test
   if (_g1->in_cset_fast_test(obj)) {
     if (obj->is_forwarded()) {
       oopDesc::encode_store_heap_oop(p, obj->forwardee());
+      // If we are a root scanning closure during an initial
+      // mark pause (i.e. do_mark_object will be true) then
+      // we also need to handle marking of roots in the
+      // event of an evacuation failure. In the event of an
+      // evacuation failure, the object is forwarded to itself
+      // and not copied so let's mark it here.
+      if (do_mark_object && obj->forwardee() == obj) {
+        mark_object(p);
+      }
     } else {
-      oop copy_oop = copy_to_survivor_space(obj);
+      // We need to mark the copied object if we're a root scanning
+      // closure during an initial mark pause (i.e. do_mark_object
+      // will be true), or the object is already marked and we need
+      // to propagate the mark to the evacuated copy.
+      bool should_mark_copy = do_mark_object ||
+                              _during_initial_mark ||
+                              (_mark_in_progress && !_g1->is_obj_ill(obj));
+
+      oop copy_oop = copy_to_survivor_space(obj, should_mark_copy);
       oopDesc::encode_store_heap_oop(p, copy_oop);
     }
     // When scanning the RS, we only care about objs in CS.
     if (barrier == G1BarrierRS) {
       _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num());
     }
+  } else {
+    // The object is not in collection set. If we're a root scanning
+    // closure during an initial mark pause (i.e. do_mark_object will
+    // be true) then attempt to mark the object.
+    if (do_mark_object) {
+      mark_object(p);
+    }
   }
 
   if (barrier == G1BarrierEvac && obj != NULL) {
@@ -4557,7 +4601,6 @@
     // keep entries (which are added by the marking threads) on them
     // live until they can be processed at the end of marking.
     ref_processor()->weak_oops_do(&buf_scan_non_heap_roots);
-    ref_processor()->oops_do(&buf_scan_non_heap_roots);
   }
 
   // Finish up any enqueued closure apps (attributed as object copy time).
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -1715,26 +1715,22 @@
 class G1ParGCAllocBuffer: public ParGCAllocBuffer {
 private:
   bool        _retired;
-  bool        _during_marking;
+  bool        _should_mark_objects;
   GCLabBitMap _bitmap;
 
 public:
-  G1ParGCAllocBuffer(size_t gclab_word_size) :
-    ParGCAllocBuffer(gclab_word_size),
-    _during_marking(G1CollectedHeap::heap()->mark_in_progress()),
-    _bitmap(G1CollectedHeap::heap()->reserved_region().start(), gclab_word_size),
-    _retired(false)
-  { }
+  G1ParGCAllocBuffer(size_t gclab_word_size);
 
   inline bool mark(HeapWord* addr) {
     guarantee(use_local_bitmaps, "invariant");
-    assert(_during_marking, "invariant");
+    assert(_should_mark_objects, "invariant");
     return _bitmap.mark(addr);
   }
 
   inline void set_buf(HeapWord* buf) {
-    if (use_local_bitmaps && _during_marking)
+    if (use_local_bitmaps && _should_mark_objects) {
       _bitmap.set_buffer(buf);
+    }
     ParGCAllocBuffer::set_buf(buf);
     _retired = false;
   }
@@ -1742,7 +1738,7 @@
   inline void retire(bool end_of_gc, bool retain) {
     if (_retired)
       return;
-    if (use_local_bitmaps && _during_marking) {
+    if (use_local_bitmaps && _should_mark_objects) {
       _bitmap.retire();
     }
     ParGCAllocBuffer::retire(end_of_gc, retain);
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -28,6 +28,7 @@
 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
+#include "gc_implementation/g1/g1ErgoVerbose.hpp"
 #include "gc_implementation/g1/heapRegionRemSet.hpp"
 #include "gc_implementation/shared/gcPolicyCounters.hpp"
 #include "runtime/arguments.hpp"
@@ -145,6 +146,7 @@
   _stop_world_start(0.0),
   _all_stop_world_times_ms(new NumberSeq()),
   _all_yield_times_ms(new NumberSeq()),
+  _using_new_ratio_calculations(false),
 
   _all_mod_union_times_ms(new NumberSeq()),
 
@@ -271,15 +273,26 @@
   _recorded_survivor_tail(NULL),
   _survivors_age_table(true),
 
-  _gc_overhead_perc(0.0)
-
-{
+  _gc_overhead_perc(0.0) {
+
   // Set up the region size and associated fields. Given that the
   // policy is created before the heap, we have to set this up here,
   // so it's done as soon as possible.
   HeapRegion::setup_heap_region_size(Arguments::min_heap_size());
   HeapRegionRemSet::setup_remset_size();
 
+  G1ErgoVerbose::initialize();
+  if (PrintAdaptiveSizePolicy) {
+    // Currently, we only use a single switch for all the heuristics.
+    G1ErgoVerbose::set_enabled(true);
+    // Given that we don't currently have a verboseness level
+    // parameter, we'll hardcode this to high. This can be easily
+    // changed in the future.
+    G1ErgoVerbose::set_level(ErgoHigh);
+  } else {
+    G1ErgoVerbose::set_enabled(false);
+  }
+
   // Verify PLAB sizes
   const uint region_size = HeapRegion::GrainWords;
   if (YoungPLABSize > region_size || OldPLABSize > region_size) {
@@ -402,7 +415,7 @@
   _concurrent_mark_cleanup_times_ms->add(0.20);
   _tenuring_threshold = MaxTenuringThreshold;
   // _max_survivor_regions will be calculated by
-  // calculate_young_list_target_length() during initialization.
+  // update_young_list_target_length() during initialization.
   _max_survivor_regions = 0;
 
   assert(GCTimeRatio > 0,
@@ -410,6 +423,18 @@
          "if a user set it to 0");
   _gc_overhead_perc = 100.0 * (1.0 / (1.0 + GCTimeRatio));
 
+  uintx reserve_perc = G1ReservePercent;
+  // Put an artificial ceiling on this so that it's not set to a silly value.
+  if (reserve_perc > 50) {
+    reserve_perc = 50;
+    warning("G1ReservePercent is set to a value that is too large, "
+            "it's been updated to %u", reserve_perc);
+  }
+  _reserve_factor = (double) reserve_perc / 100.0;
+  // This will be set when the heap is expanded
+  // for the first time during initialization.
+  _reserve_regions = 0;
+
   initialize_all();
 }
 
@@ -434,16 +459,15 @@
 // ParallelScavengeHeap::initialize()). We might change this in the
 // future, but it's a good start.
 class G1YoungGenSizer : public TwoGenerationCollectorPolicy {
-  size_t size_to_region_num(size_t byte_size) {
-    return MAX2((size_t) 1, byte_size / HeapRegion::GrainBytes);
-  }
 
 public:
   G1YoungGenSizer() {
     initialize_flags();
     initialize_size_info();
   }
-
+  size_t size_to_region_num(size_t byte_size) {
+    return MAX2((size_t) 1, byte_size / HeapRegion::GrainBytes);
+  }
   size_t min_young_region_num() {
     return size_to_region_num(_min_gen0_size);
   }
@@ -455,6 +479,13 @@
   }
 };
 
+void G1CollectorPolicy::update_young_list_size_using_newratio(size_t number_of_heap_regions) {
+  assert(number_of_heap_regions > 0, "Heap must be initialized");
+  size_t young_size = number_of_heap_regions / (NewRatio + 1);
+  _min_desired_young_length = young_size;
+  _max_desired_young_length = young_size;
+}
+
 void G1CollectorPolicy::init() {
   // Set aside an initial future to_space.
   _g1 = G1CollectedHeap::heap();
@@ -465,18 +496,35 @@
 
   G1YoungGenSizer sizer;
   size_t initial_region_num = sizer.initial_young_region_num();
-
-  if (UseAdaptiveSizePolicy) {
-    set_adaptive_young_list_length(true);
+  _min_desired_young_length = sizer.min_young_region_num();
+  _max_desired_young_length = sizer.max_young_region_num();
+
+  if (FLAG_IS_CMDLINE(NewRatio)) {
+    if (FLAG_IS_CMDLINE(NewSize) || FLAG_IS_CMDLINE(MaxNewSize)) {
+      gclog_or_tty->print_cr("-XX:NewSize and -XX:MaxNewSize overrides -XX:NewRatio");
+    } else {
+      // Treat NewRatio as a fixed size that is only recalculated when the heap size changes
+      size_t heap_regions = sizer.size_to_region_num(_g1->n_regions());
+      update_young_list_size_using_newratio(heap_regions);
+      _using_new_ratio_calculations = true;
+    }
+  }
+
+  // GenCollectorPolicy guarantees that min <= initial <= max.
+  // Asserting here just to state that we rely on this property.
+  assert(_min_desired_young_length <= _max_desired_young_length, "Invalid min/max young gen size values");
+  assert(initial_region_num <= _max_desired_young_length, "Initial young gen size too large");
+  assert(_min_desired_young_length <= initial_region_num, "Initial young gen size too small");
+
+  set_adaptive_young_list_length(_min_desired_young_length < _max_desired_young_length);
+  if (adaptive_young_list_length()) {
     _young_list_fixed_length = 0;
   } else {
-    set_adaptive_young_list_length(false);
     _young_list_fixed_length = initial_region_num;
   }
   _free_regions_at_end_of_collection = _g1->free_regions();
-  calculate_young_list_min_length();
-  guarantee( _young_list_min_length == 0, "invariant, not enough info" );
-  calculate_young_list_target_length();
+  update_young_list_target_length();
+  _prev_eden_capacity = _young_list_target_length * HeapRegion::GrainBytes;
 
   // We may immediately start allocating regions and placing them on the
   // collection set list. Initialize the per-collection set info
@@ -484,236 +532,259 @@
 }
 
 // Create the jstat counters for the policy.
-void G1CollectorPolicy::initialize_gc_policy_counters()
-{
+void G1CollectorPolicy::initialize_gc_policy_counters() {
   _gc_policy_counters = new GCPolicyCounters("GarbageFirst", 1, 3);
 }
 
-void G1CollectorPolicy::calculate_young_list_min_length() {
-  _young_list_min_length = 0;
-
-  if (!adaptive_young_list_length())
-    return;
-
-  if (_alloc_rate_ms_seq->num() > 3) {
-    double now_sec = os::elapsedTime();
-    double when_ms = _mmu_tracker->when_max_gc_sec(now_sec) * 1000.0;
-    double alloc_rate_ms = predict_alloc_rate_ms();
-    size_t min_regions = (size_t) ceil(alloc_rate_ms * when_ms);
-    size_t current_region_num = _g1->young_list()->length();
-    _young_list_min_length = min_regions + current_region_num;
+bool G1CollectorPolicy::predict_will_fit(size_t young_length,
+                                         double base_time_ms,
+                                         size_t base_free_regions,
+                                         double target_pause_time_ms) {
+  if (young_length >= base_free_regions) {
+    // end condition 1: not enough space for the young regions
+    return false;
+  }
+
+  double accum_surv_rate = accum_yg_surv_rate_pred((int)(young_length - 1));
+  size_t bytes_to_copy =
+               (size_t) (accum_surv_rate * (double) HeapRegion::GrainBytes);
+  double copy_time_ms = predict_object_copy_time_ms(bytes_to_copy);
+  double young_other_time_ms = predict_young_other_time_ms(young_length);
+  double pause_time_ms = base_time_ms + copy_time_ms + young_other_time_ms;
+  if (pause_time_ms > target_pause_time_ms) {
+    // end condition 2: prediction is over the target pause time
+    return false;
+  }
+
+  size_t free_bytes =
+                  (base_free_regions - young_length) * HeapRegion::GrainBytes;
+  if ((2.0 * sigma()) * (double) bytes_to_copy > (double) free_bytes) {
+    // end condition 3: out-of-space (conservatively!)
+    return false;
+  }
+
+  // success!
+  return true;
+}
+
+void G1CollectorPolicy::record_new_heap_size(size_t new_number_of_regions) {
+  // re-calculate the necessary reserve
+  double reserve_regions_d = (double) new_number_of_regions * _reserve_factor;
+  // We use ceiling so that if reserve_regions_d is > 0.0 (but
+  // smaller than 1.0) we'll get 1.
+  _reserve_regions = (size_t) ceil(reserve_regions_d);
+
+  if (_using_new_ratio_calculations) {
+    // -XX:NewRatio was specified so we need to update the
+    // young gen length when the heap size has changed.
+    update_young_list_size_using_newratio(new_number_of_regions);
   }
 }
 
-void G1CollectorPolicy::calculate_young_list_target_length() {
+size_t G1CollectorPolicy::calculate_young_list_desired_min_length(
+                                                     size_t base_min_length) {
+  size_t desired_min_length = 0;
   if (adaptive_young_list_length()) {
-    size_t rs_lengths = (size_t) get_new_prediction(_rs_lengths_seq);
-    calculate_young_list_target_length(rs_lengths);
-  } else {
-    if (full_young_gcs())
-      _young_list_target_length = _young_list_fixed_length;
-    else
-      _young_list_target_length = _young_list_fixed_length / 2;
+    if (_alloc_rate_ms_seq->num() > 3) {
+      double now_sec = os::elapsedTime();
+      double when_ms = _mmu_tracker->when_max_gc_sec(now_sec) * 1000.0;
+      double alloc_rate_ms = predict_alloc_rate_ms();
+      desired_min_length = (size_t) ceil(alloc_rate_ms * when_ms);
+    } else {
+      // otherwise we don't have enough info to make the prediction
+    }
   }
-
-  // Make sure we allow the application to allocate at least one
-  // region before we need to do a collection again.
-  size_t min_length = _g1->young_list()->length() + 1;
-  _young_list_target_length = MAX2(_young_list_target_length, min_length);
-  calculate_max_gc_locker_expansion();
-  calculate_survivors_policy();
+  desired_min_length += base_min_length;
+  // make sure we don't go below any user-defined minimum bound
+  return MAX2(_min_desired_young_length, desired_min_length);
 }
 
-void G1CollectorPolicy::calculate_young_list_target_length(size_t rs_lengths) {
-  guarantee( adaptive_young_list_length(), "pre-condition" );
-  guarantee( !_in_marking_window || !_last_full_young_gc, "invariant" );
-
-  double start_time_sec = os::elapsedTime();
-  size_t min_reserve_perc = MAX2((size_t)2, (size_t)G1ReservePercent);
-  min_reserve_perc = MIN2((size_t) 50, min_reserve_perc);
-  size_t reserve_regions =
-    (size_t) ((double) min_reserve_perc * (double) _g1->n_regions() / 100.0);
-
-  if (full_young_gcs() && _free_regions_at_end_of_collection > 0) {
-    // we are in fully-young mode and there are free regions in the heap
-
-    double survivor_regions_evac_time =
-        predict_survivor_regions_evac_time();
-
-    double target_pause_time_ms = _mmu_tracker->max_gc_time() * 1000.0;
-    size_t pending_cards = (size_t) get_new_prediction(_pending_cards_seq);
-    size_t adj_rs_lengths = rs_lengths + predict_rs_length_diff();
-    size_t scanned_cards = predict_young_card_num(adj_rs_lengths);
-    double base_time_ms = predict_base_elapsed_time_ms(pending_cards, scanned_cards)
-                          + survivor_regions_evac_time;
-
-    // the result
-    size_t final_young_length = 0;
-
-    size_t init_free_regions =
-      MAX2((size_t)0, _free_regions_at_end_of_collection - reserve_regions);
-
-    // if we're still under the pause target...
-    if (base_time_ms <= target_pause_time_ms) {
-      // We make sure that the shortest young length that makes sense
-      // fits within the target pause time.
-      size_t min_young_length = 1;
-
-      if (predict_will_fit(min_young_length, base_time_ms,
-                                     init_free_regions, target_pause_time_ms)) {
-        // The shortest young length will fit within the target pause time;
-        // we'll now check whether the absolute maximum number of young
-        // regions will fit in the target pause time. If not, we'll do
-        // a binary search between min_young_length and max_young_length
-        size_t abs_max_young_length = _free_regions_at_end_of_collection - 1;
-        size_t max_young_length = abs_max_young_length;
-
-        if (max_young_length > min_young_length) {
-          // Let's check if the initial max young length will fit within the
-          // target pause. If so then there is no need to search for a maximal
-          // young length - we'll return the initial maximum
-
-          if (predict_will_fit(max_young_length, base_time_ms,
-                                init_free_regions, target_pause_time_ms)) {
-            // The maximum young length will satisfy the target pause time.
-            // We are done so set min young length to this maximum length.
-            // The code after the loop will then set final_young_length using
-            // the value cached in the minimum length.
-            min_young_length = max_young_length;
-          } else {
-            // The maximum possible number of young regions will not fit within
-            // the target pause time so let's search....
-
-            size_t diff = (max_young_length - min_young_length) / 2;
-            max_young_length = min_young_length + diff;
-
-            while (max_young_length > min_young_length) {
-              if (predict_will_fit(max_young_length, base_time_ms,
-                                        init_free_regions, target_pause_time_ms)) {
-
-                // The current max young length will fit within the target
-                // pause time. Note we do not exit the loop here. By setting
-                // min = max, and then increasing the max below means that
-                // we will continue searching for an upper bound in the
-                // range [max..max+diff]
-                min_young_length = max_young_length;
-              }
-              diff = (max_young_length - min_young_length) / 2;
-              max_young_length = min_young_length + diff;
-            }
-            // the above loop found a maximal young length that will fit
-            // within the target pause time.
-          }
-          assert(min_young_length <= abs_max_young_length, "just checking");
-        }
-        final_young_length = min_young_length;
-      }
-    }
-    // and we're done!
-
-    // we should have at least one region in the target young length
-    _young_list_target_length =
-                              final_young_length + _recorded_survivor_regions;
-
-    // let's keep an eye of how long we spend on this calculation
-    // right now, I assume that we'll print it when we need it; we
-    // should really adde it to the breakdown of a pause
-    double end_time_sec = os::elapsedTime();
-    double elapsed_time_ms = (end_time_sec - start_time_sec) * 1000.0;
-
-#ifdef TRACE_CALC_YOUNG_LENGTH
-    // leave this in for debugging, just in case
-    gclog_or_tty->print_cr("target = %1.1lf ms, young = " SIZE_FORMAT ", "
-                           "elapsed %1.2lf ms, (%s%s) " SIZE_FORMAT SIZE_FORMAT,
-                           target_pause_time_ms,
-                           _young_list_target_length
-                           elapsed_time_ms,
-                           full_young_gcs() ? "full" : "partial",
-                           during_initial_mark_pause() ? " i-m" : "",
-                           _in_marking_window,
-                           _in_marking_window_im);
-#endif // TRACE_CALC_YOUNG_LENGTH
-
-    if (_young_list_target_length < _young_list_min_length) {
-      // bummer; this means that, if we do a pause when the maximal
-      // length dictates, we'll violate the pause spacing target (the
-      // min length was calculate based on the application's current
-      // alloc rate);
-
-      // so, we have to bite the bullet, and allocate the minimum
-      // number. We'll violate our target, but we just can't meet it.
-
-#ifdef TRACE_CALC_YOUNG_LENGTH
-      // leave this in for debugging, just in case
-      gclog_or_tty->print_cr("adjusted target length from "
-                             SIZE_FORMAT " to " SIZE_FORMAT,
-                             _young_list_target_length, _young_list_min_length);
-#endif // TRACE_CALC_YOUNG_LENGTH
-
-      _young_list_target_length = _young_list_min_length;
+size_t G1CollectorPolicy::calculate_young_list_desired_max_length() {
+  // Here, we might want to also take into account any additional
+  // constraints (i.e., user-defined minimum bound). Currently, we
+  // effectively don't set this bound.
+  return _max_desired_young_length;
+}
+
+void G1CollectorPolicy::update_young_list_target_length(size_t rs_lengths) {
+  if (rs_lengths == (size_t) -1) {
+    // if it's set to the default value (-1), we should predict it;
+    // otherwise, use the given value.
+    rs_lengths = (size_t) get_new_prediction(_rs_lengths_seq);
+  }
+
+  // Calculate the absolute and desired min bounds.
+
+  // This is how many young regions we already have (currently: the survivors).
+  size_t base_min_length = recorded_survivor_regions();
+  // This is the absolute minimum young length, which ensures that we
+  // can allocate one eden region in the worst-case.
+  size_t absolute_min_length = base_min_length + 1;
+  size_t desired_min_length =
+                     calculate_young_list_desired_min_length(base_min_length);
+  if (desired_min_length < absolute_min_length) {
+    desired_min_length = absolute_min_length;
+  }
+
+  // Calculate the absolute and desired max bounds.
+
+  // We will try our best not to "eat" into the reserve.
+  size_t absolute_max_length = 0;
+  if (_free_regions_at_end_of_collection > _reserve_regions) {
+    absolute_max_length = _free_regions_at_end_of_collection - _reserve_regions;
+  }
+  size_t desired_max_length = calculate_young_list_desired_max_length();
+  if (desired_max_length > absolute_max_length) {
+    desired_max_length = absolute_max_length;
+  }
+
+  size_t young_list_target_length = 0;
+  if (adaptive_young_list_length()) {
+    if (full_young_gcs()) {
+      young_list_target_length =
+                        calculate_young_list_target_length(rs_lengths,
+                                                           base_min_length,
+                                                           desired_min_length,
+                                                           desired_max_length);
+      _rs_lengths_prediction = rs_lengths;
+    } else {
+      // Don't calculate anything and let the code below bound it to
+      // the desired_min_length, i.e., do the next GC as soon as
+      // possible to maximize how many old regions we can add to it.
     }
   } else {
-    // we are in a partially-young mode or we've run out of regions (due
-    // to evacuation failure)
-
-#ifdef TRACE_CALC_YOUNG_LENGTH
-    // leave this in for debugging, just in case
-    gclog_or_tty->print_cr("(partial) setting target to " SIZE_FORMAT
-                           _young_list_min_length);
-#endif // TRACE_CALC_YOUNG_LENGTH
-    // we'll do the pause as soon as possible by choosing the minimum
-    _young_list_target_length = _young_list_min_length;
+    if (full_young_gcs()) {
+      young_list_target_length = _young_list_fixed_length;
+    } else {
+      // A bit arbitrary: during partially-young GCs we allocate half
+      // the young regions to try to add old regions to the CSet.
+      young_list_target_length = _young_list_fixed_length / 2;
+      // We choose to accept that we might go under the desired min
+      // length given that we intentionally ask for a smaller young gen.
+      desired_min_length = absolute_min_length;
+    }
   }
 
-  _rs_lengths_prediction = rs_lengths;
+  // Make sure we don't go over the desired max length, nor under the
+  // desired min length. In case they clash, desired_min_length wins
+  // which is why that test is second.
+  if (young_list_target_length > desired_max_length) {
+    young_list_target_length = desired_max_length;
+  }
+  if (young_list_target_length < desired_min_length) {
+    young_list_target_length = desired_min_length;
+  }
+
+  assert(young_list_target_length > recorded_survivor_regions(),
+         "we should be able to allocate at least one eden region");
+  assert(young_list_target_length >= absolute_min_length, "post-condition");
+  _young_list_target_length = young_list_target_length;
+
+  update_max_gc_locker_expansion();
 }
 
-// This is used by: calculate_young_list_target_length(rs_length). It
-// returns true iff:
-//   the predicted pause time for the given young list will not overflow
-//   the target pause time
-// and:
-//   the predicted amount of surviving data will not overflow the
-//   the amount of free space available for survivor regions.
-//
-bool
-G1CollectorPolicy::predict_will_fit(size_t young_length,
-                                    double base_time_ms,
-                                    size_t init_free_regions,
-                                    double target_pause_time_ms) {
-
-  if (young_length >= init_free_regions)
-    // end condition 1: not enough space for the young regions
-    return false;
-
-  double accum_surv_rate_adj = 0.0;
-  double accum_surv_rate =
-    accum_yg_surv_rate_pred((int)(young_length - 1)) - accum_surv_rate_adj;
-
-  size_t bytes_to_copy =
-    (size_t) (accum_surv_rate * (double) HeapRegion::GrainBytes);
-
-  double copy_time_ms = predict_object_copy_time_ms(bytes_to_copy);
-
-  double young_other_time_ms =
-                       predict_young_other_time_ms(young_length);
-
-  double pause_time_ms =
-                   base_time_ms + copy_time_ms + young_other_time_ms;
-
-  if (pause_time_ms > target_pause_time_ms)
-    // end condition 2: over the target pause time
-    return false;
-
-  size_t free_bytes =
-                 (init_free_regions - young_length) * HeapRegion::GrainBytes;
-
-  if ((2.0 + sigma()) * (double) bytes_to_copy > (double) free_bytes)
-    // end condition 3: out of to-space (conservatively)
-    return false;
-
-  // success!
-  return true;
+size_t
+G1CollectorPolicy::calculate_young_list_target_length(size_t rs_lengths,
+                                                   size_t base_min_length,
+                                                   size_t desired_min_length,
+                                                   size_t desired_max_length) {
+  assert(adaptive_young_list_length(), "pre-condition");
+  assert(full_young_gcs(), "only call this for fully-young GCs");
+
+  // In case some edge-condition makes the desired max length too small...
+  if (desired_max_length <= desired_min_length) {
+    return desired_min_length;
+  }
+
+  // We'll adjust min_young_length and max_young_length not to include
+  // the already allocated young regions (i.e., so they reflect the
+  // min and max eden regions we'll allocate). The base_min_length
+  // will be reflected in the predictions by the
+  // survivor_regions_evac_time prediction.
+  assert(desired_min_length > base_min_length, "invariant");
+  size_t min_young_length = desired_min_length - base_min_length;
+  assert(desired_max_length > base_min_length, "invariant");
+  size_t max_young_length = desired_max_length - base_min_length;
+
+  double target_pause_time_ms = _mmu_tracker->max_gc_time() * 1000.0;
+  double survivor_regions_evac_time = predict_survivor_regions_evac_time();
+  size_t pending_cards = (size_t) get_new_prediction(_pending_cards_seq);
+  size_t adj_rs_lengths = rs_lengths + predict_rs_length_diff();
+  size_t scanned_cards = predict_young_card_num(adj_rs_lengths);
+  double base_time_ms =
+    predict_base_elapsed_time_ms(pending_cards, scanned_cards) +
+    survivor_regions_evac_time;
+  size_t available_free_regions = _free_regions_at_end_of_collection;
+  size_t base_free_regions = 0;
+  if (available_free_regions > _reserve_regions) {
+    base_free_regions = available_free_regions - _reserve_regions;
+  }
+
+  // Here, we will make sure that the shortest young length that
+  // makes sense fits within the target pause time.
+
+  if (predict_will_fit(min_young_length, base_time_ms,
+                       base_free_regions, target_pause_time_ms)) {
+    // The shortest young length will fit into the target pause time;
+    // we'll now check whether the absolute maximum number of young
+    // regions will fit in the target pause time. If not, we'll do
+    // a binary search between min_young_length and max_young_length.
+    if (predict_will_fit(max_young_length, base_time_ms,
+                         base_free_regions, target_pause_time_ms)) {
+      // The maximum young length will fit into the target pause time.
+      // We are done so set min young length to the maximum length (as
+      // the result is assumed to be returned in min_young_length).
+      min_young_length = max_young_length;
+    } else {
+      // The maximum possible number of young regions will not fit within
+      // the target pause time so we'll search for the optimal
+      // length. The loop invariants are:
+      //
+      // min_young_length < max_young_length
+      // min_young_length is known to fit into the target pause time
+      // max_young_length is known not to fit into the target pause time
+      //
+      // Going into the loop we know the above hold as we've just
+      // checked them. Every time around the loop we check whether
+      // the middle value between min_young_length and
+      // max_young_length fits into the target pause time. If it
+      // does, it becomes the new min. If it doesn't, it becomes
+      // the new max. This way we maintain the loop invariants.
+
+      assert(min_young_length < max_young_length, "invariant");
+      size_t diff = (max_young_length - min_young_length) / 2;
+      while (diff > 0) {
+        size_t young_length = min_young_length + diff;
+        if (predict_will_fit(young_length, base_time_ms,
+                             base_free_regions, target_pause_time_ms)) {
+          min_young_length = young_length;
+        } else {
+          max_young_length = young_length;
+        }
+        assert(min_young_length <  max_young_length, "invariant");
+        diff = (max_young_length - min_young_length) / 2;
+      }
+      // The results is min_young_length which, according to the
+      // loop invariants, should fit within the target pause time.
+
+      // These are the post-conditions of the binary search above:
+      assert(min_young_length < max_young_length,
+             "otherwise we should have discovered that max_young_length "
+             "fits into the pause target and not done the binary search");
+      assert(predict_will_fit(min_young_length, base_time_ms,
+                              base_free_regions, target_pause_time_ms),
+             "min_young_length, the result of the binary search, should "
+             "fit into the pause target");
+      assert(!predict_will_fit(min_young_length + 1, base_time_ms,
+                               base_free_regions, target_pause_time_ms),
+             "min_young_length, the result of the binary search, should be "
+             "optimal, so no larger length should fit into the pause target");
+    }
+  } else {
+    // Even the minimum length doesn't fit into the pause time
+    // target, return it as the result nevertheless.
+  }
+  return base_min_length + min_young_length;
 }
 
 double G1CollectorPolicy::predict_survivor_regions_evac_time() {
@@ -726,17 +797,19 @@
   return survivor_regions_evac_time;
 }
 
-void G1CollectorPolicy::check_prediction_validity() {
+void G1CollectorPolicy::revise_young_list_target_length_if_necessary() {
   guarantee( adaptive_young_list_length(), "should not call this otherwise" );
 
   size_t rs_lengths = _g1->young_list()->sampled_rs_lengths();
   if (rs_lengths > _rs_lengths_prediction) {
     // add 10% to avoid having to recalculate often
     size_t rs_lengths_prediction = rs_lengths * 1100 / 1000;
-    calculate_young_list_target_length(rs_lengths_prediction);
+    update_young_list_target_length(rs_lengths_prediction);
   }
 }
 
+
+
 HeapWord* G1CollectorPolicy::mem_allocate_work(size_t size,
                                                bool is_tlab,
                                                bool* gc_overhead_limit_was_exceeded) {
@@ -843,8 +916,7 @@
   _free_regions_at_end_of_collection = _g1->free_regions();
   // Reset survivors SurvRateGroup.
   _survivor_surv_rate_group->reset();
-  calculate_young_list_min_length();
-  calculate_young_list_target_length();
+  update_young_list_target_length();
 }
 
 void G1CollectorPolicy::record_stop_world_start() {
@@ -859,6 +931,11 @@
     gclog_or_tty->print(" (%s)", full_young_gcs() ? "young" : "partial");
   }
 
+  // We only need to do this here as the policy will only be applied
+  // to the GC we're about to start. so, no point is calculating this
+  // every time we calculate / recalculate the target young length.
+  update_survivors_policy();
+
   assert(_g1->used() == _g1->recalculate_used(),
          err_msg("sanity, used: "SIZE_FORMAT" recalculate_used: "SIZE_FORMAT,
                  _g1->used(), _g1->recalculate_used()));
@@ -959,11 +1036,9 @@
 G1CollectorPolicy::
 record_concurrent_mark_cleanup_end_work1(size_t freed_bytes,
                                          size_t max_live_bytes) {
-  if (_n_marks < 2) _n_marks++;
-  if (G1PolicyVerbose > 0)
-    gclog_or_tty->print_cr("At end of marking, max_live is " SIZE_FORMAT " MB "
-                           " (of " SIZE_FORMAT " MB heap).",
-                           max_live_bytes/M, _g1->capacity()/M);
+  if (_n_marks < 2) {
+    _n_marks++;
+  }
 }
 
 // The important thing about this is that it includes "os::elapsedTime".
@@ -977,14 +1052,6 @@
   _mmu_tracker->add_pause(_mark_cleanup_start_sec, end_time_sec, true);
 
   _num_markings++;
-
-  // We did a marking, so reset the "since_last_mark" variables.
-  double considerConcMarkCost = 1.0;
-  // If there are available processors, concurrent activity is free...
-  if (Threads::number_of_non_daemon_threads() * 2 <
-      os::active_processor_count()) {
-    considerConcMarkCost = 0.0;
-  }
   _n_pauses_at_mark_end = _n_pauses;
   _n_marks_since_last_pause++;
 }
@@ -994,8 +1061,6 @@
   _should_revert_to_full_young_gcs = false;
   _last_full_young_gc = true;
   _in_marking_window = false;
-  if (adaptive_young_list_length())
-    calculate_young_list_target_length();
 }
 
 void G1CollectorPolicy::record_concurrent_pause() {
@@ -1148,20 +1213,37 @@
   if (last_pause_included_initial_mark)
     record_concurrent_mark_init_end(0.0);
 
-  size_t min_used_targ =
+  size_t marking_initiating_used_threshold =
     (_g1->capacity() / 100) * InitiatingHeapOccupancyPercent;
 
-
   if (!_g1->mark_in_progress() && !_last_full_young_gc) {
     assert(!last_pause_included_initial_mark, "invariant");
-    if (cur_used_bytes > min_used_targ &&
-      cur_used_bytes > _prev_collection_pause_used_at_end_bytes) {
+    if (cur_used_bytes > marking_initiating_used_threshold) {
+      if (cur_used_bytes > _prev_collection_pause_used_at_end_bytes) {
         assert(!during_initial_mark_pause(), "we should not see this here");
 
+        ergo_verbose3(ErgoConcCycles,
+                      "request concurrent cycle initiation",
+                      ergo_format_reason("occupancy higher than threshold")
+                      ergo_format_byte("occupancy")
+                      ergo_format_byte_perc("threshold"),
+                      cur_used_bytes,
+                      marking_initiating_used_threshold,
+                      (double) InitiatingHeapOccupancyPercent);
+
         // Note: this might have already been set, if during the last
         // pause we decided to start a cycle but at the beginning of
         // this pause we decided to postpone it. That's OK.
         set_initiate_conc_mark_if_possible();
+      } else {
+        ergo_verbose2(ErgoConcCycles,
+                  "do not request concurrent cycle initiation",
+                  ergo_format_reason("occupancy lower than previous occupancy")
+                  ergo_format_byte("occupancy")
+                  ergo_format_byte("previous occupancy"),
+                  cur_used_bytes,
+                  _prev_collection_pause_used_at_end_bytes);
+      }
     }
   }
 
@@ -1437,16 +1519,45 @@
   }
 
   if (_last_full_young_gc) {
+    ergo_verbose2(ErgoPartiallyYoungGCs,
+                  "start partially-young GCs",
+                  ergo_format_byte_perc("known garbage"),
+                  _known_garbage_bytes, _known_garbage_ratio * 100.0);
     set_full_young_gcs(false);
     _last_full_young_gc = false;
   }
 
   if ( !_last_young_gc_full ) {
-    if ( _should_revert_to_full_young_gcs ||
-      _known_garbage_ratio < 0.05 ||
-      (adaptive_young_list_length() &&
-      (get_gc_eff_factor() * cur_efficiency < predict_young_gc_eff())) ) {
-        set_full_young_gcs(true);
+    if (_should_revert_to_full_young_gcs) {
+      ergo_verbose2(ErgoPartiallyYoungGCs,
+                    "end partially-young GCs",
+                    ergo_format_reason("partially-young GCs end requested")
+                    ergo_format_byte_perc("known garbage"),
+                    _known_garbage_bytes, _known_garbage_ratio * 100.0);
+      set_full_young_gcs(true);
+    } else if (_known_garbage_ratio < 0.05) {
+      ergo_verbose3(ErgoPartiallyYoungGCs,
+               "end partially-young GCs",
+               ergo_format_reason("known garbage percent lower than threshold")
+               ergo_format_byte_perc("known garbage")
+               ergo_format_perc("threshold"),
+               _known_garbage_bytes, _known_garbage_ratio * 100.0,
+               0.05 * 100.0);
+      set_full_young_gcs(true);
+    } else if (adaptive_young_list_length() &&
+              (get_gc_eff_factor() * cur_efficiency < predict_young_gc_eff())) {
+      ergo_verbose5(ErgoPartiallyYoungGCs,
+                    "end partially-young GCs",
+                    ergo_format_reason("current GC efficiency lower than "
+                                       "predicted fully-young GC efficiency")
+                    ergo_format_double("GC efficiency factor")
+                    ergo_format_double("current GC efficiency")
+                    ergo_format_double("predicted fully-young GC efficiency")
+                    ergo_format_byte_perc("known garbage"),
+                    get_gc_eff_factor(), cur_efficiency,
+                    predict_young_gc_eff(),
+                    _known_garbage_bytes, _known_garbage_ratio * 100.0);
+      set_full_young_gcs(true);
     }
   }
   _should_revert_to_full_young_gcs = false;
@@ -1600,8 +1711,7 @@
   _in_marking_window = new_in_marking_window;
   _in_marking_window_im = new_in_marking_window_im;
   _free_regions_at_end_of_collection = _g1->free_regions();
-  calculate_young_list_min_length();
-  calculate_young_list_target_length();
+  update_young_list_target_length();
 
   // Note that _mmu_tracker->max_gc_time() returns the time in seconds.
   double update_rs_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0;
@@ -1622,20 +1732,26 @@
     size_t used_before_gc = _cur_collection_pause_used_at_start_bytes;
     size_t used = _g1->used();
     size_t capacity = _g1->capacity();
+    size_t eden_capacity =
+      (_young_list_target_length * HeapRegion::GrainBytes) - survivor_bytes;
 
     gclog_or_tty->print_cr(
-         "   [Eden: "EXT_SIZE_FORMAT"->"EXT_SIZE_FORMAT" "
-             "Survivors: "EXT_SIZE_FORMAT"->"EXT_SIZE_FORMAT" "
-             "Heap: "EXT_SIZE_FORMAT"("EXT_SIZE_FORMAT")->"
-                     EXT_SIZE_FORMAT"("EXT_SIZE_FORMAT")]",
-             EXT_SIZE_PARAMS(_eden_bytes_before_gc),
-               EXT_SIZE_PARAMS(eden_bytes),
-             EXT_SIZE_PARAMS(_survivor_bytes_before_gc),
-               EXT_SIZE_PARAMS(survivor_bytes),
-             EXT_SIZE_PARAMS(used_before_gc),
-             EXT_SIZE_PARAMS(_capacity_before_gc),
-               EXT_SIZE_PARAMS(used),
-               EXT_SIZE_PARAMS(capacity));
+      "   [Eden: "EXT_SIZE_FORMAT"("EXT_SIZE_FORMAT")->"EXT_SIZE_FORMAT"("EXT_SIZE_FORMAT") "
+      "Survivors: "EXT_SIZE_FORMAT"->"EXT_SIZE_FORMAT" "
+      "Heap: "EXT_SIZE_FORMAT"("EXT_SIZE_FORMAT")->"
+      EXT_SIZE_FORMAT"("EXT_SIZE_FORMAT")]",
+      EXT_SIZE_PARAMS(_eden_bytes_before_gc),
+      EXT_SIZE_PARAMS(_prev_eden_capacity),
+      EXT_SIZE_PARAMS(eden_bytes),
+      EXT_SIZE_PARAMS(eden_capacity),
+      EXT_SIZE_PARAMS(_survivor_bytes_before_gc),
+      EXT_SIZE_PARAMS(survivor_bytes),
+      EXT_SIZE_PARAMS(used_before_gc),
+      EXT_SIZE_PARAMS(_capacity_before_gc),
+      EXT_SIZE_PARAMS(used),
+      EXT_SIZE_PARAMS(capacity));
+
+    _prev_eden_capacity = eden_capacity;
   } else if (PrintGC) {
     _g1->print_size_transition(gclog_or_tty,
                                _cur_collection_pause_used_at_start_bytes,
@@ -1877,6 +1993,12 @@
   // I don't think we need to do this when in young GC mode since
   // marking will be initiated next time we hit the soft limit anyway...
   if (predicted_time_ms > _expensive_region_limit_ms) {
+    ergo_verbose2(ErgoPartiallyYoungGCs,
+              "request partially-young GCs end",
+              ergo_format_reason("predicted region time higher than threshold")
+              ergo_format_ms("predicted region time")
+              ergo_format_ms("threshold"),
+              predicted_time_ms, _expensive_region_limit_ms);
     // no point in doing another partial one
     _should_revert_to_full_young_gcs = true;
   }
@@ -1986,7 +2108,9 @@
 }
 
 size_t G1CollectorPolicy::expansion_amount() {
-  if ((recent_avg_pause_time_ratio() * 100.0) > _gc_overhead_perc) {
+  double recent_gc_overhead = recent_avg_pause_time_ratio() * 100.0;
+  double threshold = _gc_overhead_perc;
+  if (recent_gc_overhead > threshold) {
     // We will double the existing space, or take
     // G1ExpandByPercentOfAvailable % of the available expansion
     // space, whichever is smaller, bounded below by a minimum
@@ -2001,20 +2125,19 @@
     expand_bytes = MIN2(expand_bytes_via_pct, committed_bytes);
     expand_bytes = MAX2(expand_bytes, min_expand_bytes);
     expand_bytes = MIN2(expand_bytes, uncommitted_bytes);
-    if (G1PolicyVerbose > 1) {
-      gclog_or_tty->print("Decided to expand: ratio = %5.2f, "
-                 "committed = %d%s, uncommited = %d%s, via pct = %d%s.\n"
-                 "                   Answer = %d.\n",
-                 recent_avg_pause_time_ratio(),
-                 byte_size_in_proper_unit(committed_bytes),
-                 proper_unit_for_byte_size(committed_bytes),
-                 byte_size_in_proper_unit(uncommitted_bytes),
-                 proper_unit_for_byte_size(uncommitted_bytes),
-                 byte_size_in_proper_unit(expand_bytes_via_pct),
-                 proper_unit_for_byte_size(expand_bytes_via_pct),
-                 byte_size_in_proper_unit(expand_bytes),
-                 proper_unit_for_byte_size(expand_bytes));
-    }
+
+    ergo_verbose5(ErgoHeapSizing,
+                  "attempt heap expansion",
+                  ergo_format_reason("recent GC overhead higher than "
+                                     "threshold after GC")
+                  ergo_format_perc("recent GC overhead")
+                  ergo_format_perc("threshold")
+                  ergo_format_byte("uncommitted")
+                  ergo_format_byte_perc("calculated expansion amount"),
+                  recent_gc_overhead, threshold,
+                  uncommitted_bytes,
+                  expand_bytes_via_pct, (double) G1ExpandByPercentOfAvailable);
+
     return expand_bytes;
   } else {
     return 0;
@@ -2237,8 +2360,7 @@
 #endif // PRODUCT
 }
 
-void
-G1CollectorPolicy::update_region_num(bool young) {
+void G1CollectorPolicy::update_region_num(bool young) {
   if (young) {
     ++_region_num_young;
   } else {
@@ -2270,7 +2392,7 @@
   };
 }
 
-void G1CollectorPolicy::calculate_max_gc_locker_expansion() {
+void G1CollectorPolicy::update_max_gc_locker_expansion() {
   size_t expansion_region_num = 0;
   if (GCLockerEdenExpansionPercent > 0) {
     double perc = (double) GCLockerEdenExpansionPercent / 100.0;
@@ -2286,9 +2408,13 @@
 }
 
 // Calculates survivor space parameters.
-void G1CollectorPolicy::calculate_survivors_policy()
-{
-  _max_survivor_regions = _young_list_target_length / SurvivorRatio;
+void G1CollectorPolicy::update_survivors_policy() {
+  double max_survivor_regions_d =
+                 (double) _young_list_target_length / (double) SurvivorRatio;
+  // We use ceiling so that if max_survivor_regions_d is > 0.0 (but
+  // smaller than 1.0) we'll get 1.
+  _max_survivor_regions = (size_t) ceil(max_survivor_regions_d);
+
   _tenuring_threshold = _survivors_age_table.compute_tenuring_threshold(
         HeapRegion::GrainWords * _max_survivor_regions);
 }
@@ -2315,13 +2441,23 @@
 }
 #endif
 
-bool
-G1CollectorPolicy::force_initial_mark_if_outside_cycle() {
+bool G1CollectorPolicy::force_initial_mark_if_outside_cycle(
+                                                     GCCause::Cause gc_cause) {
   bool during_cycle = _g1->concurrent_mark()->cmThread()->during_cycle();
   if (!during_cycle) {
+    ergo_verbose1(ErgoConcCycles,
+                  "request concurrent cycle initiation",
+                  ergo_format_reason("requested by GC cause")
+                  ergo_format_str("GC cause"),
+                  GCCause::to_string(gc_cause));
     set_initiate_conc_mark_if_possible();
     return true;
   } else {
+    ergo_verbose1(ErgoConcCycles,
+                  "do not request concurrent cycle initiation",
+                  ergo_format_reason("concurrent cycle already in progress")
+                  ergo_format_str("GC cause"),
+                  GCCause::to_string(gc_cause));
     return false;
   }
 }
@@ -2353,6 +2489,10 @@
       // And we can now clear initiate_conc_mark_if_possible() as
       // we've already acted on it.
       clear_initiate_conc_mark_if_possible();
+
+      ergo_verbose0(ErgoConcCycles,
+                  "initiate concurrent cycle",
+                  ergo_format_reason("concurrent cycle initiation requested"));
     } else {
       // The concurrent marking thread is still finishing up the
       // previous cycle. If we start one right now the two cycles
@@ -2366,6 +2506,9 @@
       // and, if it's in a yield point, it's waiting for us to
       // finish. So, at this point we will not start a cycle and we'll
       // let the concurrent marking thread complete the last one.
+      ergo_verbose0(ErgoConcCycles,
+                    "do not initiate concurrent cycle",
+                    ergo_format_reason("concurrent cycle already in progress"));
     }
   }
 }
@@ -2756,6 +2899,8 @@
   // Set this here - in case we're not doing young collections.
   double non_young_start_time_sec = os::elapsedTime();
 
+  YoungList* young_list = _g1->young_list();
+
   start_recording_regions();
 
   guarantee(target_pause_time_ms > 0.0,
@@ -2768,61 +2913,62 @@
 
   double time_remaining_ms = target_pause_time_ms - base_time_ms;
 
+  ergo_verbose3(ErgoCSetConstruction | ErgoHigh,
+                "start choosing CSet",
+                ergo_format_ms("predicted base time")
+                ergo_format_ms("remaining time")
+                ergo_format_ms("target pause time"),
+                base_time_ms, time_remaining_ms, target_pause_time_ms);
+
   // the 10% and 50% values are arbitrary...
-  if (time_remaining_ms < 0.10 * target_pause_time_ms) {
+  double threshold = 0.10 * target_pause_time_ms;
+  if (time_remaining_ms < threshold) {
+    double prev_time_remaining_ms = time_remaining_ms;
     time_remaining_ms = 0.50 * target_pause_time_ms;
     _within_target = false;
+    ergo_verbose3(ErgoCSetConstruction,
+                  "adjust remaining time",
+                  ergo_format_reason("remaining time lower than threshold")
+                  ergo_format_ms("remaining time")
+                  ergo_format_ms("threshold")
+                  ergo_format_ms("adjusted remaining time"),
+                  prev_time_remaining_ms, threshold, time_remaining_ms);
   } else {
     _within_target = true;
   }
 
-  // We figure out the number of bytes available for future to-space.
-  // For new regions without marking information, we must assume the
-  // worst-case of complete survival.  If we have marking information for a
-  // region, we can bound the amount of live data.  We can add a number of
-  // such regions, as long as the sum of the live data bounds does not
-  // exceed the available evacuation space.
-  size_t max_live_bytes = _g1->free_regions() * HeapRegion::GrainBytes;
-
-  size_t expansion_bytes =
-    _g1->expansion_regions() * HeapRegion::GrainBytes;
+  size_t expansion_bytes = _g1->expansion_regions() * HeapRegion::GrainBytes;
+
+  HeapRegion* hr;
+  double young_start_time_sec = os::elapsedTime();
 
   _collection_set_bytes_used_before = 0;
   _collection_set_size = 0;
-
-  // Adjust for expansion and slop.
-  max_live_bytes = max_live_bytes + expansion_bytes;
-
-  HeapRegion* hr;
-  double young_start_time_sec = os::elapsedTime();
-
-  if (G1PolicyVerbose > 0) {
-    gclog_or_tty->print_cr("Adding %d young regions to the CSet",
-      _g1->young_list()->length());
-  }
-
   _young_cset_length  = 0;
   _last_young_gc_full = full_young_gcs() ? true : false;
 
-  if (_last_young_gc_full)
+  if (_last_young_gc_full) {
     ++_full_young_pause_num;
-  else
+  } else {
     ++_partial_young_pause_num;
+  }
 
   // The young list is laid with the survivor regions from the previous
   // pause are appended to the RHS of the young list, i.e.
   //   [Newly Young Regions ++ Survivors from last pause].
 
-  hr = _g1->young_list()->first_survivor_region();
+  size_t survivor_region_num = young_list->survivor_length();
+  size_t eden_region_num = young_list->length() - survivor_region_num;
+  size_t old_region_num = 0;
+  hr = young_list->first_survivor_region();
   while (hr != NULL) {
     assert(hr->is_survivor(), "badly formed young list");
     hr->set_young();
     hr = hr->get_next_young_region();
   }
 
-  // Clear the fields that point to the survivor list - they are
-  // all young now.
-  _g1->young_list()->clear_survivors();
+  // Clear the fields that point to the survivor list - they are all young now.
+  young_list->clear_survivors();
 
   if (_g1->mark_in_progress())
     _g1->concurrent_mark()->register_collection_set_finger(_inc_cset_max_finger);
@@ -2831,14 +2977,17 @@
   _collection_set = _inc_cset_head;
   _collection_set_size = _inc_cset_size;
   _collection_set_bytes_used_before = _inc_cset_bytes_used_before;
-
-  // For young regions in the collection set, we assume the worst
-  // case of complete survival
-  max_live_bytes -= _inc_cset_size * HeapRegion::GrainBytes;
-
   time_remaining_ms -= _inc_cset_predicted_elapsed_time_ms;
   predicted_pause_time_ms += _inc_cset_predicted_elapsed_time_ms;
 
+  ergo_verbose3(ErgoCSetConstruction | ErgoHigh,
+                "add young regions to CSet",
+                ergo_format_region("eden")
+                ergo_format_region("survivors")
+                ergo_format_ms("predicted young region time"),
+                eden_region_num, survivor_region_num,
+                _inc_cset_predicted_elapsed_time_ms);
+
   // The number of recorded young regions is the incremental
   // collection set's current size
   set_recorded_young_regions(_inc_cset_size);
@@ -2848,14 +2997,7 @@
   set_predicted_bytes_to_copy(_inc_cset_predicted_bytes_to_copy);
 #endif // PREDICTIONS_VERBOSE
 
-  if (G1PolicyVerbose > 0) {
-    gclog_or_tty->print_cr("  Added " PTR_FORMAT " Young Regions to CS.",
-      _inc_cset_size);
-    gclog_or_tty->print_cr("    (" SIZE_FORMAT " KB left in heap.)",
-      max_live_bytes/K);
-  }
-
-  assert(_inc_cset_size == _g1->young_list()->length(), "Invariant");
+  assert(_inc_cset_size == young_list->length(), "Invariant");
 
   double young_end_time_sec = os::elapsedTime();
   _recorded_young_cset_choice_time_ms =
@@ -2869,6 +3011,8 @@
     NumberSeq seq;
     double avg_prediction = 100000000000000000.0; // something very large
 
+    size_t prev_collection_set_size = _collection_set_size;
+    double prev_predicted_pause_time_ms = predicted_pause_time_ms;
     do {
       hr = _collectionSetChooser->getNextMarkedRegion(time_remaining_ms,
                                                       avg_prediction);
@@ -2878,23 +3022,58 @@
         predicted_pause_time_ms += predicted_time_ms;
         add_to_collection_set(hr);
         record_non_young_cset_region(hr);
-        max_live_bytes -= MIN2(hr->max_live_bytes(), max_live_bytes);
-        if (G1PolicyVerbose > 0) {
-          gclog_or_tty->print_cr("    (" SIZE_FORMAT " KB left in heap.)",
-                        max_live_bytes/K);
-        }
         seq.add(predicted_time_ms);
         avg_prediction = seq.avg() + seq.sd();
       }
-      should_continue =
-        ( hr != NULL) &&
-        ( (adaptive_young_list_length()) ? time_remaining_ms > 0.0
-          : _collection_set_size < _young_list_fixed_length );
+
+      should_continue = true;
+      if (hr == NULL) {
+        // No need for an ergo verbose message here,
+        // getNextMarkRegion() does this when it returns NULL.
+        should_continue = false;
+      } else {
+        if (adaptive_young_list_length()) {
+          if (time_remaining_ms < 0.0) {
+            ergo_verbose1(ErgoCSetConstruction,
+                          "stop adding old regions to CSet",
+                          ergo_format_reason("remaining time is lower than 0")
+                          ergo_format_ms("remaining time"),
+                          time_remaining_ms);
+            should_continue = false;
+          }
+        } else {
+          if (_collection_set_size < _young_list_fixed_length) {
+            ergo_verbose2(ErgoCSetConstruction,
+                          "stop adding old regions to CSet",
+                          ergo_format_reason("CSet length lower than target")
+                          ergo_format_region("CSet")
+                          ergo_format_region("young target"),
+                          _collection_set_size, _young_list_fixed_length);
+            should_continue = false;
+          }
+        }
+      }
     } while (should_continue);
 
     if (!adaptive_young_list_length() &&
-        _collection_set_size < _young_list_fixed_length)
+        _collection_set_size < _young_list_fixed_length) {
+      ergo_verbose2(ErgoCSetConstruction,
+                    "request partially-young GCs end",
+                    ergo_format_reason("CSet length lower than target")
+                    ergo_format_region("CSet")
+                    ergo_format_region("young target"),
+                    _collection_set_size, _young_list_fixed_length);
       _should_revert_to_full_young_gcs  = true;
+    }
+
+    old_region_num = _collection_set_size - prev_collection_set_size;
+
+    ergo_verbose2(ErgoCSetConstruction | ErgoHigh,
+                  "add old regions to CSet",
+                  ergo_format_region("old")
+                  ergo_format_ms("predicted old region time"),
+                  old_region_num,
+                  predicted_pause_time_ms - prev_predicted_pause_time_ms);
   }
 
   stop_incremental_cset_building();
@@ -2903,6 +3082,16 @@
 
   end_recording_regions();
 
+  ergo_verbose5(ErgoCSetConstruction,
+                "finish choosing CSet",
+                ergo_format_region("eden")
+                ergo_format_region("survivors")
+                ergo_format_region("old")
+                ergo_format_ms("predicted pause time")
+                ergo_format_ms("target pause time"),
+                eden_region_num, survivor_region_num, old_region_num,
+                predicted_pause_time_ms, target_pause_time_ms);
+
   double non_young_end_time_sec = os::elapsedTime();
   _recorded_non_young_cset_choice_time_ms =
     (non_young_end_time_sec - non_young_start_time_sec) * 1000.0;
@@ -2914,12 +3103,6 @@
 }
 
 void G1CollectorPolicy_BestRegionsFirst::
-expand_if_possible(size_t numRegions) {
-  size_t expansion_bytes = numRegions * HeapRegion::GrainBytes;
-  _g1->expand(expansion_bytes);
-}
-
-void G1CollectorPolicy_BestRegionsFirst::
 record_collection_pause_end() {
   G1CollectorPolicy::record_collection_pause_end();
   assert(assertMarkedBytesDataOK(), "Marked regions not OK at pause end.");
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -183,9 +183,9 @@
   // if true, then it tries to dynamically adjust the length of the
   // young list
   bool _adaptive_young_list_length;
-  size_t _young_list_min_length;
   size_t _young_list_target_length;
   size_t _young_list_fixed_length;
+  size_t _prev_eden_capacity; // used for logging
 
   // The max number of regions we can extend the eden by while the GC
   // locker is active. This should be >= _young_list_target_length;
@@ -207,6 +207,9 @@
 
   double                _gc_overhead_perc;
 
+  double _reserve_factor;
+  size_t _reserve_regions;
+
   bool during_marking() {
     return _during_marking;
   }
@@ -243,6 +246,10 @@
 
   TruncatedSeq* _max_conc_overhead_seq;
 
+  bool   _using_new_ratio_calculations;
+  size_t _min_desired_young_length; // as set on the command line or default calculations
+  size_t _max_desired_young_length; // as set on the command line or default calculations
+
   size_t _recorded_young_regions;
   size_t _recorded_non_young_regions;
   size_t _recorded_region_num;
@@ -456,12 +463,6 @@
   size_t predict_bytes_to_copy(HeapRegion* hr);
   double predict_region_elapsed_time_ms(HeapRegion* hr, bool young);
 
-    // for use by: calculate_young_list_target_length(rs_length)
-  bool predict_will_fit(size_t young_region_num,
-                        double base_time_ms,
-                        size_t init_free_regions,
-                        double target_pause_time_ms);
-
   void start_recording_regions();
   void record_cset_region_info(HeapRegion* hr, bool young);
   void record_non_young_cset_region(HeapRegion* hr);
@@ -493,7 +494,6 @@
 
   // </NEW PREDICTION>
 
-public:
   void cset_regions_freed() {
     bool propagate = _last_young_gc_full && !_in_marking_window;
     _short_lived_surv_rate_group->all_surviving_words_recorded(propagate);
@@ -772,9 +772,41 @@
   double _mark_cleanup_start_sec;
   double _mark_closure_time_ms;
 
-  void   calculate_young_list_min_length();
-  void   calculate_young_list_target_length();
-  void   calculate_young_list_target_length(size_t rs_lengths);
+  // Update the young list target length either by setting it to the
+  // desired fixed value or by calculating it using G1's pause
+  // prediction model. If no rs_lengths parameter is passed, predict
+  // the RS lengths using the prediction model, otherwise use the
+  // given rs_lengths as the prediction.
+  void update_young_list_target_length(size_t rs_lengths = (size_t) -1);
+
+  // Calculate and return the minimum desired young list target
+  // length. This is the minimum desired young list length according
+  // to the user's inputs.
+  size_t calculate_young_list_desired_min_length(size_t base_min_length);
+
+  // Calculate and return the maximum desired young list target
+  // length. This is the maximum desired young list length according
+  // to the user's inputs.
+  size_t calculate_young_list_desired_max_length();
+
+  // Calculate and return the maximum young list target length that
+  // can fit into the pause time goal. The parameters are: rs_lengths
+  // represent the prediction of how large the young RSet lengths will
+  // be, base_min_length is the alreay existing number of regions in
+  // the young list, min_length and max_length are the desired min and
+  // max young list length according to the user's inputs.
+  size_t calculate_young_list_target_length(size_t rs_lengths,
+                                            size_t base_min_length,
+                                            size_t desired_min_length,
+                                            size_t desired_max_length);
+
+  // Check whether a given young length (young_length) fits into the
+  // given target pause time and whether the prediction for the amount
+  // of objects to be copied for the given length will fit into the
+  // given free space (expressed by base_free_regions).  It is used by
+  // calculate_young_list_target_length().
+  bool predict_will_fit(size_t young_length, double base_time_ms,
+                        size_t base_free_regions, double target_pause_time_ms);
 
 public:
 
@@ -786,7 +818,10 @@
     return CollectorPolicy::G1CollectorPolicyKind;
   }
 
-  void check_prediction_validity();
+  // Check the current value of the young list RSet lengths and
+  // compare it against the last prediction. If the current value is
+  // higher, recalculate the young list target length prediction.
+  void revise_young_list_target_length_if_necessary();
 
   size_t bytes_in_collection_set() {
     return _bytes_in_collection_set_before_gc;
@@ -796,6 +831,9 @@
     return _all_pause_times_ms->num() + 1;
   }
 
+  // This should be called after the heap is resized.
+  void record_new_heap_size(size_t new_number_of_regions);
+
 protected:
 
   // Count the number of bytes used in the CS.
@@ -807,6 +845,8 @@
                                                 size_t max_live_bytes);
   void record_concurrent_mark_cleanup_end_work2();
 
+  void update_young_list_size_using_newratio(size_t number_of_heap_regions);
+
 public:
 
   virtual void init();
@@ -1045,7 +1085,7 @@
   // new cycle, as long as we are not already in one. It's best if it
   // is called during a safepoint when the test whether a cycle is in
   // progress or not is stable.
-  bool force_initial_mark_if_outside_cycle();
+  bool force_initial_mark_if_outside_cycle(GCCause::Cause gc_cause);
 
   // This is called at the very beginning of an evacuation pause (it
   // has to be the first thing that the pause does). If
@@ -1204,10 +1244,10 @@
     _survivors_age_table.merge_par(age_table);
   }
 
-  void calculate_max_gc_locker_expansion();
+  void update_max_gc_locker_expansion();
 
   // Calculates survivor space parameters.
-  void calculate_survivors_policy();
+  void update_survivors_policy();
 
 };
 
@@ -1234,8 +1274,6 @@
 
 class G1CollectorPolicy_BestRegionsFirst: public G1CollectorPolicy {
   CollectionSetChooser* _collectionSetChooser;
-  // If the estimated is less then desirable, resize if possible.
-  void expand_if_possible(size_t numRegions);
 
   virtual void choose_collection_set(double target_pause_time_ms);
   virtual void record_collection_pause_start(double start_time_sec,
@@ -1269,8 +1307,4 @@
   return (sum_of_squares - 2.0 * avg * sum + n_d * avg * avg) / n_d;
 }
 
-// Local Variables: ***
-// c-indentation-style: gnu ***
-// End: ***
-
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1COLLECTORPOLICY_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1ErgoVerbose.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc_implementation/g1/g1ErgoVerbose.hpp"
+#include "utilities/ostream.hpp"
+
+ErgoLevel G1ErgoVerbose::_level;
+bool G1ErgoVerbose::_enabled[ErgoHeuristicNum];
+
+void G1ErgoVerbose::initialize() {
+  set_level(ErgoLow);
+  set_enabled(false);
+}
+
+void G1ErgoVerbose::set_level(ErgoLevel level) {
+  _level = level;
+}
+
+void G1ErgoVerbose::set_enabled(ErgoHeuristic n, bool enabled) {
+  assert(0 <= n && n < ErgoHeuristicNum, "pre-condition");
+  _enabled[n] = enabled;
+}
+
+void G1ErgoVerbose::set_enabled(bool enabled) {
+  for (int n = 0; n < ErgoHeuristicNum; n += 1) {
+    set_enabled((ErgoHeuristic) n, enabled);
+  }
+}
+
+const char* G1ErgoVerbose::to_string(int tag) {
+  ErgoHeuristic n = extract_heuristic(tag);
+  switch (n) {
+  case ErgoHeapSizing:            return "Heap Sizing";
+  case ErgoCSetConstruction:      return "CSet Construction";
+  case ErgoConcCycles:            return "Concurrent Cycles";
+  case ErgoPartiallyYoungGCs:     return "Partially-Young GCs";
+  default:
+    ShouldNotReachHere();
+    // Keep the Windows compiler happy
+    return NULL;
+  }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1ErgoVerbose.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1ERGOVERBOSE_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_G1ERGOVERBOSE_HPP
+
+#include "memory/allocation.hpp"
+#include "utilities/debug.hpp"
+
+// The log of G1's heuristic decisions comprises of a series of
+// records which have a similar format in order to maintain
+// consistency across records and ultimately easier parsing of the
+// output, if we ever choose to do that. Each record consists of:
+// * A time stamp to be able to easily correlate each record with
+// other events.
+// * A unique string to allow us to easily identify such records.
+// * The name of the heuristic the record corresponds to.
+// * An action string which describes the action that G1 did or is
+// about to do.
+// * An optional reason string which describes the reason for the
+// action.
+// * An optional number of name/value pairs which contributed to the
+// decision to take the action described in the record.
+//
+// Each record is associated with a "tag" which is the combination of
+// the heuristic the record corresponds to, as well as the min level
+// of verboseness at which the record should be printed. The tag is
+// checked against the current settings to determine whether the record
+// should be printed or not.
+
+// The available verboseness levels.
+typedef enum {
+  // Determine which part of the tag is occupied by the level.
+  ErgoLevelShift = 8,
+  ErgoLevelMask = ~((1 << ErgoLevelShift) - 1),
+
+  // ErgoLow is 0 so that we don't have to explicitly or a heuristic
+  // id with ErgoLow to keep its use simpler.
+  ErgoLow = 0,
+  ErgoHigh = 1 << ErgoLevelShift,
+} ErgoLevel;
+
+// The available heuristics.
+typedef enum {
+  // Determines which part of the tag is occupied by the heuristic id.
+  ErgoHeuristicMask = ~ErgoLevelMask,
+
+  ErgoHeapSizing = 0,
+  ErgoCSetConstruction,
+  ErgoConcCycles,
+  ErgoPartiallyYoungGCs,
+
+  ErgoHeuristicNum
+} ErgoHeuristic;
+
+class G1ErgoVerbose : AllStatic {
+private:
+  // Determines the minimum verboseness level at which records will be
+  // printed.
+  static ErgoLevel _level;
+  // Determines which heuristics are currently enabled.
+  static bool _enabled[ErgoHeuristicNum];
+
+  static ErgoLevel extract_level(int tag) {
+    return (ErgoLevel) (tag & ErgoLevelMask);
+  }
+
+  static ErgoHeuristic extract_heuristic(int tag) {
+    return (ErgoHeuristic) (tag & ErgoHeuristicMask);
+  }
+
+public:
+  // Needs to be explicitly called at GC initialization.
+  static void initialize();
+
+  static void set_level(ErgoLevel level);
+  static void set_enabled(ErgoHeuristic h, bool enabled);
+  // It is applied to all heuristics.
+  static void set_enabled(bool enabled);
+
+  static bool enabled(int tag) {
+    ErgoLevel level = extract_level(tag);
+    ErgoHeuristic n = extract_heuristic(tag);
+    return level <= _level && _enabled[n];
+  }
+
+  // Extract the heuristic id from the tag and return a string with
+  // its name.
+  static const char* to_string(int tag);
+};
+
+// The macros below generate the format string for values of different
+// types and/or metrics.
+
+// The reason for the action is optional and is handled specially: the
+// reason string is concatenated here so it's not necessary to pass it
+// as a parameter.
+#define ergo_format_reason(_reason_) ", reason: " _reason_
+
+// Single parameter format strings
+#define ergo_format_str(_name_)      ", " _name_ ": %s"
+#define ergo_format_region(_name_)   ", " _name_ ": "SIZE_FORMAT" regions"
+#define ergo_format_byte(_name_)     ", " _name_ ": "SIZE_FORMAT" bytes"
+#define ergo_format_double(_name_)   ", " _name_ ": %1.2f"
+#define ergo_format_perc(_name_)     ", " _name_ ": %1.2f %%"
+#define ergo_format_ms(_name_)       ", " _name_ ": %1.2f ms"
+
+// Double parameter format strings
+#define ergo_format_byte_perc(_name_)                                   \
+                             ", " _name_ ": "SIZE_FORMAT" bytes (%1.2f %%)"
+
+// Generates the format string
+#define ergo_format(_action_, _extra_format_)                   \
+  " %1.3f: [G1Ergonomics (%s) " _action_ _extra_format_ "]"
+
+// Conditionally, prints an ergonomic decision record. _extra_format_
+// is the format string for the optional items we'd like to print
+// (i.e., the decision's reason and any associated values). This
+// string should be built up using the ergo_*_format macros (see
+// above) to ensure consistency.
+//
+// Since we cannot rely on the compiler supporting variable argument
+// macros, this macro accepts a fixed number of arguments and passes
+// them to the print method. For convenience, we have wrapper macros
+// below which take a specific number of arguments and set the rest to
+// a default value.
+#define ergo_verbose_common(_tag_, _action_, _extra_format_,            \
+                            _arg0_, _arg1_, _arg2_, _arg3_, _arg4_, _arg5_) \
+  do {                                                                  \
+    if (G1ErgoVerbose::enabled((_tag_))) {                              \
+      gclog_or_tty->print_cr(ergo_format(_action_, _extra_format_),     \
+                             os::elapsedTime(),                         \
+                             G1ErgoVerbose::to_string((_tag_)),         \
+                             (_arg0_), (_arg1_), (_arg2_),              \
+                             (_arg3_), (_arg4_), (_arg5_));             \
+    }                                                                   \
+  } while (0)
+
+
+#define ergo_verbose(_tag_, _action_)                           \
+  ergo_verbose_common(_tag_, _action_, "", 0, 0, 0, 0, 0, 0)
+
+#define ergo_verbose0(_tag_, _action_, _extra_format_)                  \
+  ergo_verbose_common(_tag_, _action_, _extra_format_, 0, 0, 0, 0, 0, 0)
+
+#define ergo_verbose1(_tag_, _action_, _extra_format_,                  \
+                      _arg0_)                                           \
+  ergo_verbose_common(_tag_, _action_, _extra_format_,                  \
+                      _arg0_, 0, 0, 0, 0, 0)
+
+#define ergo_verbose2(_tag_, _action_, _extra_format_,                  \
+                      _arg0_, _arg1_)                                   \
+  ergo_verbose_common(_tag_, _action_, _extra_format_,                  \
+                      _arg0_, _arg1_, 0, 0, 0, 0)
+
+#define ergo_verbose3(_tag_, _action_, _extra_format_,                  \
+                      _arg0_, _arg1_, _arg2_)                           \
+  ergo_verbose_common(_tag_, _action_, _extra_format_,                  \
+                      _arg0_, _arg1_, _arg2_, 0, 0, 0)
+
+#define ergo_verbose4(_tag_, _action_, _extra_format_,                  \
+                      _arg0_, _arg1_, _arg2_, _arg3_)                   \
+  ergo_verbose_common(_tag_, _action_, _extra_format_,                  \
+                      _arg0_, _arg1_, _arg2_, _arg3_, 0, 0)
+
+#define ergo_verbose5(_tag_, _action_, _extra_format_,                  \
+                      _arg0_, _arg1_, _arg2_, _arg3_, _arg4_)           \
+  ergo_verbose_common(_tag_, _action_, _extra_format_,                  \
+                      _arg0_, _arg1_, _arg2_, _arg3_, _arg4_, 0)
+
+#define ergo_verbose6(_tag_, _action_, _extra_format_,                  \
+                      _arg0_, _arg1_, _arg2_, _arg3_, _arg4_, _arg5_)   \
+  ergo_verbose_common(_tag_, _action_, _extra_format_,                  \
+                      _arg0_, _arg1_, _arg2_, _arg3_, _arg4_, _arg5_)
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1ERGOVERBOSE_HPP
--- a/src/share/vm/gc_implementation/g1/g1MMUTracker.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/g1MMUTracker.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -97,10 +97,6 @@
     // or performance (we are GC'ing most of the time anyway!),
     // simply overwrite the oldest entry in the tracker.
 
-    if (G1PolicyVerbose > 1) {
-      warning("MMU Tracker Queue overflow. Replacing earliest entry.");
-    }
-
     _head_index = trim_index(_head_index + 1);
     assert(_head_index == _tail_index, "Because we have a full circular buffer");
     _tail_index = trim_index(_tail_index + 1);
--- a/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -50,6 +50,8 @@
   G1RemSet* _g1_rem;
   ConcurrentMark* _cm;
   G1ParScanThreadState* _par_scan_state;
+  bool _during_initial_mark;
+  bool _mark_in_progress;
 public:
   G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state);
   bool apply_to_weak_ref_discovered_field() { return true; }
@@ -102,8 +104,8 @@
 class G1ParCopyHelper : public G1ParClosureSuper {
   G1ParScanClosure *_scanner;
 protected:
-  template <class T> void mark_forwardee(T* p);
-  oop copy_to_survivor_space(oop obj);
+  template <class T> void mark_object(T* p);
+  oop copy_to_survivor_space(oop obj, bool should_mark_copy);
 public:
   G1ParCopyHelper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state,
                   G1ParScanClosure *scanner) :
@@ -111,7 +113,7 @@
 };
 
 template<bool do_gen_barrier, G1Barrier barrier,
-         bool do_mark_forwardee>
+         bool do_mark_object>
 class G1ParCopyClosure : public G1ParCopyHelper {
   G1ParScanClosure _scanner;
   template <class T> void do_oop_work(T* p);
@@ -120,8 +122,6 @@
     _scanner(g1, par_scan_state), G1ParCopyHelper(g1, par_scan_state, &_scanner) { }
   template <class T> void do_oop_nv(T* p) {
     do_oop_work(p);
-    if (do_mark_forwardee)
-      mark_forwardee(p);
   }
   virtual void do_oop(oop* p)       { do_oop_nv(p); }
   virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
--- a/src/share/vm/gc_implementation/g1/g1_globals.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -124,9 +124,6 @@
   develop(bool, G1RSBarrierNullFilter, true,                                \
           "If true, generate null-pointer filtering code in RS barrier")    \
                                                                             \
-  develop(bool, G1PrintCTFilterStats, false,                                \
-          "If true, print stats on RS filtering effectiveness")             \
-                                                                            \
   develop(bool, G1DeferredRSUpdate, true,                                   \
           "If true, use deferred RS updates")                               \
                                                                             \
@@ -137,9 +134,9 @@
   develop(bool, G1RSCountHisto, false,                                      \
           "If true, print a histogram of RS occupancies after each pause")  \
                                                                             \
-  product(bool, G1PrintRegionLivenessInfo, false,                           \
-          "Prints the liveness information for all regions in the heap "    \
-          "at the end of a marking cycle.")                                 \
+  diagnostic(bool, G1PrintRegionLivenessInfo, false,                        \
+            "Prints the liveness information for all regions in the heap "  \
+            "at the end of a marking cycle.")                               \
                                                                             \
   develop(bool, G1PrintParCleanupStats, false,                              \
           "When true, print extra stats about parallel cleanup.")           \
@@ -231,7 +228,7 @@
           "the number of regions for which we'll print a surv rate "        \
           "summary.")                                                       \
                                                                             \
-  product(intx, G1ReservePercent, 10,                                       \
+  product(uintx, G1ReservePercent, 10,                                      \
           "It determines the minimum reserve we should have in the heap "   \
           "to minimize the probability of promotion failure.")              \
                                                                             \
--- a/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -36,7 +36,7 @@
 };
 
 template<bool do_gen_barrier, G1Barrier barrier,
-         bool do_mark_forwardee>
+         bool do_mark_object>
 class G1ParCopyClosure;
 class G1ParScanClosure;
 class G1ParPushHeapRSClosure;
--- a/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -98,7 +98,7 @@
 
     // At this point we are supposed to start a concurrent cycle. We
     // will do so if one is not already in progress.
-    bool res = g1h->g1_policy()->force_initial_mark_if_outside_cycle();
+    bool res = g1h->g1_policy()->force_initial_mark_if_outside_cycle(_gc_cause);
 
     // The above routine returns true if we were able to force the
     // next GC pause to be an initial mark; it returns false if a
--- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -909,10 +909,6 @@
     }
     young_gen()->verify(allow_dirty);
   }
-  if (!silent) {
-    gclog_or_tty->print("ref_proc ");
-  }
-  ReferenceProcessor::verify();
 }
 
 void ParallelScavengeHeap::print_heap_change(size_t prev_used) {
--- a/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -80,10 +80,6 @@
       Universe::oops_do(&mark_and_push_closure);
       break;
 
-    case reference_processing:
-      ReferenceProcessor::oops_do(&mark_and_push_closure);
-      break;
-
     case jni_handles:
       JNIHandles::oops_do(&mark_and_push_closure);
       break;
--- a/src/share/vm/gc_implementation/parallelScavenge/pcTasks.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/pcTasks.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -98,8 +98,7 @@
     management            = 6,
     jvmti                 = 7,
     system_dictionary     = 8,
-    reference_processing  = 9,
-    code_cache            = 10
+    code_cache            = 9
   };
  private:
   RootType _root_type;
--- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -516,7 +516,6 @@
   {
     ParallelScavengeHeap::ParStrongRootsScope psrs;
     Universe::oops_do(mark_and_push_closure());
-    ReferenceProcessor::oops_do(mark_and_push_closure());
     JNIHandles::oops_do(mark_and_push_closure());   // Global (strong) JNI handles
     CodeBlobToOopClosure each_active_code_blob(mark_and_push_closure(), /*do_marking=*/ true);
     Threads::oops_do(mark_and_push_closure(), &each_active_code_blob);
@@ -623,7 +622,6 @@
 
   // General strong roots.
   Universe::oops_do(adjust_root_pointer_closure());
-  ReferenceProcessor::oops_do(adjust_root_pointer_closure());
   JNIHandles::oops_do(adjust_root_pointer_closure());   // Global (strong) JNI handles
   Threads::oops_do(adjust_root_pointer_closure(), NULL);
   ObjectSynchronizer::oops_do(adjust_root_pointer_closure());
--- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -2445,7 +2445,6 @@
 
   // General strong roots.
   Universe::oops_do(adjust_root_pointer_closure());
-  ReferenceProcessor::oops_do(adjust_root_pointer_closure());
   JNIHandles::oops_do(adjust_root_pointer_closure());   // Global (strong) JNI handles
   Threads::oops_do(adjust_root_pointer_closure(), NULL);
   ObjectSynchronizer::oops_do(adjust_root_pointer_closure());
--- a/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -55,7 +55,6 @@
   switch (_root_type) {
     case universe:
       Universe::oops_do(&roots_closure);
-      ReferenceProcessor::oops_do(&roots_closure);
       break;
 
     case jni_handles:
--- a/src/share/vm/gc_interface/collectedHeap.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/gc_interface/collectedHeap.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -157,8 +157,14 @@
     // ..and clear it.
     Copy::zero_to_words(obj, new_tlab_size);
   } else {
-    // ...and clear just the allocated object.
-    Copy::zero_to_words(obj, size);
+    // ...and zap just allocated object.
+#ifdef ASSERT
+    // Skip mangling the space corresponding to the object header to
+    // ensure that the returned space is not considered parsable by
+    // any concurrent GC thread.
+    size_t hdr_size = oopDesc::header_size();
+    Copy::fill_to_words(obj + hdr_size, new_tlab_size - hdr_size, badHeapWordVal);
+#endif // ASSERT
   }
   thread->tlab().fill(obj, obj + size, new_tlab_size);
   return obj;
--- a/src/share/vm/gc_interface/collectedHeap.inline.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/gc_interface/collectedHeap.inline.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -287,7 +287,10 @@
   assert(size >= 0, "int won't convert to size_t");
   HeapWord* obj = common_permanent_mem_allocate_init(size, CHECK_NULL);
   post_allocation_setup_no_klass_install(klass, obj, size);
-  NOT_PRODUCT(Universe::heap()->check_for_bad_heap_word_value(obj, size));
+#ifndef PRODUCT
+  const size_t hs = oopDesc::header_size();
+  Universe::heap()->check_for_bad_heap_word_value(obj+hs, size-hs);
+#endif
   return (oop)obj;
 }
 
--- a/src/share/vm/interpreter/bytecodes.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/interpreter/bytecodes.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -419,6 +419,8 @@
 
   static bool        is_zero_const  (Code code)    { return (code == _aconst_null || code == _iconst_0
                                                            || code == _fconst_0 || code == _dconst_0); }
+  static bool        is_invoke      (Code code)    { return (_invokevirtual <= code && code <= _invokedynamic); }
+
   static int         compute_flags  (const char* format, int more_flags = 0);  // compute the flags
   static int         flags          (int code, bool is_wide) {
     assert(code == (u_char)code, "must be a byte");
--- a/src/share/vm/interpreter/interpreterRuntime.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/interpreter/interpreterRuntime.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -555,7 +555,7 @@
     assert(method_handle->is_a(SystemDictionary::MethodHandle_klass()), "must be");
 
     {
-      // Walk all nmethods depending on CallSite
+      // Walk all nmethods depending on this call site.
       MutexLocker mu(Compile_lock, thread);
       Universe::flush_dependents_on(call_site, method_handle);
     }
@@ -1244,7 +1244,7 @@
   // preparing the same method will be sure to see non-null entry & mirror.
 IRT_END
 
-#if defined(IA32) || defined(AMD64)
+#if defined(IA32) || defined(AMD64) || defined(ARM)
 IRT_LEAF(void, InterpreterRuntime::popframe_move_outgoing_args(JavaThread* thread, void* src_address, void* dest_address))
   if (src_address == dest_address) {
     return;
--- a/src/share/vm/interpreter/interpreterRuntime.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/interpreter/interpreterRuntime.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -141,8 +141,8 @@
                                         methodOopDesc* method,
                                         intptr_t* from, intptr_t* to);
 
-#if defined(IA32) || defined(AMD64)
-  // Popframe support (only needed on x86 and AMD64)
+#if defined(IA32) || defined(AMD64) || defined(ARM)
+  // Popframe support (only needed on x86, AMD64 and ARM)
   static void popframe_move_outgoing_args(JavaThread* thread, void* src_address, void* dest_address);
 #endif
 
--- a/src/share/vm/memory/genCollectedHeap.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/memory/genCollectedHeap.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -1269,10 +1269,6 @@
     gclog_or_tty->print("remset ");
   }
   rem_set()->verify();
-  if (!silent) {
-     gclog_or_tty->print("ref_proc ");
-  }
-  ReferenceProcessor::verify();
 }
 
 void GenCollectedHeap::print() const { print_on(tty); }
--- a/src/share/vm/memory/referenceProcessor.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/memory/referenceProcessor.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -35,15 +35,15 @@
 
 ReferencePolicy* ReferenceProcessor::_always_clear_soft_ref_policy = NULL;
 ReferencePolicy* ReferenceProcessor::_default_soft_ref_policy      = NULL;
-oop              ReferenceProcessor::_sentinelRef = NULL;
 const int        subclasses_of_ref                = REF_PHANTOM - REF_OTHER;
+bool             ReferenceProcessor::_pending_list_uses_discovered_field = false;
 
 // List of discovered references.
 class DiscoveredList {
 public:
   DiscoveredList() : _len(0), _compressed_head(0), _oop_head(NULL) { }
   oop head() const     {
-     return UseCompressedOops ?  oopDesc::decode_heap_oop_not_null(_compressed_head) :
+     return UseCompressedOops ?  oopDesc::decode_heap_oop(_compressed_head) :
                                 _oop_head;
   }
   HeapWord* adr_head() {
@@ -53,12 +53,12 @@
   void   set_head(oop o) {
     if (UseCompressedOops) {
       // Must compress the head ptr.
-      _compressed_head = oopDesc::encode_heap_oop_not_null(o);
+      _compressed_head = oopDesc::encode_heap_oop(o);
     } else {
       _oop_head = o;
     }
   }
-  bool   empty() const          { return head() == ReferenceProcessor::sentinel_ref(); }
+  bool   empty() const          { return head() == NULL; }
   size_t length()               { return _len; }
   void   set_length(size_t len) { _len = len;  }
   void   inc_length(size_t inc) { _len += inc; assert(_len > 0, "Error"); }
@@ -76,21 +76,9 @@
 }
 
 void ReferenceProcessor::init_statics() {
-  assert(_sentinelRef == NULL, "should be initialized precisely once");
-  EXCEPTION_MARK;
-  _sentinelRef = instanceKlass::cast(
-                    SystemDictionary::Reference_klass())->
-                      allocate_permanent_instance(THREAD);
-
   // Initialize the master soft ref clock.
   java_lang_ref_SoftReference::set_clock(os::javaTimeMillis());
 
-  if (HAS_PENDING_EXCEPTION) {
-      Handle ex(THREAD, PENDING_EXCEPTION);
-      vm_exit_during_initialization(ex);
-  }
-  assert(_sentinelRef != NULL && _sentinelRef->is_oop(),
-         "Just constructed it!");
   _always_clear_soft_ref_policy = new AlwaysClearPolicy();
   _default_soft_ref_policy      = new COMPILER2_PRESENT(LRUMaxHeapPolicy())
                                       NOT_COMPILER2(LRUCurrentHeapPolicy());
@@ -100,6 +88,7 @@
   guarantee(RefDiscoveryPolicy == ReferenceBasedDiscovery ||
             RefDiscoveryPolicy == ReferentBasedDiscovery,
             "Unrecongnized RefDiscoveryPolicy");
+  _pending_list_uses_discovered_field = JDK_Version::current().pending_list_uses_discovered_field();
 }
 
 ReferenceProcessor::ReferenceProcessor(MemRegion span,
@@ -130,13 +119,12 @@
   _discoveredWeakRefs    = &_discoveredSoftRefs[_max_num_q];
   _discoveredFinalRefs   = &_discoveredWeakRefs[_max_num_q];
   _discoveredPhantomRefs = &_discoveredFinalRefs[_max_num_q];
-  assert(sentinel_ref() != NULL, "_sentinelRef is NULL");
-  // Initialized all entries to _sentinelRef
+  // Initialized all entries to NULL
   for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) {
-        _discoveredSoftRefs[i].set_head(sentinel_ref());
+    _discoveredSoftRefs[i].set_head(NULL);
     _discoveredSoftRefs[i].set_length(0);
   }
-  // If we do barreirs, cache a copy of the barrier set.
+  // If we do barriers, cache a copy of the barrier set.
   if (discovered_list_needs_barrier) {
     _bs = Universe::heap()->barrier_set();
   }
@@ -167,10 +155,6 @@
   }
 }
 
-void ReferenceProcessor::oops_do(OopClosure* f) {
-  f->do_oop(adr_sentinel_ref());
-}
-
 void ReferenceProcessor::update_soft_ref_master_clock() {
   // Update (advance) the soft ref master clock field. This must be done
   // after processing the soft ref list.
@@ -283,8 +267,6 @@
   }
 #endif
   JNIHandles::weak_oops_do(is_alive, keep_alive);
-  // Finally remember to keep sentinel around
-  keep_alive->do_oop(adr_sentinel_ref());
   complete_gc->do_void();
 }
 
@@ -327,46 +309,77 @@
 void ReferenceProcessor::enqueue_discovered_reflist(DiscoveredList& refs_list,
                                                     HeapWord* pending_list_addr) {
   // Given a list of refs linked through the "discovered" field
-  // (java.lang.ref.Reference.discovered) chain them through the
-  // "next" field (java.lang.ref.Reference.next) and prepend
-  // to the pending list.
+  // (java.lang.ref.Reference.discovered), self-loop their "next" field
+  // thus distinguishing them from active References, then
+  // prepend them to the pending list.
+  // BKWRD COMPATIBILITY NOTE: For older JDKs (prior to the fix for 4956777),
+  // the "next" field is used to chain the pending list, not the discovered
+  // field.
+
   if (TraceReferenceGC && PrintGCDetails) {
     gclog_or_tty->print_cr("ReferenceProcessor::enqueue_discovered_reflist list "
                            INTPTR_FORMAT, (address)refs_list.head());
   }
-  oop obj = refs_list.head();
-  // Walk down the list, copying the discovered field into
-  // the next field and clearing it (except for the last
-  // non-sentinel object which is treated specially to avoid
-  // confusion with an active reference).
-  while (obj != sentinel_ref()) {
-    assert(obj->is_instanceRef(), "should be reference object");
-    oop next = java_lang_ref_Reference::discovered(obj);
-    if (TraceReferenceGC && PrintGCDetails) {
-      gclog_or_tty->print_cr("        obj " INTPTR_FORMAT "/next " INTPTR_FORMAT,
-                             obj, next);
+
+  oop obj = NULL;
+  oop next_d = refs_list.head();
+  if (pending_list_uses_discovered_field()) { // New behaviour
+    // Walk down the list, self-looping the next field
+    // so that the References are not considered active.
+    while (obj != next_d) {
+      obj = next_d;
+      assert(obj->is_instanceRef(), "should be reference object");
+      next_d = java_lang_ref_Reference::discovered(obj);
+      if (TraceReferenceGC && PrintGCDetails) {
+        gclog_or_tty->print_cr("        obj " INTPTR_FORMAT "/next_d " INTPTR_FORMAT,
+                               obj, next_d);
+      }
+      assert(java_lang_ref_Reference::next(obj) == NULL,
+             "Reference not active; should not be discovered");
+      // Self-loop next, so as to make Ref not active.
+      java_lang_ref_Reference::set_next(obj, obj);
+      if (next_d == obj) {  // obj is last
+        // Swap refs_list into pendling_list_addr and
+        // set obj's discovered to what we read from pending_list_addr.
+        oop old = oopDesc::atomic_exchange_oop(refs_list.head(), pending_list_addr);
+        // Need oop_check on pending_list_addr above;
+        // see special oop-check code at the end of
+        // enqueue_discovered_reflists() further below.
+        java_lang_ref_Reference::set_discovered(obj, old); // old may be NULL
+      }
     }
-    assert(java_lang_ref_Reference::next(obj) == NULL,
-           "The reference should not be enqueued");
-    if (next == sentinel_ref()) {  // obj is last
-      // Swap refs_list into pendling_list_addr and
-      // set obj's next to what we read from pending_list_addr.
-      oop old = oopDesc::atomic_exchange_oop(refs_list.head(), pending_list_addr);
-      // Need oop_check on pending_list_addr above;
-      // see special oop-check code at the end of
-      // enqueue_discovered_reflists() further below.
-      if (old == NULL) {
-        // obj should be made to point to itself, since
-        // pending list was empty.
-        java_lang_ref_Reference::set_next(obj, obj);
+  } else { // Old behaviour
+    // Walk down the list, copying the discovered field into
+    // the next field and clearing the discovered field.
+    while (obj != next_d) {
+      obj = next_d;
+      assert(obj->is_instanceRef(), "should be reference object");
+      next_d = java_lang_ref_Reference::discovered(obj);
+      if (TraceReferenceGC && PrintGCDetails) {
+        gclog_or_tty->print_cr("        obj " INTPTR_FORMAT "/next_d " INTPTR_FORMAT,
+                               obj, next_d);
+      }
+      assert(java_lang_ref_Reference::next(obj) == NULL,
+             "The reference should not be enqueued");
+      if (next_d == obj) {  // obj is last
+        // Swap refs_list into pendling_list_addr and
+        // set obj's next to what we read from pending_list_addr.
+        oop old = oopDesc::atomic_exchange_oop(refs_list.head(), pending_list_addr);
+        // Need oop_check on pending_list_addr above;
+        // see special oop-check code at the end of
+        // enqueue_discovered_reflists() further below.
+        if (old == NULL) {
+          // obj should be made to point to itself, since
+          // pending list was empty.
+          java_lang_ref_Reference::set_next(obj, obj);
+        } else {
+          java_lang_ref_Reference::set_next(obj, old);
+        }
       } else {
-        java_lang_ref_Reference::set_next(obj, old);
+        java_lang_ref_Reference::set_next(obj, next_d);
       }
-    } else {
-      java_lang_ref_Reference::set_next(obj, next);
+      java_lang_ref_Reference::set_discovered(obj, (oop) NULL);
     }
-    java_lang_ref_Reference::set_discovered(obj, (oop) NULL);
-    obj = next;
   }
 }
 
@@ -376,10 +389,9 @@
   RefProcEnqueueTask(ReferenceProcessor& ref_processor,
                      DiscoveredList      discovered_refs[],
                      HeapWord*           pending_list_addr,
-                     oop                 sentinel_ref,
                      int                 n_queues)
     : EnqueueTask(ref_processor, discovered_refs,
-                  pending_list_addr, sentinel_ref, n_queues)
+                  pending_list_addr, n_queues)
   { }
 
   virtual void work(unsigned int work_id) {
@@ -396,7 +408,7 @@
          j++, index += _n_queues) {
       _ref_processor.enqueue_discovered_reflist(
         _refs_lists[index], _pending_list_addr);
-      _refs_lists[index].set_head(_sentinel_ref);
+      _refs_lists[index].set_head(NULL);
       _refs_lists[index].set_length(0);
     }
   }
@@ -408,13 +420,13 @@
   if (_processing_is_mt && task_executor != NULL) {
     // Parallel code
     RefProcEnqueueTask tsk(*this, _discoveredSoftRefs,
-                           pending_list_addr, sentinel_ref(), _max_num_q);
+                           pending_list_addr, _max_num_q);
     task_executor->execute(tsk);
   } else {
     // Serial code: call the parent class's implementation
     for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) {
       enqueue_discovered_reflist(_discoveredSoftRefs[i], pending_list_addr);
-      _discoveredSoftRefs[i].set_head(sentinel_ref());
+      _discoveredSoftRefs[i].set_head(NULL);
       _discoveredSoftRefs[i].set_length(0);
     }
   }
@@ -428,7 +440,7 @@
                                 BoolObjectClosure* is_alive);
 
   // End Of List.
-  inline bool has_next() const { return _next != ReferenceProcessor::sentinel_ref(); }
+  inline bool has_next() const { return _ref != NULL; }
 
   // Get oop to the Reference object.
   inline oop obj() const { return _ref; }
@@ -468,9 +480,13 @@
   inline void update_discovered() {
     // First _prev_next ref actually points into DiscoveredList (gross).
     if (UseCompressedOops) {
-      _keep_alive->do_oop((narrowOop*)_prev_next);
+      if (!oopDesc::is_null(*(narrowOop*)_prev_next)) {
+        _keep_alive->do_oop((narrowOop*)_prev_next);
+      }
     } else {
-      _keep_alive->do_oop((oop*)_prev_next);
+      if (!oopDesc::is_null(*(oop*)_prev_next)) {
+        _keep_alive->do_oop((oop*)_prev_next);
+      }
     }
   }
 
@@ -488,6 +504,7 @@
 private:
   DiscoveredList&    _refs_list;
   HeapWord*          _prev_next;
+  oop                _prev;
   oop                _ref;
   HeapWord*          _discovered_addr;
   oop                _next;
@@ -509,6 +526,7 @@
                                                       BoolObjectClosure* is_alive)
   : _refs_list(refs_list),
     _prev_next(refs_list.adr_head()),
+    _prev(NULL),
     _ref(refs_list.head()),
 #ifdef ASSERT
     _first_seen(refs_list.head()),
@@ -517,7 +535,7 @@
     _processed(0),
     _removed(0),
 #endif
-    _next(refs_list.head()),
+    _next(NULL),
     _keep_alive(keep_alive),
     _is_alive(is_alive)
 { }
@@ -544,26 +562,43 @@
 
 inline void DiscoveredListIterator::next() {
   _prev_next = _discovered_addr;
+  _prev = _ref;
   move_to_next();
 }
 
 inline void DiscoveredListIterator::remove() {
   assert(_ref->is_oop(), "Dropping a bad reference");
   oop_store_raw(_discovered_addr, NULL);
+
   // First _prev_next ref actually points into DiscoveredList (gross).
+  oop new_next;
+  if (_next == _ref) {
+    // At the end of the list, we should make _prev point to itself.
+    // If _ref is the first ref, then _prev_next will be in the DiscoveredList,
+    // and _prev will be NULL.
+    new_next = _prev;
+  } else {
+    new_next = _next;
+  }
+
   if (UseCompressedOops) {
     // Remove Reference object from list.
-    oopDesc::encode_store_heap_oop_not_null((narrowOop*)_prev_next, _next);
+    oopDesc::encode_store_heap_oop((narrowOop*)_prev_next, new_next);
   } else {
     // Remove Reference object from list.
-    oopDesc::store_heap_oop((oop*)_prev_next, _next);
+    oopDesc::store_heap_oop((oop*)_prev_next, new_next);
   }
   NOT_PRODUCT(_removed++);
   _refs_list.dec_length(1);
 }
 
 inline void DiscoveredListIterator::move_to_next() {
-  _ref = _next;
+  if (_ref == _next) {
+    // End of the list.
+    _ref = NULL;
+  } else {
+    _ref = _next;
+  }
   assert(_ref != _first_seen, "cyclic ref_list found");
   NOT_PRODUCT(_processed++);
 }
@@ -613,7 +648,7 @@
   NOT_PRODUCT(
     if (PrintGCDetails && TraceReferenceGC) {
       gclog_or_tty->print_cr(" Dropped %d dead Refs out of %d "
-        "discovered Refs by policy  list " INTPTR_FORMAT,
+        "discovered Refs by policy, from list " INTPTR_FORMAT,
         iter.removed(), iter.processed(), (address)refs_list.head());
     }
   )
@@ -725,22 +760,28 @@
     assert(iter.obj()->is_oop(UseConcMarkSweepGC), "Adding a bad reference");
     iter.next();
   }
-  // Remember to keep sentinel pointer around
+  // Remember to update the next pointer of the last ref.
   iter.update_discovered();
   // Close the reachable set
   complete_gc->do_void();
 }
 
 void
+ReferenceProcessor::clear_discovered_references(DiscoveredList& refs_list) {
+  oop obj = NULL;
+  oop next = refs_list.head();
+  while (next != obj) {
+    obj = next;
+    next = java_lang_ref_Reference::discovered(obj);
+    java_lang_ref_Reference::set_discovered_raw(obj, NULL);
+  }
+  refs_list.set_head(NULL);
+  refs_list.set_length(0);
+}
+
+void
 ReferenceProcessor::abandon_partial_discovered_list(DiscoveredList& refs_list) {
-  oop obj = refs_list.head();
-  while (obj != sentinel_ref()) {
-    oop discovered = java_lang_ref_Reference::discovered(obj);
-    java_lang_ref_Reference::set_discovered_raw(obj, NULL);
-    obj = discovered;
-  }
-  refs_list.set_head(sentinel_ref());
-  refs_list.set_length(0);
+  clear_discovered_references(refs_list);
 }
 
 void ReferenceProcessor::abandon_partial_discovery() {
@@ -859,6 +900,9 @@
           refs_to_move = MIN2(ref_lists[from_idx].length() - avg_refs,
                               avg_refs - ref_lists[to_idx].length());
         }
+
+        assert(refs_to_move > 0, "otherwise the code below will fail");
+
         oop move_head = ref_lists[from_idx].head();
         oop move_tail = move_head;
         oop new_head  = move_head;
@@ -867,10 +911,24 @@
           move_tail = new_head;
           new_head = java_lang_ref_Reference::discovered(new_head);
         }
-        java_lang_ref_Reference::set_discovered(move_tail, ref_lists[to_idx].head());
+
+        // Add the chain to the to list.
+        if (ref_lists[to_idx].head() == NULL) {
+          // to list is empty. Make a loop at the end.
+          java_lang_ref_Reference::set_discovered(move_tail, move_tail);
+        } else {
+          java_lang_ref_Reference::set_discovered(move_tail, ref_lists[to_idx].head());
+        }
         ref_lists[to_idx].set_head(move_head);
         ref_lists[to_idx].inc_length(refs_to_move);
-        ref_lists[from_idx].set_head(new_head);
+
+        // Remove the chain from the from list.
+        if (move_tail == new_head) {
+          // We found the end of the from list.
+          ref_lists[from_idx].set_head(NULL);
+        } else {
+          ref_lists[from_idx].set_head(new_head);
+        }
         ref_lists[from_idx].dec_length(refs_to_move);
         if (ref_lists[from_idx].length() == 0) {
           break;
@@ -1082,42 +1140,40 @@
   // First we must make sure this object is only enqueued once. CAS in a non null
   // discovered_addr.
   oop current_head = refs_list.head();
+  // The last ref must have its discovered field pointing to itself.
+  oop next_discovered = (current_head != NULL) ? current_head : obj;
 
   // Note: In the case of G1, this specific pre-barrier is strictly
   // not necessary because the only case we are interested in
   // here is when *discovered_addr is NULL (see the CAS further below),
   // so this will expand to nothing. As a result, we have manually
   // elided this out for G1, but left in the test for some future
-  // collector that might have need for a pre-barrier here.
-  if (_discovered_list_needs_barrier && !UseG1GC) {
-    if (UseCompressedOops) {
-      _bs->write_ref_field_pre((narrowOop*)discovered_addr, current_head);
-    } else {
-      _bs->write_ref_field_pre((oop*)discovered_addr, current_head);
-    }
-    guarantee(false, "Need to check non-G1 collector");
-  }
-  oop retest = oopDesc::atomic_compare_exchange_oop(current_head, discovered_addr,
+  // collector that might have need for a pre-barrier here, e.g.:-
+  // _bs->write_ref_field_pre((oop* or narrowOop*)discovered_addr, next_discovered);
+  assert(!_discovered_list_needs_barrier || UseG1GC,
+         "Need to check non-G1 collector: "
+         "may need a pre-write-barrier for CAS from NULL below");
+  oop retest = oopDesc::atomic_compare_exchange_oop(next_discovered, discovered_addr,
                                                     NULL);
   if (retest == NULL) {
     // This thread just won the right to enqueue the object.
-    // We have separate lists for enqueueing so no synchronization
+    // We have separate lists for enqueueing, so no synchronization
     // is necessary.
     refs_list.set_head(obj);
     refs_list.inc_length(1);
     if (_discovered_list_needs_barrier) {
-      _bs->write_ref_field((void*)discovered_addr, current_head);
+      _bs->write_ref_field((void*)discovered_addr, next_discovered);
     }
 
     if (TraceReferenceGC) {
-      gclog_or_tty->print_cr("Enqueued reference (mt) (" INTPTR_FORMAT ": %s)",
+      gclog_or_tty->print_cr("Discovered reference (mt) (" INTPTR_FORMAT ": %s)",
                              obj, obj->blueprint()->internal_name());
     }
   } else {
     // If retest was non NULL, another thread beat us to it:
     // The reference has already been discovered...
     if (TraceReferenceGC) {
-      gclog_or_tty->print_cr("Already enqueued reference (" INTPTR_FORMAT ": %s)",
+      gclog_or_tty->print_cr("Already discovered reference (" INTPTR_FORMAT ": %s)",
                              obj, obj->blueprint()->internal_name());
     }
   }
@@ -1142,7 +1198,7 @@
 //     (or part of the heap being collected, indicated by our "span"
 //     we don't treat it specially (i.e. we scan it as we would
 //     a normal oop, treating its references as strong references).
-//     This means that references can't be enqueued unless their
+//     This means that references can't be discovered unless their
 //     referent is also in the same span. This is the simplest,
 //     most "local" and most conservative approach, albeit one
 //     that may cause weak references to be enqueued least promptly.
@@ -1164,14 +1220,13 @@
 //     and complexity in processing these references.
 //     We call this choice the "RefeferentBasedDiscovery" policy.
 bool ReferenceProcessor::discover_reference(oop obj, ReferenceType rt) {
-  // We enqueue references only if we are discovering refs
-  // (rather than processing discovered refs).
+  // Make sure we are discovering refs (rather than processing discovered refs).
   if (!_discovering_refs || !RegisterReferences) {
     return false;
   }
-  // We only enqueue active references.
+  // We only discover active references.
   oop next = java_lang_ref_Reference::next(obj);
-  if (next != NULL) {
+  if (next != NULL) {   // Ref is no longer active
     return false;
   }
 
@@ -1184,8 +1239,8 @@
     return false;
   }
 
-  // We only enqueue references whose referents are not (yet) strongly
-  // reachable.
+  // We only discover references whose referents are not (yet)
+  // known to be strongly reachable.
   if (is_alive_non_header() != NULL) {
     verify_referent(obj);
     if (is_alive_non_header()->do_object_b(java_lang_ref_Reference::referent(obj))) {
@@ -1211,7 +1266,7 @@
   if (discovered != NULL) {
     // The reference has already been discovered...
     if (TraceReferenceGC) {
-      gclog_or_tty->print_cr("Already enqueued reference (" INTPTR_FORMAT ": %s)",
+      gclog_or_tty->print_cr("Already discovered reference (" INTPTR_FORMAT ": %s)",
                              obj, obj->blueprint()->internal_name());
     }
     if (RefDiscoveryPolicy == ReferentBasedDiscovery) {
@@ -1233,9 +1288,9 @@
 
   if (RefDiscoveryPolicy == ReferentBasedDiscovery) {
     verify_referent(obj);
-    // enqueue if and only if either:
-    // reference is in our span or
-    // we are an atomic collector and referent is in our span
+    // Discover if and only if EITHER:
+    // .. reference is in our span, OR
+    // .. we are an atomic collector and referent is in our span
     if (_span.contains(obj_addr) ||
         (discovery_is_atomic() &&
          _span.contains(java_lang_ref_Reference::referent(obj)))) {
@@ -1262,30 +1317,28 @@
     // here: the field will be visited later when processing the discovered
     // references.
     oop current_head = list->head();
+    // The last ref must have its discovered field pointing to itself.
+    oop next_discovered = (current_head != NULL) ? current_head : obj;
+
     // As in the case further above, since we are over-writing a NULL
     // pre-value, we can safely elide the pre-barrier here for the case of G1.
+    // e.g.:- _bs->write_ref_field_pre((oop* or narrowOop*)discovered_addr, next_discovered);
     assert(discovered == NULL, "control point invariant");
-    if (_discovered_list_needs_barrier && !UseG1GC) { // safe to elide for G1
-      if (UseCompressedOops) {
-        _bs->write_ref_field_pre((narrowOop*)discovered_addr, current_head);
-      } else {
-        _bs->write_ref_field_pre((oop*)discovered_addr, current_head);
-      }
-      guarantee(false, "Need to check non-G1 collector");
-    }
-    oop_store_raw(discovered_addr, current_head);
+    assert(!_discovered_list_needs_barrier || UseG1GC,
+           "For non-G1 collector, may need a pre-write-barrier for CAS from NULL below");
+    oop_store_raw(discovered_addr, next_discovered);
     if (_discovered_list_needs_barrier) {
-      _bs->write_ref_field((void*)discovered_addr, current_head);
+      _bs->write_ref_field((void*)discovered_addr, next_discovered);
     }
     list->set_head(obj);
     list->inc_length(1);
 
     if (TraceReferenceGC) {
-      gclog_or_tty->print_cr("Enqueued reference (" INTPTR_FORMAT ": %s)",
+      gclog_or_tty->print_cr("Discovered reference (" INTPTR_FORMAT ": %s)",
                                 obj, obj->blueprint()->internal_name());
     }
   }
-  assert(obj->is_oop(), "Enqueued a bad reference");
+  assert(obj->is_oop(), "Discovered a bad reference");
   verify_referent(obj);
   return true;
 }
@@ -1437,22 +1490,12 @@
 }
 #endif
 
-void ReferenceProcessor::verify() {
-  guarantee(sentinel_ref() != NULL && sentinel_ref()->is_oop(), "Lost _sentinelRef");
-}
-
 #ifndef PRODUCT
 void ReferenceProcessor::clear_discovered_references() {
   guarantee(!_discovering_refs, "Discovering refs?");
   for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) {
-    oop obj = _discoveredSoftRefs[i].head();
-    while (obj != sentinel_ref()) {
-      oop next = java_lang_ref_Reference::discovered(obj);
-      java_lang_ref_Reference::set_discovered(obj, (oop) NULL);
-      obj = next;
-    }
-    _discoveredSoftRefs[i].set_head(sentinel_ref());
-    _discoveredSoftRefs[i].set_length(0);
+    clear_discovered_references(_discoveredSoftRefs[i]);
   }
 }
+
 #endif // PRODUCT
--- a/src/share/vm/memory/referenceProcessor.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/memory/referenceProcessor.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -52,8 +52,8 @@
 
 class ReferenceProcessor : public CHeapObj {
  protected:
-  // End of list marker
-  static oop  _sentinelRef;
+  // Compatibility with pre-4965777 JDK's
+  static bool _pending_list_uses_discovered_field;
   MemRegion   _span; // (right-open) interval of heap
                      // subject to wkref discovery
   bool        _discovering_refs;      // true when discovery enabled
@@ -106,8 +106,6 @@
   int max_num_q()                        { return _max_num_q; }
   void set_active_mt_degree(int v)       { _num_q = v; }
   DiscoveredList* discovered_soft_refs() { return _discoveredSoftRefs; }
-  static oop  sentinel_ref()             { return _sentinelRef; }
-  static oop* adr_sentinel_ref()         { return &_sentinelRef; }
   ReferencePolicy* setup_policy(bool always_clear) {
     _current_soft_ref_policy = always_clear ?
       _always_clear_soft_ref_policy : _default_soft_ref_policy;
@@ -115,7 +113,6 @@
     return _current_soft_ref_policy;
   }
 
- public:
   // Process references with a certain reachability level.
   void process_discovered_reflist(DiscoveredList               refs_lists[],
                                   ReferencePolicy*             policy,
@@ -230,6 +227,7 @@
                                         HeapWord* discovered_addr);
   void verify_ok_to_handle_reflists() PRODUCT_RETURN;
 
+  void clear_discovered_references(DiscoveredList& refs_list);
   void abandon_partial_discovered_list(DiscoveredList& refs_list);
 
   // Calculate the number of jni handles.
@@ -300,6 +298,13 @@
   bool discovery_is_atomic() const { return _discovery_is_atomic; }
   void set_atomic_discovery(bool atomic) { _discovery_is_atomic = atomic; }
 
+  // whether the JDK in which we are embedded is a pre-4965777 JDK,
+  // and thus whether or not it uses the discovered field to chain
+  // the entries in the pending list.
+  static bool pending_list_uses_discovered_field() {
+    return _pending_list_uses_discovered_field;
+  }
+
   // whether discovery is done by multiple threads same-old-timeously
   bool discovery_is_mt() const { return _discovery_is_mt; }
   void set_mt_discovery(bool mt) { _discovery_is_mt = mt; }
@@ -314,7 +319,6 @@
 
   // iterate over oops
   void weak_oops_do(OopClosure* f);       // weak roots
-  static void oops_do(OopClosure* f);     // strong root(s)
 
   // Balance each of the discovered lists.
   void balance_all_queues();
@@ -340,7 +344,6 @@
   // debugging
   void verify_no_references_recorded() PRODUCT_RETURN;
   void verify_referent(oop obj)        PRODUCT_RETURN;
-  static void verify();
 
   // clear the discovered lists (unlinking each entry).
   void clear_discovered_references() PRODUCT_RETURN;
@@ -524,12 +527,10 @@
   EnqueueTask(ReferenceProcessor& ref_processor,
               DiscoveredList      refs_lists[],
               HeapWord*           pending_list_addr,
-              oop                 sentinel_ref,
               int                 n_queues)
     : _ref_processor(ref_processor),
       _refs_lists(refs_lists),
       _pending_list_addr(pending_list_addr),
-      _sentinel_ref(sentinel_ref),
       _n_queues(n_queues)
   { }
 
@@ -540,7 +541,6 @@
   ReferenceProcessor& _ref_processor;
   DiscoveredList*     _refs_lists;
   HeapWord*           _pending_list_addr;
-  oop                 _sentinel_ref;
   int                 _n_queues;
 };
 
--- a/src/share/vm/memory/sharedHeap.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/memory/sharedHeap.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -146,7 +146,6 @@
   assert(_strong_roots_parity != 0, "must have called prologue code");
   if (!_process_strong_tasks->is_task_claimed(SH_PS_Universe_oops_do)) {
     Universe::oops_do(roots);
-    ReferenceProcessor::oops_do(roots);
     // Consider perm-gen discovered lists to be strong.
     perm_gen()->ref_processor()->weak_oops_do(roots);
   }
--- a/src/share/vm/memory/universe.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/memory/universe.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -1203,12 +1203,12 @@
   // Compute the dependent nmethods that have a reference to a
   // CallSite object.  We use instanceKlass::mark_dependent_nmethod
   // directly instead of CodeCache::mark_for_deoptimization because we
-  // want dependents on the class CallSite only not all classes in the
-  // ContextStream.
+  // want dependents on the call site class only not all classes in
+  // the ContextStream.
   int marked = 0;
   {
     MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
-    instanceKlass* call_site_klass = instanceKlass::cast(SystemDictionary::CallSite_klass());
+    instanceKlass* call_site_klass = instanceKlass::cast(call_site->klass());
     marked = call_site_klass->mark_dependent_nmethods(changes);
   }
   if (marked > 0) {
--- a/src/share/vm/oops/constMethodKlass.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/oops/constMethodKlass.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -172,11 +172,6 @@
 int constMethodKlass::oop_update_pointers(ParCompactionManager* cm, oop obj) {
   assert(obj->is_constMethod(), "should be constMethod");
   constMethodOop cm_oop = constMethodOop(obj);
-#if 0
-  PSParallelCompact::adjust_pointer(cm_oop->adr_method());
-  PSParallelCompact::adjust_pointer(cm_oop->adr_exception_table());
-  PSParallelCompact::adjust_pointer(cm_oop->adr_stackmap_data());
-#endif
   oop* const beg_oop = cm_oop->oop_block_beg();
   oop* const end_oop = cm_oop->oop_block_end();
   for (oop* cur_oop = beg_oop; cur_oop < end_oop; ++cur_oop) {
--- a/src/share/vm/oops/cpCacheKlass.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/oops/cpCacheKlass.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -63,8 +63,10 @@
   //   CollectedHeap::permanent_obj_allocate(klass, size, CHECK_NULL);
 
   oop obj = CollectedHeap::permanent_obj_allocate_no_klass_install(klass, size, CHECK_NULL);
-  NOT_PRODUCT(Universe::heap()->check_for_bad_heap_word_value((HeapWord*) obj,
-                                                              size));
+#ifndef PRODUCT
+  const size_t hs = oopDesc::header_size();
+  Universe::heap()->check_for_bad_heap_word_value(((HeapWord*) obj)+hs, size-hs);
+#endif
   constantPoolCacheOop cache = (constantPoolCacheOop) obj;
   assert(!UseConcMarkSweepGC || obj->klass_or_null() == NULL,
          "klass should be NULL here when using CMS");
--- a/src/share/vm/oops/instanceRefKlass.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/oops/instanceRefKlass.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -56,9 +56,8 @@
   if (!oopDesc::is_null(heap_oop)) {
     oop referent = oopDesc::decode_heap_oop_not_null(heap_oop);
     if (!referent->is_gc_marked() &&
-        MarkSweep::ref_processor()->
-          discover_reference(obj, ref->reference_type())) {
-      // reference already enqueued, referent will be traversed later
+        MarkSweep::ref_processor()->discover_reference(obj, ref->reference_type())) {
+      // reference was discovered, referent will be traversed later
       ref->instanceKlass::oop_follow_contents(obj);
       debug_only(
         if(TraceReferenceGC && PrintGCDetails) {
@@ -76,8 +75,34 @@
       MarkSweep::mark_and_push(referent_addr);
     }
   }
-  // treat next as normal oop.  next is a link in the pending list.
   T* next_addr = (T*)java_lang_ref_Reference::next_addr(obj);
+  if (ReferenceProcessor::pending_list_uses_discovered_field()) {
+    // Treat discovered as normal oop, if ref is not "active",
+    // i.e. if next is non-NULL.
+    T  next_oop = oopDesc::load_heap_oop(next_addr);
+    if (!oopDesc::is_null(next_oop)) { // i.e. ref is not "active"
+      T* discovered_addr = (T*)java_lang_ref_Reference::discovered_addr(obj);
+      debug_only(
+        if(TraceReferenceGC && PrintGCDetails) {
+          gclog_or_tty->print_cr("   Process discovered as normal "
+                                 INTPTR_FORMAT, discovered_addr);
+        }
+      )
+      MarkSweep::mark_and_push(discovered_addr);
+    }
+  } else {
+#ifdef ASSERT
+    // In the case of older JDKs which do not use the discovered
+    // field for the pending list, an inactive ref (next != NULL)
+    // must always have a NULL discovered field.
+    oop next = oopDesc::load_decode_heap_oop(next_addr);
+    oop discovered = java_lang_ref_Reference::discovered(obj);
+    assert(oopDesc::is_null(next) || oopDesc::is_null(discovered),
+           err_msg("Found an inactive reference " PTR_FORMAT " with a non-NULL discovered field",
+                   obj));
+#endif
+  }
+  // treat next as normal oop.  next is a link in the reference queue.
   debug_only(
     if(TraceReferenceGC && PrintGCDetails) {
       gclog_or_tty->print_cr("   Process next as normal " INTPTR_FORMAT, next_addr);
@@ -130,13 +155,33 @@
       PSParallelCompact::mark_and_push(cm, referent_addr);
     }
   }
-  // treat next as normal oop.  next is a link in the pending list.
   T* next_addr = (T*)java_lang_ref_Reference::next_addr(obj);
-  debug_only(
-    if(TraceReferenceGC && PrintGCDetails) {
-      gclog_or_tty->print_cr("   Process next as normal " INTPTR_FORMAT, next_addr);
+  if (ReferenceProcessor::pending_list_uses_discovered_field()) {
+    // Treat discovered as normal oop, if ref is not "active",
+    // i.e. if next is non-NULL.
+    T  next_oop = oopDesc::load_heap_oop(next_addr);
+    if (!oopDesc::is_null(next_oop)) { // i.e. ref is not "active"
+      T* discovered_addr = (T*)java_lang_ref_Reference::discovered_addr(obj);
+      debug_only(
+        if(TraceReferenceGC && PrintGCDetails) {
+          gclog_or_tty->print_cr("   Process discovered as normal "
+                                 INTPTR_FORMAT, discovered_addr);
+        }
+      )
+      PSParallelCompact::mark_and_push(cm, discovered_addr);
     }
-  )
+  } else {
+#ifdef ASSERT
+    // In the case of older JDKs which do not use the discovered
+    // field for the pending list, an inactive ref (next != NULL)
+    // must always have a NULL discovered field.
+    T next = oopDesc::load_heap_oop(next_addr);
+    oop discovered = java_lang_ref_Reference::discovered(obj);
+    assert(oopDesc::is_null(next) || oopDesc::is_null(discovered),
+           err_msg("Found an inactive reference " PTR_FORMAT " with a non-NULL discovered field",
+                   obj));
+#endif
+  }
   PSParallelCompact::mark_and_push(cm, next_addr);
   ref->instanceKlass::oop_follow_contents(cm, obj);
 }
@@ -197,27 +242,53 @@
 }
 
 #define InstanceRefKlass_SPECIALIZED_OOP_ITERATE(T, nv_suffix, contains)        \
+  T* disc_addr = (T*)java_lang_ref_Reference::discovered_addr(obj);             \
   if (closure->apply_to_weak_ref_discovered_field()) {                          \
-    T* disc_addr = (T*)java_lang_ref_Reference::discovered_addr(obj);           \
     closure->do_oop##nv_suffix(disc_addr);                                      \
   }                                                                             \
                                                                                 \
   T* referent_addr = (T*)java_lang_ref_Reference::referent_addr(obj);           \
   T heap_oop = oopDesc::load_heap_oop(referent_addr);                           \
-  if (!oopDesc::is_null(heap_oop) && contains(referent_addr)) {                 \
-    ReferenceProcessor* rp = closure->_ref_processor;                           \
+  ReferenceProcessor* rp = closure->_ref_processor;                             \
+  if (!oopDesc::is_null(heap_oop)) {                                            \
     oop referent = oopDesc::decode_heap_oop_not_null(heap_oop);                 \
     if (!referent->is_gc_marked() && (rp != NULL) &&                            \
         rp->discover_reference(obj, reference_type())) {                        \
       return size;                                                              \
-    } else {                                                                    \
+    } else if (contains(referent_addr)) {                                       \
       /* treat referent as normal oop */                                        \
       SpecializationStats::record_do_oop_call##nv_suffix(SpecializationStats::irk);\
       closure->do_oop##nv_suffix(referent_addr);                                \
     }                                                                           \
   }                                                                             \
+  T* next_addr = (T*)java_lang_ref_Reference::next_addr(obj);                   \
+  if (ReferenceProcessor::pending_list_uses_discovered_field()) {               \
+    T next_oop  = oopDesc::load_heap_oop(next_addr);                            \
+    /* Treat discovered as normal oop, if ref is not "active" (next non-NULL) */\
+    if (!oopDesc::is_null(next_oop) && contains(disc_addr)) {                   \
+        /* i.e. ref is not "active" */                                          \
+      debug_only(                                                               \
+        if(TraceReferenceGC && PrintGCDetails) {                                \
+          gclog_or_tty->print_cr("   Process discovered as normal "             \
+                                 INTPTR_FORMAT, disc_addr);                     \
+        }                                                                       \
+      )                                                                         \
+      SpecializationStats::record_do_oop_call##nv_suffix(SpecializationStats::irk);\
+      closure->do_oop##nv_suffix(disc_addr);                                    \
+    }                                                                           \
+  } else {                                                                      \
+    /* In the case of older JDKs which do not use the discovered field for  */  \
+    /* the pending list, an inactive ref (next != NULL) must always have a  */  \
+    /* NULL discovered field. */                                                \
+    debug_only(                                                                 \
+      T next_oop = oopDesc::load_heap_oop(next_addr);                           \
+      T disc_oop = oopDesc::load_heap_oop(disc_addr);                           \
+      assert(oopDesc::is_null(next_oop) || oopDesc::is_null(disc_oop),          \
+           err_msg("Found an inactive reference " PTR_FORMAT " with a non-NULL" \
+                   "discovered field", obj));                                   \
+    )                                                                           \
+  }                                                                             \
   /* treat next as normal oop */                                                \
-  T* next_addr = (T*)java_lang_ref_Reference::next_addr(obj);                   \
   if (contains(next_addr)) {                                                    \
     SpecializationStats::record_do_oop_call##nv_suffix(SpecializationStats::irk); \
     closure->do_oop##nv_suffix(next_addr);                                      \
@@ -306,8 +377,37 @@
       pm->claim_or_forward_depth(referent_addr);
     }
   }
-  // treat next as normal oop
+  // Treat discovered as normal oop, if ref is not "active",
+  // i.e. if next is non-NULL.
   T* next_addr = (T*)java_lang_ref_Reference::next_addr(obj);
+  if (ReferenceProcessor::pending_list_uses_discovered_field()) {
+    T  next_oop = oopDesc::load_heap_oop(next_addr);
+    if (!oopDesc::is_null(next_oop)) { // i.e. ref is not "active"
+      T* discovered_addr = (T*)java_lang_ref_Reference::discovered_addr(obj);
+      debug_only(
+        if(TraceReferenceGC && PrintGCDetails) {
+          gclog_or_tty->print_cr("   Process discovered as normal "
+                                 INTPTR_FORMAT, discovered_addr);
+        }
+      )
+      if (PSScavenge::should_scavenge(discovered_addr)) {
+        pm->claim_or_forward_depth(discovered_addr);
+      }
+    }
+  } else {
+#ifdef ASSERT
+    // In the case of older JDKs which do not use the discovered
+    // field for the pending list, an inactive ref (next != NULL)
+    // must always have a NULL discovered field.
+    oop next = oopDesc::load_decode_heap_oop(next_addr);
+    oop discovered = java_lang_ref_Reference::discovered(obj);
+    assert(oopDesc::is_null(next) || oopDesc::is_null(discovered),
+           err_msg("Found an inactive reference " PTR_FORMAT " with a non-NULL discovered field",
+                   obj));
+#endif
+  }
+
+  // Treat next as normal oop;  next is a link in the reference queue.
   if (PSScavenge::should_scavenge(next_addr)) {
     pm->claim_or_forward_depth(next_addr);
   }
--- a/src/share/vm/oops/methodDataOop.hpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/oops/methodDataOop.hpp	Fri Sep 09 16:17:16 2011 -0700
@@ -600,6 +600,11 @@
   uint taken() {
     return uint_at(taken_off_set);
   }
+
+  void set_taken(uint cnt) {
+    set_uint_at(taken_off_set, cnt);
+  }
+
   // Saturating counter
   uint inc_taken() {
     uint cnt = taken() + 1;
@@ -926,6 +931,10 @@
     return uint_at(not_taken_off_set);
   }
 
+  void set_not_taken(uint cnt) {
+    set_uint_at(not_taken_off_set, cnt);
+  }
+
   uint inc_not_taken() {
     uint cnt = not_taken() + 1;
     // Did we wrap? Will compiler screw us??
--- a/src/share/vm/oops/methodOop.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/oops/methodOop.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -914,6 +914,7 @@
                                                Symbol* name,
                                                Symbol* signature,
                                                Handle method_type, TRAPS) {
+  ResourceMark rm;
   methodHandle empty;
 
   assert(holder() == SystemDictionary::MethodHandle_klass(),
--- a/src/share/vm/opto/bytecodeInfo.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/opto/bytecodeInfo.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -45,7 +45,7 @@
   _method(callee),
   _site_invoke_ratio(site_invoke_ratio),
   _max_inline_level(max_inline_level),
-  _count_inline_bcs(method()->code_size())
+  _count_inline_bcs(method()->code_size_for_inlining())
 {
   NOT_PRODUCT(_count_inlines = 0;)
   if (_caller_jvms != NULL) {
@@ -107,7 +107,7 @@
 
   // positive filter: should send be inlined?  returns NULL (--> yes)
   // or rejection msg
-  int size = callee_method->code_size();
+  int size = callee_method->code_size_for_inlining();
 
   // Check for too many throws (and not too huge)
   if(callee_method->interpreter_throwout_count() > InlineThrowCount &&
@@ -141,7 +141,21 @@
     assert(mha_profile, "must exist");
     CounterData* cd = mha_profile->as_CounterData();
     invoke_count = cd->count();
-    call_site_count = invoke_count;  // use the same value
+    if (invoke_count == 0) {
+      return "method handle not reached";
+    }
+
+    if (_caller_jvms != NULL && _caller_jvms->method() != NULL &&
+        _caller_jvms->method()->method_data() != NULL &&
+        !_caller_jvms->method()->method_data()->is_empty()) {
+      ciMethodData* mdo = _caller_jvms->method()->method_data();
+      ciProfileData* mha_profile = mdo->bci_to_data(_caller_jvms->bci());
+      assert(mha_profile, "must exist");
+      CounterData* cd = mha_profile->as_CounterData();
+      call_site_count = cd->count();
+    } else {
+      call_site_count = invoke_count;  // use the same value
+    }
   }
 
   assert(invoke_count != 0, "require invocation count greater than zero");
@@ -244,7 +258,7 @@
   }
 
   // use frequency-based objections only for non-trivial methods
-  if (callee_method->code_size() <= MaxTrivialSize) return NULL;
+  if (callee_method->code_size_for_inlining() <= MaxTrivialSize) return NULL;
 
   // don't use counts with -Xcomp or CTW
   if (UseInterpreter && !CompileTheWorld) {
@@ -305,7 +319,7 @@
   }
 
   // suppress a few checks for accessors and trivial methods
-  if (callee_method->code_size() > MaxTrivialSize) {
+  if (callee_method->code_size_for_inlining() > MaxTrivialSize) {
 
     // don't inline into giant methods
     if (C->unique() > (uint)NodeCountInliningCutoff) {
@@ -349,7 +363,7 @@
     }
   }
 
-  int size = callee_method->code_size();
+  int size = callee_method->code_size_for_inlining();
 
   if (UseOldInlining && ClipInlining
       && (int)count_inline_bcs() + size >= DesiredMethodLimit) {
@@ -394,6 +408,16 @@
   return true;
 }
 
+//------------------------------check_can_parse--------------------------------
+const char* InlineTree::check_can_parse(ciMethod* callee) {
+  // Certain methods cannot be parsed at all:
+  if ( callee->is_native())                     return "native method";
+  if (!callee->can_be_compiled())               return "not compilable (disabled)";
+  if (!callee->has_balanced_monitors())         return "not compilable (unbalanced monitors)";
+  if ( callee->get_flow_analysis()->failing())  return "not compilable (flow analysis failed)";
+  return NULL;
+}
+
 //------------------------------print_inlining---------------------------------
 // Really, the failure_msg can be a success message also.
 void InlineTree::print_inlining(ciMethod* callee_method, int caller_bci, const char* failure_msg) const {
@@ -423,14 +447,22 @@
   int         caller_bci    = jvms->bci();
   ciMethod   *caller_method = jvms->method();
 
-  if( !pass_initial_checks(caller_method, caller_bci, callee_method)) {
-    if( PrintInlining ) {
+  // Do some initial checks.
+  if (!pass_initial_checks(caller_method, caller_bci, callee_method)) {
+    if (PrintInlining) {
       failure_msg = "failed_initial_checks";
-      print_inlining( callee_method, caller_bci, failure_msg);
+      print_inlining(callee_method, caller_bci, failure_msg);
     }
     return NULL;
   }
 
+  // Do some parse checks.
+  failure_msg = check_can_parse(callee_method);
+  if (failure_msg != NULL) {
+    if (PrintInlining)  print_inlining(callee_method, caller_bci, failure_msg);
+    return NULL;
+  }
+
   // Check if inlining policy says no.
   WarmCallInfo wci = *(initial_wci);
   failure_msg = try_to_inline(callee_method, caller_method, caller_bci, profile, &wci);
@@ -471,7 +503,7 @@
     if (failure_msg == NULL)  failure_msg = "inline (hot)";
 
     // Inline!
-    if( PrintInlining ) print_inlining( callee_method, caller_bci, failure_msg);
+    if (PrintInlining)  print_inlining(callee_method, caller_bci, failure_msg);
     if (UseOldInlining)
       build_inline_tree_for_callee(callee_method, jvms, caller_bci);
     if (InlineWarmCalls && !wci.is_hot())
@@ -481,7 +513,7 @@
 
   // Do not inline
   if (failure_msg == NULL)  failure_msg = "too cold to inline";
-  if( PrintInlining ) print_inlining( callee_method, caller_bci, failure_msg);
+  if (PrintInlining)  print_inlining(callee_method, caller_bci, failure_msg);
   return NULL;
 }
 
--- a/src/share/vm/opto/callGenerator.cpp	Thu Sep 08 16:59:27 2011 -0700
+++ b/src/share/vm/opto/callGenerator.cpp	Fri Sep 09 16:17:16 2011 -0700
@@ -61,12 +61,9 @@
   {
     _is_osr        = is_osr;
     _expected_uses = expected_uses;
-    assert(can_parse(method, is_osr), "parse must be possible");
+    assert(InlineTree::check_can_parse(method) == NULL, "parse must be possible");
   }
 
-  // Can we build either an OSR or a regular parser for this method?
-  static bool can_parse(ciMethod* method, int is_osr = false);
-
   virtual bool      is_parse() const           { return true; }
   virtual JVMState* generate(JVMState* jvms);
   int is_osr() { return _is_osr; }
@@ -152,7 +149,6 @@
     call->set_optimized_virtual(true);
     if (method()->is_method_handle_invoke()) {