changeset 879:494244ae0171

Merge
author coleenp
date Mon, 27 Jul 2009 17:23:52 -0400
parents abe076e3636f 0a83664f978b
children 16c930df1e9b
files src/share/vm/classfile/classFileParser.cpp src/share/vm/includeDB_core src/share/vm/oops/objArrayKlass.cpp
diffstat 141 files changed, 3996 insertions(+), 2121 deletions(-) [+]
line wrap: on
line diff
--- a/.hgtags	Mon Jul 27 09:06:22 2009 -0700
+++ b/.hgtags	Mon Jul 27 17:23:52 2009 -0400
@@ -35,3 +35,7 @@
 53d9bf689e80fcc76b221bbe6c5d58e08b80cbc6 jdk7-b58
 c55be0c7bd32c016c52218eb4c8b5da8a75450b5 jdk7-b59
 a77eddcd510c3972717c025cfcef9a60bfa4ecac jdk7-b60
+27b728fd1281ab62e9d7e4424f8bbb6ca438d803 jdk7-b61
+a88386380bdaaa5ab4ffbedf22c57bac5dbec034 jdk7-b62
+32c83fb84370a35344676991a48440378e6b6c8a jdk7-b63
+ba36394eb84b949b31212bdb32a518a8f92bab5b jdk7-b64
--- a/agent/src/share/classes/sun/jvm/hotspot/code/DebugInfoReadStream.java	Mon Jul 27 09:06:22 2009 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/code/DebugInfoReadStream.java	Mon Jul 27 17:23:52 2009 -0400
@@ -24,23 +24,64 @@
 
 package sun.jvm.hotspot.code;
 
+import java.util.*;
+
 import sun.jvm.hotspot.debugger.*;
 import sun.jvm.hotspot.runtime.VM;
+import sun.jvm.hotspot.utilities.*;
 
 public class DebugInfoReadStream extends CompressedReadStream {
   private NMethod code;
   private int InvocationEntryBCI;
+  private List objectPool; // ArrayList<ObjectValue>
 
   public DebugInfoReadStream(NMethod code, int offset) {
     super(code.scopesDataBegin(), offset);
     InvocationEntryBCI = VM.getVM().getInvocationEntryBCI();
     this.code = code;
+    this.objectPool = null;
+  }
+
+  public DebugInfoReadStream(NMethod code, int offset, List objectPool) {
+    super(code.scopesDataBegin(), offset);
+    InvocationEntryBCI = VM.getVM().getInvocationEntryBCI();
+    this.code = code;
+    this.objectPool = objectPool;
   }
 
   public OopHandle readOopHandle() {
     return code.getOopAt(readInt());
   }
 
+  ScopeValue readObjectValue() {
+    int id = readInt();
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(objectPool != null, "object pool does not exist");
+      for (Iterator itr = objectPool.iterator(); itr.hasNext();) {
+        ObjectValue ov = (ObjectValue) itr.next();
+        Assert.that(ov.id() != id, "should not be read twice");
+      }
+    }
+    ObjectValue result = new ObjectValue(id);
+    // Cache the object since an object field could reference it.
+    objectPool.add(result);
+    result.readObject(this);
+    return result;
+  }
+
+  ScopeValue getCachedObject() {
+    int id = readInt();
+    Assert.that(objectPool != null, "object pool does not exist");
+    for (Iterator itr = objectPool.iterator(); itr.hasNext();) {
+      ObjectValue ov = (ObjectValue) itr.next();
+      if (ov.id() == id) {
+        return ov;
+      }
+    }
+    Assert.that(false, "should not reach here");
+    return null;
+  }
+
   public int readBCI() {
     return readInt() + InvocationEntryBCI;
   }
--- a/agent/src/share/classes/sun/jvm/hotspot/code/MonitorValue.java	Mon Jul 27 09:06:22 2009 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/code/MonitorValue.java	Mon Jul 27 17:23:52 2009 -0400
@@ -29,6 +29,7 @@
 public class MonitorValue {
   private ScopeValue owner;
   private Location   basicLock;
+  private boolean    eliminated;
 
   // FIXME: not useful yet
   //  MonitorValue(ScopeValue* owner, Location basic_lock);
@@ -36,10 +37,12 @@
   public MonitorValue(DebugInfoReadStream stream) {
     basicLock = new Location(stream);
     owner     = ScopeValue.readFrom(stream);
+    eliminated= stream.readBoolean();
   }
 
   public ScopeValue owner()     { return owner; }
   public Location   basicLock() { return basicLock; }
+  public boolean   eliminated() { return eliminated; }
 
   // FIXME: not yet implementable
   //  void write_on(DebugInfoWriteStream* stream);
@@ -50,5 +53,8 @@
     tty.print(",");
     basicLock().printOn(tty);
     tty.print("}");
+    if (eliminated) {
+      tty.print(" (eliminated)");
+    }
   }
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/agent/src/share/classes/sun/jvm/hotspot/code/ObjectValue.java	Mon Jul 27 17:23:52 2009 -0400
@@ -0,0 +1,93 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+package sun.jvm.hotspot.code;
+
+import java.io.*;
+import java.util.*;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.utilities.*;
+
+/** An ObjectValue describes an object eliminated by escape analysis. */
+
+public class ObjectValue extends ScopeValue {
+  private int        id;
+  private ScopeValue klass;
+  private List       fieldsValue; // ArrayList<ScopeValue>
+
+  // Field "boolean visited" is not implemented here since
+  // it is used only a during debug info creation.
+
+  public ObjectValue(int id) {
+    this.id = id;
+    klass   = null;
+    fieldsValue = new ArrayList();
+  }
+
+  public boolean isObject() { return true; }
+  public int id() { return id; }
+  public ScopeValue getKlass() { return klass; }
+  public List getFieldsValue() { return fieldsValue; }
+  public ScopeValue getFieldAt(int i) { return (ScopeValue)fieldsValue.get(i); }
+  public int fieldsSize() { return fieldsValue.size(); }
+
+  // Field "value" is always NULL here since it is used
+  // only during deoptimization of a compiled frame
+  // pointing to reallocated object.
+  public OopHandle getValue() { return null; }
+
+  /** Serialization of debugging information */
+
+  void readObject(DebugInfoReadStream stream) {
+    klass = readFrom(stream);
+    Assert.that(klass.isConstantOop(), "should be constant klass oop");
+    int length = stream.readInt();
+    for (int i = 0; i < length; i++) {
+      ScopeValue val = readFrom(stream);
+      fieldsValue.add(val);
+    }
+  }
+
+  // Printing
+
+  public void print() {
+    printOn(System.out);
+  }
+
+  public void printOn(PrintStream tty) {
+    tty.print("scalarObj[" + id + "]");
+  }
+
+  void printFieldsOn(PrintStream tty) {
+    if (fieldsValue.size() > 0) {
+      ((ScopeValue)fieldsValue.get(0)).printOn(tty);
+    }
+    for (int i = 1; i < fieldsValue.size(); i++) {
+      tty.print(", ");
+      ((ScopeValue)fieldsValue.get(i)).printOn(tty);
+    }
+  }
+
+};
--- a/agent/src/share/classes/sun/jvm/hotspot/code/ScopeDesc.java	Mon Jul 27 09:06:22 2009 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/code/ScopeDesc.java	Mon Jul 27 17:23:52 2009 -0400
@@ -27,8 +27,10 @@
 import java.io.*;
 import java.util.*;
 
+import sun.jvm.hotspot.debugger.*;
 import sun.jvm.hotspot.oops.*;
 import sun.jvm.hotspot.runtime.*;
+import sun.jvm.hotspot.utilities.*;
 
 /** ScopeDescs contain the information that makes source-level
     debugging of nmethods possible; each scopeDesc describes a method
@@ -45,10 +47,31 @@
   private int     localsDecodeOffset;
   private int     expressionsDecodeOffset;
   private int     monitorsDecodeOffset;
+  /** Scalar replaced bjects pool */
+  private List    objects; // ArrayList<ScopeValue>
+
 
   public ScopeDesc(NMethod code, int decodeOffset) {
     this.code = code;
     this.decodeOffset = decodeOffset;
+    this.objects      = decodeObjectValues(DebugInformationRecorder.SERIALIZED_NULL);
+
+    // Decode header
+    DebugInfoReadStream stream  = streamAt(decodeOffset);
+
+    senderDecodeOffset = stream.readInt();
+    method = (Method) VM.getVM().getObjectHeap().newOop(stream.readOopHandle());
+    bci    = stream.readBCI();
+    // Decode offsets for body and sender
+    localsDecodeOffset      = stream.readInt();
+    expressionsDecodeOffset = stream.readInt();
+    monitorsDecodeOffset    = stream.readInt();
+  }
+
+  public ScopeDesc(NMethod code, int decodeOffset, int objectDecodeOffset) {
+    this.code = code;
+    this.decodeOffset = decodeOffset;
+    this.objects      = decodeObjectValues(objectDecodeOffset);
 
     // Decode header
     DebugInfoReadStream stream  = streamAt(decodeOffset);
@@ -81,6 +104,11 @@
     return decodeMonitorValues(monitorsDecodeOffset);
   }
 
+  /** Returns a List&lt;MonitorValue&gt; */
+  public List getObjects() {
+    return objects;
+  }
+
   /** Stack walking. Returns null if this is the outermost scope. */
   public ScopeDesc sender() {
     if (isTop()) {
@@ -131,7 +159,7 @@
   //
 
   private DebugInfoReadStream streamAt(int decodeOffset) {
-    return new DebugInfoReadStream(code, decodeOffset);
+    return new DebugInfoReadStream(code, decodeOffset, objects);
   }
 
   /** Returns a List&lt;ScopeValue&gt; or null if no values were present */
@@ -161,4 +189,22 @@
     }
     return res;
   }
+
+  /** Returns a List&lt;ObjectValue&gt; or null if no values were present */
+  private List decodeObjectValues(int decodeOffset) {
+    if (decodeOffset == DebugInformationRecorder.SERIALIZED_NULL) {
+      return null;
+    }
+    List res = new ArrayList();
+    DebugInfoReadStream stream = new DebugInfoReadStream(code, decodeOffset, res);
+    int length = stream.readInt();
+    for (int i = 0; i < length; i++) {
+      // Objects values are pushed to 'res' array during read so that
+      // object's fields could reference it (OBJECT_ID_CODE).
+      ScopeValue.readFrom(stream);
+      // res.add(ScopeValue.readFrom(stream));
+    }
+    Assert.that(res.size() == length, "inconsistent debug information");
+    return res;
+  }
 }
--- a/agent/src/share/classes/sun/jvm/hotspot/code/ScopeValue.java	Mon Jul 27 09:06:22 2009 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/code/ScopeValue.java	Mon Jul 27 17:23:52 2009 -0400
@@ -49,12 +49,15 @@
   static final int CONSTANT_OOP_CODE    = 2;
   static final int CONSTANT_LONG_CODE   = 3;
   static final int CONSTANT_DOUBLE_CODE = 4;
+  static final int CONSTANT_OBJECT_CODE = 5;
+  static final int CONSTANT_OBJECT_ID_CODE = 6;
 
   public boolean isLocation()       { return false; }
   public boolean isConstantInt()    { return false; }
   public boolean isConstantDouble() { return false; }
   public boolean isConstantLong()   { return false; }
   public boolean isConstantOop()    { return false; }
+  public boolean isObject()         { return false; }
 
   public static ScopeValue readFrom(DebugInfoReadStream stream) {
     switch (stream.readInt()) {
@@ -68,6 +71,10 @@
       return new ConstantLongValue(stream);
     case CONSTANT_DOUBLE_CODE:
       return new ConstantDoubleValue(stream);
+    case CONSTANT_OBJECT_CODE:
+      return stream.readObjectValue();
+    case CONSTANT_OBJECT_ID_CODE:
+      return stream.getCachedObject();
     default:
       Assert.that(false, "should not reach here");
       return null;
--- a/agent/src/share/classes/sun/jvm/hotspot/jdi/ObjectReferenceImpl.java	Mon Jul 27 09:06:22 2009 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/jdi/ObjectReferenceImpl.java	Mon Jul 27 17:23:52 2009 -0400
@@ -249,6 +249,7 @@
             OopHandle givenHandle = obj.getHandle();
             for (Iterator itr = monitors.iterator(); itr.hasNext();) {
                 MonitorInfo mi = (MonitorInfo) itr.next();
+                if (mi.eliminated() && frame.isCompiledFrame()) continue; // skip eliminated monitor
                 if (givenHandle.equals(mi.owner())) {
                     res++;
                 }
--- a/agent/src/share/classes/sun/jvm/hotspot/jdi/ThreadReferenceImpl.java	Mon Jul 27 09:06:22 2009 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/jdi/ThreadReferenceImpl.java	Mon Jul 27 17:23:52 2009 -0400
@@ -301,6 +301,9 @@
             List frameMonitors = frame.getMonitors();  // List<MonitorInfo>
             for (Iterator miItr = frameMonitors.iterator(); miItr.hasNext(); ) {
                 sun.jvm.hotspot.runtime.MonitorInfo mi = (sun.jvm.hotspot.runtime.MonitorInfo) miItr.next();
+                if (mi.eliminated() && frame.isCompiledFrame()) {
+                  continue; // skip eliminated monitor
+                }
                 OopHandle obj = mi.owner();
                 if (obj == null) {
                     // this monitor doesn't have an owning object so skip it
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/CompiledVFrame.java	Mon Jul 27 09:06:22 2009 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/CompiledVFrame.java	Mon Jul 27 17:23:52 2009 -0400
@@ -131,8 +131,18 @@
     List result = new ArrayList(monitors.size());
     for (int i = 0; i < monitors.size(); i++) {
       MonitorValue mv = (MonitorValue) monitors.get(i);
-      StackValue ownerSV = createStackValue(mv.owner()); // it is an oop
-      result.add(new MonitorInfo(ownerSV.getObject(), resolveMonitorLock(mv.basicLock())));
+      ScopeValue ov = mv.owner();
+      StackValue ownerSV = createStackValue(ov); // it is an oop
+      if (ov.isObject()) { // The owner object was scalar replaced
+        Assert.that(mv.eliminated() && ownerSV.objIsScalarReplaced(), "monitor should be eliminated for scalar replaced object");
+        // Put klass for scalar replaced object.
+        ScopeValue kv = ((ObjectValue)ov).getKlass();
+        Assert.that(kv.isConstantOop(), "klass should be oop constant for scalar replaced object");
+        OopHandle k = ((ConstantOopReadValue)kv).getValue();
+        result.add(new MonitorInfo(k, resolveMonitorLock(mv.basicLock()), mv.eliminated(), true));
+      } else {
+        result.add(new MonitorInfo(ownerSV.getObject(), resolveMonitorLock(mv.basicLock()), mv.eliminated(), false));
+      }
     }
     return result;
   }
@@ -212,12 +222,12 @@
           // long or is unused.  He always saves a long.  Here we know
           // a long was saved, but we only want an narrow oop back.  Narrow the
           // saved long to the narrow oop that the JVM wants.
-          return new StackValue(valueAddr.getCompOopHandleAt(VM.getVM().getIntSize()));
+          return new StackValue(valueAddr.getCompOopHandleAt(VM.getVM().getIntSize()), 0);
         } else {
-          return new StackValue(valueAddr.getCompOopHandleAt(0));
+          return new StackValue(valueAddr.getCompOopHandleAt(0), 0);
         }
       } else if( loc.holdsOop() ) {  // Holds an oop?
-        return new StackValue(valueAddr.getOopHandleAt(0));
+        return new StackValue(valueAddr.getOopHandleAt(0), 0);
       } else if( loc.holdsDouble() ) {
         // Double value in a single stack slot
         return new StackValue(valueAddr.getJIntAt(0) & 0xFFFFFFFF);
@@ -277,7 +287,7 @@
       return new StackValue(((ConstantIntValue) sv).getValue() & 0xFFFFFFFF);
     } else if (sv.isConstantOop()) {
       // constant oop
-      return new StackValue(((ConstantOopReadValue) sv).getValue());
+      return new StackValue(((ConstantOopReadValue) sv).getValue(), 0);
     } else if (sv.isConstantDouble()) {
       // Constant double in a single stack slot
       double d = ((ConstantDoubleValue) sv).getValue();
@@ -285,6 +295,9 @@
     } else if (VM.getVM().isLP64() && sv.isConstantLong()) {
       // Constant long in a single stack slot
       return new StackValue(((ConstantLongValue) sv).getValue() & 0xFFFFFFFF);
+    } else if (sv.isObject()) {
+      // Scalar replaced object in compiled frame
+      return new StackValue(((ObjectValue)sv).getValue(), 1);
     }
 
     // Unknown ScopeValue type
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/InterpretedVFrame.java	Mon Jul 27 09:06:22 2009 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/InterpretedVFrame.java	Mon Jul 27 17:23:52 2009 -0400
@@ -61,7 +61,7 @@
       StackValue sv;
       if (oopMask.isOop(i)) {
         // oop value
-        sv = new StackValue(addr.getOopHandleAt(0));
+        sv = new StackValue(addr.getOopHandleAt(0), 0);
       } else {
         // integer
         // Fetch a signed integer the size of a stack slot
@@ -95,7 +95,7 @@
       StackValue sv;
       if (oopMask.isOop(i + nofLocals)) {
         // oop value
-        sv = new StackValue(addr.getOopHandleAt(0));
+        sv = new StackValue(addr.getOopHandleAt(0), 0);
       } else {
         // integer
         // Fetch a signed integer the size of a stack slot
@@ -113,7 +113,7 @@
     for (BasicObjectLock current = getFrame().interpreterFrameMonitorEnd();
          current.address().lessThan(getFrame().interpreterFrameMonitorBegin().address());
          current = getFrame().nextMonitorInInterpreterFrame(current)) {
-      result.add(new MonitorInfo(current.obj(), current.lock()));
+      result.add(new MonitorInfo(current.obj(), current.lock(), false, false));
     }
     return result;
   }
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/MonitorInfo.java	Mon Jul 27 09:06:22 2009 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/MonitorInfo.java	Mon Jul 27 17:23:52 2009 -0400
@@ -25,16 +25,39 @@
 package sun.jvm.hotspot.runtime;
 
 import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.utilities.*;
 
 public class MonitorInfo {
   private OopHandle owner;
   private BasicLock lock;
+  private OopHandle ownerKlass;
+  private boolean eliminated;
+  private boolean ownerIsScalarReplaced;
 
-  public MonitorInfo(OopHandle owner, BasicLock lock) {
-    this.owner = owner;
-    this.lock  = lock;
+  public MonitorInfo(OopHandle owner, BasicLock lock, boolean eliminated, boolean ownerIsScalarReplaced) {
+    if (!ownerIsScalarReplaced) {
+      this.owner = owner;
+      this.ownerKlass = null;
+    } else {
+      Assert.that(eliminated, "monitor should be eliminated for scalar replaced object");
+      this.owner = null;
+      this.ownerKlass = owner;
+    }
+    this.eliminated = eliminated;
+    this.ownerIsScalarReplaced = ownerIsScalarReplaced;
   }
 
-  public OopHandle owner() { return owner; }
+  public OopHandle owner() {
+   Assert.that(!ownerIsScalarReplaced, "should not be called for scalar replaced object");
+   return owner;
+  }
+
+  public OopHandle ownerKlass() {
+   Assert.that(ownerIsScalarReplaced, "should not be called for not scalar replaced object");
+   return ownerKlass;
+  }
+
   public BasicLock lock()  { return lock; }
+  public boolean eliminated() { return eliminated; }
+  public boolean ownerIsScalarReplaced() { return ownerIsScalarReplaced; }
 }
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/StackValue.java	Mon Jul 27 09:06:22 2009 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/StackValue.java	Mon Jul 27 17:23:52 2009 -0400
@@ -37,9 +37,11 @@
     type = BasicType.getTConflict();
   }
 
-  public StackValue(OopHandle h) {
+  public StackValue(OopHandle h, long scalar_replaced) {
     handleValue = h;
     type = BasicType.getTObject();
+    integerValue = scalar_replaced;
+    Assert.that(integerValue == 0 || handleValue == null, "not null object should not be marked as scalar replaced");
   }
 
   public StackValue(long i) {
@@ -59,6 +61,13 @@
     return handleValue;
   }
 
+  boolean objIsScalarReplaced() {
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(type == BasicType.getTObject(), "type check");
+    }
+    return integerValue != 0;
+  }
+
   public long getInteger() {
     if (Assert.ASSERTS_ENABLED) {
       Assert.that(type == BasicType.getTInt(), "type check");
--- a/agent/src/share/classes/sun/jvm/hotspot/ui/tree/OopTreeNodeAdapter.java	Mon Jul 27 09:06:22 2009 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/ui/tree/OopTreeNodeAdapter.java	Mon Jul 27 17:23:52 2009 -0400
@@ -161,6 +161,8 @@
           child = new OopTreeNodeAdapter(field.getValue(getObj()), field.getID(), getTreeTableMode());
         } catch (AddressException e) {
           child = new BadOopTreeNodeAdapter(field.getValueAsOopHandle(getObj()), field.getID(), getTreeTableMode());
+        } catch (UnknownOopException e) {
+          child = new BadOopTreeNodeAdapter(field.getValueAsOopHandle(getObj()), field.getID(), getTreeTableMode());
         }
       }
       ++curField;
--- a/agent/src/share/classes/sun/jvm/hotspot/utilities/soql/JSJavaThread.java	Mon Jul 27 09:06:22 2009 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/utilities/soql/JSJavaThread.java	Mon Jul 27 17:23:52 2009 -0400
@@ -135,6 +135,10 @@
                     List frameMonitors = frame.getMonitors();  // List<MonitorInfo>
                     for (Iterator miItr = frameMonitors.iterator(); miItr.hasNext(); ) {
                         MonitorInfo mi = (MonitorInfo) miItr.next();
+
+                        if (mi.eliminated() && frame.isCompiledFrame()) {
+                          continue; // skip eliminated monitor
+                        }
                         OopHandle obj = mi.owner();
                         if (obj == null) {
                             // this monitor doesn't have an owning object so skip it
--- a/make/hotspot_version	Mon Jul 27 09:06:22 2009 -0700
+++ b/make/hotspot_version	Mon Jul 27 17:23:52 2009 -0400
@@ -35,7 +35,7 @@
 
 HS_MAJOR_VER=16
 HS_MINOR_VER=0
-HS_BUILD_NUMBER=04
+HS_BUILD_NUMBER=06
 
 JDK_MAJOR_VER=1
 JDK_MINOR_VER=7
--- a/make/solaris/makefiles/adlc.make	Mon Jul 27 09:06:22 2009 -0700
+++ b/make/solaris/makefiles/adlc.make	Mon Jul 27 17:23:52 2009 -0400
@@ -68,7 +68,9 @@
 
 # CFLAGS_WARN holds compiler options to suppress/enable warnings.
 # Compiler warnings are treated as errors
-CFLAGS_WARN = +w -errwarn
+ifeq ($(shell expr $(COMPILER_REV_NUMERIC) \>= 509), 1)
+  CFLAGS_WARN = +w -errwarn
+endif
 CFLAGS += $(CFLAGS_WARN)
 
 ifeq ("${Platform_compiler}", "sparcWorks")
--- a/make/solaris/makefiles/optimized.make	Mon Jul 27 09:06:22 2009 -0700
+++ b/make/solaris/makefiles/optimized.make	Mon Jul 27 17:23:52 2009 -0400
@@ -41,7 +41,7 @@
 endif
 
 # Workaround SS11 bug 6345274 (all platforms) (Fixed in SS11 patch and SS12)
-ifeq ($(COMPILER_REV_NUMERIC),508))
+ifeq ($(COMPILER_REV_NUMERIC),508)
 OPT_CFLAGS/ciTypeFlow.o = $(OPT_CFLAGS/O2)
 endif # COMPILER_REV_NUMERIC == 508
 
--- a/src/cpu/sparc/vm/assembler_sparc.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/cpu/sparc/vm/assembler_sparc.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -4208,6 +4208,7 @@
                   PtrQueue::byte_offset_of_active()),
          tmp);
   }
+
   // Check on whether to annul.
   br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered);
   delayed() -> nop();
@@ -4215,13 +4216,13 @@
   // satb_log_barrier_work1(tmp, offset);
   if (index == noreg) {
     if (Assembler::is_simm13(offset)) {
-      ld_ptr(obj, offset, tmp);
+      load_heap_oop(obj, offset, tmp);
     } else {
       set(offset, tmp);
-      ld_ptr(obj, tmp, tmp);
+      load_heap_oop(obj, tmp, tmp);
     }
   } else {
-    ld_ptr(obj, index, tmp);
+    load_heap_oop(obj, index, tmp);
   }
 
   // satb_log_barrier_work2(obj, tmp, offset);
--- a/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -371,7 +371,7 @@
   }
   __ move(value.result(), array_addr, null_check_info);
   if (obj_store) {
-    // Is this precise?
+    // Precise card mark
     post_barrier(LIR_OprFact::address(array_addr), value.result());
   }
 }
@@ -685,11 +685,8 @@
   LIR_Opr result = rlock_result(x);
   __ cmove(lir_cond_equal, LIR_OprFact::intConst(1), LIR_OprFact::intConst(0), result);
   if (type == objectType) {  // Write-barrier needed for Object fields.
-#ifdef PRECISE_CARDMARK
+    // Precise card mark since could either be object or array
     post_barrier(addr, val.result());
-#else
-    post_barrier(obj.result(), val.result());
-#endif // PRECISE_CARDMARK
   }
 }
 
--- a/src/cpu/sparc/vm/sparc.ad	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/cpu/sparc/vm/sparc.ad	Mon Jul 27 17:23:52 2009 -0400
@@ -1891,17 +1891,17 @@
 // The intptr_t operand types, defined by textual substitution.
 // (Cf. opto/type.hpp.  This lets us avoid many, many other ifdefs.)
 #ifdef _LP64
-#define immX     immL
-#define immX13   immL13
-#define immX13m7 immL13m7
-#define iRegX    iRegL
-#define g1RegX   g1RegL
+#define immX      immL
+#define immX13    immL13
+#define immX13m7  immL13m7
+#define iRegX     iRegL
+#define g1RegX    g1RegL
 #else
-#define immX     immI
-#define immX13   immI13
-#define immX13m7 immI13m7
-#define iRegX    iRegI
-#define g1RegX   g1RegI
+#define immX      immI
+#define immX13    immI13
+#define immX13m7  immI13m7
+#define iRegX     iRegI
+#define g1RegX    g1RegI
 #endif
 
 //----------ENCODING BLOCK-----------------------------------------------------
@@ -3446,6 +3446,15 @@
   interface(CONST_INTER);
 %}
 
+// Integer Immediate: 8-bit
+operand immI8() %{
+  predicate(Assembler::is_simm(n->get_int(), 8));
+  match(ConI);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
 // Integer Immediate: 13-bit
 operand immI13() %{
   predicate(Assembler::is_simm13(n->get_int()));
@@ -3466,6 +3475,15 @@
   interface(CONST_INTER);
 %}
 
+// Integer Immediate: 16-bit
+operand immI16() %{
+  predicate(Assembler::is_simm(n->get_int(), 16));
+  match(ConI);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
 // Unsigned (positive) Integer Immediate: 13-bit
 operand immU13() %{
   predicate((0 <= n->get_int()) && Assembler::is_simm13(n->get_int()));
@@ -5544,7 +5562,7 @@
   ins_encode %{
     __ ldub($mem$$Address, $dst$$Register);
   %}
-  ins_pipe(iload_mask_mem);
+  ins_pipe(iload_mem);
 %}
 
 // Load Unsigned Byte (8bit UNsigned) into a Long Register
@@ -5557,7 +5575,22 @@
   ins_encode %{
     __ ldub($mem$$Address, $dst$$Register);
   %}
-  ins_pipe(iload_mask_mem);
+  ins_pipe(iload_mem);
+%}
+
+// Load Unsigned Byte (8 bit UNsigned) with 8-bit mask into Long Register
+instruct loadUB2L_immI8(iRegL dst, memory mem, immI8 mask) %{
+  match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
+  ins_cost(MEMORY_REF_COST + DEFAULT_COST);
+
+  size(2*4);
+  format %{ "LDUB   $mem,$dst\t# ubyte & 8-bit mask -> long\n\t"
+            "AND    $dst,$mask,$dst" %}
+  ins_encode %{
+    __ ldub($mem$$Address, $dst$$Register);
+    __ and3($dst$$Register, $mask$$constant, $dst$$Register);
+  %}
+  ins_pipe(iload_mem);
 %}
 
 // Load Short (16bit signed)
@@ -5610,7 +5643,7 @@
   ins_encode %{
     __ lduh($mem$$Address, $dst$$Register);
   %}
-  ins_pipe(iload_mask_mem);
+  ins_pipe(iload_mem);
 %}
 
 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
@@ -5636,7 +5669,56 @@
   ins_encode %{
     __ lduh($mem$$Address, $dst$$Register);
   %}
-  ins_pipe(iload_mask_mem);
+  ins_pipe(iload_mem);
+%}
+
+// Load Unsigned Short/Char (16bit UNsigned) with mask 0xFF into a Long Register
+instruct loadUS2L_immI_255(iRegL dst, indOffset13m7 mem, immI_255 mask) %{
+  match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  format %{ "LDUB   $mem+1,$dst\t! ushort/char & 0xFF -> long" %}
+  ins_encode %{
+    __ ldub($mem$$Address, $dst$$Register, 1);  // LSB is index+1 on BE
+  %}
+  ins_pipe(iload_mem);
+%}
+
+// Load Unsigned Short/Char (16bit UNsigned) with a 13-bit mask into a Long Register
+instruct loadUS2L_immI13(iRegL dst, memory mem, immI13 mask) %{
+  match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
+  ins_cost(MEMORY_REF_COST + DEFAULT_COST);
+
+  size(2*4);
+  format %{ "LDUH   $mem,$dst\t! ushort/char & 13-bit mask -> long\n\t"
+            "AND    $dst,$mask,$dst" %}
+  ins_encode %{
+    Register Rdst = $dst$$Register;
+    __ lduh($mem$$Address, Rdst);
+    __ and3(Rdst, $mask$$constant, Rdst);
+  %}
+  ins_pipe(iload_mem);
+%}
+
+// Load Unsigned Short/Char (16bit UNsigned) with a 16-bit mask into a Long Register
+instruct loadUS2L_immI16(iRegL dst, memory mem, immI16 mask, iRegL tmp) %{
+  match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
+  effect(TEMP dst, TEMP tmp);
+  ins_cost(MEMORY_REF_COST + 2*DEFAULT_COST);
+
+  size(3*4);
+  format %{ "LDUH   $mem,$dst\t! ushort/char & 16-bit mask -> long\n\t"
+            "SET    $mask,$tmp\n\t"
+            "AND    $dst,$tmp,$dst" %}
+  ins_encode %{
+    Register Rdst = $dst$$Register;
+    Register Rtmp = $tmp$$Register;
+    __ lduh($mem$$Address, Rdst);
+    __ set($mask$$constant, Rtmp);
+    __ and3(Rdst, Rtmp, Rdst);
+  %}
+  ins_pipe(iload_mem);
 %}
 
 // Load Integer
@@ -5718,6 +5800,68 @@
   ins_encode %{
     __ ldsw($mem$$Address, $dst$$Register);
   %}
+  ins_pipe(iload_mask_mem);
+%}
+
+// Load Integer with mask 0xFF into a Long Register
+instruct loadI2L_immI_255(iRegL dst, indOffset13m7 mem, immI_255 mask) %{
+  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  format %{ "LDUB   $mem+3,$dst\t! int & 0xFF -> long" %}
+  ins_encode %{
+    __ ldub($mem$$Address, $dst$$Register, 3);  // LSB is index+3 on BE
+  %}
+  ins_pipe(iload_mem);
+%}
+
+// Load Integer with mask 0xFFFF into a Long Register
+instruct loadI2L_immI_65535(iRegL dst, indOffset13m7 mem, immI_65535 mask) %{
+  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  format %{ "LDUH   $mem+2,$dst\t! int & 0xFFFF -> long" %}
+  ins_encode %{
+    __ lduh($mem$$Address, $dst$$Register, 2);  // LSW is index+2 on BE
+  %}
+  ins_pipe(iload_mem);
+%}
+
+// Load Integer with a 13-bit mask into a Long Register
+instruct loadI2L_immI13(iRegL dst, memory mem, immI13 mask) %{
+  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
+  ins_cost(MEMORY_REF_COST + DEFAULT_COST);
+
+  size(2*4);
+  format %{ "LDUW   $mem,$dst\t! int & 13-bit mask -> long\n\t"
+            "AND    $dst,$mask,$dst" %}
+  ins_encode %{
+    Register Rdst = $dst$$Register;
+    __ lduw($mem$$Address, Rdst);
+    __ and3(Rdst, $mask$$constant, Rdst);
+  %}
+  ins_pipe(iload_mem);
+%}
+
+// Load Integer with a 32-bit mask into a Long Register
+instruct loadI2L_immI(iRegL dst, memory mem, immI mask, iRegL tmp) %{
+  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
+  effect(TEMP dst, TEMP tmp);
+  ins_cost(MEMORY_REF_COST + 2*DEFAULT_COST);
+
+  size(3*4);
+  format %{ "LDUW   $mem,$dst\t! int & 32-bit mask -> long\n\t"
+            "SET    $mask,$tmp\n\t"
+            "AND    $dst,$tmp,$dst" %}
+  ins_encode %{
+    Register Rdst = $dst$$Register;
+    Register Rtmp = $tmp$$Register;
+    __ lduw($mem$$Address, Rdst);
+    __ set($mask$$constant, Rtmp);
+    __ and3(Rdst, Rtmp, Rdst);
+  %}
   ins_pipe(iload_mem);
 %}
 
--- a/src/cpu/x86/vm/assembler_x86.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -6805,14 +6805,18 @@
   jcc(Assembler::equal, done);
 
   // if (x.f == NULL) goto done;
-  cmpptr(Address(obj, 0), NULL_WORD);
+#ifdef _LP64
+  load_heap_oop(tmp2, Address(obj, 0));
+#else
+  movptr(tmp2, Address(obj, 0));
+#endif
+  cmpptr(tmp2, (int32_t) NULL_WORD);
   jcc(Assembler::equal, done);
 
   // Can we store original value in the thread's buffer?
 
-  LP64_ONLY(movslq(tmp, index);)
-  movptr(tmp2, Address(obj, 0));
 #ifdef _LP64
+  movslq(tmp, index);
   cmpq(tmp, 0);
 #else
   cmpl(index, 0);
@@ -6834,8 +6838,7 @@
   if(tosca_live) push(rax);
   push(obj);
 #ifdef _LP64
-  movq(c_rarg0, Address(obj, 0));
-  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), c_rarg0, r15_thread);
+  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, r15_thread);
 #else
   push(thread);
   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, thread);
--- a/src/cpu/x86/vm/interp_masm_x86_32.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/cpu/x86/vm/interp_masm_x86_32.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -1372,6 +1372,8 @@
     // If no method data exists, go to profile_continue.
     test_method_data_pointer(mdp, profile_continue);
 
+    set_mdp_flag_at(mdp, BitData::null_seen_byte_constant());
+
     // The method data pointer needs to be updated.
     int mdp_delta = in_bytes(BitData::bit_data_size());
     if (TypeProfileCasts) {
--- a/src/cpu/x86/vm/interp_masm_x86_64.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/cpu/x86/vm/interp_masm_x86_64.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -1409,6 +1409,8 @@
     // If no method data exists, go to profile_continue.
     test_method_data_pointer(mdp, profile_continue);
 
+    set_mdp_flag_at(mdp, BitData::null_seen_byte_constant());
+
     // The method data pointer needs to be updated.
     int mdp_delta = in_bytes(BitData::bit_data_size());
     if (TypeProfileCasts) {
--- a/src/cpu/x86/vm/methodHandles_x86.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/cpu/x86/vm/methodHandles_x86.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -269,11 +269,11 @@
 
 #ifndef PRODUCT
 void trace_method_handle_stub(const char* adaptername,
-                              oop mh,
+                              oopDesc* mh,
                               intptr_t* entry_sp,
                               intptr_t* saved_sp) {
   // called as a leaf from native code: do not block the JVM!
-  printf("MH %s "PTR_FORMAT" "PTR_FORMAT" "INTX_FORMAT"\n", adaptername, mh, entry_sp, entry_sp - saved_sp);
+  printf("MH %s "PTR_FORMAT" "PTR_FORMAT" "INTX_FORMAT"\n", adaptername, (void*)mh, entry_sp, entry_sp - saved_sp);
 }
 #endif //PRODUCT
 
--- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -1302,22 +1302,19 @@
 
   const Register ic_reg = rax;
   const Register receiver = j_rarg0;
-  const Register tmp = rdx;
 
   Label ok;
   Label exception_pending;
 
+  assert_different_registers(ic_reg, receiver, rscratch1);
   __ verify_oop(receiver);
-  __ push(tmp); // spill (any other registers free here???)
-  __ load_klass(tmp, receiver);
-  __ cmpq(ic_reg, tmp);
+  __ load_klass(rscratch1, receiver);
+  __ cmpq(ic_reg, rscratch1);
   __ jcc(Assembler::equal, ok);
 
-  __ pop(tmp);
   __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 
   __ bind(ok);
-  __ pop(tmp);
 
   // Verified entry point must be aligned
   __ align(8);
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -709,7 +709,7 @@
   //
   //  Input:
   //     start   -  starting address
-  //     end     -  element count
+  //     count   -  element count
   void  gen_write_ref_array_pre_barrier(Register start, Register count) {
     assert_different_registers(start, count);
     BarrierSet* bs = Universe::heap()->barrier_set();
@@ -757,7 +757,6 @@
           __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post)));
           __ addptr(rsp, 2*wordSize);
           __ popa();
-
         }
         break;
 
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -1207,9 +1207,9 @@
           __ pusha();                      // push registers (overkill)
           // must compute element count unless barrier set interface is changed (other platforms supply count)
           assert_different_registers(start, end, scratch);
-          __ lea(scratch, Address(end, wordSize));
-          __ subptr(scratch, start);
-          __ shrptr(scratch, LogBytesPerWord);
+          __ lea(scratch, Address(end, BytesPerHeapOop));
+          __ subptr(scratch, start);               // subtract start to get #bytes
+          __ shrptr(scratch, LogBytesPerHeapOop);  // convert to element count
           __ mov(c_rarg0, start);
           __ mov(c_rarg1, scratch);
           __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post)));
@@ -1225,6 +1225,7 @@
           Label L_loop;
 
            __ shrptr(start, CardTableModRefBS::card_shift);
+           __ addptr(end, BytesPerHeapOop);
            __ shrptr(end, CardTableModRefBS::card_shift);
            __ subptr(end, start); // number of bytes to copy
 
@@ -2251,6 +2252,7 @@
     // and report their number to the caller.
     assert_different_registers(rax, r14_length, count, to, end_to, rcx);
     __ lea(end_to, to_element_addr);
+    __ addptr(end_to, -heapOopSize);      // make an inclusive end pointer
     gen_write_ref_array_post_barrier(to, end_to, rscratch1);
     __ movptr(rax, r14_length);           // original oops
     __ addptr(rax, count);                // K = (original - remaining) oops
@@ -2259,7 +2261,7 @@
 
     // Come here on success only.
     __ BIND(L_do_card_marks);
-    __ addptr(end_to, -wordSize);         // make an inclusive end pointer
+    __ addptr(end_to, -heapOopSize);         // make an inclusive end pointer
     gen_write_ref_array_post_barrier(to, end_to, rscratch1);
     __ xorptr(rax, rax);                  // return 0 on success
 
--- a/src/cpu/x86/vm/x86_32.ad	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/cpu/x86/vm/x86_32.ad	Mon Jul 27 17:23:52 2009 -0400
@@ -6885,8 +6885,9 @@
 %}
 
 // Load Byte (8bit signed) into Long Register
-instruct loadB2L(eRegL dst, memory mem) %{
+instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
   match(Set dst (ConvI2L (LoadB mem)));
+  effect(KILL cr);
 
   ins_cost(375);
   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
@@ -6917,19 +6918,37 @@
 %}
 
 // Load Unsigned Byte (8 bit UNsigned) into Long Register
-instruct loadUB2L(eRegL dst, memory mem)
-%{
+instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
   match(Set dst (ConvI2L (LoadUB mem)));
+  effect(KILL cr);
 
   ins_cost(250);
   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
             "XOR    $dst.hi,$dst.hi" %}
 
   ins_encode %{
-    __ movzbl($dst$$Register, $mem$$Address);
-    __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
-  %}
-
+    Register Rdst = $dst$$Register;
+    __ movzbl(Rdst, $mem$$Address);
+    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
+  %}
+
+  ins_pipe(ialu_reg_mem);
+%}
+
+// Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
+instruct loadUB2L_immI8(eRegL dst, memory mem, immI8 mask, eFlagsReg cr) %{
+  match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
+  effect(KILL cr);
+
+  format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 8-bit mask -> long\n\t"
+            "XOR    $dst.hi,$dst.hi\n\t"
+            "AND    $dst.lo,$mask" %}
+  ins_encode %{
+    Register Rdst = $dst$$Register;
+    __ movzbl(Rdst, $mem$$Address);
+    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
+    __ andl(Rdst, $mask$$constant);
+  %}
   ins_pipe(ialu_reg_mem);
 %}
 
@@ -6960,8 +6979,9 @@
 %}
 
 // Load Short (16bit signed) into Long Register
-instruct loadS2L(eRegL dst, memory mem) %{
+instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
   match(Set dst (ConvI2L (LoadS mem)));
+  effect(KILL cr);
 
   ins_cost(375);
   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
@@ -7004,8 +7024,9 @@
 %}
 
 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
-instruct loadUS2L(eRegL dst, memory mem) %{
+instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
   match(Set dst (ConvI2L (LoadUS mem)));
+  effect(KILL cr);
 
   ins_cost(250);
   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
@@ -7019,6 +7040,38 @@
   ins_pipe(ialu_reg_mem);
 %}
 
+// Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
+instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
+  match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
+  effect(KILL cr);
+
+  format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
+            "XOR    $dst.hi,$dst.hi" %}
+  ins_encode %{
+    Register Rdst = $dst$$Register;
+    __ movzbl(Rdst, $mem$$Address);
+    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
+  %}
+  ins_pipe(ialu_reg_mem);
+%}
+
+// Load Unsigned Short/Char (16 bit UNsigned) with a 16-bit mask into Long Register
+instruct loadUS2L_immI16(eRegL dst, memory mem, immI16 mask, eFlagsReg cr) %{
+  match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
+  effect(KILL cr);
+
+  format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 16-bit mask -> long\n\t"
+            "XOR    $dst.hi,$dst.hi\n\t"
+            "AND    $dst.lo,$mask" %}
+  ins_encode %{
+    Register Rdst = $dst$$Register;
+    __ movzwl(Rdst, $mem$$Address);
+    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
+    __ andl(Rdst, $mask$$constant);
+  %}
+  ins_pipe(ialu_reg_mem);
+%}
+
 // Load Integer
 instruct loadI(eRegI dst, memory mem) %{
   match(Set dst (LoadI mem));
@@ -7082,8 +7135,9 @@
 %}
 
 // Load Integer into Long Register
-instruct loadI2L(eRegL dst, memory mem) %{
+instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
   match(Set dst (ConvI2L (LoadI mem)));
+  effect(KILL cr);
 
   ins_cost(375);
   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
@@ -7099,9 +7153,57 @@
   ins_pipe(ialu_reg_mem);
 %}
 
+// Load Integer with mask 0xFF into Long Register
+instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
+  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
+  effect(KILL cr);
+
+  format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
+            "XOR    $dst.hi,$dst.hi" %}
+  ins_encode %{
+    Register Rdst = $dst$$Register;
+    __ movzbl(Rdst, $mem$$Address);
+    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
+  %}
+  ins_pipe(ialu_reg_mem);
+%}
+
+// Load Integer with mask 0xFFFF into Long Register
+instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
+  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
+  effect(KILL cr);
+
+  format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
+            "XOR    $dst.hi,$dst.hi" %}
+  ins_encode %{
+    Register Rdst = $dst$$Register;
+    __ movzwl(Rdst, $mem$$Address);
+    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
+  %}
+  ins_pipe(ialu_reg_mem);
+%}
+
+// Load Integer with 32-bit mask into Long Register
+instruct loadI2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
+  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
+  effect(KILL cr);
+
+  format %{ "MOV    $dst.lo,$mem\t# int & 32-bit mask -> long\n\t"
+            "XOR    $dst.hi,$dst.hi\n\t"
+            "AND    $dst.lo,$mask" %}
+  ins_encode %{
+    Register Rdst = $dst$$Register;
+    __ movl(Rdst, $mem$$Address);
+    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
+    __ andl(Rdst, $mask$$constant);
+  %}
+  ins_pipe(ialu_reg_mem);
+%}
+
 // Load Unsigned Integer into Long Register
-instruct loadUI2L(eRegL dst, memory mem) %{
+instruct loadUI2L(eRegL dst, memory mem, eFlagsReg cr) %{
   match(Set dst (LoadUI2L mem));
+  effect(KILL cr);
 
   ins_cost(250);
   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
@@ -7695,6 +7797,17 @@
   ins_pipe( ialu_mem_long_reg );
 %}
 
+// Store Long to Integer
+instruct storeL2I(memory mem, eRegL src) %{
+  match(Set mem (StoreI mem (ConvL2I src)));
+
+  format %{ "MOV    $mem,$src.lo\t# long -> int" %}
+  ins_encode %{
+    __ movl($mem$$Address, $src$$Register);
+  %}
+  ins_pipe(ialu_mem_reg);
+%}
+
 // Volatile Store Long.  Must be atomic, so move it into
 // the FP TOS and then do a 64-bit FIST.  Has to probe the
 // target address before the store (for null-ptr checks)
--- a/src/cpu/x86/vm/x86_64.ad	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/cpu/x86/vm/x86_64.ad	Mon Jul 27 17:23:52 2009 -0400
@@ -6444,6 +6444,21 @@
   ins_pipe(ialu_reg_mem);
 %}
 
+// Load Unsigned Byte (8 bit UNsigned) with a 8-bit mask into Long Register
+instruct loadUB2L_immI8(rRegL dst, memory mem, immI8 mask, rFlagsReg cr) %{
+  match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
+  effect(KILL cr);
+
+  format %{ "movzbq  $dst, $mem\t# ubyte & 8-bit mask -> long\n\t"
+            "andl    $dst, $mask" %}
+  ins_encode %{
+    Register Rdst = $dst$$Register;
+    __ movzbq(Rdst, $mem$$Address);
+    __ andl(Rdst, $mask$$constant);
+  %}
+  ins_pipe(ialu_reg_mem);
+%}
+
 // Load Short (16 bit signed)
 instruct loadS(rRegI dst, memory mem)
 %{
@@ -6528,6 +6543,32 @@
   ins_pipe(ialu_reg_mem);
 %}
 
+// Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
+instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
+  match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
+
+  format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
+  ins_encode %{
+    __ movzbq($dst$$Register, $mem$$Address);
+  %}
+  ins_pipe(ialu_reg_mem);
+%}
+
+// Load Unsigned Short/Char (16 bit UNsigned) with mask into Long Register
+instruct loadUS2L_immI16(rRegL dst, memory mem, immI16 mask, rFlagsReg cr) %{
+  match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
+  effect(KILL cr);
+
+  format %{ "movzwq  $dst, $mem\t# ushort/char & 16-bit mask -> long\n\t"
+            "andl    $dst, $mask" %}
+  ins_encode %{
+    Register Rdst = $dst$$Register;
+    __ movzwq(Rdst, $mem$$Address);
+    __ andl(Rdst, $mask$$constant);
+  %}
+  ins_pipe(ialu_reg_mem);
+%}
+
 // Load Integer
 instruct loadI(rRegI dst, memory mem)
 %{
@@ -6606,6 +6647,43 @@
   ins_pipe(ialu_reg_mem);
 %}
 
+// Load Integer with mask 0xFF into Long Register
+instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
+  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
+
+  format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
+  ins_encode %{
+    __ movzbq($dst$$Register, $mem$$Address);
+  %}
+  ins_pipe(ialu_reg_mem);
+%}
+
+// Load Integer with mask 0xFFFF into Long Register
+instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
+  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
+
+  format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
+  ins_encode %{
+    __ movzwq($dst$$Register, $mem$$Address);
+  %}
+  ins_pipe(ialu_reg_mem);
+%}
+
+// Load Integer with a 32-bit mask into Long Register
+instruct loadI2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
+  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
+  effect(KILL cr);
+
+  format %{ "movl    $dst, $mem\t# int & 32-bit mask -> long\n\t"
+            "andl    $dst, $mask" %}
+  ins_encode %{
+    Register Rdst = $dst$$Register;
+    __ movl(Rdst, $mem$$Address);
+    __ andl(Rdst, $mask$$constant);
+  %}
+  ins_pipe(ialu_reg_mem);
+%}
+
 // Load Unsigned Integer into Long Register
 instruct loadUI2L(rRegL dst, memory mem)
 %{
@@ -11673,8 +11751,9 @@
 
   ins_cost(125);
   format %{ "movslq  $dst, $src\t# i2l" %}
-  opcode(0x63); // needs REX.W
-  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
+  ins_encode %{
+    __ movslq($dst$$Register, $src$$Register);
+  %}
   ins_pipe(ialu_reg_reg);
 %}
 
--- a/src/share/tools/MakeDeps/Database.java	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/tools/MakeDeps/Database.java	Mon Jul 27 17:23:52 2009 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright 1999-2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1999-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -39,7 +39,6 @@
   private HashMap<String,String> platformDepFiles;
   private long threshold;
   private int nOuterFiles;
-  private int nPrecompiledFiles;
   private boolean missingOk;
   private Platform plat;
   /** These allow you to specify files not in the include database
@@ -62,7 +61,6 @@
 
     threshold = t;
     nOuterFiles = 0;
-    nPrecompiledFiles = 0;
     missingOk = false;
     firstFile = null;
     lastFile = null;
@@ -343,7 +341,6 @@
             plat.getGIFileTemplate().getInvDir() +
             list.getName() +
             "\"");
-        nPrecompiledFiles += 1;
       }
     }
     inclFile.println();
@@ -408,22 +405,22 @@
       gd.println();
     }
 
-    if (nPrecompiledFiles > 0) {
-      // write Precompiled_Files = ...
-      gd.println("Precompiled_Files = \\");
-      for (Iterator iter = grandInclude.iterator(); iter.hasNext(); ) {
-        FileList list = (FileList) iter.next();
+    // write Precompiled_Files = ...
+    gd.println("Precompiled_Files = \\");
+    for (Iterator iter = grandInclude.iterator(); iter.hasNext(); ) {
+      FileList list = (FileList) iter.next();
+      if (list.getCount() >= threshold) {
         gd.println(list.getName() + " \\");
         String platformDep = platformDepFiles.get(list.getName());
         if (platformDep != null) {
-            // make sure changes to the platform dependent file will
-            // cause regeneration of the pch file.
-            gd.println(platformDep + " \\");
+          // make sure changes to the platform dependent file will
+          // cause regeneration of the pch file.
+          gd.println(platformDep + " \\");
         }
       }
-      gd.println();
-      gd.println();
     }
+    gd.println();
+    gd.println();
 
     gd.println("DTraced_Files = \\");
     for (Iterator iter = outerFiles.iterator(); iter.hasNext(); ) {
@@ -483,7 +480,6 @@
         }
 
         if (plat.includeGIDependencies()
-            && nPrecompiledFiles > 0
             && anII.getUseGrandInclude()) {
           gd.println("    $(Precompiled_Files) \\");
         }
--- a/src/share/vm/adlc/formssel.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/adlc/formssel.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -420,6 +420,13 @@
   return  _matrule->is_ideal_load();
 }
 
+// Return 'true' if this instruction matches an ideal 'LoadKlass' node
+bool InstructForm::skip_antidep_check() const {
+  if( _matrule == NULL ) return false;
+
+  return  _matrule->skip_antidep_check();
+}
+
 // Return 'true' if this instruction matches an ideal 'Load?' node
 Form::DataType InstructForm::is_ideal_store() const {
   if( _matrule == NULL ) return Form::none;
@@ -567,6 +574,8 @@
 
 // loads from memory, so must check for anti-dependence
 bool InstructForm::needs_anti_dependence_check(FormDict &globals) const {
+  if ( skip_antidep_check() ) return false;
+
   // Machine independent loads must be checked for anti-dependences
   if( is_ideal_load() != Form::none )  return true;
 
@@ -3957,6 +3966,28 @@
 }
 
 
+bool MatchRule::skip_antidep_check() const {
+  // Some loads operate on what is effectively immutable memory so we
+  // should skip the anti dep computations.  For some of these nodes
+  // the rewritable field keeps the anti dep logic from triggering but
+  // for certain kinds of LoadKlass it does not since they are
+  // actually reading memory which could be rewritten by the runtime,
+  // though never by generated code.  This disables it uniformly for
+  // the nodes that behave like this: LoadKlass, LoadNKlass and
+  // LoadRange.
+  if ( _opType && (strcmp(_opType,"Set") == 0) && _rChild ) {
+    const char *opType = _rChild->_opType;
+    if (strcmp("LoadKlass", opType) == 0 ||
+        strcmp("LoadNKlass", opType) == 0 ||
+        strcmp("LoadRange", opType) == 0) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+
 Form::DataType MatchRule::is_ideal_store() const {
   Form::DataType ideal_store = Form::none;
 
--- a/src/share/vm/adlc/formssel.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/adlc/formssel.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -158,6 +158,9 @@
 
   virtual Form::CallType is_ideal_call() const; // matches ideal 'Call'
   virtual Form::DataType is_ideal_load() const; // node matches ideal 'LoadXNode'
+  // Should antidep checks be disabled for this Instruct
+  // See definition of MatchRule::skip_antidep_check
+  bool skip_antidep_check() const;
   virtual Form::DataType is_ideal_store() const;// node matches ideal 'StoreXNode'
           bool        is_ideal_mem() const { return is_ideal_load() != Form::none || is_ideal_store() != Form::none; }
   virtual uint        two_address(FormDict &globals); // output reg must match input reg
@@ -1003,6 +1006,9 @@
   bool       is_ideal_loopEnd() const; // node matches ideal 'LoopEnd'
   bool       is_ideal_bool() const;    // node matches ideal 'Bool'
   Form::DataType is_ideal_load() const;// node matches ideal 'LoadXNode'
+  // Should antidep checks be disabled for this rule
+  // See definition of MatchRule::skip_antidep_check
+  bool skip_antidep_check() const;
   Form::DataType is_ideal_store() const;// node matches ideal 'StoreXNode'
 
   // Check if 'mRule2' is a cisc-spill variant of this MatchRule
--- a/src/share/vm/adlc/output_h.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/adlc/output_h.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -1367,11 +1367,11 @@
         else if (!strcmp(oper->ideal_type(_globalNames), "ConN")) {
           // Access the locally stored constant
           fprintf(fp,"  virtual intptr_t       constant() const {");
-          fprintf(fp,   " return _c0->make_oopptr()->get_con();");
+          fprintf(fp,   " return _c0->get_ptrtype()->get_con();");
           fprintf(fp, " }\n");
           // Generate query to determine if this pointer is an oop
           fprintf(fp,"  virtual bool           constant_is_oop() const {");
-          fprintf(fp,   " return _c0->make_oopptr()->isa_oop_ptr();");
+          fprintf(fp,   " return _c0->get_ptrtype()->isa_oop_ptr();");
           fprintf(fp, " }\n");
         }
         else if (!strcmp(oper->ideal_type(_globalNames), "ConL")) {
--- a/src/share/vm/c1/c1_LIRGenerator.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/c1/c1_LIRGenerator.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -1534,12 +1534,8 @@
   }
 
   if (is_oop) {
-#ifdef PRECISE_CARDMARK
-    // Precise cardmarks don't work
-    post_barrier(LIR_OprFact::address(address), value.result());
-#else
+    // Store to object so mark the card of the header
     post_barrier(object.result(), value.result());
-#endif // PRECISE_CARDMARK
   }
 
   if (is_volatile && os::is_MP()) {
--- a/src/share/vm/classfile/classFileParser.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/classfile/classFileParser.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -3237,6 +3237,16 @@
     this_klass->set_minor_version(minor_version);
     this_klass->set_major_version(major_version);
 
+    // Set up methodOop::intrinsic_id as soon as we know the names of methods.
+    // (We used to do this lazily, but now we query it in Rewriter,
+    // which is eagerly done for every method, so we might as well do it now,
+    // when everything is fresh in memory.)
+    if (methodOopDesc::klass_id_for_intrinsics(this_klass->as_klassOop()) != vmSymbols::NO_SID) {
+      for (int j = 0; j < methods->length(); j++) {
+        ((methodOop)methods->obj_at(j))->init_intrinsic_id();
+      }
+    }
+
     if (cached_class_file_bytes != NULL) {
       // JVMTI: we have an instanceKlass now, tell it about the cached bytes
       this_klass->set_cached_class_file(cached_class_file_bytes,
--- a/src/share/vm/classfile/vmSymbols.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/classfile/vmSymbols.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -513,9 +513,6 @@
 //
 // for Emacs: (let ((c-backslash-column 120) (c-backslash-max-column 120)) (c-backslash-region (point) (point-max) nil t))
 #define VM_INTRINSICS_DO(do_intrinsic, do_class, do_name, do_signature, do_alias)                                       \
-  do_intrinsic(_Object_init,              java_lang_Object, object_initializer_name, void_method_signature,      F_R)   \
-  /*    (symbol object_initializer_name defined above) */                                                               \
-                                                                                                                        \
   do_intrinsic(_hashCode,                 java_lang_Object,       hashCode_name, void_int_signature,             F_R)   \
    do_name(     hashCode_name,                                   "hashCode")                                            \
   do_intrinsic(_getClass,                 java_lang_Object,       getClass_name, void_class_signature,           F_R)   \
@@ -635,9 +632,6 @@
   do_intrinsic(_equalsC,                  java_util_Arrays,       equals_name,    equalsC_signature,             F_S)   \
    do_signature(equalsC_signature,                               "([C[C)Z")                                             \
                                                                                                                         \
-  do_intrinsic(_invoke,                   java_lang_reflect_Method, invoke_name, object_array_object_object_signature, F_R) \
-  /*   (symbols invoke_name and invoke_signature defined above) */                                                      \
-                                                                                                                        \
   do_intrinsic(_compareTo,                java_lang_String,       compareTo_name, string_int_signature,          F_R)   \
    do_name(     compareTo_name,                                  "compareTo")                                           \
   do_intrinsic(_indexOf,                  java_lang_String,       indexOf_name, string_int_signature,            F_R)   \
@@ -656,8 +650,6 @@
    do_name(     attemptUpdate_name,                                 "attemptUpdate")                                    \
    do_signature(attemptUpdate_signature,                            "(JJ)Z")                                            \
                                                                                                                         \
-  do_intrinsic(_fillInStackTrace,         java_lang_Throwable, fillInStackTrace_name, void_throwable_signature,  F_RNY) \
-                                                                                                                        \
   /* support for sun.misc.Unsafe */                                                                                     \
   do_class(sun_misc_Unsafe,               "sun/misc/Unsafe")                                                            \
                                                                                                                         \
@@ -819,10 +811,22 @@
    do_name(     prefetchReadStatic_name,                         "prefetchReadStatic")                                  \
   do_intrinsic(_prefetchWriteStatic,      sun_misc_Unsafe,        prefetchWriteStatic_name, prefetch_signature,  F_SN)  \
    do_name(     prefetchWriteStatic_name,                        "prefetchWriteStatic")                                 \
+    /*== LAST_COMPILER_INLINE*/                                                                                         \
+    /*the compiler does have special inlining code for these; bytecode inline is just fine */                           \
+                                                                                                                        \
+  do_intrinsic(_fillInStackTrace,         java_lang_Throwable, fillInStackTrace_name, void_throwable_signature,  F_RNY) \
+                                                                                                                        \
+  do_intrinsic(_Object_init,              java_lang_Object, object_initializer_name, void_method_signature,      F_R)   \
+  /*    (symbol object_initializer_name defined above) */                                                               \
+                                                                                                                        \
+  do_intrinsic(_invoke,                   java_lang_reflect_Method, invoke_name, object_array_object_object_signature, F_R) \
+  /*   (symbols invoke_name and invoke_signature defined above) */                                                      \
+                                                                                                                        \
     /*end*/
 
 
 
+
 // Class vmSymbols
 
 class vmSymbols: AllStatic {
@@ -935,6 +939,7 @@
     #undef VM_INTRINSIC_ENUM
 
     ID_LIMIT,
+    LAST_COMPILER_INLINE = _prefetchWriteStatic,
     FIRST_ID = _none + 1
   };
 
@@ -972,4 +977,7 @@
   static Flags              flags_for(ID id);
 
   static const char* short_name_as_C_string(ID id, char* buf, int size);
+
+  // Access to intrinsic methods:
+  static methodOop method_for(ID id);
 };
--- a/src/share/vm/compiler/oopMap.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/compiler/oopMap.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -379,7 +379,15 @@
         if ( loc != NULL ) {
           oop *base_loc    = fr->oopmapreg_to_location(omv.content_reg(), reg_map);
           oop *derived_loc = loc;
-          derived_oop_fn(base_loc, derived_loc);
+          oop val = *base_loc;
+          if (val == (oop)NULL || Universe::is_narrow_oop_base(val)) {
+            // Ignore NULL oops and decoded NULL narrow oops which
+            // equal to Universe::narrow_oop_base when a narrow oop
+            // implicit null check is used in compiled code.
+            // The narrow_oop_base could be NULL or be the address
+            // of the page below heap depending on compressed oops mode.
+          } else
+            derived_oop_fn(base_loc, derived_loc);
         }
         oms.next();
       }  while (!oms.is_done());
@@ -394,6 +402,15 @@
       oop* loc = fr->oopmapreg_to_location(omv.reg(),reg_map);
       if ( loc != NULL ) {
         if ( omv.type() == OopMapValue::oop_value ) {
+          oop val = *loc;
+          if (val == (oop)NULL || Universe::is_narrow_oop_base(val)) {
+            // Ignore NULL oops and decoded NULL narrow oops which
+            // equal to Universe::narrow_oop_base when a narrow oop
+            // implicit null check is used in compiled code.
+            // The narrow_oop_base could be NULL or be the address
+            // of the page below heap depending on compressed oops mode.
+            continue;
+          }
 #ifdef ASSERT
           if ((((uintptr_t)loc & (sizeof(*loc)-1)) != 0) ||
              !Universe::heap()->is_in_or_null(*loc)) {
@@ -410,6 +427,8 @@
 #endif // ASSERT
           oop_fn->do_oop(loc);
         } else if ( omv.type() == OopMapValue::value_value ) {
+          assert((*loc) == (oop)NULL || !Universe::is_narrow_oop_base(*loc),
+                 "found invalid value pointer");
           value_fn->do_oop(loc);
         } else if ( omv.type() == OopMapValue::narrowoop_value ) {
           narrowOop *nl = (narrowOop*)loc;
--- a/src/share/vm/compiler/oopMap.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/compiler/oopMap.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -233,6 +233,10 @@
   int heap_size() const;
   void copy_to(address addr);
 
+  // Methods oops_do() and all_do() filter out NULL oops and
+  // oop == Universe::narrow_oop_base() before passing oops
+  // to closures.
+
   // Iterates through frame for a compiled method
   static void oops_do            (const frame* fr,
                                   const RegisterMap* reg_map, OopClosure* f);
--- a/src/share/vm/gc_implementation/g1/bufferingOopClosure.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/bufferingOopClosure.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -42,35 +42,40 @@
     BufferLength = 1024
   };
 
-  oop          *_buffer[BufferLength];
-  oop         **_buffer_top;
-  oop         **_buffer_curr;
+  StarTask  _buffer[BufferLength];
+  StarTask* _buffer_top;
+  StarTask* _buffer_curr;
 
-  OopClosure  *_oc;
-  double       _closure_app_seconds;
+  OopClosure* _oc;
+  double      _closure_app_seconds;
 
   void process_buffer () {
-
     double start = os::elapsedTime();
-    for (oop **curr = _buffer; curr < _buffer_curr; ++curr) {
-      _oc->do_oop(*curr);
+    for (StarTask* curr = _buffer; curr < _buffer_curr; ++curr) {
+      if (curr->is_narrow()) {
+        assert(UseCompressedOops, "Error");
+        _oc->do_oop((narrowOop*)(*curr));
+      } else {
+        _oc->do_oop((oop*)(*curr));
+      }
     }
     _buffer_curr = _buffer;
     _closure_app_seconds += (os::elapsedTime() - start);
   }
 
-public:
-  virtual void do_oop(narrowOop* p) {
-    guarantee(false, "NYI");
-  }
-  virtual void do_oop(oop *p) {
+  template <class T> inline void do_oop_work(T* p) {
     if (_buffer_curr == _buffer_top) {
       process_buffer();
     }
-
-    *_buffer_curr = p;
+    StarTask new_ref(p);
+    *_buffer_curr = new_ref;
     ++_buffer_curr;
   }
+
+public:
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(oop* p)       { do_oop_work(p); }
+
   void done () {
     if (_buffer_curr > _buffer) {
       process_buffer();
@@ -88,18 +93,17 @@
 class BufferingOopsInGenClosure: public OopsInGenClosure {
   BufferingOopClosure _boc;
   OopsInGenClosure* _oc;
-public:
+ protected:
+  template <class T> inline void do_oop_work(T* p) {
+    assert(generation()->is_in_reserved((void*)p), "Must be in!");
+    _boc.do_oop(p);
+  }
+ public:
   BufferingOopsInGenClosure(OopsInGenClosure *oc) :
     _boc(oc), _oc(oc) {}
 
-  virtual void do_oop(narrowOop* p) {
-    guarantee(false, "NYI");
-  }
-
-  virtual void do_oop(oop* p) {
-    assert(generation()->is_in_reserved(p), "Must be in!");
-    _boc.do_oop(p);
-  }
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(oop* p)       { do_oop_work(p); }
 
   void done() {
     _boc.done();
@@ -130,14 +134,14 @@
     BufferLength = 1024
   };
 
-  oop                      *_buffer[BufferLength];
-  oop                     **_buffer_top;
-  oop                     **_buffer_curr;
+  StarTask     _buffer[BufferLength];
+  StarTask*    _buffer_top;
+  StarTask*    _buffer_curr;
 
-  HeapRegion               *_hr_buffer[BufferLength];
-  HeapRegion              **_hr_curr;
+  HeapRegion*  _hr_buffer[BufferLength];
+  HeapRegion** _hr_curr;
 
-  OopsInHeapRegionClosure  *_oc;
+  OopsInHeapRegionClosure*  _oc;
   double                    _closure_app_seconds;
 
   void process_buffer () {
@@ -146,15 +150,20 @@
            "the two lengths should be the same");
 
     double start = os::elapsedTime();
-    HeapRegion **hr_curr = _hr_buffer;
-    HeapRegion *hr_prev = NULL;
-    for (oop **curr = _buffer; curr < _buffer_curr; ++curr) {
-      HeapRegion *region = *hr_curr;
+    HeapRegion** hr_curr = _hr_buffer;
+    HeapRegion*  hr_prev = NULL;
+    for (StarTask* curr = _buffer; curr < _buffer_curr; ++curr) {
+      HeapRegion* region = *hr_curr;
       if (region != hr_prev) {
         _oc->set_region(region);
         hr_prev = region;
       }
-      _oc->do_oop(*curr);
+      if (curr->is_narrow()) {
+        assert(UseCompressedOops, "Error");
+        _oc->do_oop((narrowOop*)(*curr));
+      } else {
+        _oc->do_oop((oop*)(*curr));
+      }
       ++hr_curr;
     }
     _buffer_curr = _buffer;
@@ -163,17 +172,16 @@
   }
 
 public:
-  virtual void do_oop(narrowOop *p) {
-    guarantee(false, "NYI");
-  }
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(      oop* p) { do_oop_work(p); }
 
-  virtual void do_oop(oop *p) {
+  template <class T> void do_oop_work(T* p) {
     if (_buffer_curr == _buffer_top) {
       assert(_hr_curr > _hr_buffer, "_hr_curr should be consistent with _buffer_curr");
       process_buffer();
     }
-
-    *_buffer_curr = p;
+    StarTask new_ref(p);
+    *_buffer_curr = new_ref;
     ++_buffer_curr;
     *_hr_curr = _from;
     ++_hr_curr;
--- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -452,13 +452,10 @@
   _regionStack.allocate(G1MarkRegionStackSize);
 
   // Create & start a ConcurrentMark thread.
-  if (G1ConcMark) {
-    _cmThread = new ConcurrentMarkThread(this);
-    assert(cmThread() != NULL, "CM Thread should have been created");
-    assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
-  } else {
-    _cmThread = NULL;
-  }
+  _cmThread = new ConcurrentMarkThread(this);
+  assert(cmThread() != NULL, "CM Thread should have been created");
+  assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
+
   _g1h = G1CollectedHeap::heap();
   assert(CGC_lock != NULL, "Where's the CGC_lock?");
   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
@@ -783,18 +780,18 @@
                      bool do_barrier) : _cm(cm), _g1h(g1h),
                                         _do_barrier(do_barrier) { }
 
-  virtual void do_oop(narrowOop* p) {
-    guarantee(false, "NYI");
-  }
-
-  virtual void do_oop(oop* p) {
-    oop thisOop = *p;
-    if (thisOop != NULL) {
-      assert(thisOop->is_oop() || thisOop->mark() == NULL,
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(      oop* p) { do_oop_work(p); }
+
+  template <class T> void do_oop_work(T* p) {
+    T heap_oop = oopDesc::load_heap_oop(p);
+    if (!oopDesc::is_null(heap_oop)) {
+      oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+      assert(obj->is_oop() || obj->mark() == NULL,
              "expected an oop, possibly with mark word displaced");
-      HeapWord* addr = (HeapWord*)thisOop;
+      HeapWord* addr = (HeapWord*)obj;
       if (_g1h->is_in_g1_reserved(addr)) {
-        _cm->grayRoot(thisOop);
+        _cm->grayRoot(obj);
       }
     }
     if (_do_barrier) {
@@ -850,16 +847,6 @@
   double start = os::elapsedTime();
   GCOverheadReporter::recordSTWStart(start);
 
-  // If there has not been a GC[n-1] since last GC[n] cycle completed,
-  // precede our marking with a collection of all
-  // younger generations to keep floating garbage to a minimum.
-  // YSR: we won't do this for now -- it's an optimization to be
-  // done post-beta.
-
-  // YSR:    ignoring weak refs for now; will do at bug fixing stage
-  // EVM:    assert(discoveredRefsAreClear());
-
-
   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
   g1p->record_concurrent_mark_init_start();
   checkpointRootsInitialPre();
@@ -1135,6 +1122,13 @@
     return;
   }
 
+  if (VerifyDuringGC) {
+    HandleMark hm;  // handle scope
+    gclog_or_tty->print(" VerifyDuringGC:(before)");
+    Universe::heap()->prepare_for_verify();
+    Universe::verify(true, false, true);
+  }
+
   G1CollectorPolicy* g1p = g1h->g1_policy();
   g1p->record_concurrent_mark_remark_start();
 
@@ -1159,10 +1153,12 @@
     JavaThread::satb_mark_queue_set().set_active_all_threads(false);
 
     if (VerifyDuringGC) {
-      g1h->prepare_for_verify();
-      g1h->verify(/* allow_dirty */      true,
-                  /* silent */           false,
-                  /* use_prev_marking */ false);
+      HandleMark hm;  // handle scope
+      gclog_or_tty->print(" VerifyDuringGC:(after)");
+      Universe::heap()->prepare_for_verify();
+      Universe::heap()->verify(/* allow_dirty */      true,
+                               /* silent */           false,
+                               /* use_prev_marking */ false);
     }
   }
 
@@ -1233,6 +1229,41 @@
                CardTableModRefBS::card_shift);
   }
 
+  // It takes a region that's not empty (i.e., it has at least one
+  // live object in it and sets its corresponding bit on the region
+  // bitmap to 1. If the region is "starts humongous" it will also set
+  // to 1 the bits on the region bitmap that correspond to its
+  // associated "continues humongous" regions.
+  void set_bit_for_region(HeapRegion* hr) {
+    assert(!hr->continuesHumongous(), "should have filtered those out");
+
+    size_t index = hr->hrs_index();
+    if (!hr->startsHumongous()) {
+      // Normal (non-humongous) case: just set the bit.
+      _region_bm->par_at_put((BitMap::idx_t) index, true);
+    } else {
+      // Starts humongous case: calculate how many regions are part of
+      // this humongous region and then set the bit range. It might
+      // have been a bit more efficient to look at the object that
+      // spans these humongous regions to calculate their number from
+      // the object's size. However, it's a good idea to calculate
+      // this based on the metadata itself, and not the region
+      // contents, so that this code is not aware of what goes into
+      // the humongous regions (in case this changes in the future).
+      G1CollectedHeap* g1h = G1CollectedHeap::heap();
+      size_t end_index = index + 1;
+      while (end_index < g1h->n_regions()) {
+        HeapRegion* chr = g1h->region_at(end_index);
+        if (!chr->continuesHumongous()) {
+          break;
+        }
+        end_index += 1;
+      }
+      _region_bm->par_at_put_range((BitMap::idx_t) index,
+                                   (BitMap::idx_t) end_index, true);
+    }
+  }
+
   bool doHeapRegion(HeapRegion* hr) {
     if (_co_tracker != NULL)
       _co_tracker->update();
@@ -1241,13 +1272,13 @@
       _start_vtime_sec = os::elapsedVTime();
 
     if (hr->continuesHumongous()) {
-      HeapRegion* hum_start = hr->humongous_start_region();
-      // If the head region of the humongous region has been determined
-      // to be alive, then all the tail regions should be marked
-      // such as well.
-      if (_region_bm->at(hum_start->hrs_index())) {
-        _region_bm->par_at_put(hr->hrs_index(), 1);
-      }
+      // We will ignore these here and process them when their
+      // associated "starts humongous" region is processed (see
+      // set_bit_for_heap_region()). Note that we cannot rely on their
+      // associated "starts humongous" region to have their bit set to
+      // 1 since, due to the region chunking in the parallel region
+      // iteration, a "continues humongous" region might be visited
+      // before its associated "starts humongous".
       return false;
     }
 
@@ -1343,14 +1374,14 @@
           intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
         mark_card_num_range(start_card_num, last_card_num);
         // This definitely means the region has live objects.
-        _region_bm->par_at_put(hr->hrs_index(), 1);
+        set_bit_for_region(hr);
       }
     }
 
     hr->add_to_marked_bytes(marked_bytes);
     // Update the live region bitmap.
     if (marked_bytes > 0) {
-      _region_bm->par_at_put(hr->hrs_index(), 1);
+      set_bit_for_region(hr);
     }
     hr->set_top_at_conc_mark_count(nextTop);
     _tot_live += hr->next_live_bytes();
@@ -1623,6 +1654,15 @@
     return;
   }
 
+  if (VerifyDuringGC) {
+    HandleMark hm;  // handle scope
+    gclog_or_tty->print(" VerifyDuringGC:(before)");
+    Universe::heap()->prepare_for_verify();
+    Universe::verify(/* allow dirty  */ true,
+                     /* silent       */ false,
+                     /* prev marking */ true);
+  }
+
   _cleanup_co_tracker.disable();
 
   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
@@ -1755,10 +1795,12 @@
   g1h->increment_total_collections();
 
   if (VerifyDuringGC) {
-    g1h->prepare_for_verify();
-    g1h->verify(/* allow_dirty */      true,
-                /* silent */           false,
-                /* use_prev_marking */ true);
+    HandleMark hm;  // handle scope
+    gclog_or_tty->print(" VerifyDuringGC:(after)");
+    Universe::heap()->prepare_for_verify();
+    Universe::verify(/* allow dirty  */ true,
+                     /* silent       */ false,
+                     /* prev marking */ true);
   }
 }
 
@@ -1817,12 +1859,11 @@
     _g1(g1), _cm(cm),
     _bitMap(bitMap) {}
 
-  void do_oop(narrowOop* p) {
-    guarantee(false, "NYI");
-  }
-
-  void do_oop(oop* p) {
-    oop thisOop = *p;
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(      oop* p) { do_oop_work(p); }
+
+  template <class T> void do_oop_work(T* p) {
+    oop thisOop = oopDesc::load_decode_heap_oop(p);
     HeapWord* addr = (HeapWord*)thisOop;
     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
       _bitMap->mark(addr);
@@ -1981,12 +2022,11 @@
   ReachablePrinterOopClosure(CMBitMapRO* bitmap, outputStream* out) :
     _bitmap(bitmap), _g1h(G1CollectedHeap::heap()), _out(out) { }
 
-  void do_oop(narrowOop* p) {
-    guarantee(false, "NYI");
-  }
-
-  void do_oop(oop* p) {
-    oop         obj = *p;
+  void do_oop(narrowOop* p) { do_oop_work(p); }
+  void do_oop(      oop* p) { do_oop_work(p); }
+
+  template <class T> void do_oop_work(T* p) {
+    oop         obj = oopDesc::load_decode_heap_oop(p);
     const char* str = NULL;
     const char* str2 = "";
 
@@ -2128,6 +2168,7 @@
 
 
   HeapWord* objAddr = (HeapWord*) obj;
+  assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
   if (_g1h->is_in_g1_reserved(objAddr)) {
     tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
     HeapRegion* hr = _g1h->heap_region_containing(obj);
@@ -2345,7 +2386,7 @@
     }
   }
 
-  bool drain() {
+  template <class T> bool drain() {
     while (_ms_ind > 0) {
       oop obj = pop();
       assert(obj != NULL, "Since index was non-zero.");
@@ -2359,9 +2400,8 @@
         }
         // Now process this portion of this one.
         int lim = MIN2(next_arr_ind, len);
-        assert(!UseCompressedOops, "This needs to be fixed");
         for (int j = arr_ind; j < lim; j++) {
-          do_oop(aobj->obj_at_addr<oop>(j));
+          do_oop(aobj->obj_at_addr<T>(j));
         }
 
       } else {
@@ -2388,13 +2428,13 @@
     FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
   }
 
-  void do_oop(narrowOop* p) {
-    guarantee(false, "NYI");
-  }
-
-  void do_oop(oop* p) {
-    oop obj = *p;
-    if (obj == NULL) return;
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(      oop* p) { do_oop_work(p); }
+
+  template <class T> void do_oop_work(T* p) {
+    T heap_oop = oopDesc::load_heap_oop(p);
+    if (oopDesc::is_null(heap_oop)) return;
+    oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
     if (obj->is_forwarded()) {
       // If the object has already been forwarded, we have to make sure
       // that it's marked.  So follow the forwarding pointer.  Note that
@@ -2443,7 +2483,11 @@
     oop obj = oop(addr);
     if (!obj->is_forwarded()) {
       if (!_oop_cl.push(obj)) return false;
-      if (!_oop_cl.drain()) return false;
+      if (UseCompressedOops) {
+        if (!_oop_cl.drain<narrowOop>()) return false;
+      } else {
+        if (!_oop_cl.drain<oop>()) return false;
+      }
     }
     // Otherwise...
     return true;
@@ -2601,9 +2645,6 @@
 
 // abandon current marking iteration due to a Full GC
 void ConcurrentMark::abort() {
-  // If we're not marking, nothing to do.
-  if (!G1ConcMark) return;
-
   // Clear all marks to force marking thread to do nothing
   _nextMarkBitMap->clearAll();
   // Empty mark stack
@@ -2779,14 +2820,14 @@
   CMTask*            _task;
 
 public:
-  void do_oop(narrowOop* p) {
-    guarantee(false, "NYI");
-  }
-
-  void do_oop(oop* p) {
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(      oop* p) { do_oop_work(p); }
+
+  template <class T> void do_oop_work(T* p) {
     tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) p), "invariant" );
-
-    oop obj = *p;
+    tmp_guarantee_CM( !_g1h->heap_region_containing((HeapWord*) p)->is_on_free_list(), "invariant" );
+
+    oop obj = oopDesc::load_decode_heap_oop(p);
     if (_cm->verbose_high())
       gclog_or_tty->print_cr("[%d] we're looking at location "
                              "*"PTR_FORMAT" = "PTR_FORMAT,
@@ -2932,6 +2973,7 @@
   ++_refs_reached;
 
   HeapWord* objAddr = (HeapWord*) obj;
+  assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
   if (_g1h->is_in_g1_reserved(objAddr)) {
     tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
     HeapRegion* hr =  _g1h->heap_region_containing(obj);
@@ -2995,6 +3037,7 @@
 void CMTask::push(oop obj) {
   HeapWord* objAddr = (HeapWord*) obj;
   tmp_guarantee_CM( _g1h->is_in_g1_reserved(objAddr), "invariant" );
+  tmp_guarantee_CM( !_g1h->heap_region_containing(objAddr)->is_on_free_list(), "invariant" );
   tmp_guarantee_CM( !_g1h->is_obj_ill(obj), "invariant" );
   tmp_guarantee_CM( _nextMarkBitMap->isMarked(objAddr), "invariant" );
 
@@ -3240,6 +3283,8 @@
 
       tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) obj),
                         "invariant" );
+      tmp_guarantee_CM( !_g1h->heap_region_containing(obj)->is_on_free_list(),
+                        "invariant" );
 
       scan_object(obj);
 
--- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -763,6 +763,7 @@
   CMBitMap*                   _nextMarkBitMap;
   // the task queue of this task
   CMTaskQueue*                _task_queue;
+private:
   // the task queue set---needed for stealing
   CMTaskQueueSet*             _task_queues;
   // indicates whether the task has been claimed---this is only  for
--- a/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -424,7 +424,7 @@
       while (n <= next_boundary) {
         q = n;
         oop obj = oop(q);
-        if (obj->klass() == NULL) return q;
+        if (obj->klass_or_null() == NULL) return q;
         n += obj->size();
       }
       assert(q <= next_boundary && n > next_boundary, "Consequence of loop");
@@ -436,7 +436,7 @@
       while (n <= next_boundary) {
         q = n;
         oop obj = oop(q);
-        if (obj->klass() == NULL) return q;
+        if (obj->klass_or_null() == NULL) return q;
         n += _sp->block_size(q);
       }
       assert(q <= next_boundary && n > next_boundary, "Consequence of loop");
--- a/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -96,14 +96,14 @@
     while (n <= addr) {
       q = n;
       oop obj = oop(q);
-      if (obj->klass() == NULL) return q;
+      if (obj->klass_or_null() == NULL) return q;
       n += obj->size();
     }
   } else {
     while (n <= addr) {
       q = n;
       oop obj = oop(q);
-      if (obj->klass() == NULL) return q;
+      if (obj->klass_or_null() == NULL) return q;
       n += _sp->block_size(q);
     }
   }
@@ -115,7 +115,7 @@
 inline HeapWord*
 G1BlockOffsetArray::forward_to_block_containing_addr(HeapWord* q,
                                                      const void* addr) {
-  if (oop(q)->klass() == NULL) return q;
+  if (oop(q)->klass_or_null() == NULL) return q;
   HeapWord* n = q + _sp->block_size(q);
   // In the normal case, where the query "addr" is a card boundary, and the
   // offset table chunks are the same size as cards, the block starting at
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -902,6 +902,10 @@
                                     size_t word_size) {
   ResourceMark rm;
 
+  if (PrintHeapAtGC) {
+    Universe::print_heap_before_gc();
+  }
+
   if (full && DisableExplicitGC) {
     gclog_or_tty->print("\n\n\nDisabling Explicit GC\n\n\n");
     return;
@@ -927,7 +931,7 @@
     g1_policy()->record_full_collection_start();
 
     gc_prologue(true);
-    increment_total_collections();
+    increment_total_collections(true /* full gc */);
 
     size_t g1h_prev_used = used();
     assert(used() == recalculate_used(), "Should be equal");
@@ -1066,6 +1070,10 @@
     assert( check_young_list_empty(false, false),
             "young list should be empty at this point");
   }
+
+  if (PrintHeapAtGC) {
+    Universe::print_heap_after_gc();
+  }
 }
 
 void G1CollectedHeap::do_full_collection(bool clear_all_soft_refs) {
@@ -1650,8 +1658,15 @@
   assert(Heap_lock->owner() != NULL,
          "Should be owned on this thread's behalf.");
   size_t result = _summary_bytes_used;
-  if (_cur_alloc_region != NULL)
-    result += _cur_alloc_region->used();
+  // Read only once in case it is set to NULL concurrently
+  HeapRegion* hr = _cur_alloc_region;
+  if (hr != NULL)
+    result += hr->used();
+  return result;
+}
+
+size_t G1CollectedHeap::used_unlocked() const {
+  size_t result = _summary_bytes_used;
   return result;
 }
 
@@ -2125,13 +2140,13 @@
   VerifyLivenessOopClosure(G1CollectedHeap* _g1h) {
     g1h = _g1h;
   }
-  void do_oop(narrowOop *p) {
-    guarantee(false, "NYI");
-  }
-  void do_oop(oop *p) {
-    oop obj = *p;
-    assert(obj == NULL || !g1h->is_obj_dead(obj),
-           "Dead object referenced by a not dead object");
+  void do_oop(narrowOop *p) { do_oop_work(p); }
+  void do_oop(      oop *p) { do_oop_work(p); }
+
+  template <class T> void do_oop_work(T *p) {
+    oop obj = oopDesc::load_decode_heap_oop(p);
+    guarantee(obj == NULL || !g1h->is_obj_dead(obj),
+              "Dead object referenced by a not dead object");
   }
 };
 
@@ -2198,8 +2213,10 @@
   // use_prev_marking == true  -> use "prev" marking information,
   // use_prev_marking == false -> use "next" marking information
   VerifyRegionClosure(bool allow_dirty, bool par, bool use_prev_marking)
-    : _allow_dirty(allow_dirty), _par(par),
+    : _allow_dirty(allow_dirty),
+      _par(par),
       _use_prev_marking(use_prev_marking) {}
+
   bool doHeapRegion(HeapRegion* r) {
     guarantee(_par || r->claim_value() == HeapRegion::InitialClaimValue,
               "Should be unclaimed at verify points.");
@@ -2223,18 +2240,16 @@
   // use_prev_marking == true  -> use "prev" marking information,
   // use_prev_marking == false -> use "next" marking information
   VerifyRootsClosure(bool use_prev_marking) :
-    _g1h(G1CollectedHeap::heap()), _failures(false),
+    _g1h(G1CollectedHeap::heap()),
+    _failures(false),
     _use_prev_marking(use_prev_marking) { }
 
   bool failures() { return _failures; }
 
-  void do_oop(narrowOop* p) {
-    guarantee(false, "NYI");
-  }
-
-  void do_oop(oop* p) {
-    oop obj = *p;
-    if (obj != NULL) {
+  template <class T> void do_oop_nv(T* p) {
+    T heap_oop = oopDesc::load_heap_oop(p);
+    if (!oopDesc::is_null(heap_oop)) {
+      oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
       if (_g1h->is_obj_dead_cond(obj, _use_prev_marking)) {
         gclog_or_tty->print_cr("Root location "PTR_FORMAT" "
                                "points to dead obj "PTR_FORMAT, p, (void*) obj);
@@ -2243,6 +2258,9 @@
       }
     }
   }
+
+  void do_oop(oop* p)       { do_oop_nv(p); }
+  void do_oop(narrowOop* p) { do_oop_nv(p); }
 };
 
 // This is the task used for parallel heap verification.
@@ -2259,7 +2277,8 @@
   G1ParVerifyTask(G1CollectedHeap* g1h, bool allow_dirty,
                   bool use_prev_marking) :
     AbstractGangTask("Parallel verify task"),
-    _g1h(g1h), _allow_dirty(allow_dirty),
+    _g1h(g1h),
+    _allow_dirty(allow_dirty),
     _use_prev_marking(use_prev_marking) { }
 
   void work(int worker_i) {
@@ -2325,9 +2344,37 @@
   }
 };
 
-void G1CollectedHeap::print() const { print_on(gclog_or_tty); }
+void G1CollectedHeap::print() const { print_on(tty); }
 
 void G1CollectedHeap::print_on(outputStream* st) const {
+  print_on(st, PrintHeapAtGCExtended);
+}
+
+void G1CollectedHeap::print_on(outputStream* st, bool extended) const {
+  st->print(" %-20s", "garbage-first heap");
+  st->print(" total " SIZE_FORMAT "K, used " SIZE_FORMAT "K",
+            capacity()/K, used_unlocked()/K);
+  st->print(" [" INTPTR_FORMAT ", " INTPTR_FORMAT ", " INTPTR_FORMAT ")",
+            _g1_storage.low_boundary(),
+            _g1_storage.high(),
+            _g1_storage.high_boundary());
+  st->cr();
+  st->print("  region size " SIZE_FORMAT "K, ",
+            HeapRegion::GrainBytes/K);
+  size_t young_regions = _young_list->length();
+  st->print(SIZE_FORMAT " young (" SIZE_FORMAT "K), ",
+            young_regions, young_regions * HeapRegion::GrainBytes / K);
+  size_t survivor_regions = g1_policy()->recorded_survivor_regions();
+  st->print(SIZE_FORMAT " survivors (" SIZE_FORMAT "K)",
+            survivor_regions, survivor_regions * HeapRegion::GrainBytes / K);
+  st->cr();
+  perm()->as_gen()->print_on(st);
+  if (extended) {
+    print_on_extended(st);
+  }
+}
+
+void G1CollectedHeap::print_on_extended(outputStream* st) const {
   PrintRegionClosure blk(st);
   _hrs->iterate(&blk);
 }
@@ -2408,10 +2455,6 @@
 }
 
 void G1CollectedHeap::gc_prologue(bool full /* Ignored */) {
-  if (PrintHeapAtGC){
-    gclog_or_tty->print_cr(" {Heap before GC collections=%d:", total_collections());
-    Universe::print();
-  }
   assert(InlineCacheBuffer::is_empty(), "should have cleaned up ICBuffer");
   // Call allocation profiler
   AllocationProfiler::iterate_since_last_gc();
@@ -2425,12 +2468,6 @@
   // is set.
   COMPILER2_PRESENT(assert(DerivedPointerTable::is_empty(),
                         "derived pointer present"));
-
-  if (PrintHeapAtGC){
-    gclog_or_tty->print_cr(" Heap after GC collections=%d:", total_collections());
-    Universe::print();
-    gclog_or_tty->print("} ");
-  }
 }
 
 void G1CollectedHeap::do_collection_pause() {
@@ -2453,12 +2490,10 @@
 
 void
 G1CollectedHeap::doConcurrentMark() {
-  if (G1ConcMark) {
-    MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
-    if (!_cmThread->in_progress()) {
-      _cmThread->set_started();
-      CGC_lock->notify();
-    }
+  MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
+  if (!_cmThread->in_progress()) {
+    _cmThread->set_started();
+    CGC_lock->notify();
   }
 }
 
@@ -2535,9 +2570,11 @@
                           "Not enough space for young surv words summary.");
   }
   memset(_surviving_young_words, 0, array_length * sizeof(size_t));
+#ifdef ASSERT
   for (size_t i = 0;  i < array_length; ++i) {
-    guarantee( _surviving_young_words[i] == 0, "invariant" );
-  }
+    assert( _surviving_young_words[i] == 0, "memset above" );
+  }
+#endif // !ASSERT
 }
 
 void
@@ -2559,241 +2596,254 @@
 
 void
 G1CollectedHeap::do_collection_pause_at_safepoint() {
-  char verbose_str[128];
-  sprintf(verbose_str, "GC pause ");
-  if (g1_policy()->in_young_gc_mode()) {
-    if (g1_policy()->full_young_gcs())
-      strcat(verbose_str, "(young)");
-    else
-      strcat(verbose_str, "(partial)");
-  }
-  if (g1_policy()->should_initiate_conc_mark())
-    strcat(verbose_str, " (initial-mark)");
-
-  GCCauseSetter x(this, GCCause::_g1_inc_collection_pause);
-
-  // if PrintGCDetails is on, we'll print long statistics information
-  // in the collector policy code, so let's not print this as the output
-  // is messy if we do.
-  gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps);
-  TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
-  TraceTime t(verbose_str, PrintGC && !PrintGCDetails, true, gclog_or_tty);
-
-  ResourceMark rm;
-  assert(SafepointSynchronize::is_at_safepoint(), "should be at safepoint");
-  assert(Thread::current() == VMThread::vm_thread(), "should be in vm thread");
-  guarantee(!is_gc_active(), "collection is not reentrant");
-  assert(regions_accounted_for(), "Region leakage!");
-
-  increment_gc_time_stamp();
-
-  if (g1_policy()->in_young_gc_mode()) {
-    assert(check_young_list_well_formed(),
-                "young list should be well formed");
-  }
-
-  if (GC_locker::is_active()) {
-    return; // GC is disabled (e.g. JNI GetXXXCritical operation)
-  }
-
-  bool abandoned = false;
-  { // Call to jvmpi::post_class_unload_events must occur outside of active GC
-    IsGCActiveMark x;
-
-    gc_prologue(false);
-    increment_total_collections();
+  if (PrintHeapAtGC) {
+    Universe::print_heap_before_gc();
+  }
+
+  {
+    char verbose_str[128];
+    sprintf(verbose_str, "GC pause ");
+    if (g1_policy()->in_young_gc_mode()) {
+      if (g1_policy()->full_young_gcs())
+        strcat(verbose_str, "(young)");
+      else
+        strcat(verbose_str, "(partial)");
+    }
+    if (g1_policy()->should_initiate_conc_mark())
+      strcat(verbose_str, " (initial-mark)");
+
+    GCCauseSetter x(this, GCCause::_g1_inc_collection_pause);
+
+    // if PrintGCDetails is on, we'll print long statistics information
+    // in the collector policy code, so let's not print this as the output
+    // is messy if we do.
+    gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps);
+    TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
+    TraceTime t(verbose_str, PrintGC && !PrintGCDetails, true, gclog_or_tty);
+
+    ResourceMark rm;
+    assert(SafepointSynchronize::is_at_safepoint(), "should be at safepoint");
+    assert(Thread::current() == VMThread::vm_thread(), "should be in vm thread");
+    guarantee(!is_gc_active(), "collection is not reentrant");
+    assert(regions_accounted_for(), "Region leakage!");
+
+    increment_gc_time_stamp();
+
+    if (g1_policy()->in_young_gc_mode()) {
+      assert(check_young_list_well_formed(),
+             "young list should be well formed");
+    }
+
+    if (GC_locker::is_active()) {
+      return; // GC is disabled (e.g. JNI GetXXXCritical operation)
+    }
+
+    bool abandoned = false;
+    { // Call to jvmpi::post_class_unload_events must occur outside of active GC
+      IsGCActiveMark x;
+
+      gc_prologue(false);
+      increment_total_collections(false /* full gc */);
 
 #if G1_REM_SET_LOGGING
-    gclog_or_tty->print_cr("\nJust chose CS, heap:");
-    print();
-#endif
-
-    if (VerifyBeforeGC && total_collections() >= VerifyGCStartAt) {
-      HandleMark hm;  // Discard invalid handles created during verification
-      prepare_for_verify();
-      gclog_or_tty->print(" VerifyBeforeGC:");
-      Universe::verify(false);
-    }
-
-    COMPILER2_PRESENT(DerivedPointerTable::clear());
-
-    // We want to turn off ref discovery, if necessary, and turn it back on
-    // on again later if we do.
-    bool was_enabled = ref_processor()->discovery_enabled();
-    if (was_enabled) ref_processor()->disable_discovery();
-
-    // Forget the current alloc region (we might even choose it to be part
-    // of the collection set!).
-    abandon_cur_alloc_region();
-
-    // The elapsed time induced by the start time below deliberately elides
-    // the possible verification above.
-    double start_time_sec = os::elapsedTime();
-    GCOverheadReporter::recordSTWStart(start_time_sec);
-    size_t start_used_bytes = used();
-    if (!G1ConcMark) {
-      do_sync_mark();
-    }
-
-    g1_policy()->record_collection_pause_start(start_time_sec,
-                                               start_used_bytes);
-
-    guarantee(_in_cset_fast_test == NULL, "invariant");
-    guarantee(_in_cset_fast_test_base == NULL, "invariant");
-    _in_cset_fast_test_length = max_regions();
-    _in_cset_fast_test_base =
-                             NEW_C_HEAP_ARRAY(bool, _in_cset_fast_test_length);
-    memset(_in_cset_fast_test_base, false,
-                                     _in_cset_fast_test_length * sizeof(bool));
-    // We're biasing _in_cset_fast_test to avoid subtracting the
-    // beginning of the heap every time we want to index; basically
-    // it's the same with what we do with the card table.
-    _in_cset_fast_test = _in_cset_fast_test_base -
-              ((size_t) _g1_reserved.start() >> HeapRegion::LogOfHRGrainBytes);
-
-#if SCAN_ONLY_VERBOSE
-    _young_list->print();
-#endif // SCAN_ONLY_VERBOSE
-
-    if (g1_policy()->should_initiate_conc_mark()) {
-      concurrent_mark()->checkpointRootsInitialPre();
-    }
-    save_marks();
-
-    // We must do this before any possible evacuation that should propagate
-    // marks.
-    if (mark_in_progress()) {
-      double start_time_sec = os::elapsedTime();
-
-      _cm->drainAllSATBBuffers();
-      double finish_mark_ms = (os::elapsedTime() - start_time_sec) * 1000.0;
-      g1_policy()->record_satb_drain_time(finish_mark_ms);
-
-    }
-    // Record the number of elements currently on the mark stack, so we
-    // only iterate over these.  (Since evacuation may add to the mark
-    // stack, doing more exposes race conditions.)  If no mark is in
-    // progress, this will be zero.
-    _cm->set_oops_do_bound();
-
-    assert(regions_accounted_for(), "Region leakage.");
-
-    if (mark_in_progress())
-      concurrent_mark()->newCSet();
-
-    // Now choose the CS.
-    g1_policy()->choose_collection_set();
-
-    // We may abandon a pause if we find no region that will fit in the MMU
-    // pause.
-    bool abandoned = (g1_policy()->collection_set() == NULL);
-
-    // Nothing to do if we were unable to choose a collection set.
-    if (!abandoned) {
-#if G1_REM_SET_LOGGING
-      gclog_or_tty->print_cr("\nAfter pause, heap:");
+      gclog_or_tty->print_cr("\nJust chose CS, heap:");
       print();
 #endif
 
-      setup_surviving_young_words();
-
-      // Set up the gc allocation regions.
-      get_gc_alloc_regions();
-
-      // Actually do the work...
-      evacuate_collection_set();
-      free_collection_set(g1_policy()->collection_set());
-      g1_policy()->clear_collection_set();
-
-      FREE_C_HEAP_ARRAY(bool, _in_cset_fast_test_base);
-      // this is more for peace of mind; we're nulling them here and
-      // we're expecting them to be null at the beginning of the next GC
-      _in_cset_fast_test = NULL;
-      _in_cset_fast_test_base = NULL;
-
-      release_gc_alloc_regions(false /* totally */);
-
-      cleanup_surviving_young_words();
-
-      if (g1_policy()->in_young_gc_mode()) {
-        _young_list->reset_sampled_info();
-        assert(check_young_list_empty(true),
-               "young list should be empty");
+      if (VerifyBeforeGC && total_collections() >= VerifyGCStartAt) {
+        HandleMark hm;  // Discard invalid handles created during verification
+        prepare_for_verify();
+        gclog_or_tty->print(" VerifyBeforeGC:");
+        Universe::verify(false);
+      }
+
+      COMPILER2_PRESENT(DerivedPointerTable::clear());
+
+      // We want to turn off ref discovery, if necessary, and turn it back on
+      // on again later if we do. XXX Dubious: why is discovery disabled?
+      bool was_enabled = ref_processor()->discovery_enabled();
+      if (was_enabled) ref_processor()->disable_discovery();
+
+      // Forget the current alloc region (we might even choose it to be part
+      // of the collection set!).
+      abandon_cur_alloc_region();
+
+      // The elapsed time induced by the start time below deliberately elides
+      // the possible verification above.
+      double start_time_sec = os::elapsedTime();
+      GCOverheadReporter::recordSTWStart(start_time_sec);
+      size_t start_used_bytes = used();
+
+      g1_policy()->record_collection_pause_start(start_time_sec,
+                                                 start_used_bytes);
+
+      guarantee(_in_cset_fast_test == NULL, "invariant");
+      guarantee(_in_cset_fast_test_base == NULL, "invariant");
+      _in_cset_fast_test_length = max_regions();
+      _in_cset_fast_test_base =
+                             NEW_C_HEAP_ARRAY(bool, _in_cset_fast_test_length);
+      memset(_in_cset_fast_test_base, false,
+                                     _in_cset_fast_test_length * sizeof(bool));
+      // We're biasing _in_cset_fast_test to avoid subtracting the
+      // beginning of the heap every time we want to index; basically
+      // it's the same with what we do with the card table.
+      _in_cset_fast_test = _in_cset_fast_test_base -
+              ((size_t) _g1_reserved.start() >> HeapRegion::LogOfHRGrainBytes);
 
 #if SCAN_ONLY_VERBOSE
-        _young_list->print();
+      _young_list->print();
 #endif // SCAN_ONLY_VERBOSE
 
-        g1_policy()->record_survivor_regions(_young_list->survivor_length(),
-                                             _young_list->first_survivor_region(),
-                                             _young_list->last_survivor_region());
-        _young_list->reset_auxilary_lists();
+      if (g1_policy()->should_initiate_conc_mark()) {
+        concurrent_mark()->checkpointRootsInitialPre();
       }
-    } else {
-      COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
+      save_marks();
+
+      // We must do this before any possible evacuation that should propagate
+      // marks.
+      if (mark_in_progress()) {
+        double start_time_sec = os::elapsedTime();
+
+        _cm->drainAllSATBBuffers();
+        double finish_mark_ms = (os::elapsedTime() - start_time_sec) * 1000.0;
+        g1_policy()->record_satb_drain_time(finish_mark_ms);
+      }
+      // Record the number of elements currently on the mark stack, so we
+      // only iterate over these.  (Since evacuation may add to the mark
+      // stack, doing more exposes race conditions.)  If no mark is in
+      // progress, this will be zero.
+      _cm->set_oops_do_bound();
+
+      assert(regions_accounted_for(), "Region leakage.");
+
+      if (mark_in_progress())
+        concurrent_mark()->newCSet();
+
+      // Now choose the CS.
+      g1_policy()->choose_collection_set();
+
+      // We may abandon a pause if we find no region that will fit in the MMU
+      // pause.
+      bool abandoned = (g1_policy()->collection_set() == NULL);
+
+      // Nothing to do if we were unable to choose a collection set.
+      if (!abandoned) {
+#if G1_REM_SET_LOGGING
+        gclog_or_tty->print_cr("\nAfter pause, heap:");
+        print();
+#endif
+
+        setup_surviving_young_words();
+
+        // Set up the gc allocation regions.
+        get_gc_alloc_regions();
+
+        // Actually do the work...
+        evacuate_collection_set();
+        free_collection_set(g1_policy()->collection_set());
+        g1_policy()->clear_collection_set();
+
+        FREE_C_HEAP_ARRAY(bool, _in_cset_fast_test_base);
+        // this is more for peace of mind; we're nulling them here and
+        // we're expecting them to be null at the beginning of the next GC
+        _in_cset_fast_test = NULL;
+        _in_cset_fast_test_base = NULL;
+
+        release_gc_alloc_regions(false /* totally */);
+
+        cleanup_surviving_young_words();
+
+        if (g1_policy()->in_young_gc_mode()) {
+          _young_list->reset_sampled_info();
+          assert(check_young_list_empty(true),
+                 "young list should be empty");
+
+#if SCAN_ONLY_VERBOSE
+          _young_list->print();
+#endif // SCAN_ONLY_VERBOSE
+
+          g1_policy()->record_survivor_regions(_young_list->survivor_length(),
+                                          _young_list->first_survivor_region(),
+                                          _young_list->last_survivor_region());
+          _young_list->reset_auxilary_lists();
+        }
+      } else {
+        COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
+      }
+
+      if (evacuation_failed()) {
+        _summary_bytes_used = recalculate_used();
+      } else {
+        // The "used" of the the collection set have already been subtracted
+        // when they were freed.  Add in the bytes evacuated.
+        _summary_bytes_used += g1_policy()->bytes_in_to_space();
+      }
+
+      if (g1_policy()->in_young_gc_mode() &&
+          g1_policy()->should_initiate_conc_mark()) {
+        concurrent_mark()->checkpointRootsInitialPost();
+        set_marking_started();
+        // CAUTION: after the doConcurrentMark() call below,
+        // the concurrent marking thread(s) could be running
+        // concurrently with us. Make sure that anything after
+        // this point does not assume that we are the only GC thread
+        // running. Note: of course, the actual marking work will
+        // not start until the safepoint itself is released in
+        // ConcurrentGCThread::safepoint_desynchronize().
+        doConcurrentMark();
+      }
+
+#if SCAN_ONLY_VERBOSE
+      _young_list->print();
+#endif // SCAN_ONLY_VERBOSE
+
+      double end_time_sec = os::elapsedTime();
+      double pause_time_ms = (end_time_sec - start_time_sec) * MILLIUNITS;
+      g1_policy()->record_pause_time_ms(pause_time_ms);
+      GCOverheadReporter::recordSTWEnd(end_time_sec);
+      g1_policy()->record_collection_pause_end(abandoned);
+
+      assert(regions_accounted_for(), "Region leakage.");
+
+      if (VerifyAfterGC && total_collections() >= VerifyGCStartAt) {
+        HandleMark hm;  // Discard invalid handles created during verification
+        gclog_or_tty->print(" VerifyAfterGC:");
+        prepare_for_verify();
+        Universe::verify(false);
+      }
+
+      if (was_enabled) ref_processor()->enable_discovery();
+
+      {
+        size_t expand_bytes = g1_policy()->expansion_amount();
+        if (expand_bytes > 0) {
+          size_t bytes_before = capacity();
+          expand(expand_bytes);
+        }
+      }
+
+      if (mark_in_progress()) {
+        concurrent_mark()->update_g1_committed();
+      }
+
+#ifdef TRACESPINNING
+      ParallelTaskTerminator::print_termination_counts();
+#endif
+
+      gc_epilogue(false);
     }
 
-    if (evacuation_failed()) {
-      _summary_bytes_used = recalculate_used();
-    } else {
-      // The "used" of the the collection set have already been subtracted
-      // when they were freed.  Add in the bytes evacuated.
-      _summary_bytes_used += g1_policy()->bytes_in_to_space();
+    assert(verify_region_lists(), "Bad region lists.");
+
+    if (ExitAfterGCNum > 0 && total_collections() == ExitAfterGCNum) {
+      gclog_or_tty->print_cr("Stopping after GC #%d", ExitAfterGCNum);
+      print_tracing_info();
+      vm_exit(-1);
     }
-
-    if (g1_policy()->in_young_gc_mode() &&
-        g1_policy()->should_initiate_conc_mark()) {
-      concurrent_mark()->checkpointRootsInitialPost();
-      set_marking_started();
-      doConcurrentMark();
-    }
-
-#if SCAN_ONLY_VERBOSE
-    _young_list->print();
-#endif // SCAN_ONLY_VERBOSE
-
-    double end_time_sec = os::elapsedTime();
-    double pause_time_ms = (end_time_sec - start_time_sec) * MILLIUNITS;
-    g1_policy()->record_pause_time_ms(pause_time_ms);
-    GCOverheadReporter::recordSTWEnd(end_time_sec);
-    g1_policy()->record_collection_pause_end(abandoned);
-
-    assert(regions_accounted_for(), "Region leakage.");
-
-    if (VerifyAfterGC && total_collections() >= VerifyGCStartAt) {
-      HandleMark hm;  // Discard invalid handles created during verification
-      gclog_or_tty->print(" VerifyAfterGC:");
-      prepare_for_verify();
-      Universe::verify(false);
-    }
-
-    if (was_enabled) ref_processor()->enable_discovery();
-
-    {
-      size_t expand_bytes = g1_policy()->expansion_amount();
-      if (expand_bytes > 0) {
-        size_t bytes_before = capacity();
-        expand(expand_bytes);
-      }
-    }
-
-    if (mark_in_progress()) {
-      concurrent_mark()->update_g1_committed();
-    }
-
-#ifdef TRACESPINNING
-    ParallelTaskTerminator::print_termination_counts();
-#endif
-
-    gc_epilogue(false);
-  }
-
-  assert(verify_region_lists(), "Bad region lists.");
-
-  if (ExitAfterGCNum > 0 && total_collections() == ExitAfterGCNum) {
-    gclog_or_tty->print_cr("Stopping after GC #%d", ExitAfterGCNum);
-    print_tracing_info();
-    vm_exit(-1);
+  }
+
+  if (PrintHeapAtGC) {
+    Universe::print_heap_after_gc();
   }
 }
 
@@ -2930,6 +2980,7 @@
 
   for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
     assert(_gc_alloc_regions[ap] == NULL, "invariant");
+    assert(_gc_alloc_region_counts[ap] == 0, "invariant");
 
     // Create new GC alloc regions.
     HeapRegion* alloc_region = _retained_gc_alloc_regions[ap];
@@ -2958,6 +3009,9 @@
     if (alloc_region == NULL) {
       // we will get a new GC alloc region
       alloc_region = newAllocRegionWithExpansion(ap, 0);
+    } else {
+      // the region was retained from the last collection
+      ++_gc_alloc_region_counts[ap];
     }
 
     if (alloc_region != NULL) {
@@ -2996,11 +3050,11 @@
   for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
     HeapRegion* r = _gc_alloc_regions[ap];
     _retained_gc_alloc_regions[ap] = NULL;
+    _gc_alloc_region_counts[ap] = 0;
 
     if (r != NULL) {
       // we retain nothing on _gc_alloc_regions between GCs
       set_gc_alloc_region(ap, NULL);
-      _gc_alloc_region_counts[ap] = 0;
 
       if (r->is_empty()) {
         // we didn't actually allocate anything in it; let's just put
@@ -3088,10 +3142,8 @@
   G1CollectedHeap* _g1;
 public:
   G1KeepAliveClosure(G1CollectedHeap* g1) : _g1(g1) {}
-  void do_oop(narrowOop* p) {
-    guarantee(false, "NYI");
-  }
-  void do_oop(oop* p) {
+  void do_oop(narrowOop* p) { guarantee(false, "Not needed"); }
+  void do_oop(      oop* p) {
     oop obj = *p;
 #ifdef G1_DEBUG
     if (PrintGC && Verbose) {
@@ -3103,7 +3155,6 @@
     if (_g1->obj_in_cs(obj)) {
       assert( obj->is_forwarded(), "invariant" );
       *p = obj->forwardee();
-
 #ifdef G1_DEBUG
       gclog_or_tty->print_cr("     in CSet: moved "PTR_FORMAT" -> "PTR_FORMAT,
                              (void*) obj, (void*) *p);
@@ -3120,12 +3171,12 @@
   UpdateRSetImmediate(G1CollectedHeap* g1) :
     _g1(g1), _g1_rem_set(g1->g1_rem_set()) {}
 
-  void do_oop(narrowOop* p) {
-    guarantee(false, "NYI");
-  }
-  void do_oop(oop* p) {
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(      oop* p) { do_oop_work(p); }
+  template <class T> void do_oop_work(T* p) {
     assert(_from->is_in_reserved(p), "paranoia");
-    if (*p != NULL && !_from->is_survivor()) {
+    T heap_oop = oopDesc::load_heap_oop(p);
+    if (!oopDesc::is_null(heap_oop) && !_from->is_survivor()) {
       _g1_rem_set->par_write_ref(_from, p, 0);
     }
   }
@@ -3141,12 +3192,12 @@
   UpdateRSetDeferred(G1CollectedHeap* g1, DirtyCardQueue* dcq) :
     _g1(g1), _ct_bs((CardTableModRefBS*)_g1->barrier_set()), _dcq(dcq) {}
 
-  void do_oop(narrowOop* p) {
-    guarantee(false, "NYI");
-  }
-  void do_oop(oop* p) {
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(      oop* p) { do_oop_work(p); }
+  template <class T> void do_oop_work(T* p) {
     assert(_from->is_in_reserved(p), "paranoia");
-    if (!_from->is_in_reserved(*p) && !_from->is_survivor()) {
+    if (!_from->is_in_reserved(oopDesc::load_decode_heap_oop(p)) &&
+        !_from->is_survivor()) {
       size_t card_index = _ct_bs->index_for(p);
       if (_ct_bs->mark_card_deferred(card_index)) {
         _dcq->enqueue((jbyte*)_ct_bs->byte_for_index(card_index));
@@ -3501,614 +3552,66 @@
   fill_with_object(block, free_words);
 }
 
-#define use_local_bitmaps         1
-#define verify_local_bitmaps      0
-
 #ifndef PRODUCT
-
-class GCLabBitMap;
-class GCLabBitMapClosure: public BitMapClosure {
-private:
-  ConcurrentMark* _cm;
-  GCLabBitMap*    _bitmap;
-
-public:
-  GCLabBitMapClosure(ConcurrentMark* cm,
-                     GCLabBitMap* bitmap) {
-    _cm     = cm;
-    _bitmap = bitmap;
-  }
-
-  virtual bool do_bit(size_t offset);
-};
-
-#endif // PRODUCT
-
-#define oop_buffer_length 256
-
-class GCLabBitMap: public BitMap {
-private:
-  ConcurrentMark* _cm;
-
-  int       _shifter;
-  size_t    _bitmap_word_covers_words;
-
-  // beginning of the heap
-  HeapWord* _heap_start;
-
-  // this is the actual start of the GCLab
-  HeapWord* _real_start_word;
-
-  // this is the actual end of the GCLab
-  HeapWord* _real_end_word;
-
-  // this is the first word, possibly located before the actual start
-  // of the GCLab, that corresponds to the first bit of the bitmap
-  HeapWord* _start_word;
-
-  // size of a GCLab in words
-  size_t _gclab_word_size;
-
-  static int shifter() {
-    return MinObjAlignment - 1;
-  }
-
-  // how many heap words does a single bitmap word corresponds to?
-  static size_t bitmap_word_covers_words() {
-    return BitsPerWord << shifter();
-  }
-
-  static size_t gclab_word_size() {
-    return G1ParallelGCAllocBufferSize / HeapWordSize;
-  }
-
-  static size_t bitmap_size_in_bits() {
-    size_t bits_in_bitmap = gclab_word_size() >> shifter();
-    // We are going to ensure that the beginning of a word in this
-    // bitmap also corresponds to the beginning of a word in the
-    // global marking bitmap. To handle the case where a GCLab
-    // starts from the middle of the bitmap, we need to add enough
-    // space (i.e. up to a bitmap word) to ensure that we have
-    // enough bits in the bitmap.
-    return bits_in_bitmap + BitsPerWord - 1;
-  }
-public:
-  GCLabBitMap(HeapWord* heap_start)
-    : BitMap(bitmap_size_in_bits()),
-      _cm(G1CollectedHeap::heap()->concurrent_mark()),
-      _shifter(shifter()),
-      _bitmap_word_covers_words(bitmap_word_covers_words()),
-      _heap_start(heap_start),
-      _gclab_word_size(gclab_word_size()),
-      _real_start_word(NULL),
-      _real_end_word(NULL),
-      _start_word(NULL)
-  {
-    guarantee( size_in_words() >= bitmap_size_in_words(),
-               "just making sure");
-  }
-
-  inline unsigned heapWordToOffset(HeapWord* addr) {
-    unsigned offset = (unsigned) pointer_delta(addr, _start_word) >> _shifter;
-    assert(offset < size(), "offset should be within bounds");
-    return offset;
-  }
-
-  inline HeapWord* offsetToHeapWord(size_t offset) {
-    HeapWord* addr =  _start_word + (offset << _shifter);
-    assert(_real_start_word <= addr && addr < _real_end_word, "invariant");
-    return addr;
-  }
-
-  bool fields_well_formed() {
-    bool ret1 = (_real_start_word == NULL) &&
-                (_real_end_word == NULL) &&
-                (_start_word == NULL);
-    if (ret1)
-      return true;
-
-    bool ret2 = _real_start_word >= _start_word &&
-      _start_word < _real_end_word &&
-      (_real_start_word + _gclab_word_size) == _real_end_word &&
-      (_start_word + _gclab_word_size + _bitmap_word_covers_words)
-                                                              > _real_end_word;
-    return ret2;
-  }
-
-  inline bool mark(HeapWord* addr) {
-    guarantee(use_local_bitmaps, "invariant");
-    assert(fields_well_formed(), "invariant");
-
-    if (addr >= _real_start_word && addr < _real_end_word) {
-      assert(!isMarked(addr), "should not have already been marked");
-
-      // first mark it on the bitmap
-      at_put(heapWordToOffset(addr), true);
-
-      return true;
-    } else {
-      return false;
-    }
-  }
-
-  inline bool isMarked(HeapWord* addr) {
-    guarantee(use_local_bitmaps, "invariant");
-    assert(fields_well_formed(), "invariant");
-
-    return at(heapWordToOffset(addr));
-  }
-
-  void set_buffer(HeapWord* start) {
-    guarantee(use_local_bitmaps, "invariant");
-    clear();
-
-    assert(start != NULL, "invariant");
-    _real_start_word = start;
-    _real_end_word   = start + _gclab_word_size;
-
-    size_t diff =
-      pointer_delta(start, _heap_start) % _bitmap_word_covers_words;
-    _start_word = start - diff;
-
-    assert(fields_well_formed(), "invariant");
-  }
-
-#ifndef PRODUCT
-  void verify() {
-    // verify that the marks have been propagated
-    GCLabBitMapClosure cl(_cm, this);
-    iterate(&cl);
-  }
-#endif // PRODUCT
-
-  void retire() {
-    guarantee(use_local_bitmaps, "invariant");
-    assert(fields_well_formed(), "invariant");
-
-    if (_start_word != NULL) {
-      CMBitMap*       mark_bitmap = _cm->nextMarkBitMap();
-
-      // this means that the bitmap was set up for the GCLab
-      assert(_real_start_word != NULL && _real_end_word != NULL, "invariant");
-
-      mark_bitmap->mostly_disjoint_range_union(this,
-                                0, // always start from the start of the bitmap
-                                _start_word,
-                                size_in_words());
-      _cm->grayRegionIfNecessary(MemRegion(_real_start_word, _real_end_word));
-
-#ifndef PRODUCT
-      if (use_local_bitmaps && verify_local_bitmaps)
-        verify();
-#endif // PRODUCT
-    } else {
-      assert(_real_start_word == NULL && _real_end_word == NULL, "invariant");
-    }
-  }
-
-  static size_t bitmap_size_in_words() {
-    return (bitmap_size_in_bits() + BitsPerWord - 1) / BitsPerWord;
-  }
-};
-
-#ifndef PRODUCT
-
 bool GCLabBitMapClosure::do_bit(size_t offset) {
   HeapWord* addr = _bitmap->offsetToHeapWord(offset);
   guarantee(_cm->isMarked(oop(addr)), "it should be!");
   return true;
 }
-
 #endif // PRODUCT
 
-class G1ParGCAllocBuffer: public ParGCAllocBuffer {
-private:
-  bool        _retired;
-  bool        _during_marking;
-  GCLabBitMap _bitmap;
-
-public:
-  G1ParGCAllocBuffer() :
-    ParGCAllocBuffer(G1ParallelGCAllocBufferSize / HeapWordSize),
-    _during_marking(G1CollectedHeap::heap()->mark_in_progress()),
-    _bitmap(G1CollectedHeap::heap()->reserved_region().start()),
-    _retired(false)
-  { }
-
-  inline bool mark(HeapWord* addr) {
-    guarantee(use_local_bitmaps, "invariant");
-    assert(_during_marking, "invariant");
-    return _bitmap.mark(addr);
-  }
-
-  inline void set_buf(HeapWord* buf) {
-    if (use_local_bitmaps && _during_marking)
-      _bitmap.set_buffer(buf);
-    ParGCAllocBuffer::set_buf(buf);
-    _retired = false;
-  }
-
-  inline void retire(bool end_of_gc, bool retain) {
-    if (_retired)
-      return;
-    if (use_local_bitmaps && _during_marking) {
-      _bitmap.retire();
-    }
-    ParGCAllocBuffer::retire(end_of_gc, retain);
-    _retired = true;
-  }
-};
-
-
-class G1ParScanThreadState : public StackObj {
-protected:
-  G1CollectedHeap* _g1h;
-  RefToScanQueue*  _refs;
-  DirtyCardQueue   _dcq;
-  CardTableModRefBS* _ct_bs;
-  G1RemSet* _g1_rem;
-
-  typedef GrowableArray<oop*> OverflowQueue;
-  OverflowQueue* _overflowed_refs;
-
-  G1ParGCAllocBuffer _alloc_buffers[GCAllocPurposeCount];
-  ageTable           _age_table;
-
-  size_t           _alloc_buffer_waste;
-  size_t           _undo_waste;
-
-  OopsInHeapRegionClosure*      _evac_failure_cl;
-  G1ParScanHeapEvacClosure*     _evac_cl;
-  G1ParScanPartialArrayClosure* _partial_scan_cl;
-
-  int _hash_seed;
-  int _queue_num;
-
-  int _term_attempts;
+G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, int queue_num)
+  : _g1h(g1h),
+    _refs(g1h->task_queue(queue_num)),
+    _dcq(&g1h->dirty_card_queue_set()),
+    _ct_bs((CardTableModRefBS*)_g1h->barrier_set()),
+    _g1_rem(g1h->g1_rem_set()),
+    _hash_seed(17), _queue_num(queue_num),
+    _term_attempts(0),
+    _age_table(false),
 #if G1_DETAILED_STATS
-  int _pushes, _pops, _steals, _steal_attempts;
-  int _overflow_pushes;
+    _pushes(0), _pops(0), _steals(0),
+    _steal_attempts(0),  _overflow_pushes(0),
 #endif
-
-  double _start;
-  double _start_strong_roots;
-  double _strong_roots_time;
-  double _start_term;
-  double _term_time;
-
-  // Map from young-age-index (0 == not young, 1 is youngest) to
-  // surviving words. base is what we get back from the malloc call
-  size_t* _surviving_young_words_base;
-  // this points into the array, as we use the first few entries for padding
-  size_t* _surviving_young_words;
-
-#define PADDING_ELEM_NUM (64 / sizeof(size_t))
-
-  void   add_to_alloc_buffer_waste(size_t waste) { _alloc_buffer_waste += waste; }
-
-  void   add_to_undo_waste(size_t waste)         { _undo_waste += waste; }
-
-  DirtyCardQueue& dirty_card_queue()             { return _dcq;  }
-  CardTableModRefBS* ctbs()                      { return _ct_bs; }
-
-  void immediate_rs_update(HeapRegion* from, oop* p, int tid) {
-    if (!from->is_survivor()) {
-      _g1_rem->par_write_ref(from, p, tid);
-    }
-  }
-
-  void deferred_rs_update(HeapRegion* from, oop* p, int tid) {
-    // If the new value of the field points to the same region or
-    // is the to-space, we don't need to include it in the Rset updates.
-    if (!from->is_in_reserved(*p) && !from->is_survivor()) {
-      size_t card_index = ctbs()->index_for(p);
-      // If the card hasn't been added to the buffer, do it.
-      if (ctbs()->mark_card_deferred(card_index)) {
-        dirty_card_queue().enqueue((jbyte*)ctbs()->byte_for_index(card_index));
-      }
-    }
-  }
-
-public:
-  G1ParScanThreadState(G1CollectedHeap* g1h, int queue_num)
-    : _g1h(g1h),
-      _refs(g1h->task_queue(queue_num)),
-      _dcq(&g1h->dirty_card_queue_set()),
-      _ct_bs((CardTableModRefBS*)_g1h->barrier_set()),
-      _g1_rem(g1h->g1_rem_set()),
-      _hash_seed(17), _queue_num(queue_num),
-      _term_attempts(0),
-      _age_table(false),
-#if G1_DETAILED_STATS
-      _pushes(0), _pops(0), _steals(0),
-      _steal_attempts(0),  _overflow_pushes(0),
-#endif
-      _strong_roots_time(0), _term_time(0),
-      _alloc_buffer_waste(0), _undo_waste(0)
-  {
-    // we allocate G1YoungSurvRateNumRegions plus one entries, since
-    // we "sacrifice" entry 0 to keep track of surviving bytes for
-    // non-young regions (where the age is -1)
-    // We also add a few elements at the beginning and at the end in
-    // an attempt to eliminate cache contention
-    size_t real_length = 1 + _g1h->g1_policy()->young_cset_length();
-    size_t array_length = PADDING_ELEM_NUM +
-                          real_length +
-                          PADDING_ELEM_NUM;
-    _surviving_young_words_base = NEW_C_HEAP_ARRAY(size_t, array_length);
-    if (_surviving_young_words_base == NULL)
-      vm_exit_out_of_memory(array_length * sizeof(size_t),
-                            "Not enough space for young surv histo.");
-    _surviving_young_words = _surviving_young_words_base + PADDING_ELEM_NUM;
-    memset(_surviving_young_words, 0, real_length * sizeof(size_t));
-
-    _overflowed_refs = new OverflowQueue(10);
-
-    _start = os::elapsedTime();
-  }
-
-  ~G1ParScanThreadState() {
-    FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_base);
-  }
-
-  RefToScanQueue*   refs()            { return _refs;             }
-  OverflowQueue*    overflowed_refs() { return _overflowed_refs;  }
-  ageTable*         age_table()       { return &_age_table;       }
-
-  G1ParGCAllocBuffer* alloc_buffer(GCAllocPurpose purpose) {
-    return &_alloc_buffers[purpose];
-  }
-
-  size_t alloc_buffer_waste()                    { return _alloc_buffer_waste; }
-  size_t undo_waste()                            { return _undo_waste; }
-
-  void push_on_queue(oop* ref) {
-    assert(ref != NULL, "invariant");
-    assert(has_partial_array_mask(ref) || _g1h->obj_in_cs(*ref), "invariant");
-
-    if (!refs()->push(ref)) {
-      overflowed_refs()->push(ref);
-      IF_G1_DETAILED_STATS(note_overflow_push());
-    } else {
-      IF_G1_DETAILED_STATS(note_push());
-    }
-  }
-
-  void pop_from_queue(oop*& ref) {
-    if (!refs()->pop_local(ref)) {
-      ref = NULL;
-    } else {
-      assert(ref != NULL, "invariant");
-      assert(has_partial_array_mask(ref) || _g1h->obj_in_cs(*ref),
-             "invariant");
-
-      IF_G1_DETAILED_STATS(note_pop());
-    }
-  }
-
-  void pop_from_overflow_queue(oop*& ref) {
-    ref = overflowed_refs()->pop();
-  }
-
-  int refs_to_scan()                             { return refs()->size();                 }
-  int overflowed_refs_to_scan()                  { return overflowed_refs()->length();    }
-
-  void update_rs(HeapRegion* from, oop* p, int tid) {
-    if (G1DeferredRSUpdate) {
-      deferred_rs_update(from, p, tid);
-    } else {
-      immediate_rs_update(from, p, tid);
-    }
-  }
-
-  HeapWord* allocate_slow(GCAllocPurpose purpose, size_t word_sz) {
-
-    HeapWord* obj = NULL;
-    if (word_sz * 100 <
-        (size_t)(G1ParallelGCAllocBufferSize / HeapWordSize) *
-                                                  ParallelGCBufferWastePct) {
-      G1ParGCAllocBuffer* alloc_buf = alloc_buffer(purpose);
-      add_to_alloc_buffer_waste(alloc_buf->words_remaining());
-      alloc_buf->retire(false, false);
-
-      HeapWord* buf =
-        _g1h->par_allocate_during_gc(purpose, G1ParallelGCAllocBufferSize / HeapWordSize);
-      if (buf == NULL) return NULL; // Let caller handle allocation failure.
-      // Otherwise.
-      alloc_buf->set_buf(buf);
-
-      obj = alloc_buf->allocate(word_sz);
-      assert(obj != NULL, "buffer was definitely big enough...");
-    } else {
-      obj = _g1h->par_allocate_during_gc(purpose, word_sz);
-    }
-    return obj;
-  }
-
-  HeapWord* allocate(GCAllocPurpose purpose, size_t word_sz) {
-    HeapWord* obj = alloc_buffer(purpose)->allocate(word_sz);
-    if (obj != NULL) return obj;
-    return allocate_slow(purpose, word_sz);
-  }
-
-  void undo_allocation(GCAllocPurpose purpose, HeapWord* obj, size_t word_sz) {
-    if (alloc_buffer(purpose)->contains(obj)) {
-      guarantee(alloc_buffer(purpose)->contains(obj + word_sz - 1),
-                "should contain whole object");
-      alloc_buffer(purpose)->undo_allocation(obj, word_sz);
-    } else {
-      CollectedHeap::fill_with_object(obj, word_sz);
-      add_to_undo_waste(word_sz);
-    }
-  }
-
-  void set_evac_failure_closure(OopsInHeapRegionClosure* evac_failure_cl) {
-    _evac_failure_cl = evac_failure_cl;
-  }
-  OopsInHeapRegionClosure* evac_failure_closure() {
-    return _evac_failure_cl;
-  }
-
-  void set_evac_closure(G1ParScanHeapEvacClosure* evac_cl) {
-    _evac_cl = evac_cl;
-  }
-
-  void set_partial_scan_closure(G1ParScanPartialArrayClosure* partial_scan_cl) {
-    _partial_scan_cl = partial_scan_cl;
-  }
-
-  int* hash_seed() { return &_hash_seed; }
-  int  queue_num() { return _queue_num; }
-
-  int term_attempts()   { return _term_attempts; }
-  void note_term_attempt()  { _term_attempts++; }
-
-#if G1_DETAILED_STATS
-  int pushes()          { return _pushes; }
-  int pops()            { return _pops; }
-  int steals()          { return _steals; }
-  int steal_attempts()  { return _steal_attempts; }
-  int overflow_pushes() { return _overflow_pushes; }
-
-  void note_push()          { _pushes++; }
-  void note_pop()           { _pops++; }
-  void note_steal()         { _steals++; }
-  void note_steal_attempt() { _steal_attempts++; }
-  void note_overflow_push() { _overflow_pushes++; }
-#endif
-
-  void start_strong_roots() {
-    _start_strong_roots = os::elapsedTime();
-  }
-  void end_strong_roots() {
-    _strong_roots_time += (os::elapsedTime() - _start_strong_roots);
-  }
-  double strong_roots_time() { return _strong_roots_time; }
-
-  void start_term_time() {
-    note_term_attempt();
-    _start_term = os::elapsedTime();
-  }
-  void end_term_time() {
-    _term_time += (os::elapsedTime() - _start_term);
-  }
-  double term_time() { return _term_time; }
-
-  double elapsed() {
-    return os::elapsedTime() - _start;
-  }
-
-  size_t* surviving_young_words() {
-    // We add on to hide entry 0 which accumulates surviving words for
-    // age -1 regions (i.e. non-young ones)
-    return _surviving_young_words;
-  }
-
-  void retire_alloc_buffers() {
-    for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
-      size_t waste = _alloc_buffers[ap].words_remaining();
-      add_to_alloc_buffer_waste(waste);
-      _alloc_buffers[ap].retire(true, false);
-    }
-  }
-
-private:
-  void deal_with_reference(oop* ref_to_scan) {
-    if (has_partial_array_mask(ref_to_scan)) {
-      _partial_scan_cl->do_oop_nv(ref_to_scan);
-    } else {
-      // Note: we can use "raw" versions of "region_containing" because
-      // "obj_to_scan" is definitely in the heap, and is not in a
-      // humongous region.
-      HeapRegion* r = _g1h->heap_region_containing_raw(ref_to_scan);
-      _evac_cl->set_region(r);
-      _evac_cl->do_oop_nv(ref_to_scan);
-    }
-  }
-
-public:
-  void trim_queue() {
-    // I've replicated the loop twice, first to drain the overflow
-    // queue, second to drain the task queue. This is better than
-    // having a single loop, which checks both conditions and, inside
-    // it, either pops the overflow queue or the task queue, as each
-    // loop is tighter. Also, the decision to drain the overflow queue
-    // first is not arbitrary, as the overflow queue is not visible
-    // to the other workers, whereas the task queue is. So, we want to
-    // drain the "invisible" entries first, while allowing the other
-    // workers to potentially steal the "visible" entries.
-
-    while (refs_to_scan() > 0 || overflowed_refs_to_scan() > 0) {
-      while (overflowed_refs_to_scan() > 0) {
-        oop *ref_to_scan = NULL;
-        pop_from_overflow_queue(ref_to_scan);
-        assert(ref_to_scan != NULL, "invariant");
-        // We shouldn't have pushed it on the queue if it was not
-        // pointing into the CSet.
-        assert(ref_to_scan != NULL, "sanity");
-        assert(has_partial_array_mask(ref_to_scan) ||
-                                      _g1h->obj_in_cs(*ref_to_scan), "sanity");
-
-        deal_with_reference(ref_to_scan);
-      }
-
-      while (refs_to_scan() > 0) {
-        oop *ref_to_scan = NULL;
-        pop_from_queue(ref_to_scan);
-
-        if (ref_to_scan != NULL) {
-          // We shouldn't have pushed it on the queue if it was not
-          // pointing into the CSet.
-          assert(has_partial_array_mask(ref_to_scan) ||
-                                      _g1h->obj_in_cs(*ref_to_scan), "sanity");
-
-          deal_with_reference(ref_to_scan);
-        }
-      }
-    }
-  }
-};
+    _strong_roots_time(0), _term_time(0),
+    _alloc_buffer_waste(0), _undo_waste(0)
+{
+  // we allocate G1YoungSurvRateNumRegions plus one entries, since
+  // we "sacrifice" entry 0 to keep track of surviving bytes for
+  // non-young regions (where the age is -1)
+  // We also add a few elements at the beginning and at the end in
+  // an attempt to eliminate cache contention
+  size_t real_length = 1 + _g1h->g1_policy()->young_cset_length();
+  size_t array_length = PADDING_ELEM_NUM +
+                        real_length +
+                        PADDING_ELEM_NUM;
+  _surviving_young_words_base = NEW_C_HEAP_ARRAY(size_t, array_length);
+  if (_surviving_young_words_base == NULL)
+    vm_exit_out_of_memory(array_length * sizeof(size_t),
+                          "Not enough space for young surv histo.");
+  _surviving_young_words = _surviving_young_words_base + PADDING_ELEM_NUM;
+  memset(_surviving_young_words, 0, real_length * sizeof(size_t));
+
+  _overflowed_refs = new OverflowQueue(10);
+
+  _start = os::elapsedTime();
+}
 
 G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
   _g1(g1), _g1_rem(_g1->g1_rem_set()), _cm(_g1->concurrent_mark()),
   _par_scan_state(par_scan_state) { }
 
-// This closure is applied to the fields of the objects that have just been copied.
-// Should probably be made inline and moved in g1OopClosures.inline.hpp.
-void G1ParScanClosure::do_oop_nv(oop* p) {
-  oop obj = *p;
-
-  if (obj != NULL) {
-    if (_g1->in_cset_fast_test(obj)) {
-      // We're not going to even bother checking whether the object is
-      // already forwarded or not, as this usually causes an immediate
-      // stall. We'll try to prefetch the object (for write, given that
-      // we might need to install the forwarding reference) and we'll
-      // get back to it when pop it from the queue
-      Prefetch::write(obj->mark_addr(), 0);
-      Prefetch::read(obj->mark_addr(), (HeapWordSize*2));
-
-      // slightly paranoid test; I'm trying to catch potential
-      // problems before we go into push_on_queue to know where the
-      // problem is coming from
-      assert(obj == *p, "the value of *p should not have changed");
-      _par_scan_state->push_on_queue(p);
-    } else {
-      _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num());
-    }
-  }
-}
-
-void G1ParCopyHelper::mark_forwardee(oop* p) {
+template <class T> void G1ParCopyHelper::mark_forwardee(T* p) {
   // This is called _after_ do_oop_work has been called, hence after
   // the object has been relocated to its new location and *p points
   // to its new location.
 
-  oop thisOop = *p;
-  if (thisOop != NULL) {
-    assert((_g1->evacuation_failed()) || (!_g1->obj_in_cs(thisOop)),
+  T heap_oop = oopDesc::load_heap_oop(p);
+  if (!oopDesc::is_null(heap_oop)) {
+    oop obj = oopDesc::decode_heap_oop(heap_oop);
+    assert((_g1->evacuation_failed()) || (!_g1->obj_in_cs(obj)),
            "shouldn't still be in the CSet if evacuation didn't fail.");
-    HeapWord* addr = (HeapWord*)thisOop;
+    HeapWord* addr = (HeapWord*)obj;
     if (_g1->is_in_g1_reserved(addr))
       _cm->grayRoot(oop(addr));
   }
@@ -4191,7 +3694,8 @@
 
     if (obj->is_objArray() && arrayOop(obj)->length() >= ParGCArrayScanChunk) {
       arrayOop(old)->set_length(0);
-      _par_scan_state->push_on_queue(set_partial_array_mask(old));
+      oop* old_p = set_partial_array_mask(old);
+      _par_scan_state->push_on_queue(old_p);
     } else {
       // No point in using the slower heap_region_containing() method,
       // given that we know obj is in the heap.
@@ -4205,11 +3709,11 @@
   return obj;
 }
 
-template<bool do_gen_barrier, G1Barrier barrier,
-         bool do_mark_forwardee, bool skip_cset_test>
-void G1ParCopyClosure<do_gen_barrier, barrier,
-                      do_mark_forwardee, skip_cset_test>::do_oop_work(oop* p) {
-  oop obj = *p;
+template <bool do_gen_barrier, G1Barrier barrier, bool do_mark_forwardee, bool skip_cset_test>
+template <class T>
+void G1ParCopyClosure <do_gen_barrier, barrier, do_mark_forwardee, skip_cset_test>
+::do_oop_work(T* p) {
+  oop obj = oopDesc::load_decode_heap_oop(p);
   assert(barrier != G1BarrierRS || obj != NULL,
          "Precondition: G1BarrierRS implies obj is nonNull");
 
@@ -4226,9 +3730,10 @@
                            "into CS.", p, (void*) obj);
 #endif
     if (obj->is_forwarded()) {
-      *p = obj->forwardee();
+      oopDesc::encode_store_heap_oop(p, obj->forwardee());
     } else {
-      *p = copy_to_survivor_space(obj);
+      oop copy_oop = copy_to_survivor_space(obj);
+      oopDesc::encode_store_heap_oop(p, copy_oop);
     }
     // When scanning the RS, we only care about objs in CS.
     if (barrier == G1BarrierRS) {
@@ -4247,21 +3752,9 @@
 }
 
 template void G1ParCopyClosure<false, G1BarrierEvac, false, true>::do_oop_work(oop* p);
-
-template<class T> void G1ParScanPartialArrayClosure::process_array_chunk(
-  oop obj, int start, int end) {
-  // process our set of indices (include header in first chunk)
-  assert(start < end, "invariant");
-  T* const base      = (T*)objArrayOop(obj)->base();
-  T* const start_addr = (start == 0) ? (T*) obj : base + start;
-  T* const end_addr   = base + end;
-  MemRegion mr((HeapWord*)start_addr, (HeapWord*)end_addr);
-  _scanner.set_region(_g1->heap_region_containing(obj));
-  obj->oop_iterate(&_scanner, mr);
-}
-
-void G1ParScanPartialArrayClosure::do_oop_nv(oop* p) {
-  assert(!UseCompressedOops, "Needs to be fixed to work with compressed oops");
+template void G1ParCopyClosure<false, G1BarrierEvac, false, true>::do_oop_work(narrowOop* p);
+
+template <class T> void G1ParScanPartialArrayClosure::do_oop_nv(T* p) {
   assert(has_partial_array_mask(p), "invariant");
   oop old = clear_partial_array_mask(p);
   assert(old->is_objArray(), "must be obj array");
@@ -4281,19 +3774,19 @@
     end = start + ParGCArrayScanChunk;
     arrayOop(old)->set_length(end);
     // Push remainder.
-    _par_scan_state->push_on_queue(set_partial_array_mask(old));
+    oop* old_p = set_partial_array_mask(old);
+    assert(arrayOop(old)->length() < obj->length(), "Empty push?");
+    _par_scan_state->push_on_queue(old_p);
   } else {
     // Restore length so that the heap remains parsable in
     // case of evacuation failure.
     arrayOop(old)->set_length(end);
   }
-
+  _scanner.set_region(_g1->heap_region_containing_raw(obj));
   // process our set of indices (include header in first chunk)
-  process_array_chunk<oop>(obj, start, end);
+  obj->oop_iterate_range(&_scanner, start, end);
 }
 
-int G1ScanAndBalanceClosure::_nq = 0;
-
 class G1ParEvacuateFollowersClosure : public VoidClosure {
 protected:
   G1CollectedHeap*              _g1h;
@@ -4316,21 +3809,28 @@
   void do_void() {
     G1ParScanThreadState* pss = par_scan_state();
     while (true) {
-      oop* ref_to_scan;
       pss->trim_queue();
       IF_G1_DETAILED_STATS(pss->note_steal_attempt());
-      if (queues()->steal(pss->queue_num(),
-                          pss->hash_seed(),
-                          ref_to_scan)) {
+
+      StarTask stolen_task;
+      if (queues()->steal(pss->queue_num(), pss->hash_seed(), stolen_task)) {
         IF_G1_DETAILED_STATS(pss->note_steal());
 
         // slightly paranoid tests; I'm trying to catch potential
         // problems before we go into push_on_queue to know where the
         // problem is coming from
-        assert(ref_to_scan != NULL, "invariant");
-        assert(has_partial_array_mask(ref_to_scan) ||
-                                   _g1h->obj_in_cs(*ref_to_scan), "invariant");
-        pss->push_on_queue(ref_to_scan);
+        assert((oop*)stolen_task != NULL, "Error");
+        if (stolen_task.is_narrow()) {
+          assert(UseCompressedOops, "Error");
+          narrowOop* p = (narrowOop*) stolen_task;
+          assert(has_partial_array_mask(p) ||
+                 _g1h->obj_in_cs(oopDesc::load_decode_heap_oop(p)), "Error");
+          pss->push_on_queue(p);
+        } else {
+          oop* p = (oop*) stolen_task;
+          assert(has_partial_array_mask(p) || _g1h->obj_in_cs(*p), "Error");
+          pss->push_on_queue(p);
+        }
         continue;
       }
       pss->start_term_time();
@@ -4347,6 +3847,7 @@
   G1CollectedHeap*       _g1h;
   RefToScanQueueSet      *_queues;
   ParallelTaskTerminator _terminator;
+  int _n_workers;
 
   Mutex _stats_lock;
   Mutex* stats_lock() { return &_stats_lock; }
@@ -4362,7 +3863,8 @@
       _g1h(g1h),
       _queues(task_queues),
       _terminator(workers, _queues),
-      _stats_lock(Mutex::leaf, "parallel G1 stats lock", true)
+      _stats_lock(Mutex::leaf, "parallel G1 stats lock", true),
+      _n_workers(workers)
   {}
 
   RefToScanQueueSet* queues() { return _queues; }
@@ -4372,6 +3874,7 @@
   }
 
   void work(int i) {
+    if (i >= _n_workers) return;  // no work needed this round
     ResourceMark rm;
     HandleMark   hm;
 
@@ -4469,23 +3972,6 @@
 
 // *** Common G1 Evacuation Stuff
 
-class G1CountClosure: public OopsInHeapRegionClosure {
-public:
-  int n;
-  G1CountClosure() : n(0) {}
-  void do_oop(narrowOop* p) {
-    guarantee(false, "NYI");
-  }
-  void do_oop(oop* p) {
-    oop obj = *p;
-    assert(obj != NULL && G1CollectedHeap::heap()->obj_in_cs(obj),
-           "Rem set closure called on non-rem-set pointer.");
-    n++;
-  }
-};
-
-static G1CountClosure count_closure;
-
 void
 G1CollectedHeap::
 g1_process_strong_roots(bool collecting_perm_gen,
@@ -5357,7 +4843,7 @@
   assert(_free_region_list == NULL, "Postcondition of loop.");
   if (_free_region_list_size != 0) {
     gclog_or_tty->print_cr("Size is %d.", _free_region_list_size);
-    print();
+    print_on(gclog_or_tty, true /* extended */);
   }
   assert(_free_region_list_size == 0, "Postconditions of loop.");
 }
@@ -5535,8 +5021,3 @@
 void G1CollectedHeap::g1_unimplemented() {
   // Unimplemented();
 }
-
-
-// Local Variables: ***
-// c-indentation-style: gnu ***
-// End: ***
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -56,8 +56,8 @@
 #  define IF_G1_DETAILED_STATS(code)
 #endif
 
-typedef GenericTaskQueue<oop*>    RefToScanQueue;
-typedef GenericTaskQueueSet<oop*> RefToScanQueueSet;
+typedef GenericTaskQueue<StarTask>    RefToScanQueue;
+typedef GenericTaskQueueSet<StarTask> RefToScanQueueSet;
 
 typedef int RegionIdx_t;   // needs to hold [ 0..max_regions() )
 typedef int CardIdx_t;     // needs to hold [ 0..CardsPerRegion )
@@ -700,6 +700,9 @@
   size_t g1_reserved_obj_bytes() { return _g1_reserved.byte_size(); }
   virtual size_t capacity() const;
   virtual size_t used() const;
+  // This should be called when we're not holding the heap lock. The
+  // result might be a bit inaccurate.
+  size_t used_unlocked() const;
   size_t recalculate_used() const;
 #ifndef PRODUCT
   size_t recalculate_used_regions() const;
@@ -1061,8 +1064,14 @@
 
   // Override; it uses the "prev" marking information
   virtual void verify(bool allow_dirty, bool silent);
+  // Default behavior by calling print(tty);
   virtual void print() const;
+  // This calls print_on(st, PrintHeapAtGCExtended).
   virtual void print_on(outputStream* st) const;
+  // If extended is true, it will print out information for all
+  // regions in the heap by calling print_on_extended(st).
+  virtual void print_on(outputStream* st, bool extended) const;
+  virtual void print_on_extended(outputStream* st) const;
 
   virtual void print_gc_threads_on(outputStream* st) const;
   virtual void gc_threads_do(ThreadClosure* tc) const;
@@ -1265,6 +1274,552 @@
 
 };
 
-// Local Variables: ***
-// c-indentation-style: gnu ***
-// End: ***
+#define use_local_bitmaps         1
+#define verify_local_bitmaps      0
+#define oop_buffer_length       256
+
+#ifndef PRODUCT
+class GCLabBitMap;
+class GCLabBitMapClosure: public BitMapClosure {
+private:
+  ConcurrentMark* _cm;
+  GCLabBitMap*    _bitmap;
+
+public:
+  GCLabBitMapClosure(ConcurrentMark* cm,
+                     GCLabBitMap* bitmap) {
+    _cm     = cm;
+    _bitmap = bitmap;
+  }
+
+  virtual bool do_bit(size_t offset);
+};
+#endif // !PRODUCT
+
+class GCLabBitMap: public BitMap {
+private:
+  ConcurrentMark* _cm;
+
+  int       _shifter;
+  size_t    _bitmap_word_covers_words;
+
+  // beginning of the heap
+  HeapWord* _heap_start;
+
+  // this is the actual start of the GCLab
+  HeapWord* _real_start_word;
+
+  // this is the actual end of the GCLab
+  HeapWord* _real_end_word;
+
+  // this is the first word, possibly located before the actual start
+  // of the GCLab, that corresponds to the first bit of the bitmap
+  HeapWord* _start_word;
+
+  // size of a GCLab in words
+  size_t _gclab_word_size;
+
+  static int shifter() {
+    return MinObjAlignment - 1;
+  }
+
+  // how many heap words does a single bitmap word corresponds to?
+  static size_t bitmap_word_covers_words() {
+    return BitsPerWord << shifter();
+  }
+
+  static size_t gclab_word_size() {
+    return G1ParallelGCAllocBufferSize / HeapWordSize;
+  }
+
+  static size_t bitmap_size_in_bits() {
+    size_t bits_in_bitmap = gclab_word_size() >> shifter();
+    // We are going to ensure that the beginning of a word in this
+    // bitmap also corresponds to the beginning of a word in the
+    // global marking bitmap. To handle the case where a GCLab
+    // starts from the middle of the bitmap, we need to add enough
+    // space (i.e. up to a bitmap word) to ensure that we have
+    // enough bits in the bitmap.
+    return bits_in_bitmap + BitsPerWord - 1;
+  }
+public:
+  GCLabBitMap(HeapWord* heap_start)
+    : BitMap(bitmap_size_in_bits()),
+      _cm(G1CollectedHeap::heap()->concurrent_mark()),
+      _shifter(shifter()),
+      _bitmap_word_covers_words(bitmap_word_covers_words()),
+      _heap_start(heap_start),
+      _gclab_word_size(gclab_word_size()),
+      _real_start_word(NULL),
+      _real_end_word(NULL),
+      _start_word(NULL)
+  {
+    guarantee( size_in_words() >= bitmap_size_in_words(),
+               "just making sure");
+  }
+
+  inline unsigned heapWordToOffset(HeapWord* addr) {
+    unsigned offset = (unsigned) pointer_delta(addr, _start_word) >> _shifter;
+    assert(offset < size(), "offset should be within bounds");
+    return offset;
+  }
+
+  inline HeapWord* offsetToHeapWord(size_t offset) {
+    HeapWord* addr =  _start_word + (offset << _shifter);
+    assert(_real_start_word <= addr && addr < _real_end_word, "invariant");
+    return addr;
+  }
+
+  bool fields_well_formed() {
+    bool ret1 = (_real_start_word == NULL) &&
+                (_real_end_word == NULL) &&
+                (_start_word == NULL);
+    if (ret1)
+      return true;
+
+    bool ret2 = _real_start_word >= _start_word &&
+      _start_word < _real_end_word &&
+      (_real_start_word + _gclab_word_size) == _real_end_word &&
+      (_start_word + _gclab_word_size + _bitmap_word_covers_words)
+                                                              > _real_end_word;
+    return ret2;
+  }
+
+  inline bool mark(HeapWord* addr) {
+    guarantee(use_local_bitmaps, "invariant");
+    assert(fields_well_formed(), "invariant");
+
+    if (addr >= _real_start_word && addr < _real_end_word) {
+      assert(!isMarked(addr), "should not have already been marked");
+
+      // first mark it on the bitmap
+      at_put(heapWordToOffset(addr), true);
+
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  inline bool isMarked(HeapWord* addr) {
+    guarantee(use_local_bitmaps, "invariant");
+    assert(fields_well_formed(), "invariant");
+
+    return at(heapWordToOffset(addr));
+  }
+
+  void set_buffer(HeapWord* start) {
+    guarantee(use_local_bitmaps, "invariant");
+    clear();
+
+    assert(start != NULL, "invariant");
+    _real_start_word = start;
+    _real_end_word   = start + _gclab_word_size;
+
+    size_t diff =
+      pointer_delta(start, _heap_start) % _bitmap_word_covers_words;
+    _start_word = start - diff;
+
+    assert(fields_well_formed(), "invariant");
+  }
+
+#ifndef PRODUCT
+  void verify() {
+    // verify that the marks have been propagated
+    GCLabBitMapClosure cl(_cm, this);
+    iterate(&cl);
+  }
+#endif // PRODUCT
+
+  void retire() {
+    guarantee(use_local_bitmaps, "invariant");
+    assert(fields_well_formed(), "invariant");
+
+    if (_start_word != NULL) {
+      CMBitMap*       mark_bitmap = _cm->nextMarkBitMap();
+
+      // this means that the bitmap was set up for the GCLab
+      assert(_real_start_word != NULL && _real_end_word != NULL, "invariant");
+
+      mark_bitmap->mostly_disjoint_range_union(this,
+                                0, // always start from the start of the bitmap
+                                _start_word,
+                                size_in_words());
+      _cm->grayRegionIfNecessary(MemRegion(_real_start_word, _real_end_word));
+
+#ifndef PRODUCT
+      if (use_local_bitmaps && verify_local_bitmaps)
+        verify();
+#endif // PRODUCT
+    } else {
+      assert(_real_start_word == NULL && _real_end_word == NULL, "invariant");
+    }
+  }
+
+  static size_t bitmap_size_in_words() {
+    return (bitmap_size_in_bits() + BitsPerWord - 1) / BitsPerWord;
+  }
+};
+
+class G1ParGCAllocBuffer: public ParGCAllocBuffer {
+private:
+  bool        _retired;
+  bool        _during_marking;
+  GCLabBitMap _bitmap;
+
+public:
+  G1ParGCAllocBuffer() :
+    ParGCAllocBuffer(G1ParallelGCAllocBufferSize / HeapWordSize),
+    _during_marking(G1CollectedHeap::heap()->mark_in_progress()),
+    _bitmap(G1CollectedHeap::heap()->reserved_region().start()),
+    _retired(false)
+  { }
+
+  inline bool mark(HeapWord* addr) {
+    guarantee(use_local_bitmaps, "invariant");
+    assert(_during_marking, "invariant");
+    return _bitmap.mark(addr);
+  }
+
+  inline void set_buf(HeapWord* buf) {
+    if (use_local_bitmaps && _during_marking)
+      _bitmap.set_buffer(buf);
+    ParGCAllocBuffer::set_buf(buf);
+    _retired = false;
+  }
+
+  inline void retire(bool end_of_gc, bool retain) {
+    if (_retired)
+      return;
+    if (use_local_bitmaps && _during_marking) {
+      _bitmap.retire();
+    }
+    ParGCAllocBuffer::retire(end_of_gc, retain);
+    _retired = true;
+  }
+};
+
+class G1ParScanThreadState : public StackObj {
+protected:
+  G1CollectedHeap* _g1h;
+  RefToScanQueue*  _refs;
+  DirtyCardQueue   _dcq;
+  CardTableModRefBS* _ct_bs;
+  G1RemSet* _g1_rem;
+
+  typedef GrowableArray<StarTask> OverflowQueue;
+  OverflowQueue* _overflowed_refs;
+
+  G1ParGCAllocBuffer _alloc_buffers[GCAllocPurposeCount];
+  ageTable           _age_table;
+
+  size_t           _alloc_buffer_waste;
+  size_t           _undo_waste;
+
+  OopsInHeapRegionClosure*      _evac_failure_cl;
+  G1ParScanHeapEvacClosure*     _evac_cl;
+  G1ParScanPartialArrayClosure* _partial_scan_cl;
+
+  int _hash_seed;
+  int _queue_num;
+
+  int _term_attempts;
+#if G1_DETAILED_STATS
+  int _pushes, _pops, _steals, _steal_attempts;
+  int _overflow_pushes;
+#endif
+
+  double _start;
+  double _start_strong_roots;
+  double _strong_roots_time;
+  double _start_term;
+  double _term_time;
+
+  // Map from young-age-index (0 == not young, 1 is youngest) to
+  // surviving words. base is what we get back from the malloc call
+  size_t* _surviving_young_words_base;
+  // this points into the array, as we use the first few entries for padding
+  size_t* _surviving_young_words;
+
+#define PADDING_ELEM_NUM (64 / sizeof(size_t))
+
+  void   add_to_alloc_buffer_waste(size_t waste) { _alloc_buffer_waste += waste; }
+
+  void   add_to_undo_waste(size_t waste)         { _undo_waste += waste; }
+
+  DirtyCardQueue& dirty_card_queue()             { return _dcq;  }
+  CardTableModRefBS* ctbs()                      { return _ct_bs; }
+
+  template <class T> void immediate_rs_update(HeapRegion* from, T* p, int tid) {
+    if (!from->is_survivor()) {
+      _g1_rem->par_write_ref(from, p, tid);
+    }
+  }
+
+  template <class T> void deferred_rs_update(HeapRegion* from, T* p, int tid) {
+    // If the new value of the field points to the same region or
+    // is the to-space, we don't need to include it in the Rset updates.
+    if (!from->is_in_reserved(oopDesc::load_decode_heap_oop(p)) && !from->is_survivor()) {
+      size_t card_index = ctbs()->index_for(p);
+      // If the card hasn't been added to the buffer, do it.
+      if (ctbs()->mark_card_deferred(card_index)) {
+        dirty_card_queue().enqueue((jbyte*)ctbs()->byte_for_index(card_index));
+      }
+    }
+  }
+
+public:
+  G1ParScanThreadState(G1CollectedHeap* g1h, int queue_num);
+
+  ~G1ParScanThreadState() {
+    FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_base);
+  }
+
+  RefToScanQueue*   refs()            { return _refs;             }
+  OverflowQueue*    overflowed_refs() { return _overflowed_refs;  }
+  ageTable*         age_table()       { return &_age_table;       }
+
+  G1ParGCAllocBuffer* alloc_buffer(GCAllocPurpose purpose) {
+    return &_alloc_buffers[purpose];
+  }
+
+  size_t alloc_buffer_waste()                    { return _alloc_buffer_waste; }
+  size_t undo_waste()                            { return _undo_waste; }
+
+  template <class T> void push_on_queue(T* ref) {
+    assert(ref != NULL, "invariant");
+    assert(has_partial_array_mask(ref) ||
+           _g1h->obj_in_cs(oopDesc::load_decode_heap_oop(ref)), "invariant");
+#ifdef ASSERT
+    if (has_partial_array_mask(ref)) {
+      oop p = clear_partial_array_mask(ref);
+      // Verify that we point into the CS
+      assert(_g1h->obj_in_cs(p), "Should be in CS");
+    }
+#endif
+    if (!refs()->push(ref)) {
+      overflowed_refs()->push(ref);
+      IF_G1_DETAILED_STATS(note_overflow_push());
+    } else {
+      IF_G1_DETAILED_STATS(note_push());
+    }
+  }
+
+  void pop_from_queue(StarTask& ref) {
+    if (refs()->pop_local(ref)) {
+      assert((oop*)ref != NULL, "pop_local() returned true");
+      assert(UseCompressedOops || !ref.is_narrow(), "Error");
+      assert(has_partial_array_mask((oop*)ref) ||
+             _g1h->obj_in_cs(ref.is_narrow() ? oopDesc::load_decode_heap_oop((narrowOop*)ref)
+                                             : oopDesc::load_decode_heap_oop((oop*)ref)),
+             "invariant");
+      IF_G1_DETAILED_STATS(note_pop());
+    } else {
+      StarTask null_task;
+      ref = null_task;
+    }
+  }
+
+  void pop_from_overflow_queue(StarTask& ref) {
+    StarTask new_ref = overflowed_refs()->pop();
+    assert((oop*)new_ref != NULL, "pop() from a local non-empty stack");
+    assert(UseCompressedOops || !new_ref.is_narrow(), "Error");
+    assert(has_partial_array_mask((oop*)new_ref) ||
+           _g1h->obj_in_cs(new_ref.is_narrow() ? oopDesc::load_decode_heap_oop((narrowOop*)new_ref)
+                                               : oopDesc::load_decode_heap_oop((oop*)new_ref)),
+             "invariant");
+    ref = new_ref;
+  }
+
+  int refs_to_scan()                             { return refs()->size();                 }
+  int overflowed_refs_to_scan()                  { return overflowed_refs()->length();    }
+
+  template <class T> void update_rs(HeapRegion* from, T* p, int tid) {
+    if (G1DeferredRSUpdate) {
+      deferred_rs_update(from, p, tid);
+    } else {
+      immediate_rs_update(from, p, tid);
+    }
+  }
+
+  HeapWord* allocate_slow(GCAllocPurpose purpose, size_t word_sz) {
+
+    HeapWord* obj = NULL;
+    if (word_sz * 100 <
+        (size_t)(G1ParallelGCAllocBufferSize / HeapWordSize) *
+                                                  ParallelGCBufferWastePct) {
+      G1ParGCAllocBuffer* alloc_buf = alloc_buffer(purpose);
+      add_to_alloc_buffer_waste(alloc_buf->words_remaining());
+      alloc_buf->retire(false, false);
+
+      HeapWord* buf =
+        _g1h->par_allocate_during_gc(purpose, G1ParallelGCAllocBufferSize / HeapWordSize);
+      if (buf == NULL) return NULL; // Let caller handle allocation failure.
+      // Otherwise.
+      alloc_buf->set_buf(buf);
+
+      obj = alloc_buf->allocate(word_sz);
+      assert(obj != NULL, "buffer was definitely big enough...");
+    } else {
+      obj = _g1h->par_allocate_during_gc(purpose, word_sz);
+    }
+    return obj;
+  }
+
+  HeapWord* allocate(GCAllocPurpose purpose, size_t word_sz) {
+    HeapWord* obj = alloc_buffer(purpose)->allocate(word_sz);
+    if (obj != NULL) return obj;
+    return allocate_slow(purpose, word_sz);
+  }
+
+  void undo_allocation(GCAllocPurpose purpose, HeapWord* obj, size_t word_sz) {
+    if (alloc_buffer(purpose)->contains(obj)) {
+      assert(alloc_buffer(purpose)->contains(obj + word_sz - 1),
+             "should contain whole object");
+      alloc_buffer(purpose)->undo_allocation(obj, word_sz);
+    } else {
+      CollectedHeap::fill_with_object(obj, word_sz);
+      add_to_undo_waste(word_sz);
+    }
+  }
+
+  void set_evac_failure_closure(OopsInHeapRegionClosure* evac_failure_cl) {
+    _evac_failure_cl = evac_failure_cl;
+  }
+  OopsInHeapRegionClosure* evac_failure_closure() {
+    return _evac_failure_cl;
+  }
+
+  void set_evac_closure(G1ParScanHeapEvacClosure* evac_cl) {
+    _evac_cl = evac_cl;
+  }
+
+  void set_partial_scan_closure(G1ParScanPartialArrayClosure* partial_scan_cl) {
+    _partial_scan_cl = partial_scan_cl;
+  }
+
+  int* hash_seed() { return &_hash_seed; }
+  int  queue_num() { return _queue_num; }
+
+  int term_attempts()   { return _term_attempts; }
+  void note_term_attempt()  { _term_attempts++; }
+
+#if G1_DETAILED_STATS
+  int pushes()          { return _pushes; }
+  int pops()            { return _pops; }
+  int steals()          { return _steals; }
+  int steal_attempts()  { return _steal_attempts; }
+  int overflow_pushes() { return _overflow_pushes; }
+
+  void note_push()          { _pushes++; }
+  void note_pop()           { _pops++; }
+  void note_steal()         { _steals++; }
+  void note_steal_attempt() { _steal_attempts++; }
+  void note_overflow_push() { _overflow_pushes++; }
+#endif
+
+  void start_strong_roots() {
+    _start_strong_roots = os::elapsedTime();
+  }
+  void end_strong_roots() {
+    _strong_roots_time += (os::elapsedTime() - _start_strong_roots);
+  }
+  double strong_roots_time() { return _strong_roots_time; }
+
+  void start_term_time() {
+    note_term_attempt();
+    _start_term = os::elapsedTime();
+  }
+  void end_term_time() {
+    _term_time += (os::elapsedTime() - _start_term);
+  }
+  double term_time() { return _term_time; }
+
+  double elapsed() {
+    return os::elapsedTime() - _start;
+  }
+
+  size_t* surviving_young_words() {
+    // We add on to hide entry 0 which accumulates surviving words for
+    // age -1 regions (i.e. non-young ones)
+    return _surviving_young_words;
+  }
+
+  void retire_alloc_buffers() {
+    for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
+      size_t waste = _alloc_buffers[ap].words_remaining();
+      add_to_alloc_buffer_waste(waste);
+      _alloc_buffers[ap].retire(true, false);
+    }
+  }
+
+private:
+  template <class T> void deal_with_reference(T* ref_to_scan) {
+    if (has_partial_array_mask(ref_to_scan)) {
+      _partial_scan_cl->do_oop_nv(ref_to_scan);
+    } else {
+      // Note: we can use "raw" versions of "region_containing" because
+      // "obj_to_scan" is definitely in the heap, and is not in a
+      // humongous region.
+      HeapRegion* r = _g1h->heap_region_containing_raw(ref_to_scan);
+      _evac_cl->set_region(r);
+      _evac_cl->do_oop_nv(ref_to_scan);
+    }
+  }
+
+public:
+  void trim_queue() {
+    // I've replicated the loop twice, first to drain the overflow
+    // queue, second to drain the task queue. This is better than
+    // having a single loop, which checks both conditions and, inside
+    // it, either pops the overflow queue or the task queue, as each
+    // loop is tighter. Also, the decision to drain the overflow queue
+    // first is not arbitrary, as the overflow queue is not visible
+    // to the other workers, whereas the task queue is. So, we want to
+    // drain the "invisible" entries first, while allowing the other
+    // workers to potentially steal the "visible" entries.
+
+    while (refs_to_scan() > 0 || overflowed_refs_to_scan() > 0) {
+      while (overflowed_refs_to_scan() > 0) {
+        StarTask ref_to_scan;
+        assert((oop*)ref_to_scan == NULL, "Constructed above");
+        pop_from_overflow_queue(ref_to_scan);
+        // We shouldn't have pushed it on the queue if it was not
+        // pointing into the CSet.
+        assert((oop*)ref_to_scan != NULL, "Follows from inner loop invariant");
+        if (ref_to_scan.is_narrow()) {
+          assert(UseCompressedOops, "Error");
+          narrowOop* p = (narrowOop*)ref_to_scan;
+          assert(!has_partial_array_mask(p) &&
+                 _g1h->obj_in_cs(oopDesc::load_decode_heap_oop(p)), "sanity");
+          deal_with_reference(p);
+        } else {
+          oop* p = (oop*)ref_to_scan;
+          assert((has_partial_array_mask(p) && _g1h->obj_in_cs(clear_partial_array_mask(p))) ||
+                 _g1h->obj_in_cs(oopDesc::load_decode_heap_oop(p)), "sanity");
+          deal_with_reference(p);
+        }
+      }
+
+      while (refs_to_scan() > 0) {
+        StarTask ref_to_scan;
+        assert((oop*)ref_to_scan == NULL, "Constructed above");
+        pop_from_queue(ref_to_scan);
+        if ((oop*)ref_to_scan != NULL) {
+          if (ref_to_scan.is_narrow()) {
+            assert(UseCompressedOops, "Error");
+            narrowOop* p = (narrowOop*)ref_to_scan;
+            assert(!has_partial_array_mask(p) &&
+                   _g1h->obj_in_cs(oopDesc::load_decode_heap_oop(p)), "sanity");
+            deal_with_reference(p);
+          } else {
+            oop* p = (oop*)ref_to_scan;
+            assert((has_partial_array_mask(p) && _g1h->obj_in_cs(clear_partial_array_mask(p))) ||
+                  _g1h->obj_in_cs(oopDesc::load_decode_heap_oop(p)), "sanity");
+            deal_with_reference(p);
+          }
+        }
+      }
+    }
+  }
+};
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -293,10 +293,6 @@
   if (G1SteadyStateUsed < 50) {
     vm_exit_during_initialization("G1SteadyStateUsed must be at least 50%.");
   }
-  if (UseConcMarkSweepGC) {
-    vm_exit_during_initialization("-XX:+UseG1GC is incompatible with "
-                                  "-XX:+UseConcMarkSweepGC.");
-  }
 
   initialize_gc_policy_counters();
 
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -1097,6 +1097,10 @@
     _recorded_survivor_tail    = tail;
   }
 
+  size_t recorded_survivor_regions() {
+    return _recorded_survivor_regions;
+  }
+
   void record_thread_age_table(ageTable* age_table)
   {
     _survivors_age_table.merge_par(age_table);
--- a/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -42,18 +42,6 @@
   virtual void set_region(HeapRegion* from) { _from = from; }
 };
 
-
-class G1ScanAndBalanceClosure : public OopClosure {
-  G1CollectedHeap* _g1;
-  static int _nq;
-public:
-  G1ScanAndBalanceClosure(G1CollectedHeap* g1) : _g1(g1) { }
-  inline  void do_oop_nv(oop* p);
-  inline  void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
-  virtual void do_oop(oop* p);
-  virtual void do_oop(narrowOop* p)    { guarantee(false, "NYI"); }
-};
-
 class G1ParClosureSuper : public OopsInHeapRegionClosure {
 protected:
   G1CollectedHeap* _g1;
@@ -69,34 +57,32 @@
 public:
   G1ParScanClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
     G1ParClosureSuper(g1, par_scan_state) { }
-  void do_oop_nv(oop* p);   // should be made inline
-  inline  void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
+  template <class T> void do_oop_nv(T* p);
   virtual void do_oop(oop* p)          { do_oop_nv(p); }
   virtual void do_oop(narrowOop* p)    { do_oop_nv(p); }
 };
 
-#define G1_PARTIAL_ARRAY_MASK 1
+#define G1_PARTIAL_ARRAY_MASK 0x2
 
-inline bool has_partial_array_mask(oop* ref) {
-  return (intptr_t) ref & G1_PARTIAL_ARRAY_MASK;
+template <class T> inline bool has_partial_array_mask(T* ref) {
+  return ((uintptr_t)ref & G1_PARTIAL_ARRAY_MASK) == G1_PARTIAL_ARRAY_MASK;
 }
 
-inline oop* set_partial_array_mask(oop obj) {
-  return (oop*) ((intptr_t) obj | G1_PARTIAL_ARRAY_MASK);
+template <class T> inline T* set_partial_array_mask(T obj) {
+  assert(((uintptr_t)obj & G1_PARTIAL_ARRAY_MASK) == 0, "Information loss!");
+  return (T*) ((uintptr_t)obj | G1_PARTIAL_ARRAY_MASK);
 }
 
-inline oop clear_partial_array_mask(oop* ref) {
-  return oop((intptr_t) ref & ~G1_PARTIAL_ARRAY_MASK);
+template <class T> inline oop clear_partial_array_mask(T* ref) {
+  return oop((intptr_t)ref & ~G1_PARTIAL_ARRAY_MASK);
 }
 
 class G1ParScanPartialArrayClosure : public G1ParClosureSuper {
   G1ParScanClosure _scanner;
-  template <class T> void process_array_chunk(oop obj, int start, int end);
 public:
   G1ParScanPartialArrayClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
     G1ParClosureSuper(g1, par_scan_state), _scanner(g1, par_scan_state) { }
-  void do_oop_nv(oop* p);
-  void do_oop_nv(narrowOop* p)      { guarantee(false, "NYI"); }
+  template <class T> void do_oop_nv(T* p);
   virtual void do_oop(oop* p)       { do_oop_nv(p); }
   virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
 };
@@ -105,7 +91,7 @@
 class G1ParCopyHelper : public G1ParClosureSuper {
   G1ParScanClosure *_scanner;
 protected:
-  void mark_forwardee(oop* p);
+  template <class T> void mark_forwardee(T* p);
   oop copy_to_survivor_space(oop obj);
 public:
   G1ParCopyHelper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state,
@@ -117,36 +103,35 @@
          bool do_mark_forwardee, bool skip_cset_test>
 class G1ParCopyClosure : public G1ParCopyHelper {
   G1ParScanClosure _scanner;
-  void do_oop_work(oop* p);
-  void do_oop_work(narrowOop* p) { guarantee(false, "NYI"); }
+  template <class T> void do_oop_work(T* p);
 public:
   G1ParCopyClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
     _scanner(g1, par_scan_state), G1ParCopyHelper(g1, par_scan_state, &_scanner) { }
-  inline void do_oop_nv(oop* p) {
+  template <class T> void do_oop_nv(T* p) {
     do_oop_work(p);
     if (do_mark_forwardee)
       mark_forwardee(p);
   }
-  inline void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
   virtual void do_oop(oop* p)       { do_oop_nv(p); }
   virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
 };
 
 typedef G1ParCopyClosure<false, G1BarrierNone, false, false> G1ParScanExtRootClosure;
 typedef G1ParCopyClosure<true,  G1BarrierNone, false, false> G1ParScanPermClosure;
+typedef G1ParCopyClosure<false, G1BarrierRS,   false, false> G1ParScanHeapRSClosure;
 typedef G1ParCopyClosure<false, G1BarrierNone, true,  false> G1ParScanAndMarkExtRootClosure;
 typedef G1ParCopyClosure<true,  G1BarrierNone, true,  false> G1ParScanAndMarkPermClosure;
-typedef G1ParCopyClosure<false, G1BarrierRS,   false, false> G1ParScanHeapRSClosure;
 typedef G1ParCopyClosure<false, G1BarrierRS,   true,  false> G1ParScanAndMarkHeapRSClosure;
 // This is the only case when we set skip_cset_test. Basically, this
 // closure is (should?) only be called directly while we're draining
 // the overflow and task queues. In that case we know that the
 // reference in question points into the collection set, otherwise we
-// would not have pushed it on the queue.
-typedef G1ParCopyClosure<false, G1BarrierEvac, false, true> G1ParScanHeapEvacClosure;
+// would not have pushed it on the queue. The following is defined in
+// g1_specialized_oop_closures.hpp.
+// typedef G1ParCopyClosure<false, G1BarrierEvac, false, true> G1ParScanHeapEvacClosure;
 // We need a separate closure to handle references during evacuation
-// failure processing, as it cannot asume that the reference already
- // points to the collection set (like G1ParScanHeapEvacClosure does).
+// failure processing, as we cannot asume that the reference already
+// points into the collection set (like G1ParScanHeapEvacClosure does).
 typedef G1ParCopyClosure<false, G1BarrierEvac, false, false> G1ParScanHeapEvacFailureClosure;
 
 class FilterIntoCSClosure: public OopClosure {
@@ -158,10 +143,9 @@
                         G1CollectedHeap* g1, OopClosure* oc) :
     _dcto_cl(dcto_cl), _g1(g1), _oc(oc)
   {}
-  inline void do_oop_nv(oop* p);
-  inline void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
-  virtual void do_oop(oop* p);
-  virtual void do_oop(narrowOop* p)   { guarantee(false, "NYI"); }
+  template <class T> void do_oop_nv(T* p);
+  virtual void do_oop(oop* p)        { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
   bool apply_to_weak_ref_discovered_field() { return true; }
   bool do_header() { return false; }
 };
@@ -174,10 +158,9 @@
                                      OopsInHeapRegionClosure* oc) :
     _g1(g1), _oc(oc)
   {}
-  inline  void do_oop_nv(oop* p);
-  inline  void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
-  virtual void do_oop(oop* p);
-  virtual void do_oop(narrowOop* p)    { guarantee(false, "NYI"); }
+  template <class T> void do_oop_nv(T* p);
+  virtual void do_oop(oop* p) { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
   bool apply_to_weak_ref_discovered_field() { return true; }
   bool do_header() { return false; }
   void set_region(HeapRegion* from) {
@@ -195,10 +178,9 @@
                                             ConcurrentMark* cm)
   : _g1(g1), _oc(oc), _cm(cm) { }
 
-  inline  void do_oop_nv(oop* p);
-  inline  void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
-  virtual void do_oop(oop* p);
-  virtual void do_oop(narrowOop* p)    { guarantee(false, "NYI"); }
+  template <class T> void do_oop_nv(T* p);
+  virtual void do_oop(oop* p) { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
   bool apply_to_weak_ref_discovered_field() { return true; }
   bool do_header() { return false; }
   void set_region(HeapRegion* from) {
@@ -213,10 +195,9 @@
   int _out_of_region;
 public:
   FilterOutOfRegionClosure(HeapRegion* r, OopClosure* oc);
-  inline  void do_oop_nv(oop* p);
-  inline  void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
-  virtual void do_oop(oop* p);
-  virtual void do_oop(narrowOop* p)   { guarantee(false, "NYI"); }
+  template <class T> void do_oop_nv(T* p);
+  virtual void do_oop(oop* p) { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
   bool apply_to_weak_ref_discovered_field() { return true; }
   bool do_header() { return false; }
   int out_of_region() { return _out_of_region; }
--- a/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -31,9 +31,10 @@
 // perf-critical inner loop.
 #define FILTERINTOCSCLOSURE_DOHISTOGRAMCOUNT 0
 
-inline void FilterIntoCSClosure::do_oop_nv(oop* p) {
-  oop obj = *p;
-  if (obj != NULL && _g1->obj_in_cs(obj)) {
+template <class T> inline void FilterIntoCSClosure::do_oop_nv(T* p) {
+  T heap_oop = oopDesc::load_heap_oop(p);
+  if (!oopDesc::is_null(heap_oop) &&
+      _g1->obj_in_cs(oopDesc::decode_heap_oop_not_null(heap_oop))) {
     _oc->do_oop(p);
 #if FILTERINTOCSCLOSURE_DOHISTOGRAMCOUNT
     _dcto_cl->incr_count();
@@ -41,44 +42,32 @@
   }
 }
 
-inline void FilterIntoCSClosure::do_oop(oop* p)
-{
-  do_oop_nv(p);
-}
-
 #define FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT 0
 
-inline void FilterOutOfRegionClosure::do_oop_nv(oop* p) {
-  oop obj = *p;
-  HeapWord* obj_hw = (HeapWord*)obj;
-  if (obj_hw != NULL && (obj_hw < _r_bottom || obj_hw >= _r_end)) {
-    _oc->do_oop(p);
+template <class T> inline void FilterOutOfRegionClosure::do_oop_nv(T* p) {
+  T heap_oop = oopDesc::load_heap_oop(p);
+  if (!oopDesc::is_null(heap_oop)) {
+    HeapWord* obj_hw = (HeapWord*)oopDesc::decode_heap_oop_not_null(heap_oop);
+    if (obj_hw < _r_bottom || obj_hw >= _r_end) {
+      _oc->do_oop(p);
 #if FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT
-    _out_of_region++;
+      _out_of_region++;
 #endif
+    }
   }
 }
 
-inline void FilterOutOfRegionClosure::do_oop(oop* p)
-{
-  do_oop_nv(p);
-}
-
-inline void FilterInHeapRegionAndIntoCSClosure::do_oop_nv(oop* p) {
-  oop obj = *p;
-  if (obj != NULL && _g1->obj_in_cs(obj))
+template <class T> inline void FilterInHeapRegionAndIntoCSClosure::do_oop_nv(T* p) {
+  T heap_oop = oopDesc::load_heap_oop(p);
+  if (!oopDesc::is_null(heap_oop) &&
+      _g1->obj_in_cs(oopDesc::decode_heap_oop_not_null(heap_oop)))
     _oc->do_oop(p);
 }
 
-inline void FilterInHeapRegionAndIntoCSClosure::do_oop(oop* p)
-{
-  do_oop_nv(p);
-}
-
-
-inline void FilterAndMarkInHeapRegionAndIntoCSClosure::do_oop_nv(oop* p) {
-  oop obj = *p;
-  if (obj != NULL) {
+template <class T> inline void FilterAndMarkInHeapRegionAndIntoCSClosure::do_oop_nv(T* p) {
+  T heap_oop = oopDesc::load_heap_oop(p);
+  if (!oopDesc::is_null(heap_oop)) {
+    oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
     HeapRegion* hr = _g1->heap_region_containing((HeapWord*) obj);
     if (hr != NULL) {
       if (hr->in_collection_set())
@@ -89,24 +78,29 @@
   }
 }
 
-inline void FilterAndMarkInHeapRegionAndIntoCSClosure::do_oop(oop* p)
-{
-  do_oop_nv(p);
+// This closure is applied to the fields of the objects that have just been copied.
+template <class T> inline void G1ParScanClosure::do_oop_nv(T* p) {
+  T heap_oop = oopDesc::load_heap_oop(p);
+
+  if (!oopDesc::is_null(heap_oop)) {
+    oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+    if (_g1->in_cset_fast_test(obj)) {
+      // We're not going to even bother checking whether the object is
+      // already forwarded or not, as this usually causes an immediate
+      // stall. We'll try to prefetch the object (for write, given that
+      // we might need to install the forwarding reference) and we'll
+      // get back to it when pop it from the queue
+      Prefetch::write(obj->mark_addr(), 0);
+      Prefetch::read(obj->mark_addr(), (HeapWordSize*2));
+
+      // slightly paranoid test; I'm trying to catch potential
+      // problems before we go into push_on_queue to know where the
+      // problem is coming from
+      assert(obj == oopDesc::load_decode_heap_oop(p),
+             "p should still be pointing to obj");
+      _par_scan_state->push_on_queue(p);
+    } else {
+      _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num());
+    }
+  }
 }
-
-inline void G1ScanAndBalanceClosure::do_oop_nv(oop* p) {
-  RefToScanQueue* q;
-  if (ParallelGCThreads > 0) {
-    // Deal the work out equally.
-    _nq = (_nq + 1) % ParallelGCThreads;
-    q = _g1->task_queue(_nq);
-  } else {
-    q = _g1->task_queue(0);
-  }
-  bool nooverflow = q->push(p);
-  guarantee(nooverflow, "Overflow during poplularity region processing");
-}
-
-inline void G1ScanAndBalanceClosure::do_oop(oop* p) {
-  do_oop_nv(p);
-}
--- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -65,11 +65,10 @@
   void set_region(HeapRegion* from) {
     _blk->set_region(from);
   }
-  virtual void do_oop(narrowOop* p) {
-    guarantee(false, "NYI");
-  }
-  virtual void do_oop(oop* p) {
-    oop obj = *p;
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(      oop* p) { do_oop_work(p); }
+  template <class T> void do_oop_work(T* p) {
+    oop obj = oopDesc::load_decode_heap_oop(p);
     if (_g1->obj_in_cs(obj)) _blk->do_oop(p);
   }
   bool apply_to_weak_ref_discovered_field() { return true; }
@@ -110,11 +109,10 @@
 public:
   VerifyRSCleanCardOopClosure(G1CollectedHeap* g1) : _g1(g1) {}
 
-  virtual void do_oop(narrowOop* p) {
-    guarantee(false, "NYI");
-  }
-  virtual void do_oop(oop* p) {
-    oop obj = *p;
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(      oop* p) { do_oop_work(p); }
+  template <class T> void do_oop_work(T* p) {
+    oop obj = oopDesc::load_decode_heap_oop(p);
     HeapRegion* to = _g1->heap_region_containing(obj);
     guarantee(to == NULL || !to->in_collection_set(),
               "Missed a rem set member.");
@@ -129,9 +127,9 @@
 {
   _seq_task = new SubTasksDone(NumSeqTasks);
   guarantee(n_workers() > 0, "There should be some workers");
-  _new_refs = NEW_C_HEAP_ARRAY(GrowableArray<oop*>*, n_workers());
+  _new_refs = NEW_C_HEAP_ARRAY(GrowableArray<OopOrNarrowOopStar>*, n_workers());
   for (uint i = 0; i < n_workers(); i++) {
-    _new_refs[i] = new (ResourceObj::C_HEAP) GrowableArray<oop*>(8192,true);
+    _new_refs[i] = new (ResourceObj::C_HEAP) GrowableArray<OopOrNarrowOopStar>(8192,true);
   }
 }
 
@@ -140,7 +138,7 @@
   for (uint i = 0; i < n_workers(); i++) {
     delete _new_refs[i];
   }
-  FREE_C_HEAP_ARRAY(GrowableArray<oop*>*, _new_refs);
+  FREE_C_HEAP_ARRAY(GrowableArray<OopOrNarrowOopStar>*, _new_refs);
 }
 
 void CountNonCleanMemRegionClosure::do_MemRegion(MemRegion mr) {
@@ -428,15 +426,15 @@
   }
 };
 
-void
-HRInto_G1RemSet::scanNewRefsRS(OopsInHeapRegionClosure* oc,
-                                             int worker_i) {
+template <class T> void
+HRInto_G1RemSet::scanNewRefsRS_work(OopsInHeapRegionClosure* oc,
+                                    int worker_i) {
   double scan_new_refs_start_sec = os::elapsedTime();
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   CardTableModRefBS* ct_bs = (CardTableModRefBS*) (g1h->barrier_set());
   for (int i = 0; i < _new_refs[worker_i]->length(); i++) {
-    oop* p = _new_refs[worker_i]->at(i);
-    oop obj = *p;
+    T* p = (T*) _new_refs[worker_i]->at(i);
+    oop obj = oopDesc::load_decode_heap_oop(p);
     // *p was in the collection set when p was pushed on "_new_refs", but
     // another thread may have processed this location from an RS, so it
     // might not point into the CS any longer.  If so, it's obviously been
@@ -549,11 +547,10 @@
   G1CollectedHeap* _g1;
 public:
   UpdateRSetOopsIntoCSImmediate(G1CollectedHeap* g1) : _g1(g1) { }
-  virtual void do_oop(narrowOop* p) {
-    guarantee(false, "NYI");
-  }
-  virtual void do_oop(oop* p) {
-    HeapRegion* to = _g1->heap_region_containing(*p);
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(      oop* p) { do_oop_work(p); }
+  template <class T> void do_oop_work(T* p) {
+    HeapRegion* to = _g1->heap_region_containing(oopDesc::load_decode_heap_oop(p));
     if (to->in_collection_set()) {
       to->rem_set()->add_reference(p, 0);
     }
@@ -567,11 +564,10 @@
 public:
   UpdateRSetOopsIntoCSDeferred(G1CollectedHeap* g1, DirtyCardQueue* dcq) :
     _g1(g1), _ct_bs((CardTableModRefBS*)_g1->barrier_set()), _dcq(dcq) { }
-  virtual void do_oop(narrowOop* p) {
-    guarantee(false, "NYI");
-  }
-  virtual void do_oop(oop* p) {
-    oop obj = *p;
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(      oop* p) { do_oop_work(p); }
+  template <class T> void do_oop_work(T* p) {
+    oop obj = oopDesc::load_decode_heap_oop(p);
     if (_g1->obj_in_cs(obj)) {
       size_t card_index = _ct_bs->index_for(p);
       if (_ct_bs->mark_card_deferred(card_index)) {
@@ -581,10 +577,10 @@
   }
 };
 
-void HRInto_G1RemSet::new_refs_iterate(OopClosure* cl) {
+template <class T> void HRInto_G1RemSet::new_refs_iterate_work(OopClosure* cl) {
   for (size_t i = 0; i < n_workers(); i++) {
     for (int j = 0; j < _new_refs[i]->length(); j++) {
-      oop* p = _new_refs[i]->at(j);
+      T* p = (T*) _new_refs[i]->at(j);
       cl->do_oop(p);
     }
   }
--- a/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -62,10 +62,12 @@
   // If "this" is of the given subtype, return "this", else "NULL".
   virtual HRInto_G1RemSet* as_HRInto_G1RemSet() { return NULL; }
 
-  // Record, if necessary, the fact that *p (where "p" is in region "from")
-  // has changed to its new value.
+  // Record, if necessary, the fact that *p (where "p" is in region "from",
+  // and is, a fortiori, required to be non-NULL) has changed to its new value.
   virtual void write_ref(HeapRegion* from, oop* p) = 0;
+  virtual void write_ref(HeapRegion* from, narrowOop* p) = 0;
   virtual void par_write_ref(HeapRegion* from, oop* p, int tid) = 0;
+  virtual void par_write_ref(HeapRegion* from, narrowOop* p, int tid) = 0;
 
   // Requires "region_bm" and "card_bm" to be bitmaps with 1 bit per region
   // or card, respectively, such that a region or card with a corresponding
@@ -105,7 +107,9 @@
 
   // Nothing is necessary in the version below.
   void write_ref(HeapRegion* from, oop* p) {}
+  void write_ref(HeapRegion* from, narrowOop* p) {}
   void par_write_ref(HeapRegion* from, oop* p, int tid) {}
+  void par_write_ref(HeapRegion* from, narrowOop* p, int tid) {}
 
   void scrub(BitMap* region_bm, BitMap* card_bm) {}
   void scrub_par(BitMap* region_bm, BitMap* card_bm,
@@ -143,8 +147,19 @@
   // their references into the collection summarized in "_new_refs".
   bool _par_traversal_in_progress;
   void set_par_traversal(bool b) { _par_traversal_in_progress = b; }
-  GrowableArray<oop*>** _new_refs;
-  void new_refs_iterate(OopClosure* cl);
+  GrowableArray<OopOrNarrowOopStar>** _new_refs;
+  template <class T> void new_refs_iterate_work(OopClosure* cl);
+  void new_refs_iterate(OopClosure* cl) {
+    if (UseCompressedOops) {
+      new_refs_iterate_work<narrowOop>(cl);
+    } else {
+      new_refs_iterate_work<oop>(cl);
+    }
+  }
+
+protected:
+  template <class T> void write_ref_nv(HeapRegion* from, T* p);
+  template <class T> void par_write_ref_nv(HeapRegion* from, T* p, int tid);
 
 public:
   // This is called to reset dual hash tables after the gc pause
@@ -161,7 +176,14 @@
   void prepare_for_oops_into_collection_set_do();
   void cleanup_after_oops_into_collection_set_do();
   void scanRS(OopsInHeapRegionClosure* oc, int worker_i);
-  void scanNewRefsRS(OopsInHeapRegionClosure* oc, int worker_i);
+  template <class T> void scanNewRefsRS_work(OopsInHeapRegionClosure* oc, int worker_i);
+  void scanNewRefsRS(OopsInHeapRegionClosure* oc, int worker_i) {
+    if (UseCompressedOops) {
+      scanNewRefsRS_work<narrowOop>(oc, worker_i);
+    } else {
+      scanNewRefsRS_work<oop>(oc, worker_i);
+    }
+  }
   void updateRS(int worker_i);
   HeapRegion* calculateStartRegion(int i);
 
@@ -172,12 +194,22 @@
 
   // Record, if necessary, the fact that *p (where "p" is in region "from",
   // which is required to be non-NULL) has changed to a new non-NULL value.
-  inline void write_ref(HeapRegion* from, oop* p);
-  // The "_nv" version is the same; it exists just so that it is not virtual.
-  inline void write_ref_nv(HeapRegion* from, oop* p);
+  // [Below the virtual version calls a non-virtual protected
+  // workhorse that is templatified for narrow vs wide oop.]
+  inline void write_ref(HeapRegion* from, oop* p) {
+    write_ref_nv(from, p);
+  }
+  inline void write_ref(HeapRegion* from, narrowOop* p) {
+    write_ref_nv(from, p);
+  }
+  inline void par_write_ref(HeapRegion* from, oop* p, int tid) {
+    par_write_ref_nv(from, p, tid);
+  }
+  inline void par_write_ref(HeapRegion* from, narrowOop* p, int tid) {
+    par_write_ref_nv(from, p, tid);
+  }
 
-  inline bool self_forwarded(oop obj);
-  inline void par_write_ref(HeapRegion* from, oop* p, int tid);
+  bool self_forwarded(oop obj);
 
   void scrub(BitMap* region_bm, BitMap* card_bm);
   void scrub_par(BitMap* region_bm, BitMap* card_bm,
@@ -208,6 +240,9 @@
   HeapRegion* _from;
   HRInto_G1RemSet* _rs;
   int _worker_i;
+
+  template <class T> void do_oop_work(T* p);
+
 public:
   UpdateRSOopClosure(HRInto_G1RemSet* rs, int worker_i = 0) :
     _from(NULL), _rs(rs), _worker_i(worker_i) {
@@ -219,11 +254,10 @@
     _from = from;
   }
 
-  virtual void do_oop(narrowOop* p);
-  virtual void do_oop(oop* p);
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(oop* p)       { do_oop_work(p); }
 
   // Override: this closure is idempotent.
   //  bool idempotent() { return true; }
   bool apply_to_weak_ref_discovered_field() { return true; }
 };
-
--- a/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -30,12 +30,8 @@
   }
 }
 
-inline void HRInto_G1RemSet::write_ref_nv(HeapRegion* from, oop* p) {
-  par_write_ref(from, p, 0);
-}
-
-inline void HRInto_G1RemSet::write_ref(HeapRegion* from, oop* p) {
-  write_ref_nv(from, p);
+template <class T> inline void HRInto_G1RemSet::write_ref_nv(HeapRegion* from, T* p) {
+  par_write_ref_nv(from, p, 0);
 }
 
 inline bool HRInto_G1RemSet::self_forwarded(oop obj) {
@@ -43,8 +39,8 @@
   return result;
 }
 
-inline void HRInto_G1RemSet::par_write_ref(HeapRegion* from, oop* p, int tid) {
-  oop obj = *p;
+template <class T> inline void HRInto_G1RemSet::par_write_ref_nv(HeapRegion* from, T* p, int tid) {
+  oop obj = oopDesc::load_decode_heap_oop(p);
 #ifdef ASSERT
   // can't do because of races
   // assert(obj == NULL || obj->is_oop(), "expected an oop");
@@ -71,7 +67,7 @@
     // false during the evacuation failure handing.
     if (_par_traversal_in_progress &&
         to->in_collection_set() && !self_forwarded(obj)) {
-      _new_refs[tid]->push(p);
+      _new_refs[tid]->push((void*)p);
       // Deferred updates to the Cset are either discarded (in the normal case),
       // or processed (if an evacuation failure occurs) at the end
       // of the collection.
@@ -89,11 +85,7 @@
   }
 }
 
-inline void UpdateRSOopClosure::do_oop(narrowOop* p) {
-  guarantee(false, "NYI");
-}
-
-inline void UpdateRSOopClosure::do_oop(oop* p) {
+template <class T> inline void UpdateRSOopClosure::do_oop_work(T* p) {
   assert(_from != NULL, "from region must be non-NULL");
   _rs->par_write_ref(_from, p, _worker_i);
 }
--- a/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -34,6 +34,7 @@
 
 
 void G1SATBCardTableModRefBS::enqueue(oop pre_val) {
+  assert(pre_val->is_oop_or_null(true), "Error");
   if (!JavaThread::satb_mark_queue_set().active()) return;
   Thread* thr = Thread::current();
   if (thr->is_Java_thread()) {
@@ -46,32 +47,31 @@
 }
 
 // When we know the current java thread:
-void
-G1SATBCardTableModRefBS::write_ref_field_pre_static(void* field,
-                                                    oop newVal,
+template <class T> void
+G1SATBCardTableModRefBS::write_ref_field_pre_static(T* field,
+                                                    oop new_val,
                                                     JavaThread* jt) {
   if (!JavaThread::satb_mark_queue_set().active()) return;
-  assert(!UseCompressedOops, "Else will need to modify this to deal with narrowOop");
-  oop preVal = *(oop*)field;
-  if (preVal != NULL) {
-    jt->satb_mark_queue().enqueue(preVal);
+  T heap_oop = oopDesc::load_heap_oop(field);
+  if (!oopDesc::is_null(heap_oop)) {
+    oop pre_val = oopDesc::decode_heap_oop_not_null(heap_oop);
+    assert(pre_val->is_oop(true /* ignore mark word */), "Error");
+    jt->satb_mark_queue().enqueue(pre_val);
   }
 }
 
-void
-G1SATBCardTableModRefBS::write_ref_array_pre(MemRegion mr) {
+template <class T> void
+G1SATBCardTableModRefBS::write_ref_array_pre_work(T* dst, int count) {
   if (!JavaThread::satb_mark_queue_set().active()) return;
-  assert(!UseCompressedOops, "Else will need to modify this to deal with narrowOop");
-  oop* elem_ptr = (oop*)mr.start();
-  while ((HeapWord*)elem_ptr < mr.end()) {
-    oop elem = *elem_ptr;
-    if (elem != NULL) enqueue(elem);
-    elem_ptr++;
+  T* elem_ptr = dst;
+  for (int i = 0; i < count; i++, elem_ptr++) {
+    T heap_oop = oopDesc::load_heap_oop(elem_ptr);
+    if (!oopDesc::is_null(heap_oop)) {
+      enqueue(oopDesc::decode_heap_oop_not_null(heap_oop));
+    }
   }
 }
 
-
-
 G1SATBCardTableLoggingModRefBS::
 G1SATBCardTableLoggingModRefBS(MemRegion whole_heap,
                                int max_covered_regions) :
--- a/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -47,31 +47,41 @@
 
   // This notes that we don't need to access any BarrierSet data
   // structures, so this can be called from a static context.
-  static void write_ref_field_pre_static(void* field, oop newVal) {
-    assert(!UseCompressedOops, "Else needs to be templatized");
-    oop preVal = *((oop*)field);
-    if (preVal != NULL) {
-      enqueue(preVal);
+  template <class T> static void write_ref_field_pre_static(T* field, oop newVal) {
+    T heap_oop = oopDesc::load_heap_oop(field);
+    if (!oopDesc::is_null(heap_oop)) {
+      enqueue(oopDesc::decode_heap_oop(heap_oop));
     }
   }
 
   // When we know the current java thread:
-  static void write_ref_field_pre_static(void* field, oop newVal,
-                                         JavaThread* jt);
+  template <class T> static void write_ref_field_pre_static(T* field, oop newVal,
+                                                            JavaThread* jt);
 
   // We export this to make it available in cases where the static
   // type of the barrier set is known.  Note that it is non-virtual.
-  inline void inline_write_ref_field_pre(void* field, oop newVal) {
+  template <class T> inline void inline_write_ref_field_pre(T* field, oop newVal) {
     write_ref_field_pre_static(field, newVal);
   }
 
-  // This is the more general virtual version.
-  void write_ref_field_pre_work(void* field, oop new_val) {
+  // These are the more general virtual versions.
+  virtual void write_ref_field_pre_work(oop* field, oop new_val) {
     inline_write_ref_field_pre(field, new_val);
   }
+  virtual void write_ref_field_pre_work(narrowOop* field, oop new_val) {
+    inline_write_ref_field_pre(field, new_val);
+  }
+  virtual void write_ref_field_pre_work(void* field, oop new_val) {
+    guarantee(false, "Not needed");
+  }
 
-  virtual void write_ref_array_pre(MemRegion mr);
-
+  template <class T> void write_ref_array_pre_work(T* dst, int count);
+  virtual void write_ref_array_pre(oop* dst, int count) {
+    write_ref_array_pre_work(dst, count);
+  }
+  virtual void write_ref_array_pre(narrowOop* dst, int count) {
+    write_ref_array_pre_work(dst, count);
+  }
 };
 
 // Adds card-table logging to the post-barrier.
--- a/src/share/vm/gc_implementation/g1/g1_globals.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -80,9 +80,6 @@
   develop(bool, G1TraceConcurrentRefinement, false,                         \
           "Trace G1 concurrent refinement")                                 \
                                                                             \
-  develop(bool, G1ConcMark, true,                                           \
-          "If true, run concurrent marking for G1")                         \
-                                                                            \
   product(intx, G1MarkStackSize, 2 * 1024 * 1024,                           \
           "Size of the mark stack for concurrent marking.")                 \
                                                                             \
--- a/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -37,14 +37,12 @@
 class G1ParCopyClosure;
 class G1ParScanClosure;
 
-typedef G1ParCopyClosure<false, G1BarrierEvac, false, true>
-                                                      G1ParScanHeapEvacClosure;
+typedef G1ParCopyClosure<false, G1BarrierEvac, false, true> G1ParScanHeapEvacClosure;
 
 class FilterIntoCSClosure;
 class FilterOutOfRegionClosure;
 class FilterInHeapRegionAndIntoCSClosure;
 class FilterAndMarkInHeapRegionAndIntoCSClosure;
-class G1ScanAndBalanceClosure;
 
 #ifdef FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES
 #error "FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES already defined."
@@ -56,8 +54,7 @@
       f(FilterIntoCSClosure,_nv)                        \
       f(FilterOutOfRegionClosure,_nv)                   \
       f(FilterInHeapRegionAndIntoCSClosure,_nv)         \
-      f(FilterAndMarkInHeapRegionAndIntoCSClosure,_nv)  \
-      f(G1ScanAndBalanceClosure,_nv)
+      f(FilterAndMarkInHeapRegionAndIntoCSClosure,_nv)
 
 #ifdef FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES
 #error "FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES already defined."
--- a/src/share/vm/gc_implementation/g1/heapRegion.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegion.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -66,16 +66,16 @@
   bool failures() { return _failures; }
   int n_failures() { return _n_failures; }
 
-  virtual void do_oop(narrowOop* p) {
-    guarantee(false, "NYI");
-  }
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(      oop* p) { do_oop_work(p); }
 
-  void do_oop(oop* p) {
+  template <class T> void do_oop_work(T* p) {
     assert(_containing_obj != NULL, "Precondition");
     assert(!_g1h->is_obj_dead_cond(_containing_obj, _use_prev_marking),
            "Precondition");
-    oop obj = *p;
-    if (obj != NULL) {
+    T heap_oop = oopDesc::load_heap_oop(p);
+    if (!oopDesc::is_null(heap_oop)) {
+      oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
       bool failed = false;
       if (!_g1h->is_in_closed_subset(obj) ||
           _g1h->is_obj_dead_cond(obj, _use_prev_marking)) {
@@ -106,8 +106,8 @@
       }
 
       if (!_g1h->full_collection()) {
-        HeapRegion* from = _g1h->heap_region_containing(p);
-        HeapRegion* to   = _g1h->heap_region_containing(*p);
+        HeapRegion* from = _g1h->heap_region_containing((HeapWord*)p);
+        HeapRegion* to   = _g1h->heap_region_containing(obj);
         if (from != NULL && to != NULL &&
             from != to &&
             !to->isHumongous()) {
@@ -534,13 +534,13 @@
   // Otherwise, find the obj that extends onto mr.start().
 
   assert(cur <= mr.start()
-         && (oop(cur)->klass() == NULL ||
+         && (oop(cur)->klass_or_null() == NULL ||
              cur + oop(cur)->size() > mr.start()),
          "postcondition of block_start");
   oop obj;
   while (cur < mr.end()) {
     obj = oop(cur);
-    if (obj->klass() == NULL) {
+    if (obj->klass_or_null() == NULL) {
       // Ran into an unparseable point.
       return cur;
     } else if (!g1h->is_obj_dead(obj)) {
@@ -577,7 +577,7 @@
   assert(cur <= mr.start(), "Postcondition");
 
   while (cur <= mr.start()) {
-    if (oop(cur)->klass() == NULL) {
+    if (oop(cur)->klass_or_null() == NULL) {
       // Ran into an unparseable point.
       return cur;
     }
@@ -591,7 +591,7 @@
   obj = oop(cur);
   // If we finish this loop...
   assert(cur <= mr.start()
-         && obj->klass() != NULL
+         && obj->klass_or_null() != NULL
          && cur + obj->size() > mr.start(),
          "Loop postcondition");
   if (!g1h->is_obj_dead(obj)) {
@@ -601,7 +601,7 @@
   HeapWord* next;
   while (cur < mr.end()) {
     obj = oop(cur);
-    if (obj->klass() == NULL) {
+    if (obj->klass_or_null() == NULL) {
       // Ran into an unparseable point.
       return cur;
     };
@@ -703,7 +703,7 @@
   }
   if (vl_cl.failures()) {
     gclog_or_tty->print_cr("Heap:");
-    G1CollectedHeap::heap()->print();
+    G1CollectedHeap::heap()->print_on(gclog_or_tty, true /* extended */);
     gclog_or_tty->print_cr("");
   }
   if (VerifyDuringGC &&
@@ -781,8 +781,13 @@
     // will pick up the right saved_mark_word() as the high water mark
     // of the region. Either way, the behaviour will be correct.
     ContiguousSpace::set_saved_mark();
+    OrderAccess::storestore();
     _gc_time_stamp = curr_gc_time_stamp;
-    OrderAccess::fence();
+    // The following fence is to force a flush of the writes above, but
+    // is strictly not needed because when an allocating worker thread
+    // calls set_saved_mark() it does so under the ParGCRareEvent_lock;
+    // when the lock is released, the write will be flushed.
+    // OrderAccess::fence();
   }
 }
 
--- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -126,7 +126,7 @@
     }
   }
 
-  void add_reference_work(oop* from, bool par) {
+  void add_reference_work(OopOrNarrowOopStar from, bool par) {
     // Must make this robust in case "from" is not in "_hr", because of
     // concurrency.
 
@@ -173,11 +173,11 @@
     _bm.clear();
   }
 
-  void add_reference(oop* from) {
+  void add_reference(OopOrNarrowOopStar from) {
     add_reference_work(from, /*parallel*/ true);
   }
 
-  void seq_add_reference(oop* from) {
+  void seq_add_reference(OopOrNarrowOopStar from) {
     add_reference_work(from, /*parallel*/ false);
   }
 
@@ -220,7 +220,7 @@
   }
 
   // Requires "from" to be in "hr()".
-  bool contains_reference(oop* from) const {
+  bool contains_reference(OopOrNarrowOopStar from) const {
     assert(hr()->is_in_reserved(from), "Precondition.");
     size_t card_ind = pointer_delta(from, hr()->bottom(),
                                     CardTableModRefBS::card_size);
@@ -394,7 +394,7 @@
   void set_next(PosParPRT* nxt) { _next = nxt; }
   PosParPRT** next_addr() { return &_next; }
 
-  void add_reference(oop* from, int tid) {
+  void add_reference(OopOrNarrowOopStar from, int tid) {
     // Expand if necessary.
     PerRegionTable** pt = par_tables();
     if (par_tables() == NULL && tid > 0 && hr()->is_gc_alloc_region()) {
@@ -447,7 +447,7 @@
     return res;
   }
 
-  bool contains_reference(oop* from) const {
+  bool contains_reference(OopOrNarrowOopStar from) const {
     if (PerRegionTable::contains_reference(from)) return true;
     if (_par_tables != NULL) {
       for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets()-1; i++) {
@@ -564,12 +564,15 @@
 }
 #endif
 
-void OtherRegionsTable::add_reference(oop* from, int tid) {
+void OtherRegionsTable::add_reference(OopOrNarrowOopStar from, int tid) {
   size_t cur_hrs_ind = hr()->hrs_index();
 
 #if HRRS_VERBOSE
   gclog_or_tty->print_cr("ORT::add_reference_work(" PTR_FORMAT "->" PTR_FORMAT ").",
-                                                  from, *from);
+                                                  from,
+                                                  UseCompressedOops
+                                                  ? oopDesc::load_decode_heap_oop((narrowOop*)from)
+                                                  : oopDesc::load_decode_heap_oop((oop*)from));
 #endif
 
   int from_card = (int)(uintptr_t(from) >> CardTableModRefBS::card_shift);
@@ -1021,13 +1024,13 @@
   }
 }
 
-bool OtherRegionsTable::contains_reference(oop* from) const {
+bool OtherRegionsTable::contains_reference(OopOrNarrowOopStar from) const {
   // Cast away const in this case.
   MutexLockerEx x((Mutex*)&_m, Mutex::_no_safepoint_check_flag);
   return contains_reference_locked(from);
 }
 
-bool OtherRegionsTable::contains_reference_locked(oop* from) const {
+bool OtherRegionsTable::contains_reference_locked(OopOrNarrowOopStar from) const {
   HeapRegion* hr = _g1h->heap_region_containing_raw(from);
   if (hr == NULL) return false;
   RegionIdx_t hr_ind = (RegionIdx_t) hr->hrs_index();
@@ -1288,24 +1291,24 @@
 
 
 
-oop**        HeapRegionRemSet::_recorded_oops = NULL;
-HeapWord**   HeapRegionRemSet::_recorded_cards = NULL;
-HeapRegion** HeapRegionRemSet::_recorded_regions = NULL;
-int          HeapRegionRemSet::_n_recorded = 0;
+OopOrNarrowOopStar* HeapRegionRemSet::_recorded_oops = NULL;
+HeapWord**          HeapRegionRemSet::_recorded_cards = NULL;
+HeapRegion**        HeapRegionRemSet::_recorded_regions = NULL;
+int                 HeapRegionRemSet::_n_recorded = 0;
 
 HeapRegionRemSet::Event* HeapRegionRemSet::_recorded_events = NULL;
 int*         HeapRegionRemSet::_recorded_event_index = NULL;
 int          HeapRegionRemSet::_n_recorded_events = 0;
 
-void HeapRegionRemSet::record(HeapRegion* hr, oop* f) {
+void HeapRegionRemSet::record(HeapRegion* hr, OopOrNarrowOopStar f) {
   if (_recorded_oops == NULL) {
     assert(_n_recorded == 0
            && _recorded_cards == NULL
            && _recorded_regions == NULL,
            "Inv");
-    _recorded_oops = NEW_C_HEAP_ARRAY(oop*, MaxRecorded);
-    _recorded_cards = NEW_C_HEAP_ARRAY(HeapWord*, MaxRecorded);
-    _recorded_regions = NEW_C_HEAP_ARRAY(HeapRegion*, MaxRecorded);
+    _recorded_oops    = NEW_C_HEAP_ARRAY(OopOrNarrowOopStar, MaxRecorded);
+    _recorded_cards   = NEW_C_HEAP_ARRAY(HeapWord*,          MaxRecorded);
+    _recorded_regions = NEW_C_HEAP_ARRAY(HeapRegion*,        MaxRecorded);
   }
   if (_n_recorded == MaxRecorded) {
     gclog_or_tty->print_cr("Filled up 'recorded' (%d).", MaxRecorded);
@@ -1408,21 +1411,21 @@
   HeapRegionRemSet* hrrs = hr0->rem_set();
 
   // Make three references from region 0x101...
-  hrrs->add_reference((oop*)hr1_start);
-  hrrs->add_reference((oop*)hr1_mid);
-  hrrs->add_reference((oop*)hr1_last);
+  hrrs->add_reference((OopOrNarrowOopStar)hr1_start);
+  hrrs->add_reference((OopOrNarrowOopStar)hr1_mid);
+  hrrs->add_reference((OopOrNarrowOopStar)hr1_last);
 
-  hrrs->add_reference((oop*)hr2_start);
-  hrrs->add_reference((oop*)hr2_mid);
-  hrrs->add_reference((oop*)hr2_last);
+  hrrs->add_reference((OopOrNarrowOopStar)hr2_start);
+  hrrs->add_reference((OopOrNarrowOopStar)hr2_mid);
+  hrrs->add_reference((OopOrNarrowOopStar)hr2_last);
 
-  hrrs->add_reference((oop*)hr3_start);
-  hrrs->add_reference((oop*)hr3_mid);
-  hrrs->add_reference((oop*)hr3_last);
+  hrrs->add_reference((OopOrNarrowOopStar)hr3_start);
+  hrrs->add_reference((OopOrNarrowOopStar)hr3_mid);
+  hrrs->add_reference((OopOrNarrowOopStar)hr3_last);
 
   // Now cause a coarsening.
-  hrrs->add_reference((oop*)hr4->bottom());
-  hrrs->add_reference((oop*)hr5->bottom());
+  hrrs->add_reference((OopOrNarrowOopStar)hr4->bottom());
+  hrrs->add_reference((OopOrNarrowOopStar)hr5->bottom());
 
   // Now, does iteration yield these three?
   HeapRegionRemSetIterator iter;
--- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -116,9 +116,9 @@
 
   // For now.  Could "expand" some tables in the future, so that this made
   // sense.
-  void add_reference(oop* from, int tid);
+  void add_reference(OopOrNarrowOopStar from, int tid);
 
-  void add_reference(oop* from) {
+  void add_reference(OopOrNarrowOopStar from) {
     return add_reference(from, 0);
   }
 
@@ -140,8 +140,8 @@
   static size_t static_mem_size();
   static size_t fl_mem_size();
 
-  bool contains_reference(oop* from) const;
-  bool contains_reference_locked(oop* from) const;
+  bool contains_reference(OopOrNarrowOopStar from) const;
+  bool contains_reference_locked(OopOrNarrowOopStar from) const;
 
   void clear();
 
@@ -192,10 +192,10 @@
   // Unused unless G1RecordHRRSOops is true.
 
   static const int MaxRecorded = 1000000;
-  static oop**        _recorded_oops;
-  static HeapWord**   _recorded_cards;
-  static HeapRegion** _recorded_regions;
-  static int          _n_recorded;
+  static OopOrNarrowOopStar* _recorded_oops;
+  static HeapWord**          _recorded_cards;
+  static HeapRegion**        _recorded_regions;
+  static int                 _n_recorded;
 
   static const int MaxRecordedEvents = 1000;
   static Event*       _recorded_events;
@@ -231,13 +231,13 @@
 
   /* Used in the sequential case.  Returns "true" iff this addition causes
      the size limit to be reached. */
-  void add_reference(oop* from) {
+  void add_reference(OopOrNarrowOopStar from) {
     _other_regions.add_reference(from);
   }
 
   /* Used in the parallel case.  Returns "true" iff this addition causes
      the size limit to be reached. */
-  void add_reference(oop* from, int tid) {
+  void add_reference(OopOrNarrowOopStar from, int tid) {
     _other_regions.add_reference(from, tid);
   }
 
@@ -301,7 +301,7 @@
     return OtherRegionsTable::fl_mem_size();
   }
 
-  bool contains_reference(oop* from) const {
+  bool contains_reference(OopOrNarrowOopStar from) const {
     return _other_regions.contains_reference(from);
   }
   void print() const;
@@ -329,7 +329,7 @@
   }
 #endif
 
-  static void record(HeapRegion* hr, oop* f);
+  static void record(HeapRegion* hr, OopOrNarrowOopStar f);
   static void print_recorded();
   static void record_event(Event evnt);
 
--- a/src/share/vm/gc_implementation/g1/satbQueue.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/satbQueue.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -43,6 +43,18 @@
     }
   }
 }
+
+#ifdef ASSERT
+void ObjPtrQueue::verify_oops_in_buffer() {
+  if (_buf == NULL) return;
+  for (size_t i = _index; i < _sz; i += oopSize) {
+    oop obj = (oop)_buf[byte_index_to_index((int)i)];
+    assert(obj != NULL && obj->is_oop(true /* ignore mark word */),
+           "Not an oop");
+  }
+}
+#endif
+
 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 #endif // _MSC_VER
@@ -66,6 +78,7 @@
 
 
 void SATBMarkQueueSet::handle_zero_index_for_thread(JavaThread* t) {
+  DEBUG_ONLY(t->satb_mark_queue().verify_oops_in_buffer();)
   t->satb_mark_queue().handle_zero_index();
 }
 
@@ -143,7 +156,7 @@
     }
     _completed_buffers_tail = NULL;
     _n_completed_buffers = 0;
-    debug_only(assert_completed_buffer_list_len_correct_locked());
+    DEBUG_ONLY(assert_completed_buffer_list_len_correct_locked());
   }
   while (buffers_to_delete != NULL) {
     CompletedBufferNode* nd = buffers_to_delete;
--- a/src/share/vm/gc_implementation/g1/satbQueue.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/satbQueue.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -39,6 +39,7 @@
   static void apply_closure_to_buffer(ObjectClosure* cl,
                                       void** buf, size_t index, size_t sz);
 
+  void verify_oops_in_buffer() NOT_DEBUG_RETURN;
 };
 
 
--- a/src/share/vm/gc_implementation/includeDB_gc_g1	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/gc_implementation/includeDB_gc_g1	Mon Jul 27 17:23:52 2009 -0400
@@ -27,6 +27,7 @@
 bufferingOopClosure.hpp			genOopClosures.hpp
 bufferingOopClosure.hpp			generation.hpp
 bufferingOopClosure.hpp			os.hpp
+bufferingOopClosure.hpp			taskqueue.hpp
 
 cardTableRS.cpp				concurrentMark.hpp
 cardTableRS.cpp				g1SATBCardTableModRefBS.hpp
@@ -139,7 +140,7 @@
 g1CollectedHeap.cpp                     g1CollectedHeap.inline.hpp
 g1CollectedHeap.cpp                     g1CollectorPolicy.hpp
 g1CollectedHeap.cpp                     g1MarkSweep.hpp
-g1CollectedHeap.cpp                     g1RemSet.hpp
+g1CollectedHeap.cpp                     g1RemSet.inline.hpp
 g1CollectedHeap.cpp                     g1OopClosures.inline.hpp
 g1CollectedHeap.cpp                     genOopClosures.inline.hpp
 g1CollectedHeap.cpp                     gcLocker.inline.hpp
@@ -151,13 +152,14 @@
 g1CollectedHeap.cpp                     isGCActiveMark.hpp
 g1CollectedHeap.cpp			oop.inline.hpp
 g1CollectedHeap.cpp			oop.pcgc.inline.hpp
-g1CollectedHeap.cpp			parGCAllocBuffer.hpp
 g1CollectedHeap.cpp                     vm_operations_g1.hpp
 g1CollectedHeap.cpp                     vmThread.hpp
 
 g1CollectedHeap.hpp                     barrierSet.hpp
+g1CollectedHeap.hpp                     g1RemSet.hpp
 g1CollectedHeap.hpp                     heapRegion.hpp
 g1CollectedHeap.hpp                     memRegion.hpp
+g1CollectedHeap.hpp			parGCAllocBuffer.hpp
 g1CollectedHeap.hpp                     sharedHeap.hpp
 
 g1CollectedHeap.inline.hpp              concurrentMark.hpp
@@ -245,6 +247,7 @@
 g1RemSet.cpp				iterator.hpp
 g1RemSet.cpp				oop.inline.hpp
 
+g1RemSet.inline.hpp			oop.inline.hpp
 g1RemSet.inline.hpp			g1RemSet.hpp
 g1RemSet.inline.hpp			heapRegionRemSet.hpp
 
@@ -255,6 +258,7 @@
 g1SATBCardTableModRefBS.cpp		thread_<os_family>.inline.hpp
 g1SATBCardTableModRefBS.cpp		satbQueue.hpp
 
+g1SATBCardTableModRefBS.hpp		oop.inline.hpp
 g1SATBCardTableModRefBS.hpp		cardTableModRefBS.hpp
 g1SATBCardTableModRefBS.hpp		memRegion.hpp
 
--- a/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -31,9 +31,10 @@
                                                         bool clear,
                                                         int n_threads) {
   if (n_threads > 0) {
-    assert(n_threads == (int)ParallelGCThreads, "# worker threads != # requested!");
-
-      // Make sure the LNC array is valid for the space.
+    assert((n_threads == 1 && ParallelGCThreads == 0) ||
+           n_threads <= (int)ParallelGCThreads,
+           "# worker threads != # requested!");
+    // Make sure the LNC array is valid for the space.
     jbyte**   lowest_non_clean;
     uintptr_t lowest_non_clean_base_chunk_index;
     size_t    lowest_non_clean_chunk_size;
--- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -885,7 +885,7 @@
 }
 
 
-void ParallelScavengeHeap::verify(bool allow_dirty, bool silent) {
+void ParallelScavengeHeap::verify(bool allow_dirty, bool silent, bool option /* ignored */) {
   // Why do we need the total_collections()-filter below?
   if (total_collections() > 0) {
     if (!silent) {
--- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -217,7 +217,7 @@
   virtual void gc_threads_do(ThreadClosure* tc) const;
   virtual void print_tracing_info() const;
 
-  void verify(bool allow_dirty, bool silent);
+  void verify(bool allow_dirty, bool silent, bool /* option */);
 
   void print_heap_change(size_t prev_used);
 
--- a/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -117,6 +117,7 @@
     process_array_chunk(old);
   } else {
     if (p.is_narrow()) {
+      assert(UseCompressedOops, "Error");
       PSScavenge::copy_and_push_safe_barrier(this, (narrowOop*)p);
     } else {
       PSScavenge::copy_and_push_safe_barrier(this, (oop*)p);
--- a/src/share/vm/gc_interface/collectedHeap.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/gc_interface/collectedHeap.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -533,7 +533,7 @@
   virtual void print_tracing_info() const = 0;
 
   // Heap verification
-  virtual void verify(bool allow_dirty, bool silent) = 0;
+  virtual void verify(bool allow_dirty, bool silent, bool option) = 0;
 
   // Non product verification and debugging.
 #ifndef PRODUCT
--- a/src/share/vm/includeDB_core	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/includeDB_core	Mon Jul 27 17:23:52 2009 -0400
@@ -554,7 +554,6 @@
 ciEnv.cpp                               linkResolver.hpp
 ciEnv.cpp                               methodDataOop.hpp
 ciEnv.cpp                               objArrayKlass.hpp
-ciEnv.cpp                               oop.hpp
 ciEnv.cpp                               oop.inline.hpp
 ciEnv.cpp                               oop.inline2.hpp
 ciEnv.cpp                               oopFactory.hpp
@@ -785,7 +784,6 @@
 ciSignature.cpp                         allocation.inline.hpp
 ciSignature.cpp                         ciSignature.hpp
 ciSignature.cpp                         ciUtilities.hpp
-ciSignature.cpp                         oop.hpp
 ciSignature.cpp                         oop.inline.hpp
 ciSignature.cpp                         signature.hpp
 
@@ -952,7 +950,6 @@
 classify.cpp                            classify.hpp
 classify.cpp                            systemDictionary.hpp
 
-classify.hpp                            oop.hpp
 classify.hpp                            oop.inline.hpp
 
 codeBlob.cpp                            allocation.inline.hpp
@@ -1187,7 +1184,6 @@
 compilerOracle.cpp                      jniHandles.hpp
 compilerOracle.cpp                      klass.hpp
 compilerOracle.cpp                      methodOop.hpp
-compilerOracle.cpp                      oop.hpp
 compilerOracle.cpp                      oop.inline.hpp
 compilerOracle.cpp                      oopFactory.hpp
 compilerOracle.cpp                      resourceArea.hpp
@@ -1631,7 +1627,6 @@
 frame.cpp                               methodOop.hpp
 frame.cpp                               monitorChunk.hpp
 frame.cpp                               nativeInst_<arch>.hpp
-frame.cpp                               oop.hpp
 frame.cpp                               oop.inline.hpp
 frame.cpp                               oop.inline2.hpp
 frame.cpp                               oopMapCache.hpp
@@ -1799,7 +1794,6 @@
 generation.cpp                          generation.hpp
 generation.cpp                          generation.inline.hpp
 generation.cpp                          java.hpp
-generation.cpp                          oop.hpp
 generation.cpp                          oop.inline.hpp
 generation.cpp                          spaceDecorator.hpp
 generation.cpp                          space.inline.hpp
@@ -2272,7 +2266,6 @@
 java.cpp                                memprofiler.hpp
 java.cpp                                methodOop.hpp
 java.cpp                                objArrayOop.hpp
-java.cpp                                oop.hpp
 java.cpp                                oop.inline.hpp
 java.cpp                                oopFactory.hpp
 java.cpp                                sharedRuntime.hpp
@@ -2949,7 +2942,7 @@
 nativeInst_<arch>.cpp                   assembler_<arch>.inline.hpp
 nativeInst_<arch>.cpp                   handles.hpp
 nativeInst_<arch>.cpp                   nativeInst_<arch>.hpp
-nativeInst_<arch>.cpp                   oop.hpp
+nativeInst_<arch>.cpp                   oop.inline.hpp
 nativeInst_<arch>.cpp                   ostream.hpp
 nativeInst_<arch>.cpp                   resourceArea.hpp
 nativeInst_<arch>.cpp                   sharedRuntime.hpp
@@ -3844,7 +3837,7 @@
 stackValue.cpp                          debugInfo.hpp
 stackValue.cpp                          frame.inline.hpp
 stackValue.cpp                          handles.inline.hpp
-stackValue.cpp                          oop.hpp
+stackValue.cpp                          oop.inline.hpp
 stackValue.cpp                          stackValue.hpp
 
 stackValue.hpp                          handles.hpp
@@ -4332,7 +4325,6 @@
 unhandledOops.cpp                       collectedHeap.hpp
 unhandledOops.cpp                       gcLocker.inline.hpp
 unhandledOops.cpp                       globalDefinitions.hpp
-unhandledOops.cpp                       oop.hpp
 unhandledOops.cpp                       oop.inline.hpp
 unhandledOops.cpp                       thread.hpp
 unhandledOops.cpp                       unhandledOops.hpp
@@ -4468,7 +4460,6 @@
 vframe.cpp                              nmethod.hpp
 vframe.cpp                              objectMonitor.hpp
 vframe.cpp                              objectMonitor.inline.hpp
-vframe.cpp                              oop.hpp
 vframe.cpp                              oop.inline.hpp
 vframe.cpp                              oopMapCache.hpp
 vframe.cpp                              pcDesc.hpp
@@ -4580,7 +4571,6 @@
 vmThread.cpp                            interfaceSupport.hpp
 vmThread.cpp                            methodOop.hpp
 vmThread.cpp                            mutexLocker.hpp
-vmThread.cpp                            oop.hpp
 vmThread.cpp                            oop.inline.hpp
 vmThread.cpp                            os.hpp
 vmThread.cpp                            resourceArea.hpp
--- a/src/share/vm/includeDB_features	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/includeDB_features	Mon Jul 27 17:23:52 2009 -0400
@@ -47,7 +47,7 @@
 dump.cpp                                javaClasses.hpp
 dump.cpp                                loaderConstraints.hpp
 dump.cpp                                methodDataOop.hpp
-dump.cpp                                oop.hpp
+dump.cpp                                oop.inline.hpp
 dump.cpp                                oopFactory.hpp
 dump.cpp                                resourceArea.hpp
 dump.cpp                                signature.hpp
@@ -237,7 +237,7 @@
 serialize.cpp                           compiledICHolderOop.hpp
 serialize.cpp                           methodDataOop.hpp
 serialize.cpp                           objArrayOop.hpp
-serialize.cpp                           oop.hpp
+serialize.cpp                           oop.inline.hpp
 serialize.cpp                           symbolTable.hpp
 serialize.cpp                           systemDictionary.hpp
 
@@ -295,7 +295,7 @@
 vmStructs.cpp                           objArrayKlass.hpp
 vmStructs.cpp                           objArrayKlassKlass.hpp
 vmStructs.cpp                           objArrayOop.hpp
-vmStructs.cpp                           oop.hpp
+vmStructs.cpp                           oop.inline.hpp
 vmStructs.cpp                           oopMap.hpp
 vmStructs.cpp                           pcDesc.hpp
 vmStructs.cpp                           perfMemory.hpp
--- a/src/share/vm/interpreter/rewriter.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/interpreter/rewriter.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -273,6 +273,7 @@
   compute_index_maps();
 
   if (RegisterFinalizersAtInit && _klass->name() == vmSymbols::java_lang_Object()) {
+    bool did_rewrite = false;
     int i = _methods->length();
     while (i-- > 0) {
       methodOop method = (methodOop)_methods->obj_at(i);
@@ -281,9 +282,11 @@
         // object for finalization if needed.
         methodHandle m(THREAD, method);
         rewrite_Object_init(m, CHECK);
+        did_rewrite = true;
         break;
       }
     }
+    assert(did_rewrite, "must find Object::<init> to rewrite it");
   }
 
   // rewrite methods, in two passes
--- a/src/share/vm/memory/barrierSet.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/memory/barrierSet.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -25,12 +25,27 @@
 # include "incls/_precompiled.incl"
 # include "incls/_barrierSet.cpp.incl"
 
-// count is in HeapWord's
+// count is number of array elements being written
 void BarrierSet::static_write_ref_array_pre(HeapWord* start, size_t count) {
-   Universe::heap()->barrier_set()->write_ref_array_pre(MemRegion(start, start + count));
+  assert(count <= (size_t)max_intx, "count too large");
+#if 0
+  warning("Pre: \t" INTPTR_FORMAT "[" SIZE_FORMAT "]\t",
+                   start,            count);
+#endif
+  if (UseCompressedOops) {
+    Universe::heap()->barrier_set()->write_ref_array_pre((narrowOop*)start, (int)count);
+  } else {
+    Universe::heap()->barrier_set()->write_ref_array_pre(      (oop*)start, (int)count);
+  }
 }
 
-// count is in HeapWord's
+// count is number of array elements being written
 void BarrierSet::static_write_ref_array_post(HeapWord* start, size_t count) {
-   Universe::heap()->barrier_set()->write_ref_array_work(MemRegion(start, start + count));
+  assert(count <= (size_t)max_intx, "count too large");
+  HeapWord* end = start + objArrayOopDesc::array_size((int)count);
+#if 0
+  warning("Post:\t" INTPTR_FORMAT "[" SIZE_FORMAT "] : [" INTPTR_FORMAT","INTPTR_FORMAT")\t",
+                   start,            count,              start,          end);
+#endif
+  Universe::heap()->barrier_set()->write_ref_array_work(MemRegion(start, end));
 }
--- a/src/share/vm/memory/barrierSet.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/memory/barrierSet.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -81,9 +81,13 @@
   // barrier types.  Semantically, it should be thought of as a call to the
   // virtual "_work" function below, which must implement the barrier.)
   // First the pre-write versions...
-  inline void write_ref_field_pre(void* field, oop new_val);
+  template <class T> inline void write_ref_field_pre(T* field, oop new_val);
+private:
+  // Keep this private so as to catch violations at build time.
+  virtual void write_ref_field_pre_work(     void* field, oop new_val) { guarantee(false, "Not needed"); };
 protected:
-  virtual void write_ref_field_pre_work(void* field, oop new_val) {};
+  virtual void write_ref_field_pre_work(      oop* field, oop new_val) {};
+  virtual void write_ref_field_pre_work(narrowOop* field, oop new_val) {};
 public:
 
   // ...then the post-write version.
@@ -117,12 +121,17 @@
   virtual void read_ref_array(MemRegion mr) = 0;
   virtual void read_prim_array(MemRegion mr) = 0;
 
-  virtual void write_ref_array_pre(MemRegion mr) {}
+  virtual void write_ref_array_pre(      oop* dst, int length) {}
+  virtual void write_ref_array_pre(narrowOop* dst, int length) {}
   inline void write_ref_array(MemRegion mr);
 
   // Static versions, suitable for calling from generated code.
   static void static_write_ref_array_pre(HeapWord* start, size_t count);
   static void static_write_ref_array_post(HeapWord* start, size_t count);
+  // Narrow oop versions of the above; count is # of array elements being written,
+  // starting with "start", which is HeapWord-aligned.
+  static void static_write_ref_array_pre_narrow(HeapWord* start, size_t count);
+  static void static_write_ref_array_post_narrow(HeapWord* start, size_t count);
 
 protected:
   virtual void write_ref_array_work(MemRegion mr) = 0;
--- a/src/share/vm/memory/barrierSet.inline.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/memory/barrierSet.inline.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -23,10 +23,10 @@
  */
 
 // Inline functions of BarrierSet, which de-virtualize certain
-// performance-critical calls when when the barrier is the most common
+// performance-critical calls when the barrier is the most common
 // card-table kind.
 
-void BarrierSet::write_ref_field_pre(void* field, oop new_val) {
+template <class T> void BarrierSet::write_ref_field_pre(T* field, oop new_val) {
   if (kind() == CardTableModRef) {
     ((CardTableModRefBS*)this)->inline_write_ref_field_pre(field, new_val);
   } else {
--- a/src/share/vm/memory/cardTableModRefBS.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/memory/cardTableModRefBS.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -287,7 +287,7 @@
   // these functions here for performance.
 protected:
   void write_ref_field_work(oop obj, size_t offset, oop newVal);
-  void write_ref_field_work(void* field, oop newVal);
+  virtual void write_ref_field_work(void* field, oop newVal);
 public:
 
   bool has_write_ref_array_opt() { return true; }
@@ -317,10 +317,10 @@
 
   // *** Card-table-barrier-specific things.
 
-  inline void inline_write_ref_field_pre(void* field, oop newVal) {}
+  template <class T> inline void inline_write_ref_field_pre(T* field, oop newVal) {}
 
-  inline void inline_write_ref_field(void* field, oop newVal) {
-    jbyte* byte = byte_for(field);
+  template <class T> inline void inline_write_ref_field(T* field, oop newVal) {
+    jbyte* byte = byte_for((void*)field);
     *byte = dirty_card;
   }
 
--- a/src/share/vm/memory/genCollectedHeap.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/memory/genCollectedHeap.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -1194,7 +1194,7 @@
   return _gens[level]->gc_stats();
 }
 
-void GenCollectedHeap::verify(bool allow_dirty, bool silent) {
+void GenCollectedHeap::verify(bool allow_dirty, bool silent, bool option /* ignored */) {
   if (!silent) {
     gclog_or_tty->print("permgen ");
   }
--- a/src/share/vm/memory/genCollectedHeap.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/memory/genCollectedHeap.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -325,7 +325,7 @@
   void prepare_for_verify();
 
   // Override.
-  void verify(bool allow_dirty, bool silent);
+  void verify(bool allow_dirty, bool silent, bool /* option */);
 
   // Override.
   void print() const;
--- a/src/share/vm/memory/genOopClosures.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/memory/genOopClosures.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -57,7 +57,7 @@
   template <class T> void do_barrier(T* p);
 
   // Version for use by closures that may be called in parallel code.
-  void par_do_barrier(oop* p);
+  template <class T> void par_do_barrier(T* p);
 
  public:
   OopsInGenClosure() : OopClosure(NULL),
--- a/src/share/vm/memory/genOopClosures.inline.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/memory/genOopClosures.inline.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -40,18 +40,20 @@
 
 template <class T> inline void OopsInGenClosure::do_barrier(T* p) {
   assert(generation()->is_in_reserved(p), "expected ref in generation");
-  assert(!oopDesc::is_null(*p), "expected non-null object");
-  oop obj = oopDesc::load_decode_heap_oop_not_null(p);
+  T heap_oop = oopDesc::load_heap_oop(p);
+  assert(!oopDesc::is_null(heap_oop), "expected non-null oop");
+  oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
   // If p points to a younger generation, mark the card.
   if ((HeapWord*)obj < _gen_boundary) {
     _rs->inline_write_ref_field_gc(p, obj);
   }
 }
 
-inline void OopsInGenClosure::par_do_barrier(oop* p) {
+template <class T> inline void OopsInGenClosure::par_do_barrier(T* p) {
   assert(generation()->is_in_reserved(p), "expected ref in generation");
-  oop obj = *p;
-  assert(obj != NULL, "expected non-null object");
+  T heap_oop = oopDesc::load_heap_oop(p);
+  assert(!oopDesc::is_null(heap_oop), "expected non-null oop");
+  oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
   // If p points to a younger generation, mark the card.
   if ((HeapWord*)obj < gen_boundary()) {
     rs()->write_ref_field_gc_par(p, obj);
--- a/src/share/vm/memory/referenceProcessor.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/memory/referenceProcessor.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -1013,12 +1013,19 @@
   // discovered_addr.
   oop current_head = refs_list.head();
 
-  // Note: In the case of G1, this pre-barrier is strictly
+  // Note: In the case of G1, this specific pre-barrier is strictly
   // not necessary because the only case we are interested in
-  // here is when *discovered_addr is NULL, so this will expand to
-  // nothing. As a result, I am just manually eliding this out for G1.
+  // here is when *discovered_addr is NULL (see the CAS further below),
+  // so this will expand to nothing. As a result, we have manually
+  // elided this out for G1, but left in the test for some future
+  // collector that might have need for a pre-barrier here.
   if (_discovered_list_needs_barrier && !UseG1GC) {
-    _bs->write_ref_field_pre((void*)discovered_addr, current_head); guarantee(false, "Needs to be fixed: YSR");
+    if (UseCompressedOops) {
+      _bs->write_ref_field_pre((narrowOop*)discovered_addr, current_head);
+    } else {
+      _bs->write_ref_field_pre((oop*)discovered_addr, current_head);
+    }
+    guarantee(false, "Need to check non-G1 collector");
   }
   oop retest = oopDesc::atomic_compare_exchange_oop(current_head, discovered_addr,
                                                     NULL);
@@ -1029,9 +1036,8 @@
     refs_list.set_head(obj);
     refs_list.inc_length(1);
     if (_discovered_list_needs_barrier) {
-      _bs->write_ref_field((void*)discovered_addr, current_head); guarantee(false, "Needs to be fixed: YSR");
+      _bs->write_ref_field((void*)discovered_addr, current_head);
     }
-
   } else {
     // If retest was non NULL, another thread beat us to it:
     // The reference has already been discovered...
@@ -1177,11 +1183,16 @@
     // pre-value, we can safely elide the pre-barrier here for the case of G1.
     assert(discovered == NULL, "control point invariant");
     if (_discovered_list_needs_barrier && !UseG1GC) { // safe to elide for G1
-      _bs->write_ref_field_pre((oop*)discovered_addr, current_head);
+      if (UseCompressedOops) {
+        _bs->write_ref_field_pre((narrowOop*)discovered_addr, current_head);
+      } else {
+        _bs->write_ref_field_pre((oop*)discovered_addr, current_head);
+      }
+      guarantee(false, "Need to check non-G1 collector");
     }
     oop_store_raw(discovered_addr, current_head);
     if (_discovered_list_needs_barrier) {
-      _bs->write_ref_field((oop*)discovered_addr, current_head);
+      _bs->write_ref_field((void*)discovered_addr, current_head);
     }
     list->set_head(obj);
     list->inc_length(1);
--- a/src/share/vm/memory/space.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/memory/space.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -106,6 +106,7 @@
   virtual void set_end(HeapWord* value)    { _end = value; }
 
   virtual HeapWord* saved_mark_word() const  { return _saved_mark_word; }
+
   void set_saved_mark_word(HeapWord* p) { _saved_mark_word = p; }
 
   MemRegionClosure* preconsumptionDirtyCardClosure() const {
--- a/src/share/vm/memory/universe.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/memory/universe.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -1170,7 +1170,7 @@
   st->print_cr("}");
 }
 
-void Universe::verify(bool allow_dirty, bool silent) {
+void Universe::verify(bool allow_dirty, bool silent, bool option) {
   if (SharedSkipVerify) {
     return;
   }
@@ -1194,7 +1194,7 @@
   if (!silent) gclog_or_tty->print("[Verifying ");
   if (!silent) gclog_or_tty->print("threads ");
   Threads::verify();
-  heap()->verify(allow_dirty, silent);
+  heap()->verify(allow_dirty, silent, option);
 
   if (!silent) gclog_or_tty->print("syms ");
   SymbolTable::verify();
--- a/src/share/vm/memory/universe.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/memory/universe.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -343,6 +343,7 @@
   // For UseCompressedOops
   static address* narrow_oop_base_addr()              { return &_narrow_oop._base; }
   static address  narrow_oop_base()                   { return  _narrow_oop._base; }
+  static bool  is_narrow_oop_base(void* addr)         { return (narrow_oop_base() == (address)addr); }
   static int      narrow_oop_shift()                  { return  _narrow_oop._shift; }
   static void     set_narrow_oop_base(address base)   { _narrow_oop._base  = base; }
   static void     set_narrow_oop_shift(int shift)     { _narrow_oop._shift = shift; }
@@ -398,7 +399,7 @@
 
   // Debugging
   static bool verify_in_progress() { return _verify_in_progress; }
-  static void verify(bool allow_dirty = true, bool silent = false);
+  static void verify(bool allow_dirty = true, bool silent = false, bool option = true);
   static int  verify_count()                  { return _verify_count; }
   static void print();
   static void print_on(outputStream* st);
--- a/src/share/vm/oops/instanceRefKlass.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/oops/instanceRefKlass.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -28,13 +28,14 @@
 template <class T>
 static void specialized_oop_follow_contents(instanceRefKlass* ref, oop obj) {
   T* referent_addr = (T*)java_lang_ref_Reference::referent_addr(obj);
-  oop referent = oopDesc::load_decode_heap_oop(referent_addr);
+  T heap_oop = oopDesc::load_heap_oop(referent_addr);
   debug_only(
     if(TraceReferenceGC && PrintGCDetails) {
       gclog_or_tty->print_cr("instanceRefKlass::oop_follow_contents " INTPTR_FORMAT, obj);
     }
   )
-  if (referent != NULL) {
+  if (!oopDesc::is_null(heap_oop)) {
+    oop referent = oopDesc::decode_heap_oop_not_null(heap_oop);
     if (!referent->is_gc_marked() &&
         MarkSweep::ref_processor()->
           discover_reference(obj, ref->reference_type())) {
@@ -81,13 +82,14 @@
                                             ParCompactionManager* cm,
                                             oop obj) {
   T* referent_addr = (T*)java_lang_ref_Reference::referent_addr(obj);
-  oop referent = oopDesc::load_decode_heap_oop(referent_addr);
+  T heap_oop = oopDesc::load_heap_oop(referent_addr);
   debug_only(
     if(TraceReferenceGC && PrintGCDetails) {
       gclog_or_tty->print_cr("instanceRefKlass::oop_follow_contents " INTPTR_FORMAT, obj);
     }
   )
-  if (referent != NULL) {
+  if (!oopDesc::is_null(heap_oop)) {
+    oop referent = oopDesc::decode_heap_oop_not_null(heap_oop);
     if (PSParallelCompact::mark_bitmap()->is_unmarked(referent) &&
         PSParallelCompact::ref_processor()->
           discover_reference(obj, ref->reference_type())) {
@@ -182,9 +184,10 @@
   }                                                                             \
                                                                                 \
   T* referent_addr = (T*)java_lang_ref_Reference::referent_addr(obj);           \
-  oop referent = oopDesc::load_decode_heap_oop(referent_addr);                  \
-  if (referent != NULL && contains(referent_addr)) {                            \
+  T heap_oop = oopDesc::load_heap_oop(referent_addr);                           \
+  if (!oopDesc::is_null(heap_oop) && contains(referent_addr)) {                 \
     ReferenceProcessor* rp = closure->_ref_processor;                           \
+    oop referent = oopDesc::decode_heap_oop_not_null(heap_oop);                 \
     if (!referent->is_gc_marked() && (rp != NULL) &&                            \
         rp->discover_reference(obj, reference_type())) {                        \
       return size;                                                              \
--- a/src/share/vm/oops/methodKlass.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/oops/methodKlass.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -68,7 +68,7 @@
   m->set_constants(NULL);
   m->set_max_stack(0);
   m->set_max_locals(0);
-  m->clear_intrinsic_id_cache();
+  m->set_intrinsic_id(vmIntrinsics::_none);
   m->set_method_data(NULL);
   m->set_interpreter_throwout_count(0);
   m->set_vtable_index(methodOopDesc::garbage_vtable_index);
--- a/src/share/vm/oops/methodOop.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/oops/methodOop.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -962,26 +962,39 @@
   return newm;
 }
 
-vmIntrinsics::ID methodOopDesc::compute_intrinsic_id() const {
-  assert(vmIntrinsics::_none == 0, "correct coding of default case");
-  const uintptr_t max_cache_uint = right_n_bits((int)(sizeof(_intrinsic_id_cache) * BitsPerByte));
-  assert((uintptr_t)vmIntrinsics::ID_LIMIT <= max_cache_uint, "else fix cache size");
+vmSymbols::SID methodOopDesc::klass_id_for_intrinsics(klassOop holder) {
   // if loader is not the default loader (i.e., != NULL), we can't know the intrinsics
   // because we are not loading from core libraries
-  if (instanceKlass::cast(method_holder())->class_loader() != NULL) return vmIntrinsics::_none;
+  if (instanceKlass::cast(holder)->class_loader() != NULL)
+    return vmSymbols::NO_SID;   // regardless of name, no intrinsics here
 
   // see if the klass name is well-known:
-  symbolOop klass_name    = instanceKlass::cast(method_holder())->name();
-  vmSymbols::SID klass_id = vmSymbols::find_sid(klass_name);
-  if (klass_id == vmSymbols::NO_SID)  return vmIntrinsics::_none;
+  symbolOop klass_name = instanceKlass::cast(holder)->name();
+  return vmSymbols::find_sid(klass_name);
+}
+
+void methodOopDesc::init_intrinsic_id() {
+  assert(_intrinsic_id == vmIntrinsics::_none, "do this just once");
+  const uintptr_t max_id_uint = right_n_bits((int)(sizeof(_intrinsic_id) * BitsPerByte));
+  assert((uintptr_t)vmIntrinsics::ID_LIMIT <= max_id_uint, "else fix size");
+
+  // the klass name is well-known:
+  vmSymbols::SID klass_id = klass_id_for_intrinsics(method_holder());
+  assert(klass_id != vmSymbols::NO_SID, "caller responsibility");
 
   // ditto for method and signature:
   vmSymbols::SID  name_id = vmSymbols::find_sid(name());
-  if (name_id  == vmSymbols::NO_SID)  return vmIntrinsics::_none;
+  if (name_id  == vmSymbols::NO_SID)  return;
   vmSymbols::SID   sig_id = vmSymbols::find_sid(signature());
-  if (sig_id   == vmSymbols::NO_SID)  return vmIntrinsics::_none;
+  if (sig_id   == vmSymbols::NO_SID)  return;
   jshort flags = access_flags().as_short();
 
+  vmIntrinsics::ID id = vmIntrinsics::find_id(klass_id, name_id, sig_id, flags);
+  if (id != vmIntrinsics::_none) {
+    set_intrinsic_id(id);
+    return;
+  }
+
   // A few slightly irregular cases:
   switch (klass_id) {
   case vmSymbols::VM_SYMBOL_ENUM_NAME(java_lang_StrictMath):
@@ -992,15 +1005,18 @@
     case vmSymbols::VM_SYMBOL_ENUM_NAME(sqrt_name):
       // pretend it is the corresponding method in the non-strict class:
       klass_id = vmSymbols::VM_SYMBOL_ENUM_NAME(java_lang_Math);
+      id = vmIntrinsics::find_id(klass_id, name_id, sig_id, flags);
       break;
     }
   }
 
-  // return intrinsic id if any
-  return vmIntrinsics::find_id(klass_id, name_id, sig_id, flags);
+  if (id != vmIntrinsics::_none) {
+    // Set up its iid.  It is an alias method.
+    set_intrinsic_id(id);
+    return;
+  }
 }
 
-
 // These two methods are static since a GC may move the methodOopDesc
 bool methodOopDesc::load_signature_classes(methodHandle m, TRAPS) {
   bool sig_is_loaded = true;
--- a/src/share/vm/oops/methodOop.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/oops/methodOop.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -104,7 +104,7 @@
   u2                _max_stack;                  // Maximum number of entries on the expression stack
   u2                _max_locals;                 // Number of local variables used by this method
   u2                _size_of_parameters;         // size of the parameter block (receiver + arguments) in words
-  u1                _intrinsic_id_cache;         // Cache for intrinsic_id; 0 or 1+vmInt::ID
+  u1                _intrinsic_id;               // vmSymbols::intrinsic_id (0 == _none)
   u1                _highest_tier_compile;       // Highest compile level this method has ever seen.
   u2                _interpreter_throwout_count; // Count of times method was exited via exception while interpreting
   u2                _number_of_breakpoints;      // fullspeed debugging support
@@ -224,8 +224,6 @@
   int highest_tier_compile()                     { return _highest_tier_compile;}
   void set_highest_tier_compile(int level)      { _highest_tier_compile = level;}
 
-  void clear_intrinsic_id_cache() { _intrinsic_id_cache = 0; }
-
   // Count of times method was exited via exception while interpreting
   void interpreter_throwout_increment() {
     if (_interpreter_throwout_count < 65534) {
@@ -571,18 +569,12 @@
   void set_cached_itable_index(int index)           { instanceKlass::cast(method_holder())->set_cached_itable_index(method_idnum(), index); }
 
   // Support for inlining of intrinsic methods
-  vmIntrinsics::ID intrinsic_id() const { // returns zero if not an intrinsic
-    const u1& cache = _intrinsic_id_cache;
-    if (cache != 0) {
-      return (vmIntrinsics::ID)(cache - 1);
-    } else {
-      vmIntrinsics::ID id = compute_intrinsic_id();
-      *(u1*)&cache = ((u1) id) + 1;   // force the cache to be non-const
-      vmIntrinsics::verify_method(id, (methodOop) this);
-      assert((vmIntrinsics::ID)(cache - 1) == id, "proper conversion");
-      return id;
-    }
-  }
+  vmIntrinsics::ID intrinsic_id() const          { return (vmIntrinsics::ID) _intrinsic_id;           }
+  void     set_intrinsic_id(vmIntrinsics::ID id) {                           _intrinsic_id = (u1) id; }
+
+  // Helper routines for intrinsic_id() and vmIntrinsics::method().
+  void init_intrinsic_id();     // updates from _none if a match
+  static vmSymbols::SID klass_id_for_intrinsics(klassOop holder);
 
   // On-stack replacement support
   bool has_osr_nmethod()                         { return instanceKlass::cast(method_holder())->lookup_osr_nmethod(this, InvocationEntryBci) != NULL; }
@@ -635,9 +627,6 @@
   void set_size_of_parameters(int size)          { _size_of_parameters = size; }
  private:
 
-  // Helper routine for intrinsic_id().
-  vmIntrinsics::ID compute_intrinsic_id() const;
-
   // Inlined elements
   address* native_function_addr() const          { assert(is_native(), "must be native"); return (address*) (this+1); }
   address* signature_handler_addr() const        { return native_function_addr() + 1; }
--- a/src/share/vm/oops/objArrayKlass.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/oops/objArrayKlass.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -85,8 +85,6 @@
 template <class T> void objArrayKlass::do_copy(arrayOop s, T* src,
                                arrayOop d, T* dst, int length, TRAPS) {
 
-  const size_t word_len = objArrayOopDesc::array_size(length);
-
   BarrierSet* bs = Universe::heap()->barrier_set();
   // For performance reasons, we assume we are that the write barrier we
   // are using has optimized modes for arrays of references.  At least one
@@ -94,11 +92,10 @@
   assert(bs->has_write_ref_array_opt(), "Barrier set must have ref array opt");
   assert(bs->has_write_ref_array_pre_opt(), "For pre-barrier as well.");
 
-  MemRegion dst_mr = MemRegion((HeapWord*)dst, word_len);
   if (s == d) {
     // since source and destination are equal we do not need conversion checks.
     assert(length > 0, "sanity check");
-    bs->write_ref_array_pre(dst_mr);
+    bs->write_ref_array_pre(dst, length);
     Copy::conjoint_oops_atomic(src, dst, length);
   } else {
     // We have to make sure all elements conform to the destination array
@@ -106,7 +103,7 @@
     klassOop stype = objArrayKlass::cast(s->klass())->element_klass();
     if (stype == bound || Klass::cast(stype)->is_subtype_of(bound)) {
       // elements are guaranteed to be subtypes, so no check necessary
-      bs->write_ref_array_pre(dst_mr);
+      bs->write_ref_array_pre(dst, length);
       Copy::conjoint_oops_atomic(src, dst, length);
     } else {
       // slow case: need individual subtype checks
@@ -138,6 +135,7 @@
       }
     }
   }
+  const size_t word_len = objArrayOopDesc::array_size(length);
   bs->write_ref_array(MemRegion((HeapWord*)dst, word_len));
 }
 
--- a/src/share/vm/oops/oop.inline.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/oops/oop.inline.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -148,12 +148,14 @@
 
 inline narrowOop oopDesc::encode_heap_oop_not_null(oop v) {
   assert(!is_null(v), "oop value can never be zero");
+  assert(Universe::heap()->is_in_reserved(v), "Address not in heap");
   address base = Universe::narrow_oop_base();
   int    shift = Universe::narrow_oop_shift();
   uint64_t  pd = (uint64_t)(pointer_delta((void*)v, (void*)base, 1));
   assert(OopEncodingHeapMax > pd, "change encoding max if new encoding");
   uint64_t result = pd >> shift;
   assert((result & CONST64(0xffffffff00000000)) == 0, "narrow oop overflow");
+  assert(decode_heap_oop(result) == v, "reversibility");
   return (narrowOop)result;
 }
 
@@ -449,7 +451,7 @@
   oopDesc::bs()->write_ref_field(p, v);
 }
 
-inline void update_barrier_set_pre(void* p, oop v) {
+template <class T> inline void update_barrier_set_pre(T* p, oop v) {
   oopDesc::bs()->write_ref_field_pre(p, v);
 }
 
@@ -459,15 +461,15 @@
   } else {
     update_barrier_set_pre(p, v);
     oopDesc::encode_store_heap_oop(p, v);
-    update_barrier_set(p, v);
+    update_barrier_set((void*)p, v);  // cast away type
   }
 }
 
 template <class T> inline void oop_store(volatile T* p, oop v) {
-  update_barrier_set_pre((void*)p, v);
+  update_barrier_set_pre((T*)p, v);   // cast away volatile
   // Used by release_obj_field_put, so use release_store_ptr.
   oopDesc::release_encode_store_heap_oop(p, v);
-  update_barrier_set((void*)p, v);
+  update_barrier_set((void*)p, v);    // cast away type
 }
 
 template <class T> inline void oop_store_without_check(T* p, oop v) {
--- a/src/share/vm/oops/oopsHierarchy.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/oops/oopsHierarchy.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -29,6 +29,7 @@
 typedef juint narrowOop; // Offset instead of address for an oop within a java object
 typedef class klassOopDesc* wideKlassOop; // to keep SA happy and unhandled oop
                                           // detector happy.
+typedef void* OopOrNarrowOopStar;
 
 #ifndef CHECK_UNHANDLED_OOPS
 
--- a/src/share/vm/opto/block.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/opto/block.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -357,6 +357,9 @@
 #ifndef PRODUCT
   , _trace_opto_pipelining(TraceOptoPipelining || C->method_has_option("TraceOptoPipelining"))
 #endif
+#ifdef ASSERT
+  , _raw_oops(a)
+#endif
 {
   ResourceMark rm;
   // I'll need a few machine-specific GotoNodes.  Make an Ideal GotoNode,
--- a/src/share/vm/opto/block.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/opto/block.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -380,6 +380,10 @@
   bool _trace_opto_pipelining;  // tracing flag
 #endif
 
+#ifdef ASSERT
+  Unique_Node_List _raw_oops;
+#endif
+
   // Build dominators
   void Dominators();
 
--- a/src/share/vm/opto/buildOopMap.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/opto/buildOopMap.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -74,9 +74,11 @@
   // this block.
   Block *_b;                    // Block for this struct
   OopFlow *_next;               // Next free OopFlow
+                                // or NULL if dead/conflict
+  Compile* C;
 
-  OopFlow( short *callees, Node **defs ) : _callees(callees), _defs(defs),
-    _b(NULL), _next(NULL) { }
+  OopFlow( short *callees, Node **defs, Compile* c ) : _callees(callees), _defs(defs),
+    _b(NULL), _next(NULL), C(c) { }
 
   // Given reaching-defs for this block start, compute it for this block end
   void compute_reach( PhaseRegAlloc *regalloc, int max_reg, Dict *safehash );
@@ -88,7 +90,7 @@
   void clone( OopFlow *flow, int max_size);
 
   // Make a new OopFlow from scratch
-  static OopFlow *make( Arena *A, int max_size );
+  static OopFlow *make( Arena *A, int max_size, Compile* C );
 
   // Build an oopmap from the current flow info
   OopMap *build_oop_map( Node *n, int max_reg, PhaseRegAlloc *regalloc, int* live );
@@ -180,11 +182,11 @@
 }
 
 //------------------------------make-------------------------------------------
-OopFlow *OopFlow::make( Arena *A, int max_size ) {
+OopFlow *OopFlow::make( Arena *A, int max_size, Compile* C ) {
   short *callees = NEW_ARENA_ARRAY(A,short,max_size+1);
   Node **defs    = NEW_ARENA_ARRAY(A,Node*,max_size+1);
   debug_only( memset(defs,0,(max_size+1)*sizeof(Node*)) );
-  OopFlow *flow = new (A) OopFlow(callees+1, defs+1);
+  OopFlow *flow = new (A) OopFlow(callees+1, defs+1, C);
   assert( &flow->_callees[OptoReg::Bad] == callees, "Ok to index at OptoReg::Bad" );
   assert( &flow->_defs   [OptoReg::Bad] == defs   , "Ok to index at OptoReg::Bad" );
   return flow;
@@ -288,7 +290,7 @@
               m = m->in(idx);
             }
           }
-         guarantee( 0, "must find derived/base pair" );
+          guarantee( 0, "must find derived/base pair" );
         }
       found: ;
         Node *base = n->in(i+1); // Base is other half of pair
@@ -347,6 +349,13 @@
     } else {
       // Other - some reaching non-oop value
       omap->set_value( r);
+#ifdef ASSERT
+      if( t->isa_rawptr() && C->cfg()->_raw_oops.member(def) ) {
+        def->dump();
+        n->dump();
+        assert(false, "there should be a oop in OopMap instead of a live raw oop at safepoint");
+      }
+#endif
     }
 
   }
@@ -562,7 +571,7 @@
 
   // Do the first block 'by hand' to prime the worklist
   Block *entry = _cfg->_blocks[1];
-  OopFlow *rootflow = OopFlow::make(A,max_reg);
+  OopFlow *rootflow = OopFlow::make(A,max_reg,this);
   // Initialize to 'bottom' (not 'top')
   memset( rootflow->_callees, OptoReg::Bad, max_reg*sizeof(short) );
   memset( rootflow->_defs   ,            0, max_reg*sizeof(Node*) );
@@ -628,7 +637,7 @@
       // Carry it forward.
     } else {                    // Draw a new OopFlow from the freelist
       if( !free_list )
-        free_list = OopFlow::make(A,max_reg);
+        free_list = OopFlow::make(A,max_reg,C);
       flow = free_list;
       assert( flow->_b == NULL, "oopFlow is not free" );
       free_list = flow->_next;
--- a/src/share/vm/opto/c2compiler.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/opto/c2compiler.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -104,7 +104,9 @@
     initialize();
   }
   bool subsume_loads = true;
-  bool do_escape_analysis = DoEscapeAnalysis;
+  bool do_escape_analysis = DoEscapeAnalysis &&
+                            !(env->jvmti_can_hotswap_or_post_breakpoint() ||
+                              env->jvmti_can_examine_or_deopt_anywhere());
   while (!env->failing()) {
     // Attempt to compile while subsuming loads into machine instructions.
     Compile C(env, this, target, entry_bci, subsume_loads, do_escape_analysis);
--- a/src/share/vm/opto/cfgnode.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/opto/cfgnode.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -1796,8 +1796,12 @@
     for (uint i=1; i<req(); ++i) {// For all paths in
       Node *ii = in(i);
       if (ii->is_DecodeN() && ii->bottom_type() == bottom_type()) {
-        has_decodeN = true;
-        in_decodeN = ii->in(1);
+        // Note: in_decodeN is used only to define the type of new phi.
+        // Find a non dead path otherwise phi type will be wrong.
+        if (ii->in(1)->bottom_type() != Type::TOP) {
+          has_decodeN = true;
+          in_decodeN = ii->in(1);
+        }
       } else if (!ii->is_Phi()) {
         may_push = false;
       }
@@ -1805,7 +1809,6 @@
 
     if (has_decodeN && may_push) {
       PhaseIterGVN *igvn = phase->is_IterGVN();
-      // Note: in_decodeN is used only to define the type of new phi here.
       PhiNode *new_phi = PhiNode::make_blank(in(0), in_decodeN);
       uint orig_cnt = req();
       for (uint i=1; i<req(); ++i) {// For all paths in
--- a/src/share/vm/opto/compile.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/opto/compile.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -101,7 +101,8 @@
     }
   }
   // Lazily create intrinsics for intrinsic IDs well-known in the runtime.
-  if (m->intrinsic_id() != vmIntrinsics::_none) {
+  if (m->intrinsic_id() != vmIntrinsics::_none &&
+      m->intrinsic_id() <= vmIntrinsics::LAST_COMPILER_INLINE) {
     CallGenerator* cg = make_vm_intrinsic(m, is_virtual);
     if (cg != NULL) {
       // Save it for next time:
@@ -440,6 +441,8 @@
                   _orig_pc_slot_offset_in_bytes(0),
                   _node_bundling_limit(0),
                   _node_bundling_base(NULL),
+                  _java_calls(0),
+                  _inner_loops(0),
 #ifndef PRODUCT
                   _trace_opto_output(TraceOptoOutput || method()->has_option("TraceOptoOutput")),
                   _printer(IdealGraphPrinter::printer()),
@@ -710,6 +713,8 @@
     _code_buffer("Compile::Fill_buffer"),
     _node_bundling_limit(0),
     _node_bundling_base(NULL),
+    _java_calls(0),
+    _inner_loops(0),
 #ifndef PRODUCT
     _trace_opto_output(TraceOptoOutput),
     _printer(NULL),
@@ -1850,22 +1855,26 @@
   int  _float_count;            // count float ops requiring 24-bit precision
   int  _double_count;           // count double ops requiring more precision
   int  _java_call_count;        // count non-inlined 'java' calls
+  int  _inner_loop_count;       // count loops which need alignment
   VectorSet _visited;           // Visitation flags
   Node_List _tests;             // Set of IfNodes & PCTableNodes
 
   Final_Reshape_Counts() :
-    _call_count(0), _float_count(0), _double_count(0), _java_call_count(0),
+    _call_count(0), _float_count(0), _double_count(0),
+    _java_call_count(0), _inner_loop_count(0),
     _visited( Thread::current()->resource_area() ) { }
 
   void inc_call_count  () { _call_count  ++; }
   void inc_float_count () { _float_count ++; }
   void inc_double_count() { _double_count++; }
   void inc_java_call_count() { _java_call_count++; }
+  void inc_inner_loop_count() { _inner_loop_count++; }
 
   int  get_call_count  () const { return _call_count  ; }
   int  get_float_count () const { return _float_count ; }
   int  get_double_count() const { return _double_count; }
   int  get_java_call_count() const { return _java_call_count; }
+  int  get_inner_loop_count() const { return _inner_loop_count; }
 };
 
 static bool oop_offset_is_sane(const TypeInstPtr* tp) {
@@ -1877,7 +1886,7 @@
 
 //------------------------------final_graph_reshaping_impl----------------------
 // Implement items 1-5 from final_graph_reshaping below.
-static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &fpu ) {
+static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) {
 
   if ( n->outcnt() == 0 ) return; // dead node
   uint nop = n->Opcode();
@@ -1919,13 +1928,13 @@
   case Op_CmpF:
   case Op_CmpF3:
   // case Op_ConvL2F: // longs are split into 32-bit halves
-    fpu.inc_float_count();
+    frc.inc_float_count();
     break;
 
   case Op_ConvF2D:
   case Op_ConvD2F:
-    fpu.inc_float_count();
-    fpu.inc_double_count();
+    frc.inc_float_count();
+    frc.inc_double_count();
     break;
 
   // Count all double operations that may use FPU
@@ -1942,7 +1951,7 @@
   case Op_ConD:
   case Op_CmpD:
   case Op_CmpD3:
-    fpu.inc_double_count();
+    frc.inc_double_count();
     break;
   case Op_Opaque1:              // Remove Opaque Nodes before matching
   case Op_Opaque2:              // Remove Opaque Nodes before matching
@@ -1951,7 +1960,7 @@
   case Op_CallStaticJava:
   case Op_CallJava:
   case Op_CallDynamicJava:
-    fpu.inc_java_call_count(); // Count java call site;
+    frc.inc_java_call_count(); // Count java call site;
   case Op_CallRuntime:
   case Op_CallLeaf:
   case Op_CallLeafNoFP: {
@@ -1962,7 +1971,7 @@
     // uncommon_trap, _complete_monitor_locking, _complete_monitor_unlocking,
     // _new_Java, _new_typeArray, _new_objArray, _rethrow_Java, ...
     if( !call->is_CallStaticJava() || !call->as_CallStaticJava()->_name ) {
-      fpu.inc_call_count();   // Count the call site
+      frc.inc_call_count();   // Count the call site
     } else {                  // See if uncommon argument is shared
       Node *n = call->in(TypeFunc::Parms);
       int nop = n->Opcode();
@@ -1983,11 +1992,11 @@
   case Op_StoreD:
   case Op_LoadD:
   case Op_LoadD_unaligned:
-    fpu.inc_double_count();
+    frc.inc_double_count();
     goto handle_mem;
   case Op_StoreF:
   case Op_LoadF:
-    fpu.inc_float_count();
+    frc.inc_float_count();
     goto handle_mem;
 
   case Op_StoreB:
@@ -2324,6 +2333,12 @@
       n->subsume_by(btp);
     }
     break;
+  case Op_Loop:
+  case Op_CountedLoop:
+    if (n->as_Loop()->is_inner_loop()) {
+      frc.inc_inner_loop_count();
+    }
+    break;
   default:
     assert( !n->is_Call(), "" );
     assert( !n->is_Mem(), "" );
@@ -2332,17 +2347,17 @@
 
   // Collect CFG split points
   if (n->is_MultiBranch())
-    fpu._tests.push(n);
+    frc._tests.push(n);
 }
 
 //------------------------------final_graph_reshaping_walk---------------------
 // Replacing Opaque nodes with their input in final_graph_reshaping_impl(),
 // requires that the walk visits a node's inputs before visiting the node.
-static void final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &fpu ) {
+static void final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &frc ) {
   ResourceArea *area = Thread::current()->resource_area();
   Unique_Node_List sfpt(area);
 
-  fpu._visited.set(root->_idx); // first, mark node as visited
+  frc._visited.set(root->_idx); // first, mark node as visited
   uint cnt = root->req();
   Node *n = root;
   uint  i = 0;
@@ -2351,7 +2366,7 @@
       // Place all non-visited non-null inputs onto stack
       Node* m = n->in(i);
       ++i;
-      if (m != NULL && !fpu._visited.test_set(m->_idx)) {
+      if (m != NULL && !frc._visited.test_set(m->_idx)) {
         if (m->is_SafePoint() && m->as_SafePoint()->jvms() != NULL)
           sfpt.push(m);
         cnt = m->req();
@@ -2361,7 +2376,7 @@
       }
     } else {
       // Now do post-visit work
-      final_graph_reshaping_impl( n, fpu );
+      final_graph_reshaping_impl( n, frc );
       if (nstack.is_empty())
         break;             // finished
       n = nstack.node();   // Get node from stack
@@ -2442,16 +2457,16 @@
     return true;
   }
 
-  Final_Reshape_Counts fpu;
+  Final_Reshape_Counts frc;
 
   // Visit everybody reachable!
   // Allocate stack of size C->unique()/2 to avoid frequent realloc
   Node_Stack nstack(unique() >> 1);
-  final_graph_reshaping_walk(nstack, root(), fpu);
+  final_graph_reshaping_walk(nstack, root(), frc);
 
   // Check for unreachable (from below) code (i.e., infinite loops).
-  for( uint i = 0; i < fpu._tests.size(); i++ ) {
-    MultiBranchNode *n = fpu._tests[i]->as_MultiBranch();
+  for( uint i = 0; i < frc._tests.size(); i++ ) {
+    MultiBranchNode *n = frc._tests[i]->as_MultiBranch();
     // Get number of CFG targets.
     // Note that PCTables include exception targets after calls.
     uint required_outcnt = n->required_outcnt();
@@ -2497,7 +2512,7 @@
     // Check that I actually visited all kids.  Unreached kids
     // must be infinite loops.
     for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++)
-      if (!fpu._visited.test(n->fast_out(j)->_idx)) {
+      if (!frc._visited.test(n->fast_out(j)->_idx)) {
         record_method_not_compilable("infinite loop");
         return true;            // Found unvisited kid; must be unreach
       }
@@ -2506,13 +2521,14 @@
   // If original bytecodes contained a mixture of floats and doubles
   // check if the optimizer has made it homogenous, item (3).
   if( Use24BitFPMode && Use24BitFP &&
-      fpu.get_float_count() > 32 &&
-      fpu.get_double_count() == 0 &&
-      (10 * fpu.get_call_count() < fpu.get_float_count()) ) {
+      frc.get_float_count() > 32 &&
+      frc.get_double_count() == 0 &&
+      (10 * frc.get_call_count() < frc.get_float_count()) ) {
     set_24_bit_selection_and_mode( false,  true );
   }
 
-  set_has_java_calls(fpu.get_java_call_count() > 0);
+  set_java_calls(frc.get_java_call_count());
+  set_inner_loops(frc.get_inner_loop_count());
 
   // No infinite loops, no reason to bail out.
   return false;
--- a/src/share/vm/opto/compile.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/opto/compile.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -223,7 +223,8 @@
   PhaseCFG*             _cfg;                   // Results of CFG finding
   bool                  _select_24_bit_instr;   // We selected an instruction with a 24-bit result
   bool                  _in_24_bit_fp_mode;     // We are emitting instructions with 24-bit results
-  bool                  _has_java_calls;        // True if the method has java calls
+  int                   _java_calls;            // Number of java calls in the method
+  int                   _inner_loops;           // Number of inner loops in the method
   Matcher*              _matcher;               // Engine to map ideal to machine instructions
   PhaseRegAlloc*        _regalloc;              // Results of register allocation.
   int                   _frame_slots;           // Size of total frame in stack slots
@@ -505,7 +506,9 @@
   PhaseCFG*         cfg()                       { return _cfg; }
   bool              select_24_bit_instr() const { return _select_24_bit_instr; }
   bool              in_24_bit_fp_mode() const   { return _in_24_bit_fp_mode; }
-  bool              has_java_calls() const      { return _has_java_calls; }
+  bool              has_java_calls() const      { return _java_calls > 0; }
+  int               java_calls() const          { return _java_calls; }
+  int               inner_loops() const         { return _inner_loops; }
   Matcher*          matcher()                   { return _matcher; }
   PhaseRegAlloc*    regalloc()                  { return _regalloc; }
   int               frame_slots() const         { return _frame_slots; }
@@ -532,7 +535,8 @@
     _in_24_bit_fp_mode   = mode;
   }
 
-  void set_has_java_calls(bool z) { _has_java_calls = z; }
+  void  set_java_calls(int z) { _java_calls  = z; }
+  void set_inner_loops(int z) { _inner_loops = z; }
 
   // Instruction bits passed off to the VM
   int               code_size()                 { return _method_size; }
--- a/src/share/vm/opto/escape.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/opto/escape.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -578,11 +578,24 @@
   if (phi_alias_idx == alias_idx) {
     return orig_phi;
   }
-  // have we already created a Phi for this alias index?
+  // Have we recently created a Phi for this alias index?
   PhiNode *result = get_map_phi(orig_phi->_idx);
   if (result != NULL && C->get_alias_index(result->adr_type()) == alias_idx) {
     return result;
   }
+  // Previous check may fail when the same wide memory Phi was split into Phis
+  // for different memory slices. Search all Phis for this region.
+  if (result != NULL) {
+    Node* region = orig_phi->in(0);
+    for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
+      Node* phi = region->fast_out(i);
+      if (phi->is_Phi() &&
+          C->get_alias_index(phi->as_Phi()->adr_type()) == alias_idx) {
+        assert(phi->_idx >= nodes_size(), "only new Phi per instance memory slice");
+        return phi->as_Phi();
+      }
+    }
+  }
   if ((int)C->unique() + 2*NodeLimitFudgeFactor > MaxNodeLimit) {
     if (C->do_escape_analysis() == true && !C->failing()) {
       // Retry compilation without escape analysis.
@@ -595,6 +608,7 @@
   orig_phi_worklist.append_if_missing(orig_phi);
   const TypePtr *atype = C->get_adr_type(alias_idx);
   result = PhiNode::make(orig_phi->in(0), NULL, Type::MEMORY, atype);
+  C->copy_node_notes_to(result, orig_phi);
   set_map_phi(orig_phi->_idx, result);
   igvn->set_type(result, result->bottom_type());
   record_for_optimizer(result);
--- a/src/share/vm/opto/gcm.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/opto/gcm.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -1130,6 +1130,9 @@
         Node *def = self->in(1);
         if (def != NULL && def->bottom_type()->base() == Type::RawPtr) {
           early->add_inst(self);
+#ifdef ASSERT
+          _raw_oops.push(def);
+#endif
           continue;
         }
         break;
--- a/src/share/vm/opto/graphKit.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/opto/graphKit.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -1373,19 +1373,20 @@
   return st;
 }
 
+
 void GraphKit::pre_barrier(Node* ctl,
                            Node* obj,
                            Node* adr,
-                           uint adr_idx,
-                           Node *val,
-                           const Type* val_type,
+                           uint  adr_idx,
+                           Node* val,
+                           const TypeOopPtr* val_type,
                            BasicType bt) {
   BarrierSet* bs = Universe::heap()->barrier_set();
   set_control(ctl);
   switch (bs->kind()) {
     case BarrierSet::G1SATBCT:
     case BarrierSet::G1SATBCTLogging:
-        g1_write_barrier_pre(obj, adr, adr_idx, val, val_type, bt);
+      g1_write_barrier_pre(obj, adr, adr_idx, val, val_type, bt);
       break;
 
     case BarrierSet::CardTableModRef:
@@ -1404,8 +1405,8 @@
                             Node* store,
                             Node* obj,
                             Node* adr,
-                            uint adr_idx,
-                            Node *val,
+                            uint  adr_idx,
+                            Node* val,
                             BasicType bt,
                             bool use_precise) {
   BarrierSet* bs = Universe::heap()->barrier_set();
@@ -1413,7 +1414,7 @@
   switch (bs->kind()) {
     case BarrierSet::G1SATBCT:
     case BarrierSet::G1SATBCTLogging:
-        g1_write_barrier_post(store, obj, adr, adr_idx, val, bt, use_precise);
+      g1_write_barrier_post(store, obj, adr, adr_idx, val, bt, use_precise);
       break;
 
     case BarrierSet::CardTableModRef:
@@ -1431,49 +1432,55 @@
   }
 }
 
-Node* GraphKit::store_oop_to_object(Node* ctl,
-                                    Node* obj,
-                                    Node* adr,
-                                    const TypePtr* adr_type,
-                                    Node *val,
-                                    const Type* val_type,
-                                    BasicType bt) {
+Node* GraphKit::store_oop(Node* ctl,
+                          Node* obj,
+                          Node* adr,
+                          const TypePtr* adr_type,
+                          Node* val,
+                          const TypeOopPtr* val_type,
+                          BasicType bt,
+                          bool use_precise) {
+
+  set_control(ctl);
+  if (stopped()) return top(); // Dead path ?
+
+  assert(bt == T_OBJECT, "sanity");
+  assert(val != NULL, "not dead path");
   uint adr_idx = C->get_alias_index(adr_type);
-  Node* store;
-  pre_barrier(ctl, obj, adr, adr_idx, val, val_type, bt);
-  store = store_to_memory(control(), adr, val, bt, adr_idx);
-  post_barrier(control(), store, obj, adr, adr_idx, val, bt, false);
+  assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory" );
+
+  pre_barrier(control(), obj, adr, adr_idx, val, val_type, bt);
+  Node* store = store_to_memory(control(), adr, val, bt, adr_idx);
+  post_barrier(control(), store, obj, adr, adr_idx, val, bt, use_precise);
   return store;
 }
 
-Node* GraphKit::store_oop_to_array(Node* ctl,
-                                   Node* obj,
-                                   Node* adr,
-                                   const TypePtr* adr_type,
-                                   Node *val,
-                                   const Type* val_type,
-                                   BasicType bt) {
-  uint adr_idx = C->get_alias_index(adr_type);
-  Node* store;
-  pre_barrier(ctl, obj, adr, adr_idx, val, val_type, bt);
-  store = store_to_memory(control(), adr, val, bt, adr_idx);
-  post_barrier(control(), store, obj, adr, adr_idx, val, bt, true);
-  return store;
-}
-
+// Could be an array or object we don't know at compile time (unsafe ref.)
 Node* GraphKit::store_oop_to_unknown(Node* ctl,
-                                     Node* obj,
-                                     Node* adr,
-                                     const TypePtr* adr_type,
-                                     Node *val,
-                                     const Type* val_type,
-                                     BasicType bt) {
-  uint adr_idx = C->get_alias_index(adr_type);
-  Node* store;
-  pre_barrier(ctl, obj, adr, adr_idx, val, val_type, bt);
-  store = store_to_memory(control(), adr, val, bt, adr_idx);
-  post_barrier(control(), store, obj, adr, adr_idx, val, bt, true);
-  return store;
+                             Node* obj,   // containing obj
+                             Node* adr,  // actual adress to store val at
+                             const TypePtr* adr_type,
+                             Node* val,
+                             BasicType bt) {
+  Compile::AliasType* at = C->alias_type(adr_type);
+  const TypeOopPtr* val_type = NULL;
+  if (adr_type->isa_instptr()) {
+    if (at->field() != NULL) {
+      // known field.  This code is a copy of the do_put_xxx logic.
+      ciField* field = at->field();
+      if (!field->type()->is_loaded()) {
+        val_type = TypeInstPtr::BOTTOM;
+      } else {
+        val_type = TypeOopPtr::make_from_klass(field->type()->as_klass());
+      }
+    }
+  } else if (adr_type->isa_aryptr()) {
+    val_type = adr_type->is_aryptr()->elem()->make_oopptr();
+  }
+  if (val_type == NULL) {
+    val_type = TypeInstPtr::BOTTOM;
+  }
+  return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, true);
 }
 
 
@@ -1787,93 +1794,6 @@
 }
 
 
-//------------------------------store_barrier----------------------------------
-// Insert a write-barrier store.  This is to let generational GC work; we have
-// to flag all oop-stores before the next GC point.
-void GraphKit::write_barrier_post(Node* oop_store, Node* obj, Node* adr,
-                                  Node* val, bool use_precise) {
-  // No store check needed if we're storing a NULL or an old object
-  // (latter case is probably a string constant). The concurrent
-  // mark sweep garbage collector, however, needs to have all nonNull
-  // oop updates flagged via card-marks.
-  if (val != NULL && val->is_Con()) {
-    // must be either an oop or NULL
-    const Type* t = val->bottom_type();
-    if (t == TypePtr::NULL_PTR || t == Type::TOP)
-      // stores of null never (?) need barriers
-      return;
-    ciObject* con = t->is_oopptr()->const_oop();
-    if (con != NULL
-        && con->is_perm()
-        && Universe::heap()->can_elide_permanent_oop_store_barriers())
-      // no store barrier needed, because no old-to-new ref created
-      return;
-  }
-
-  if (use_ReduceInitialCardMarks()
-      && obj == just_allocated_object(control())) {
-    // We can skip marks on a freshly-allocated object.
-    // Keep this code in sync with do_eager_card_mark in runtime.cpp.
-    // That routine eagerly marks the occasional object which is produced
-    // by the slow path, so that we don't have to do it here.
-    return;
-  }
-
-  if (!use_precise) {
-    // All card marks for a (non-array) instance are in one place:
-    adr = obj;
-  }
-  // (Else it's an array (or unknown), and we want more precise card marks.)
-  assert(adr != NULL, "");
-
-  // Get the alias_index for raw card-mark memory
-  int adr_type = Compile::AliasIdxRaw;
-  // Convert the pointer to an int prior to doing math on it
-  Node* cast = _gvn.transform(new (C, 2) CastP2XNode(control(), adr));
-  // Divide by card size
-  assert(Universe::heap()->barrier_set()->kind() == BarrierSet::CardTableModRef,
-         "Only one we handle so far.");
-  CardTableModRefBS* ct =
-    (CardTableModRefBS*)(Universe::heap()->barrier_set());
-  Node *b = _gvn.transform(new (C, 3) URShiftXNode( cast, _gvn.intcon(CardTableModRefBS::card_shift) ));
-  // We store into a byte array, so do not bother to left-shift by zero
-  Node *c = byte_map_base_node();
-  // Combine
-  Node *sb_ctl = control();
-  Node *sb_adr = _gvn.transform(new (C, 4) AddPNode( top()/*no base ptr*/, c, b ));
-  Node *sb_val = _gvn.intcon(0);
-  // Smash zero into card
-  if( !UseConcMarkSweepGC ) {
-    BasicType bt = T_BYTE;
-    store_to_memory(sb_ctl, sb_adr, sb_val, bt, adr_type);
-  } else {
-    // Specialized path for CM store barrier
-    cms_card_mark( sb_ctl, sb_adr, sb_val, oop_store);
-  }
-}
-
-// Specialized path for CMS store barrier
-void GraphKit::cms_card_mark(Node* ctl, Node* adr, Node* val, Node *oop_store) {
-  BasicType bt = T_BYTE;
-  int adr_idx = Compile::AliasIdxRaw;
-  Node* mem = memory(adr_idx);
-
-  // The type input is NULL in PRODUCT builds
-  const TypePtr* type = NULL;
-  debug_only(type = C->get_adr_type(adr_idx));
-
-  // Add required edge to oop_store, optimizer does not support precedence edges.
-  // Convert required edge to precedence edge before allocation.
-  Node *store = _gvn.transform( new (C, 5) StoreCMNode(ctl, mem, adr, type, val, oop_store) );
-  set_memory(store, adr_idx);
-
-  // For CMS, back-to-back card-marks can only remove the first one
-  // and this requires DU info.  Push on worklist for optimizer.
-  if (mem->req() > MemNode::Address && adr == mem->in(MemNode::Address))
-    record_for_igvn(store);
-}
-
-
 void GraphKit::round_double_arguments(ciMethod* dest_method) {
   // (Note:  TypeFunc::make has a cache that makes this fast.)
   const TypeFunc* tf    = TypeFunc::make(dest_method);
@@ -3198,17 +3118,88 @@
   return NULL;
 }
 
+//----------------------------- store barriers ----------------------------
+#define __ ideal.
+
+void GraphKit::sync_kit(IdealKit& ideal) {
+  // Final sync IdealKit and graphKit.
+  __ drain_delay_transform();
+  set_all_memory(__ merged_memory());
+  set_control(__ ctrl());
+}
+
+// vanilla/CMS post barrier
+// Insert a write-barrier store.  This is to let generational GC work; we have
+// to flag all oop-stores before the next GC point.
+void GraphKit::write_barrier_post(Node* oop_store,
+                                  Node* obj,
+                                  Node* adr,
+                                  Node* val,
+                                  bool use_precise) {
+  // No store check needed if we're storing a NULL or an old object
+  // (latter case is probably a string constant). The concurrent
+  // mark sweep garbage collector, however, needs to have all nonNull
+  // oop updates flagged via card-marks.
+  if (val != NULL && val->is_Con()) {
+    // must be either an oop or NULL
+    const Type* t = val->bottom_type();
+    if (t == TypePtr::NULL_PTR || t == Type::TOP)
+      // stores of null never (?) need barriers
+      return;
+    ciObject* con = t->is_oopptr()->const_oop();
+    if (con != NULL
+        && con->is_perm()
+        && Universe::heap()->can_elide_permanent_oop_store_barriers())
+      // no store barrier needed, because no old-to-new ref created
+      return;
+  }
+
+  if (!use_precise) {
+    // All card marks for a (non-array) instance are in one place:
+    adr = obj;
+  }
+  // (Else it's an array (or unknown), and we want more precise card marks.)
+  assert(adr != NULL, "");
+
+  IdealKit ideal(gvn(), control(), merged_memory(), true);
+
+  // Convert the pointer to an int prior to doing math on it
+  Node* cast = __ CastPX(__ ctrl(), adr);
+
+  // Divide by card size
+  assert(Universe::heap()->barrier_set()->kind() == BarrierSet::CardTableModRef,
+         "Only one we handle so far.");
+  Node* card_offset = __ URShiftX( cast, __ ConI(CardTableModRefBS::card_shift) );
+
+  // Combine card table base and card offset
+  Node* card_adr = __ AddP(__ top(), byte_map_base_node(), card_offset );
+
+  // Get the alias_index for raw card-mark memory
+  int adr_type = Compile::AliasIdxRaw;
+  // Smash zero into card
+  Node*   zero = __ ConI(0);
+  BasicType bt = T_BYTE;
+  if( !UseConcMarkSweepGC ) {
+    __ store(__ ctrl(), card_adr, zero, bt, adr_type);
+  } else {
+    // Specialized path for CM store barrier
+    __ storeCM(__ ctrl(), card_adr, zero, oop_store, bt, adr_type);
+  }
+
+  // Final sync IdealKit and GraphKit.
+  sync_kit(ideal);
+}
+
+// G1 pre/post barriers
 void GraphKit::g1_write_barrier_pre(Node* obj,
                                     Node* adr,
                                     uint alias_idx,
                                     Node* val,
-                                    const Type* val_type,
+                                    const TypeOopPtr* val_type,
                                     BasicType bt) {
   IdealKit ideal(gvn(), control(), merged_memory(), true);
-#define __ ideal.
-  __ declares_done();
-
-  Node* thread = __ thread();
+
+  Node* tls = __ thread(); // ThreadLocalStorage
 
   Node* no_ctrl = NULL;
   Node* no_base = __ top();
@@ -3231,9 +3222,9 @@
 
   // set_control( ctl);
 
-  Node* marking_adr = __ AddP(no_base, thread, __ ConX(marking_offset));
-  Node* buffer_adr  = __ AddP(no_base, thread, __ ConX(buffer_offset));
-  Node* index_adr   = __ AddP(no_base, thread, __ ConX(index_offset));
+  Node* marking_adr = __ AddP(no_base, tls, __ ConX(marking_offset));
+  Node* buffer_adr  = __ AddP(no_base, tls, __ ConX(buffer_offset));
+  Node* index_adr   = __ AddP(no_base, tls, __ ConX(index_offset));
 
   // Now some of the values
 
@@ -3261,55 +3252,52 @@
         Node* next_index = __ SubI(index,  __ ConI(sizeof(intptr_t)));
         Node* next_indexX = next_index;
 #ifdef _LP64
-          // We could refine the type for what it's worth
-          // const TypeLong* lidxtype = TypeLong::make(CONST64(0), get_size_from_queue);
-          next_indexX = _gvn.transform( new (C, 2) ConvI2LNode(next_index, TypeLong::make(0, max_jlong, Type::WidenMax)) );
-#endif // _LP64
+        // We could refine the type for what it's worth
+        // const TypeLong* lidxtype = TypeLong::make(CONST64(0), get_size_from_queue);
+        next_indexX = _gvn.transform( new (C, 2) ConvI2LNode(next_index, TypeLong::make(0, max_jlong, Type::WidenMax)) );
+#endif
 
         // Now get the buffer location we will log the original value into and store it
-
         Node *log_addr = __ AddP(no_base, buffer, next_indexX);
-        // __ store(__ ctrl(), log_addr, orig, T_OBJECT, C->get_alias_index(TypeOopPtr::BOTTOM));
         __ store(__ ctrl(), log_addr, orig, T_OBJECT, Compile::AliasIdxRaw);
 
-
         // update the index
-        // __ store(__ ctrl(), index_adr, next_index, T_INT, Compile::AliasIdxRaw);
-        // This is a hack to force this store to occur before the oop store that is coming up
-        __ store(__ ctrl(), index_adr, next_index, T_INT, C->get_alias_index(TypeOopPtr::BOTTOM));
+        __ store(__ ctrl(), index_adr, next_index, T_INT, Compile::AliasIdxRaw);
 
       } __ else_(); {
 
         // logging buffer is full, call the runtime
         const TypeFunc *tf = OptoRuntime::g1_wb_pre_Type();
-        // __ make_leaf_call(tf, OptoRuntime::g1_wb_pre_Java(), "g1_wb_pre", orig, thread);
-        __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), "g1_wb_pre", orig, thread);
-      } __ end_if();
-    } __ end_if();
-  } __ end_if();
-
-  __ drain_delay_transform();
-  set_control( __ ctrl());
-  set_all_memory( __ merged_memory());
-
-#undef __
+        __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), "g1_wb_pre", orig, tls);
+      } __ end_if();  // (!index)
+    } __ end_if();  // (orig != NULL)
+  } __ end_if();  // (!marking)
+
+  // Final sync IdealKit and GraphKit.
+  sync_kit(ideal);
 }
 
 //
 // Update the card table and add card address to the queue
 //
-void GraphKit::g1_mark_card(IdealKit* ideal, Node* card_adr, Node* store,  Node* index, Node* index_adr, Node* buffer, const TypeFunc* tf) {
-#define __ ideal->
+void GraphKit::g1_mark_card(IdealKit& ideal,
+                            Node* card_adr,
+                            Node* oop_store,
+                            Node* index,
+                            Node* index_adr,
+                            Node* buffer,
+                            const TypeFunc* tf) {
+
   Node* zero = __ ConI(0);
   Node* no_base = __ top();
   BasicType card_bt = T_BYTE;
   // Smash zero into card. MUST BE ORDERED WRT TO STORE
-  __ storeCM(__ ctrl(), card_adr, zero, store, card_bt, Compile::AliasIdxRaw);
+  __ storeCM(__ ctrl(), card_adr, zero, oop_store, card_bt, Compile::AliasIdxRaw);
 
   //  Now do the queue work
   __ if_then(index, BoolTest::ne, zero); {
 
-    Node* next_index = __ SubI(index,  __ ConI(sizeof(intptr_t)));
+    Node* next_index = __ SubI(index, __ ConI(sizeof(intptr_t)));
     Node* next_indexX = next_index;
 #ifdef _LP64
     // We could refine the type for what it's worth
@@ -3324,10 +3312,10 @@
   } __ else_(); {
     __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), "g1_wb_post", card_adr, __ thread());
   } __ end_if();
-#undef __
+
 }
 
-void GraphKit::g1_write_barrier_post(Node* store,
+void GraphKit::g1_write_barrier_post(Node* oop_store,
                                      Node* obj,
                                      Node* adr,
                                      uint alias_idx,
@@ -3352,10 +3340,8 @@
   assert(adr != NULL, "");
 
   IdealKit ideal(gvn(), control(), merged_memory(), true);
-#define __ ideal.
-  __ declares_done();
-
-  Node* thread = __ thread();
+
+  Node* tls = __ thread(); // ThreadLocalStorage
 
   Node* no_ctrl = NULL;
   Node* no_base = __ top();
@@ -3377,8 +3363,8 @@
 
   // Pointers into the thread
 
-  Node* buffer_adr = __ AddP(no_base, thread, __ ConX(buffer_offset));
-  Node* index_adr =  __ AddP(no_base, thread, __ ConX(index_offset));
+  Node* buffer_adr = __ AddP(no_base, tls, __ ConX(buffer_offset));
+  Node* index_adr =  __ AddP(no_base, tls, __ ConX(index_offset));
 
   // Now some values
 
@@ -3387,18 +3373,14 @@
 
 
   // Convert the store obj pointer to an int prior to doing math on it
-  // Use addr not obj gets accurate card marks
-
-  // Node* cast = __ CastPX(no_ctrl, adr /* obj */);
-
   // Must use ctrl to prevent "integerized oop" existing across safepoint
-  Node* cast =  __ CastPX(__ ctrl(), ( use_precise ? adr : obj ));
+  Node* cast =  __ CastPX(__ ctrl(), adr);
 
   // Divide pointer by card size
   Node* card_offset = __ URShiftX( cast, __ ConI(CardTableModRefBS::card_shift) );
 
   // Combine card table base and card offset
-  Node *card_adr = __ AddP(no_base, byte_map_base_node(), card_offset );
+  Node* card_adr = __ AddP(no_base, byte_map_base_node(), card_offset );
 
   // If we know the value being stored does it cross regions?
 
@@ -3422,18 +3404,17 @@
         Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
 
         __ if_then(card_val, BoolTest::ne, zero); {
-          g1_mark_card(&ideal, card_adr, store, index, index_adr, buffer, tf);
+          g1_mark_card(ideal, card_adr, oop_store, index, index_adr, buffer, tf);
         } __ end_if();
       } __ end_if();
     } __ end_if();
   } else {
-    g1_mark_card(&ideal, card_adr, store, index, index_adr, buffer, tf);
+    // Object.clone() instrinsic uses this path.
+    g1_mark_card(ideal, card_adr, oop_store, index, index_adr, buffer, tf);
   }
 
-
-  __ drain_delay_transform();
-  set_control( __ ctrl());
-  set_all_memory( __ merged_memory());
+  // Final sync IdealKit and GraphKit.
+  sync_kit(ideal);
+}
 #undef __
 
-}
--- a/src/share/vm/opto/graphKit.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/opto/graphKit.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -449,21 +449,34 @@
   //
   // If val==NULL, it is taken to be a completely unknown value. QQQ
 
+  Node* store_oop(Node* ctl,
+                  Node* obj,   // containing obj
+                  Node* adr,  // actual adress to store val at
+                  const TypePtr* adr_type,
+                  Node* val,
+                  const TypeOopPtr* val_type,
+                  BasicType bt,
+                  bool use_precise);
+
   Node* store_oop_to_object(Node* ctl,
                             Node* obj,   // containing obj
                             Node* adr,  // actual adress to store val at
                             const TypePtr* adr_type,
                             Node* val,
-                            const Type* val_type,
-                            BasicType bt);
+                            const TypeOopPtr* val_type,
+                            BasicType bt) {
+    return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, false);
+  }
 
   Node* store_oop_to_array(Node* ctl,
                            Node* obj,   // containing obj
                            Node* adr,  // actual adress to store val at
                            const TypePtr* adr_type,
                            Node* val,
-                           const Type* val_type,
-                           BasicType bt);
+                           const TypeOopPtr* val_type,
+                           BasicType bt) {
+    return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, true);
+  }
 
   // Could be an array or object we don't know at compile time (unsafe ref.)
   Node* store_oop_to_unknown(Node* ctl,
@@ -471,12 +484,11 @@
                              Node* adr,  // actual adress to store val at
                              const TypePtr* adr_type,
                              Node* val,
-                             const Type* val_type,
                              BasicType bt);
 
   // For the few case where the barriers need special help
   void pre_barrier(Node* ctl, Node* obj, Node* adr, uint adr_idx,
-                   Node* val, const Type* val_type, BasicType bt);
+                   Node* val, const TypeOopPtr* val_type, BasicType bt);
 
   void post_barrier(Node* ctl, Node* store, Node* obj, Node* adr, uint adr_idx,
                     Node* val, BasicType bt, bool use_precise);
@@ -489,9 +501,6 @@
   // Return a load of array element at idx.
   Node* load_array_element(Node* ctl, Node* ary, Node* idx, const TypeAryPtr* arytype);
 
-  // CMS card-marks have an input from the corresponding oop_store
-  void  cms_card_mark(Node* ctl, Node* adr, Node* val, Node* oop_store);
-
   //---------------- Dtrace support --------------------
   void make_dtrace_method_entry_exit(ciMethod* method, bool is_entry);
   void make_dtrace_method_entry(ciMethod* method) {
@@ -583,9 +592,6 @@
     return C->too_many_recompiles(method(), bci(), reason);
   }
 
-  // vanilla/CMS post barrier
-  void write_barrier_post(Node *store, Node* obj, Node* adr, Node* val, bool use_precise);
-
   // Returns the object (if any) which was created the moment before.
   Node* just_allocated_object(Node* current_control);
 
@@ -594,12 +600,17 @@
             && Universe::heap()->can_elide_tlab_store_barriers());
   }
 
+  void sync_kit(IdealKit& ideal);
+
+  // vanilla/CMS post barrier
+  void write_barrier_post(Node *store, Node* obj, Node* adr, Node* val, bool use_precise);
+
   // G1 pre/post barriers
   void g1_write_barrier_pre(Node* obj,
                             Node* adr,
                             uint alias_idx,
                             Node* val,
-                            const Type* val_type,
+                            const TypeOopPtr* val_type,
                             BasicType bt);
 
   void g1_write_barrier_post(Node* store,
@@ -611,7 +622,7 @@
                              bool use_precise);
   // Helper function for g1
   private:
-  void g1_mark_card(IdealKit* ideal, Node* card_adr, Node* store,  Node* index, Node* index_adr,
+  void g1_mark_card(IdealKit& ideal, Node* card_adr, Node* store,  Node* index, Node* index_adr,
                     Node* buffer, const TypeFunc* tf);
 
   public:
--- a/src/share/vm/opto/idealKit.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/opto/idealKit.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -34,7 +34,7 @@
 const uint IdealKit::first_var = TypeFunc::Parms + 1;
 
 //----------------------------IdealKit-----------------------------------------
-IdealKit::IdealKit(PhaseGVN &gvn, Node* control, Node* mem, bool delay_all_transforms) :
+IdealKit::IdealKit(PhaseGVN &gvn, Node* control, Node* mem, bool delay_all_transforms, bool has_declarations) :
   _gvn(gvn), C(gvn.C) {
   _initial_ctrl = control;
   _initial_memory = mem;
@@ -47,6 +47,9 @@
   _pending_cvstates = new (C->node_arena()) GrowableArray<Node*>(C->node_arena(), init_size, 0, 0);
   _delay_transform  = new (C->node_arena()) GrowableArray<Node*>(C->node_arena(), init_size, 0, 0);
   DEBUG_ONLY(_state = new (C->node_arena()) GrowableArray<int>(C->node_arena(), init_size, 0, 0));
+  if (!has_declarations) {
+     declarations_done();
+  }
 }
 
 //-------------------------------if_then-------------------------------------
@@ -97,7 +100,7 @@
 //-------------------------------end_if-------------------------------------
 // Merge the "then" and "else" cvstates.
 //
-// The if_then() pushed the current state for later use
+// The if_then() pushed a copy of the current state for later use
 // as the initial state for a future "else" clause.  The
 // current state then became the initial state for the
 // then clause.  If an "else" clause was encountered, it will
@@ -258,8 +261,8 @@
   return delay_transform(PhiNode::make(reg, n, ct));
 }
 
-//-----------------------------declares_done-----------------------------------
-void IdealKit::declares_done() {
+//-----------------------------declarations_done-------------------------------
+void IdealKit::declarations_done() {
   _cvstate = new_cvstate();   // initialize current cvstate
   set_ctrl(_initial_ctrl);    // initialize control in current cvstate
   set_all_memory(_initial_memory);// initialize memory in current cvstate
@@ -277,7 +280,9 @@
 
 //-----------------------------delay_transform-----------------------------------
 Node* IdealKit::delay_transform(Node* n) {
-  gvn().set_type(n, n->bottom_type());
+  if (!gvn().is_IterGVN() || !gvn().is_IterGVN()->delay_transform()) {
+    gvn().set_type(n, n->bottom_type());
+  }
   _delay_transform->push(n);
   return n;
 }
@@ -321,7 +326,9 @@
 Node* IdealKit::memory(uint alias_idx) {
   MergeMemNode* mem = merged_memory();
   Node* p = mem->memory_at(alias_idx);
-  _gvn.set_type(p, Type::MEMORY);  // must be mapped
+  if (!gvn().is_IterGVN() || !gvn().is_IterGVN()->delay_transform()) {
+    _gvn.set_type(p, Type::MEMORY);  // must be mapped
+  }
   return p;
 }
 
@@ -462,9 +469,6 @@
   const TypePtr* adr_type = TypeRawPtr::BOTTOM;
   uint adr_idx = C->get_alias_index(adr_type);
 
-  // Clone initial memory
-  MergeMemNode* cloned_mem =  MergeMemNode::make(C, merged_memory());
-
   // Slow-path leaf call
   int size = slow_call_type->domain()->cnt();
   CallNode *call =  (CallNode*)new (C, size) CallLeafNode( slow_call_type, slow_call, leaf_name, adr_type);
@@ -489,9 +493,6 @@
 
   set_ctrl(transform( new (C, 1) ProjNode(call,TypeFunc::Control) ));
 
-  // Set the incoming clone of memory as current memory
-  set_all_memory(cloned_mem);
-
   // Make memory for the call
   Node* mem = _gvn.transform( new (C, 1) ProjNode(call, TypeFunc::Memory) );
 
--- a/src/share/vm/opto/idealKit.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/opto/idealKit.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -49,7 +49,7 @@
 // Example:
 //    Node* limit = ??
 //    IdealVariable i(kit), j(kit);
-//    declares_done();
+//    declarations_done();
 //    Node* exit = make_label(1); // 1 goto
 //    set(j, ConI(0));
 //    loop(i, ConI(0), BoolTest::lt, limit); {
@@ -101,10 +101,7 @@
   Node* new_cvstate();                     // Create a new cvstate
   Node* cvstate() { return _cvstate; }     // current cvstate
   Node* copy_cvstate();                    // copy current cvstate
-  void set_ctrl(Node* ctrl) { _cvstate->set_req(TypeFunc::Control, ctrl); }
 
-  // Should this assert this is a MergeMem???
-  void set_all_memory(Node* mem){ _cvstate->set_req(TypeFunc::Memory, mem); }
   void set_memory(Node* mem, uint alias_idx );
   void do_memory_merge(Node* merging, Node* join);
   void clear(Node* m);                     // clear a cvstate
@@ -132,15 +129,17 @@
   Node* memory(uint alias_idx);
 
  public:
-  IdealKit(PhaseGVN &gvn, Node* control, Node* memory, bool delay_all_transforms = false);
+  IdealKit(PhaseGVN &gvn, Node* control, Node* memory, bool delay_all_transforms = false, bool has_declarations = false);
   ~IdealKit() {
     stop();
     drain_delay_transform();
   }
   // Control
   Node* ctrl()                          { return _cvstate->in(TypeFunc::Control); }
+  void set_ctrl(Node* ctrl)             { _cvstate->set_req(TypeFunc::Control, ctrl); }
   Node* top()                           { return C->top(); }
   MergeMemNode* merged_memory()         { return _cvstate->in(TypeFunc::Memory)->as_MergeMem(); }
+  void set_all_memory(Node* mem)        { _cvstate->set_req(TypeFunc::Memory, mem); }
   void set(IdealVariable& v, Node* rhs) { _cvstate->set_req(first_var + v.id(), rhs); }
   Node* value(IdealVariable& v)         { return _cvstate->in(first_var + v.id()); }
   void dead(IdealVariable& v)           { set(v, (Node*)NULL); }
@@ -155,7 +154,7 @@
   Node* make_label(int goto_ct);
   void bind(Node* lab);
   void goto_(Node* lab, bool bind = false);
-  void declares_done();
+  void declarations_done();
   void drain_delay_transform();
 
   Node* IfTrue(IfNode* iff)  { return transform(new (C,1) IfTrueNode(iff)); }
--- a/src/share/vm/opto/ifnode.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/opto/ifnode.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -378,7 +378,18 @@
 
   // Force the original merge dead
   igvn->hash_delete(r);
-  r->set_req_X(0,NULL,igvn);
+  // First, remove region's dead users.
+  for (DUIterator_Last lmin, l = r->last_outs(lmin); l >= lmin;) {
+    Node* u = r->last_out(l);
+    if( u == r ) {
+      r->set_req(0, NULL);
+    } else {
+      assert(u->outcnt() == 0, "only dead users");
+      igvn->remove_dead_node(u);
+    }
+    l -= 1;
+  }
+  igvn->remove_dead_node(r);
 
   // Now remove the bogus extra edges used to keep things alive
   igvn->remove_dead_node( hook );
--- a/src/share/vm/opto/library_call.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/opto/library_call.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -165,6 +165,7 @@
   bool inline_native_getLength();
   bool inline_array_copyOf(bool is_copyOfRange);
   bool inline_array_equals();
+  void copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, bool is_array, bool card_mark);
   bool inline_native_clone(bool is_virtual);
   bool inline_native_Reflection_getCallerClass();
   bool inline_native_AtomicLong_get();
@@ -181,7 +182,6 @@
                           Node* src,  Node* src_offset,
                           Node* dest, Node* dest_offset,
                           Node* copy_length,
-                          int nargs,  // arguments on stack for debug info
                           bool disjoint_bases = false,
                           bool length_never_negative = false,
                           RegionNode* slow_region = NULL);
@@ -202,17 +202,16 @@
   void generate_slow_arraycopy(const TypePtr* adr_type,
                                Node* src,  Node* src_offset,
                                Node* dest, Node* dest_offset,
-                               Node* copy_length,
-                               int nargs);
+                               Node* copy_length);
   Node* generate_checkcast_arraycopy(const TypePtr* adr_type,
                                      Node* dest_elem_klass,
                                      Node* src,  Node* src_offset,
                                      Node* dest, Node* dest_offset,
-                                     Node* copy_length, int nargs);
+                                     Node* copy_length);
   Node* generate_generic_arraycopy(const TypePtr* adr_type,
                                    Node* src,  Node* src_offset,
                                    Node* dest, Node* dest_offset,
-                                   Node* copy_length, int nargs);
+                                   Node* copy_length);
   void generate_unchecked_arraycopy(const TypePtr* adr_type,
                                     BasicType basic_elem_type,
                                     bool disjoint_bases,
@@ -311,11 +310,6 @@
     if (!InlineAtomicLong)  return NULL;
     break;
 
-  case vmIntrinsics::_Object_init:
-  case vmIntrinsics::_invoke:
-    // We do not intrinsify these; they are marked for other purposes.
-    return NULL;
-
   case vmIntrinsics::_getCallerClass:
     if (!UseNewReflection)  return NULL;
     if (!InlineReflectionGetCallerClass)  return NULL;
@@ -328,6 +322,8 @@
     break;
 
  default:
+    assert(id <= vmIntrinsics::LAST_COMPILER_INLINE, "caller responsibility");
+    assert(id != vmIntrinsics::_Object_init && id != vmIntrinsics::_invoke, "enum out of order?");
     break;
   }
 
@@ -395,18 +391,11 @@
   }
 
   if (PrintIntrinsics) {
-    switch (intrinsic_id()) {
-    case vmIntrinsics::_invoke:
-    case vmIntrinsics::_Object_init:
-      // We do not expect to inline these, so do not produce any noise about them.
-      break;
-    default:
-      tty->print("Did not inline intrinsic %s%s at bci:%d in",
-                 vmIntrinsics::name_at(intrinsic_id()),
-                 (is_virtual() ? " (virtual)" : ""), kit.bci());
-      kit.caller()->print_short_name(tty);
-      tty->print_cr(" (%d bytes)", kit.caller()->code_size());
-    }
+    tty->print("Did not inline intrinsic %s%s at bci:%d in",
+               vmIntrinsics::name_at(intrinsic_id()),
+               (is_virtual() ? " (virtual)" : ""), kit.bci());
+    kit.caller()->print_short_name(tty);
+    tty->print_cr(" (%d bytes)", kit.caller()->code_size());
   }
   C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_failed);
   return NULL;
@@ -1031,7 +1020,7 @@
   const TypeAry* target_array_type = TypeAry::make(TypeInt::CHAR, TypeInt::make(0, target_length, Type::WidenMin));
   const TypeAryPtr* target_type = TypeAryPtr::make(TypePtr::BotPTR, target_array_type, target_array->klass(), true, Type::OffsetBot);
 
-  IdealKit kit(gvn(), control(), merged_memory());
+  IdealKit kit(gvn(), control(), merged_memory(), false, true);
 #define __ kit.
   Node* zero             = __ ConI(0);
   Node* one              = __ ConI(1);
@@ -1043,7 +1032,7 @@
   Node* targetOffset     = __ ConI(targetOffset_i);
   Node* sourceEnd        = __ SubI(__ AddI(sourceOffset, sourceCount), targetCountLess1);
 
-  IdealVariable rtn(kit), i(kit), j(kit); __ declares_done();
+  IdealVariable rtn(kit), i(kit), j(kit); __ declarations_done();
   Node* outer_loop = __ make_label(2 /* goto */);
   Node* return_    = __ make_label(1);
 
@@ -1080,9 +1069,9 @@
        __ bind(outer_loop);
   }__ end_loop(); __ dead(i);
   __ bind(return_);
-  __ drain_delay_transform();
-
-  set_control(__ ctrl());
+
+  // Final sync IdealKit and GraphKit.
+  sync_kit(kit);
   Node* result = __ value(rtn);
 #undef __
   C->set_has_loops(true);
@@ -2178,21 +2167,29 @@
       // Possibly an oop being stored to Java heap or native memory
       if (!TypePtr::NULL_PTR->higher_equal(_gvn.type(heap_base_oop))) {
         // oop to Java heap.
-        (void) store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, val->bottom_type(), type);
+        (void) store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type);
       } else {
-
         // We can't tell at compile time if we are storing in the Java heap or outside
         // of it. So we need to emit code to conditionally do the proper type of
         // store.
 
-        IdealKit kit(gvn(), control(),  merged_memory());
-        kit.declares_done();
+        IdealKit ideal(gvn(), control(),  merged_memory());
+#define __ ideal.
         // QQQ who knows what probability is here??
-        kit.if_then(heap_base_oop, BoolTest::ne, null(), PROB_UNLIKELY(0.999)); {
-          (void) store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, val->bottom_type(), type);
-        } kit.else_(); {
-          (void) store_to_memory(control(), adr, val, type, adr_type, is_volatile);
-        } kit.end_if();
+        __ if_then(heap_base_oop, BoolTest::ne, null(), PROB_UNLIKELY(0.999)); {
+          // Sync IdealKit and graphKit.
+          set_all_memory( __ merged_memory());
+          set_control(__ ctrl());
+          Node* st = store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type);
+          // Update IdealKit memory.
+          __ set_all_memory(merged_memory());
+          __ set_ctrl(control());
+        } __ else_(); {
+          __ store(__ ctrl(), adr, val, type, alias_type->index(), is_volatile);
+        } __ end_if();
+        // Final sync IdealKit and GraphKit.
+        sync_kit(ideal);
+#undef __
       }
     }
   }
@@ -2394,7 +2391,7 @@
   case T_OBJECT:
      // reference stores need a store barrier.
     // (They don't if CAS fails, but it isn't worth checking.)
-    pre_barrier(control(), base, adr, alias_idx, newval, value_type, T_OBJECT);
+    pre_barrier(control(), base, adr, alias_idx, newval, value_type->make_oopptr(), T_OBJECT);
 #ifdef _LP64
     if (adr->bottom_type()->is_ptr_to_narrowoop()) {
       Node *newval_enc = _gvn.transform(new (C, 2) EncodePNode(newval, newval->bottom_type()->make_narrowoop()));
@@ -2489,7 +2486,7 @@
   bool require_atomic_access = true;
   Node* store;
   if (type == T_OBJECT) // reference stores need a store barrier.
-    store = store_oop_to_unknown(control(), base, adr, adr_type, val, value_type, type);
+    store = store_oop_to_unknown(control(), base, adr, adr_type, val, type);
   else {
     store = store_to_memory(control(), adr, val, type, adr_type, require_atomic_access);
   }
@@ -3230,7 +3227,8 @@
     Node* orig_tail = _gvn.transform( new(C, 3) SubINode(orig_length, start) );
     Node* moved = generate_min_max(vmIntrinsics::_min, orig_tail, length);
 
-    Node* newcopy = new_array(klass_node, length, nargs);
+    const bool raw_mem_only = true;
+    Node* newcopy = new_array(klass_node, length, nargs, raw_mem_only);
 
     // Generate a direct call to the right arraycopy function(s).
     // We know the copy is disjoint but we might not know if the
@@ -3241,7 +3239,7 @@
     bool length_never_negative = true;
     generate_arraycopy(TypeAryPtr::OOPS, T_OBJECT,
                        original, start, newcopy, intcon(0), moved,
-                       nargs, disjoint_bases, length_never_negative);
+                       disjoint_bases, length_never_negative);
 
     push(newcopy);
   }
@@ -3883,6 +3881,98 @@
   return true;
 }
 
+//------------------------clone_coping-----------------------------------
+// Helper function for inline_native_clone.
+void LibraryCallKit::copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, bool is_array, bool card_mark) {
+  assert(obj_size != NULL, "");
+  Node* raw_obj = alloc_obj->in(1);
+  assert(alloc_obj->is_CheckCastPP() && raw_obj->is_Proj() && raw_obj->in(0)->is_Allocate(), "");
+
+  if (ReduceBulkZeroing) {
+    // We will be completely responsible for initializing this object -
+    // mark Initialize node as complete.
+    AllocateNode* alloc = AllocateNode::Ideal_allocation(alloc_obj, &_gvn);
+    // The object was just allocated - there should be no any stores!
+    guarantee(alloc != NULL && alloc->maybe_set_complete(&_gvn), "");
+  }
+
+  // Cast to Object for arraycopy.
+  // We can't use the original CheckCastPP since it should be moved
+  // after the arraycopy to prevent stores flowing above it.
+  Node* new_obj = new(C, 2) CheckCastPPNode(alloc_obj->in(0), raw_obj,
+                                            TypeInstPtr::NOTNULL);
+  new_obj = _gvn.transform(new_obj);
+  // Substitute in the locally valid dest_oop.
+  replace_in_map(alloc_obj, new_obj);
+
+  // Copy the fastest available way.
+  // TODO: generate fields copies for small objects instead.
+  Node* src  = obj;
+  Node* dest = new_obj;
+  Node* size = _gvn.transform(obj_size);
+
+  // Exclude the header but include array length to copy by 8 bytes words.
+  // Can't use base_offset_in_bytes(bt) since basic type is unknown.
+  int base_off = is_array ? arrayOopDesc::length_offset_in_bytes() :
+                            instanceOopDesc::base_offset_in_bytes();
+  // base_off:
+  // 8  - 32-bit VM
+  // 12 - 64-bit VM, compressed oops
+  // 16 - 64-bit VM, normal oops
+  if (base_off % BytesPerLong != 0) {
+    assert(UseCompressedOops, "");
+    if (is_array) {
+      // Exclude length to copy by 8 bytes words.
+      base_off += sizeof(int);
+    } else {
+      // Include klass to copy by 8 bytes words.
+      base_off = instanceOopDesc::klass_offset_in_bytes();
+    }
+    assert(base_off % BytesPerLong == 0, "expect 8 bytes alignment");
+  }
+  src  = basic_plus_adr(src,  base_off);
+  dest = basic_plus_adr(dest, base_off);
+
+  // Compute the length also, if needed:
+  Node* countx = size;
+  countx = _gvn.transform( new (C, 3) SubXNode(countx, MakeConX(base_off)) );
+  countx = _gvn.transform( new (C, 3) URShiftXNode(countx, intcon(LogBytesPerLong) ));
+
+  const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
+  bool disjoint_bases = true;
+  generate_unchecked_arraycopy(raw_adr_type, T_LONG, disjoint_bases,
+                               src, NULL, dest, NULL, countx);
+
+  // If necessary, emit some card marks afterwards.  (Non-arrays only.)
+  if (card_mark) {
+    assert(!is_array, "");
+    // Put in store barrier for any and all oops we are sticking
+    // into this object.  (We could avoid this if we could prove
+    // that the object type contains no oop fields at all.)
+    Node* no_particular_value = NULL;
+    Node* no_particular_field = NULL;
+    int raw_adr_idx = Compile::AliasIdxRaw;
+    post_barrier(control(),
+                 memory(raw_adr_type),
+                 new_obj,
+                 no_particular_field,
+                 raw_adr_idx,
+                 no_particular_value,
+                 T_OBJECT,
+                 false);
+  }
+
+  // Move the original CheckCastPP after arraycopy.
+  _gvn.hash_delete(alloc_obj);
+  alloc_obj->set_req(0, control());
+  // Replace raw memory edge with new CheckCastPP to have a live oop
+  // at safepoints instead of raw value.
+  assert(new_obj->is_CheckCastPP() && new_obj->in(1) == alloc_obj->in(1), "sanity");
+  alloc_obj->set_req(1, new_obj);    // cast to the original type
+  _gvn.hash_find_insert(alloc_obj);  // put back into GVN table
+  // Restore in the locally valid dest_oop.
+  replace_in_map(new_obj, alloc_obj);
+}
 
 //------------------------inline_native_clone----------------------------
 // Here are the simple edge cases:
@@ -3917,8 +4007,9 @@
   // paths into result_reg:
   enum {
     _slow_path = 1,     // out-of-line call to clone method (virtual or not)
-    _objArray_path,     // plain allocation, plus arrayof_oop_arraycopy
-    _fast_path,         // plain allocation, plus a CopyArray operation
+    _objArray_path,     // plain array allocation, plus arrayof_oop_arraycopy
+    _array_path,        // plain array allocation, plus arrayof_long_arraycopy
+    _instance_path,     // plain instance allocation, plus arrayof_long_arraycopy
     PATH_LIMIT
   };
   RegionNode* result_reg = new(C, PATH_LIMIT) RegionNode(PATH_LIMIT);
@@ -3933,18 +4024,6 @@
   int raw_adr_idx = Compile::AliasIdxRaw;
   const bool raw_mem_only = true;
 
-  // paths into alloc_reg (on the fast path, just before the CopyArray):
-  enum { _typeArray_alloc = 1, _instance_alloc, ALLOC_LIMIT };
-  RegionNode* alloc_reg = new(C, ALLOC_LIMIT) RegionNode(ALLOC_LIMIT);
-  PhiNode*    alloc_val = new(C, ALLOC_LIMIT) PhiNode(alloc_reg, raw_adr_type);
-  PhiNode*    alloc_siz = new(C, ALLOC_LIMIT) PhiNode(alloc_reg, TypeX_X);
-  PhiNode*    alloc_i_o = new(C, ALLOC_LIMIT) PhiNode(alloc_reg, Type::ABIO);
-  PhiNode*    alloc_mem = new(C, ALLOC_LIMIT) PhiNode(alloc_reg, Type::MEMORY,
-                                                      raw_adr_type);
-  record_for_igvn(alloc_reg);
-
-  bool card_mark = false;  // (see below)
-
   Node* array_ctl = generate_array_guard(obj_klass, (RegionNode*)NULL);
   if (array_ctl != NULL) {
     // It's an array.
@@ -3954,16 +4033,6 @@
     Node* obj_size = NULL;
     Node* alloc_obj = new_array(obj_klass, obj_length, nargs,
                                 raw_mem_only, &obj_size);
-    assert(obj_size != NULL, "");
-    Node* raw_obj = alloc_obj->in(1);
-    assert(raw_obj->is_Proj() && raw_obj->in(0)->is_Allocate(), "");
-    if (ReduceBulkZeroing) {
-      AllocateNode* alloc = AllocateNode::Ideal_allocation(alloc_obj, &_gvn);
-      if (alloc != NULL) {
-        // We will be completely responsible for initializing this object.
-        alloc->maybe_set_complete(&_gvn);
-      }
-    }
 
     if (!use_ReduceInitialCardMarks()) {
       // If it is an oop array, it requires very special treatment,
@@ -3977,7 +4046,7 @@
         bool length_never_negative = true;
         generate_arraycopy(TypeAryPtr::OOPS, T_OBJECT,
                            obj, intcon(0), alloc_obj, intcon(0),
-                           obj_length, nargs,
+                           obj_length,
                            disjoint_bases, length_never_negative);
         result_reg->init_req(_objArray_path, control());
         result_val->init_req(_objArray_path, alloc_obj);
@@ -3992,19 +4061,24 @@
     // the object.
 
     // Otherwise, there are no card marks to worry about.
-    alloc_val->init_req(_typeArray_alloc, raw_obj);
-    alloc_siz->init_req(_typeArray_alloc, obj_size);
-    alloc_reg->init_req(_typeArray_alloc, control());
-    alloc_i_o->init_req(_typeArray_alloc, i_o());
-    alloc_mem->init_req(_typeArray_alloc, memory(raw_adr_type));
+
+    if (!stopped()) {
+      copy_to_clone(obj, alloc_obj, obj_size, true, false);
+
+      // Present the results of the copy.
+      result_reg->init_req(_array_path, control());
+      result_val->init_req(_array_path, alloc_obj);
+      result_i_o ->set_req(_array_path, i_o());
+      result_mem ->set_req(_array_path, reset_memory());
+    }
   }
 
-  // We only go to the fast case code if we pass a number of guards.
+  // We only go to the instance fast case code if we pass a number of guards.
   // The paths which do not pass are accumulated in the slow_region.
   RegionNode* slow_region = new (C, 1) RegionNode(1);
   record_for_igvn(slow_region);
   if (!stopped()) {
-    // It's an instance.  Make the slow-path tests.
+    // It's an instance (we did array above).  Make the slow-path tests.
     // If this is a virtual call, we generate a funny guard.  We grab
     // the vtable entry corresponding to clone() from the target object.
     // If the target method which we are calling happens to be the
@@ -4031,25 +4105,14 @@
     PreserveJVMState pjvms(this);
     Node* obj_size = NULL;
     Node* alloc_obj = new_instance(obj_klass, NULL, raw_mem_only, &obj_size);
-    assert(obj_size != NULL, "");
-    Node* raw_obj = alloc_obj->in(1);
-    assert(raw_obj->is_Proj() && raw_obj->in(0)->is_Allocate(), "");
-    if (ReduceBulkZeroing) {
-      AllocateNode* alloc = AllocateNode::Ideal_allocation(alloc_obj, &_gvn);
-      if (alloc != NULL && !alloc->maybe_set_complete(&_gvn))
-        alloc = NULL;
-    }
-    if (!use_ReduceInitialCardMarks()) {
-      // Put in store barrier for any and all oops we are sticking
-      // into this object.  (We could avoid this if we could prove
-      // that the object type contains no oop fields at all.)
-      card_mark = true;
-    }
-    alloc_val->init_req(_instance_alloc, raw_obj);
-    alloc_siz->init_req(_instance_alloc, obj_size);
-    alloc_reg->init_req(_instance_alloc, control());
-    alloc_i_o->init_req(_instance_alloc, i_o());
-    alloc_mem->init_req(_instance_alloc, memory(raw_adr_type));
+
+    copy_to_clone(obj, alloc_obj, obj_size, false, !use_ReduceInitialCardMarks());
+
+    // Present the results of the slow call.
+    result_reg->init_req(_instance_path, control());
+    result_val->init_req(_instance_path, alloc_obj);
+    result_i_o ->set_req(_instance_path, i_o());
+    result_mem ->set_req(_instance_path, reset_memory());
   }
 
   // Generate code for the slow case.  We make a call to clone().
@@ -4065,82 +4128,12 @@
     result_mem ->set_req(_slow_path, reset_memory());
   }
 
-  // The object is allocated, as an array and/or an instance.  Now copy it.
-  set_control( _gvn.transform(alloc_reg) );
-  set_i_o(     _gvn.transform(alloc_i_o) );
-  set_memory(  _gvn.transform(alloc_mem), raw_adr_type );
-  Node* raw_obj  = _gvn.transform(alloc_val);
-
-  if (!stopped()) {
-    // Copy the fastest available way.
-    // (No need for PreserveJVMState, since we're using it all up now.)
-    // TODO: generate fields/elements copies for small objects instead.
-    Node* src  = obj;
-    Node* dest = raw_obj;
-    Node* size = _gvn.transform(alloc_siz);
-
-    // Exclude the header.
-    int base_off = instanceOopDesc::base_offset_in_bytes();
-    if (UseCompressedOops) {
-      assert(base_off % BytesPerLong != 0, "base with compressed oops");
-      // With compressed oops base_offset_in_bytes is 12 which creates
-      // the gap since countx is rounded by 8 bytes below.
-      // Copy klass and the gap.
-      base_off = instanceOopDesc::klass_offset_in_bytes();
-    }
-    src  = basic_plus_adr(src,  base_off);
-    dest = basic_plus_adr(dest, base_off);
-
-    // Compute the length also, if needed:
-    Node* countx = size;
-    countx = _gvn.transform( new (C, 3) SubXNode(countx, MakeConX(base_off)) );
-    countx = _gvn.transform( new (C, 3) URShiftXNode(countx, intcon(LogBytesPerLong) ));
-
-    // Select an appropriate instruction to initialize the range.
-    // The CopyArray instruction (if supported) can be optimized
-    // into a discrete set of scalar loads and stores.
-    bool disjoint_bases = true;
-    generate_unchecked_arraycopy(raw_adr_type, T_LONG, disjoint_bases,
-                                 src, NULL, dest, NULL, countx);
-
-    // Now that the object is properly initialized, type it as an oop.
-    // Use a secondary InitializeNode memory barrier.
-    InitializeNode* init = insert_mem_bar_volatile(Op_Initialize, raw_adr_idx,
-                                                   raw_obj)->as_Initialize();
-    init->set_complete(&_gvn);  // (there is no corresponding AllocateNode)
-    Node* new_obj = new(C, 2) CheckCastPPNode(control(), raw_obj,
-                                              TypeInstPtr::NOTNULL);
-    new_obj = _gvn.transform(new_obj);
-
-    // If necessary, emit some card marks afterwards.  (Non-arrays only.)
-    if (card_mark) {
-      Node* no_particular_value = NULL;
-      Node* no_particular_field = NULL;
-      post_barrier(control(),
-                   memory(raw_adr_type),
-                   new_obj,
-                   no_particular_field,
-                   raw_adr_idx,
-                   no_particular_value,
-                   T_OBJECT,
-                   false);
-    }
-    // Present the results of the slow call.
-    result_reg->init_req(_fast_path, control());
-    result_val->init_req(_fast_path, new_obj);
-    result_i_o ->set_req(_fast_path, i_o());
-    result_mem ->set_req(_fast_path, reset_memory());
-  }
-
   // Return the combined state.
   set_control(    _gvn.transform(result_reg) );
   set_i_o(        _gvn.transform(result_i_o) );
   set_all_memory( _gvn.transform(result_mem) );
 
-  // Cast the result to a sharper type, since we know what clone does.
-  Node* new_obj = _gvn.transform(result_val);
-  Node* cast    = new (C, 2) CheckCastPPNode(control(), new_obj, toop);
-  push(_gvn.transform(cast));
+  push(_gvn.transform(result_val));
 
   return true;
 }
@@ -4279,8 +4272,7 @@
 
     // Call StubRoutines::generic_arraycopy stub.
     generate_arraycopy(TypeRawPtr::BOTTOM, T_CONFLICT,
-                       src, src_offset, dest, dest_offset, length,
-                       nargs);
+                       src, src_offset, dest, dest_offset, length);
 
     // Do not let reads from the destination float above the arraycopy.
     // Since we cannot type the arrays, we don't know which slices
@@ -4303,8 +4295,7 @@
     // The component types are not the same or are not recognized.  Punt.
     // (But, avoid the native method wrapper to JVM_ArrayCopy.)
     generate_slow_arraycopy(TypePtr::BOTTOM,
-                            src, src_offset, dest, dest_offset, length,
-                            nargs);
+                            src, src_offset, dest, dest_offset, length);
     return true;
   }
 
@@ -4361,7 +4352,7 @@
   const TypePtr* adr_type = TypeAryPtr::get_array_body_type(dest_elem);
   generate_arraycopy(adr_type, dest_elem,
                      src, src_offset, dest, dest_offset, length,
-                     nargs, false, false, slow_region);
+                     false, false, slow_region);
 
   return true;
 }
@@ -4406,7 +4397,6 @@
                                    Node* src,  Node* src_offset,
                                    Node* dest, Node* dest_offset,
                                    Node* copy_length,
-                                   int nargs,
                                    bool disjoint_bases,
                                    bool length_never_negative,
                                    RegionNode* slow_region) {
@@ -4418,7 +4408,6 @@
 
   Node* original_dest      = dest;
   AllocateArrayNode* alloc = NULL;  // used for zeroing, if needed
-  Node* raw_dest           = NULL;  // used before zeroing, if needed
   bool  must_clear_dest    = false;
 
   // See if this is the initialization of a newly-allocated array.
@@ -4437,15 +4426,18 @@
     // "You break it, you buy it."
     InitializeNode* init = alloc->initialization();
     assert(init->is_complete(), "we just did this");
-    assert(dest->Opcode() == Op_CheckCastPP, "sanity");
+    assert(dest->is_CheckCastPP(), "sanity");
     assert(dest->in(0)->in(0) == init, "dest pinned");
-    raw_dest = dest->in(1);  // grab the raw pointer!
-    original_dest = dest;
-    dest = raw_dest;
+
+    // Cast to Object for arraycopy.
+    // We can't use the original CheckCastPP since it should be moved
+    // after the arraycopy to prevent stores flowing above it.
+    Node* new_obj = new(C, 2) CheckCastPPNode(dest->in(0), dest->in(1),
+                                              TypeInstPtr::NOTNULL);
+    dest = _gvn.transform(new_obj);
+    // Substitute in the locally valid dest_oop.
+    replace_in_map(original_dest, dest);
     adr_type = TypeRawPtr::BOTTOM;  // all initializations are into raw memory
-    // Decouple the original InitializeNode, turning it into a simple membar.
-    // We will build a new one at the end of this routine.
-    init->set_req(InitializeNode::RawAddress, top());
     // From this point on, every exit path is responsible for
     // initializing any non-copied parts of the object to zero.
     must_clear_dest = true;
@@ -4488,7 +4480,7 @@
     assert(!must_clear_dest, "");
     Node* cv = generate_generic_arraycopy(adr_type,
                                           src, src_offset, dest, dest_offset,
-                                          copy_length, nargs);
+                                          copy_length);
     if (cv == NULL)  cv = intcon(-1);  // failure (no stub available)
     checked_control = control();
     checked_i_o     = i_o();
@@ -4507,16 +4499,24 @@
       generate_negative_guard(copy_length, slow_region);
     }
 
+    // copy_length is 0.
     if (!stopped() && must_clear_dest) {
       Node* dest_length = alloc->in(AllocateNode::ALength);
       if (_gvn.eqv_uncast(copy_length, dest_length)
           || _gvn.find_int_con(dest_length, 1) <= 0) {
-        // There is no zeroing to do.
+        // There is no zeroing to do. No need for a secondary raw memory barrier.
       } else {
         // Clear the whole thing since there are no source elements to copy.
         generate_clear_array(adr_type, dest, basic_elem_type,
                              intcon(0), NULL,
                              alloc->in(AllocateNode::AllocSize));
+        // Use a secondary InitializeNode as raw memory barrier.
+        // Currently it is needed only on this path since other
+        // paths have stub or runtime calls as raw memory barriers.
+        InitializeNode* init = insert_mem_bar_volatile(Op_Initialize,
+                                                       Compile::AliasIdxRaw,
+                                                       top())->as_Initialize();
+        init->set_complete(&_gvn);  // (there is no corresponding AllocateNode)
       }
     }
 
@@ -4638,8 +4638,7 @@
       Node* cv = generate_checkcast_arraycopy(adr_type,
                                               dest_elem_klass,
                                               src, src_offset, dest, dest_offset,
-                                              copy_length,
-                                              nargs);
+                                              copy_length);
       if (cv == NULL)  cv = intcon(-1);  // failure (no stub available)
       checked_control = control();
       checked_i_o     = i_o();
@@ -4701,8 +4700,8 @@
     slow_i_o2  ->init_req(1, slow_i_o);
     slow_mem2  ->init_req(1, slow_mem);
     slow_reg2  ->init_req(2, control());
-    slow_i_o2  ->init_req(2, i_o());
-    slow_mem2  ->init_req(2, memory(adr_type));
+    slow_i_o2  ->init_req(2, checked_i_o);
+    slow_mem2  ->init_req(2, checked_mem);
 
     slow_control = _gvn.transform(slow_reg2);
     slow_i_o     = _gvn.transform(slow_i_o2);
@@ -4747,21 +4746,9 @@
                            alloc->in(AllocateNode::AllocSize));
     }
 
-    if (dest != original_dest) {
-      // Promote from rawptr to oop, so it looks right in the call's GC map.
-      dest = _gvn.transform( new(C,2) CheckCastPPNode(control(), dest,
-                                                      TypeInstPtr::NOTNULL) );
-
-      // Edit the call's debug-info to avoid referring to original_dest.
-      // (The problem with original_dest is that it isn't ready until
-      // after the InitializeNode completes, but this stuff is before.)
-      // Substitute in the locally valid dest_oop.
-      replace_in_map(original_dest, dest);
-    }
-
     generate_slow_arraycopy(adr_type,
                             src, src_offset, dest, dest_offset,
-                            copy_length, nargs);
+                            copy_length);
 
     result_region->init_req(slow_call_path, control());
     result_i_o   ->init_req(slow_call_path, i_o());
@@ -4781,16 +4768,16 @@
 
   if (dest != original_dest) {
     // Pin the "finished" array node after the arraycopy/zeroing operations.
-    // Use a secondary InitializeNode memory barrier.
-    InitializeNode* init = insert_mem_bar_volatile(Op_Initialize,
-                                                   Compile::AliasIdxRaw,
-                                                   raw_dest)->as_Initialize();
-    init->set_complete(&_gvn);  // (there is no corresponding AllocateNode)
     _gvn.hash_delete(original_dest);
     original_dest->set_req(0, control());
+    // Replace raw memory edge with new CheckCastPP to have a live oop
+    // at safepoints instead of raw value.
+    assert(dest->is_CheckCastPP() && dest->in(1) == original_dest->in(1), "sanity");
+    original_dest->set_req(1, dest);       // cast to the original type
     _gvn.hash_find_insert(original_dest);  // put back into GVN table
+    // Restore in the locally valid dest_oop.
+    replace_in_map(dest, original_dest);
   }
-
   // The memory edges above are precise in order to model effects around
   // array copies accurately to allow value numbering of field loads around
   // arraycopy.  Such field loads, both before and after, are common in Java
@@ -5074,16 +5061,13 @@
 LibraryCallKit::generate_slow_arraycopy(const TypePtr* adr_type,
                                         Node* src,  Node* src_offset,
                                         Node* dest, Node* dest_offset,
-                                        Node* copy_length,
-                                        int nargs) {
-  _sp += nargs; // any deopt will start just before call to enclosing method
+                                        Node* copy_length) {
   Node* call = make_runtime_call(RC_NO_LEAF | RC_UNCOMMON,
                                  OptoRuntime::slow_arraycopy_Type(),
                                  OptoRuntime::slow_arraycopy_Java(),
                                  "slow_arraycopy", adr_type,
                                  src, src_offset, dest, dest_offset,
                                  copy_length);
-  _sp -= nargs;
 
   // Handle exceptions thrown by this fellow:
   make_slow_call_ex(call, env()->Throwable_klass(), false);
@@ -5095,8 +5079,7 @@
                                              Node* dest_elem_klass,
                                              Node* src,  Node* src_offset,
                                              Node* dest, Node* dest_offset,
-                                             Node* copy_length,
-                                             int nargs) {
+                                             Node* copy_length) {
   if (stopped())  return NULL;
 
   address copyfunc_addr = StubRoutines::checkcast_arraycopy();
@@ -5137,8 +5120,7 @@
 LibraryCallKit::generate_generic_arraycopy(const TypePtr* adr_type,
                                            Node* src,  Node* src_offset,
                                            Node* dest, Node* dest_offset,
-                                           Node* copy_length,
-                                           int nargs) {
+                                           Node* copy_length) {
   if (stopped())  return NULL;
 
   address copyfunc_addr = StubRoutines::generic_arraycopy();
--- a/src/share/vm/opto/loopopts.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/opto/loopopts.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -346,7 +346,10 @@
 
     // Yes!  Reshape address expression!
     Node *inv_scale = new (C, 3) LShiftINode( add_invar, scale );
-    register_new_node( inv_scale, add_invar_ctrl );
+    Node *inv_scale_ctrl =
+      dom_depth(add_invar_ctrl) > dom_depth(scale_ctrl) ?
+      add_invar_ctrl : scale_ctrl;
+    register_new_node( inv_scale, inv_scale_ctrl );
     Node *var_scale = new (C, 3) LShiftINode( add_var, scale );
     register_new_node( var_scale, n_ctrl );
     Node *var_add = new (C, 3) AddINode( var_scale, inv_scale );
@@ -667,7 +670,6 @@
   }
 }
 
-#ifdef _LP64
 static bool merge_point_safe(Node* region) {
   // 4799512: Stop split_if_with_blocks from splitting a block with a ConvI2LNode
   // having a PhiNode input. This sidesteps the dangerous case where the split
@@ -676,20 +678,25 @@
   // uses.
   // A better fix for this problem can be found in the BugTraq entry, but
   // expediency for Mantis demands this hack.
+  // 6855164: If the merge point has a FastLockNode with a PhiNode input, we stop
+  // split_if_with_blocks from splitting a block because we could not move around
+  // the FastLockNode.
   for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
     Node* n = region->fast_out(i);
     if (n->is_Phi()) {
       for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
         Node* m = n->fast_out(j);
-        if (m->Opcode() == Op_ConvI2L) {
+        if (m->is_FastLock())
           return false;
-        }
+#ifdef _LP64
+        if (m->Opcode() == Op_ConvI2L)
+          return false;
+#endif
       }
     }
   }
   return true;
 }
-#endif
 
 
 //------------------------------place_near_use---------------------------------
@@ -771,12 +778,10 @@
       if( get_loop(n_ctrl->in(j)) != n_loop )
         return;
 
-#ifdef _LP64
     // Check for safety of the merge point.
     if( !merge_point_safe(n_ctrl) ) {
       return;
     }
-#endif
 
     // Split compare 'n' through the merge point if it is profitable
     Node *phi = split_thru_phi( n, n_ctrl, policy );
--- a/src/share/vm/opto/machnode.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/opto/machnode.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -300,6 +300,12 @@
           }
         }
         adr_type = t_disp->add_offset(offset);
+      } else if( base == NULL && offset != 0 && offset != Type::OffsetBot ) {
+        // Use ideal type if it is oop ptr.
+        const TypePtr *tp = oper->type()->isa_ptr();
+        if( tp != NULL) {
+          adr_type = tp;
+        }
       }
     }
 
--- a/src/share/vm/opto/macro.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/opto/macro.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -198,14 +198,79 @@
 }
 
 // Eliminate a card mark sequence.  p2x is a ConvP2XNode
-void PhaseMacroExpand::eliminate_card_mark(Node *p2x) {
+void PhaseMacroExpand::eliminate_card_mark(Node* p2x) {
   assert(p2x->Opcode() == Op_CastP2X, "ConvP2XNode required");
-  Node *shift = p2x->unique_out();
-  Node *addp = shift->unique_out();
-  for (DUIterator_Last jmin, j = addp->last_outs(jmin); j >= jmin; --j) {
-    Node *st = addp->last_out(j);
-    assert(st->is_Store(), "store required");
-    _igvn.replace_node(st, st->in(MemNode::Memory));
+  if (!UseG1GC) {
+    // vanilla/CMS post barrier
+    Node *shift = p2x->unique_out();
+    Node *addp = shift->unique_out();
+    for (DUIterator_Last jmin, j = addp->last_outs(jmin); j >= jmin; --j) {
+      Node *st = addp->last_out(j);
+      assert(st->is_Store(), "store required");
+      _igvn.replace_node(st, st->in(MemNode::Memory));
+    }
+  } else {
+    // G1 pre/post barriers
+    assert(p2x->outcnt() == 2, "expects 2 users: Xor and URShift nodes");
+    // It could be only one user, URShift node, in Object.clone() instrinsic
+    // but the new allocation is passed to arraycopy stub and it could not
+    // be scalar replaced. So we don't check the case.
+
+    // Remove G1 post barrier.
+
+    // Search for CastP2X->Xor->URShift->Cmp path which
+    // checks if the store done to a different from the value's region.
+    // And replace Cmp with #0 (false) to collapse G1 post barrier.
+    Node* xorx = NULL;
+    for (DUIterator_Fast imax, i = p2x->fast_outs(imax); i < imax; i++) {
+      Node* u = p2x->fast_out(i);
+      if (u->Opcode() == Op_XorX) {
+        xorx = u;
+        break;
+      }
+    }
+    assert(xorx != NULL, "missing G1 post barrier");
+    Node* shift = xorx->unique_out();
+    Node* cmpx = shift->unique_out();
+    assert(cmpx->is_Cmp() && cmpx->unique_out()->is_Bool() &&
+    cmpx->unique_out()->as_Bool()->_test._test == BoolTest::ne,
+    "missing region check in G1 post barrier");
+    _igvn.replace_node(cmpx, makecon(TypeInt::CC_EQ));
+
+    // Remove G1 pre barrier.
+
+    // Search "if (marking != 0)" check and set it to "false".
+    Node* this_region = p2x->in(0);
+    assert(this_region != NULL, "");
+    // There is no G1 pre barrier if previous stored value is NULL
+    // (for example, after initialization).
+    if (this_region->is_Region() && this_region->req() == 3) {
+      int ind = 1;
+      if (!this_region->in(ind)->is_IfFalse()) {
+        ind = 2;
+      }
+      if (this_region->in(ind)->is_IfFalse()) {
+        Node* bol = this_region->in(ind)->in(0)->in(1);
+        assert(bol->is_Bool(), "");
+        cmpx = bol->in(1);
+        if (bol->as_Bool()->_test._test == BoolTest::ne &&
+            cmpx->is_Cmp() && cmpx->in(2) == intcon(0) &&
+            cmpx->in(1)->is_Load()) {
+          Node* adr = cmpx->in(1)->as_Load()->in(MemNode::Address);
+          const int marking_offset = in_bytes(JavaThread::satb_mark_queue_offset() +
+                                              PtrQueue::byte_offset_of_active());
+          if (adr->is_AddP() && adr->in(AddPNode::Base) == top() &&
+              adr->in(AddPNode::Address)->Opcode() == Op_ThreadLocal &&
+              adr->in(AddPNode::Offset) == MakeConX(marking_offset)) {
+            _igvn.replace_node(cmpx, makecon(TypeInt::CC_EQ));
+          }
+        }
+      }
+    }
+    // Now CastP2X can be removed since it is used only on dead path
+    // which currently still alive until igvn optimize it.
+    assert(p2x->unique_out()->Opcode() == Op_URShiftX, "");
+    _igvn.replace_node(p2x, top());
   }
 }
 
@@ -760,14 +825,11 @@
           if (n->is_Store()) {
             _igvn.replace_node(n, n->in(MemNode::Memory));
           } else {
-            assert( n->Opcode() == Op_CastP2X, "CastP2X required");
             eliminate_card_mark(n);
           }
           k -= (oc2 - use->outcnt());
         }
       } else {
-        assert( !use->is_SafePoint(), "safepoint uses must have been already elimiated");
-        assert( use->Opcode() == Op_CastP2X, "CastP2X required");
         eliminate_card_mark(use);
       }
       j -= (oc1 - res->outcnt());
--- a/src/share/vm/opto/matcher.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/opto/matcher.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -141,6 +141,10 @@
 
 //---------------------------match---------------------------------------------
 void Matcher::match( ) {
+  if( MaxLabelRootDepth < 100 ) { // Too small?
+    assert(false, "invalid MaxLabelRootDepth, increase it to 100 minimum");
+    MaxLabelRootDepth = 100;
+  }
   // One-time initialization of some register masks.
   init_spill_mask( C->root()->in(1) );
   _return_addr_mask = return_addr();
@@ -1485,8 +1489,7 @@
 #ifdef ASSERT
     // Verify adr type after matching memory operation
     const MachOper* oper = mach->memory_operand();
-    if (oper != NULL && oper != (MachOper*)-1 &&
-        mach->adr_type() != TypeRawPtr::BOTTOM) { // non-direct addressing mode
+    if (oper != NULL && oper != (MachOper*)-1) {
       // It has a unique memory operand.  Find corresponding ideal mem node.
       Node* m = NULL;
       if (leaf->is_Mem()) {
--- a/src/share/vm/opto/mulnode.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/opto/mulnode.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -430,31 +430,28 @@
   // x & x => x
   if (phase->eqv(in(1), in(2))) return in(1);
 
-  Node *load = in(1);
-  const TypeInt *t2 = phase->type( in(2) )->isa_int();
-  if( t2 && t2->is_con() ) {
+  Node* in1 = in(1);
+  uint op = in1->Opcode();
+  const TypeInt* t2 = phase->type(in(2))->isa_int();
+  if (t2 && t2->is_con()) {
     int con = t2->get_con();
     // Masking off high bits which are always zero is useless.
     const TypeInt* t1 = phase->type( in(1) )->isa_int();
     if (t1 != NULL && t1->_lo >= 0) {
-      jint t1_support = ((jint)1 << (1 + log2_intptr(t1->_hi))) - 1;
+      jint t1_support = right_n_bits(1 + log2_intptr(t1->_hi));
       if ((t1_support & con) == t1_support)
-        return load;
+        return in1;
     }
-    uint lop = load->Opcode();
-    if( lop == Op_LoadUS &&
-        con == 0x0000FFFF )     // Already zero-extended
-      return load;
     // Masking off the high bits of a unsigned-shift-right is not
     // needed either.
-    if( lop == Op_URShiftI ) {
-      const TypeInt *t12 = phase->type( load->in(2) )->isa_int();
-      if( t12 && t12->is_con() ) {  // Shift is by a constant
+    if (op == Op_URShiftI) {
+      const TypeInt* t12 = phase->type(in1->in(2))->isa_int();
+      if (t12 && t12->is_con()) {  // Shift is by a constant
         int shift = t12->get_con();
         shift &= BitsPerJavaInteger - 1;  // semantics of Java shifts
         int mask = max_juint >> shift;
-        if( (mask&con) == mask )  // If AND is useless, skip it
-          return load;
+        if ((mask & con) == mask)  // If AND is useless, skip it
+          return in1;
       }
     }
   }
@@ -476,26 +473,17 @@
     return new (phase->C, 3) AndINode(load,phase->intcon(mask&0xFFFF));
 
   // Masking bits off of a Short?  Loading a Character does some masking
-  if( lop == Op_LoadS &&
-      (mask & 0xFFFF0000) == 0 ) {
+  if (lop == Op_LoadS && (mask & 0xFFFF0000) == 0 ) {
     Node *ldus = new (phase->C, 3) LoadUSNode(load->in(MemNode::Control),
-                                  load->in(MemNode::Memory),
-                                  load->in(MemNode::Address),
-                                  load->adr_type());
+                                              load->in(MemNode::Memory),
+                                              load->in(MemNode::Address),
+                                              load->adr_type());
     ldus = phase->transform(ldus);
-    return new (phase->C, 3) AndINode(ldus, phase->intcon(mask&0xFFFF));
+    return new (phase->C, 3) AndINode(ldus, phase->intcon(mask & 0xFFFF));
   }
 
-  // Masking sign bits off of a Byte?  Do an unsigned byte load.
-  if (lop == Op_LoadB && mask == 0x000000FF) {
-    return new (phase->C, 3) LoadUBNode(load->in(MemNode::Control),
-                                        load->in(MemNode::Memory),
-                                        load->in(MemNode::Address),
-                                        load->adr_type());
-  }
-
-  // Masking sign bits off of a Byte plus additional lower bits?  Do
-  // an unsigned byte load plus an and.
+  // Masking sign bits off of a Byte?  Do an unsigned byte load plus
+  // an and.
   if (lop == Op_LoadB && (mask & 0xFFFFFF00) == 0) {
     Node* ldub = new (phase->C, 3) LoadUBNode(load->in(MemNode::Control),
                                               load->in(MemNode::Memory),
@@ -605,8 +593,13 @@
   Node* in1 = in(1);
   uint op = in1->Opcode();
 
-  // Masking sign bits off of an integer?  Do an unsigned integer to long load.
-  if (op == Op_ConvI2L && in1->in(1)->Opcode() == Op_LoadI && mask == 0x00000000FFFFFFFFL) {
+  // Masking sign bits off of an integer?  Do an unsigned integer to
+  // long load.
+  // NOTE: This check must be *before* we try to convert the AndLNode
+  // to an AndINode and commute it with ConvI2LNode because
+  // 0xFFFFFFFFL masks the whole integer and we get a sign extension,
+  // which is wrong.
+  if (op == Op_ConvI2L && in1->in(1)->Opcode() == Op_LoadI && mask == CONST64(0x00000000FFFFFFFF)) {
     Node* load = in1->in(1);
     return new (phase->C, 3) LoadUI2LNode(load->in(MemNode::Control),
                                           load->in(MemNode::Memory),
@@ -614,9 +607,22 @@
                                           load->adr_type());
   }
 
+  // Are we masking a long that was converted from an int with a mask
+  // that fits in 32-bits?  Commute them and use an AndINode.
+  if (op == Op_ConvI2L && (mask & CONST64(0xFFFFFFFF00000000)) == 0) {
+    // If we are doing an UI2L conversion (i.e. the mask is
+    // 0x00000000FFFFFFFF) we cannot convert the AndL to an AndI
+    // because the AndI would be optimized away later in Identity.
+    if (mask != CONST64(0x00000000FFFFFFFF)) {
+      Node* andi = new (phase->C, 3) AndINode(in1->in(1), phase->intcon(mask));
+      andi = phase->transform(andi);
+      return new (phase->C, 2) ConvI2LNode(andi);
+    }
+  }
+
   // Masking off sign bits?  Dont make them!
   if (op == Op_RShiftL) {
-    const TypeInt *t12 = phase->type(in1->in(2))->isa_int();
+    const TypeInt* t12 = phase->type(in1->in(2))->isa_int();
     if( t12 && t12->is_con() ) { // Shift is by a constant
       int shift = t12->get_con();
       shift &= BitsPerJavaLong - 1;  // semantics of Java shifts
@@ -626,7 +632,7 @@
       if( (sign_bits_mask & mask) == 0 ) {
         // Use zero-fill shift instead
         Node *zshift = phase->transform(new (phase->C, 3) URShiftLNode(in1->in(1), in1->in(2)));
-        return new (phase->C, 3) AndLNode( zshift, in(2) );
+        return new (phase->C, 3) AndLNode(zshift, in(2));
       }
     }
   }
--- a/src/share/vm/opto/output.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/opto/output.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -50,6 +50,13 @@
   init_scratch_buffer_blob();
   if (failing())  return; // Out of memory
 
+  // The number of new nodes (mostly MachNop) is proportional to
+  // the number of java calls and inner loops which are aligned.
+  if ( C->check_node_count((NodeLimitFudgeFactor + C->java_calls()*3 +
+                            C->inner_loops()*(OptoLoopAlignment-1)),
+                           "out of nodes before code generation" ) ) {
+    return;
+  }
   // Make sure I can find the Start Node
   Block_Array& bbs = _cfg->_bbs;
   Block *entry = _cfg->_blocks[1];
@@ -1105,7 +1112,7 @@
   uint *call_returns = NEW_RESOURCE_ARRAY(uint, _cfg->_num_blocks+1);
 
   uint  return_offset = 0;
-  MachNode *nop = new (this) MachNopNode();
+  int nop_size = (new (this) MachNopNode())->size(_regalloc);
 
   int previous_offset = 0;
   int current_offset  = 0;
@@ -1188,7 +1195,6 @@
         }
 
         // align the instruction if necessary
-        int nop_size = nop->size(_regalloc);
         int padding = mach->compute_padding(current_offset);
         // Make sure safepoint node for polling is distinct from a call's
         // return by adding a nop if needed.
@@ -1372,7 +1378,6 @@
 
     // If the next block is the top of a loop, pad this block out to align
     // the loop top a little. Helps prevent pipe stalls at loop back branches.
-    int nop_size = (new (this) MachNopNode())->size(_regalloc);
     if( i<_cfg->_num_blocks-1 ) {
       Block *nb = _cfg->_blocks[i+1];
       uint padding = nb->alignment_padding(current_offset);
--- a/src/share/vm/opto/parse2.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/opto/parse2.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -1565,7 +1565,7 @@
     c = pop();                  // Oop to store
     b = pop();                  // index (already used)
     a = pop();                  // the array itself
-    const Type* elemtype  = _gvn.type(a)->is_aryptr()->elem();
+    const TypeOopPtr* elemtype  = _gvn.type(a)->is_aryptr()->elem()->make_oopptr();
     const TypeAryPtr* adr_type = TypeAryPtr::OOPS;
     Node* store = store_oop_to_array(control(), a, d, adr_type, c, elemtype, T_OBJECT);
     break;
--- a/src/share/vm/opto/parse3.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/opto/parse3.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -222,7 +222,7 @@
   // Store the value.
   Node* store;
   if (bt == T_OBJECT) {
-    const TypePtr* field_type;
+    const TypeOopPtr* field_type;
     if (!field->type()->is_loaded()) {
       field_type = TypeInstPtr::BOTTOM;
     } else {
@@ -361,7 +361,7 @@
     guarantee(length_con >= 0, "non-constant multianewarray");
     ciArrayKlass* array_klass_1 = array_klass->as_obj_array_klass()->element_klass()->as_array_klass();
     const TypePtr* adr_type = TypeAryPtr::OOPS;
-    const Type*    elemtype = _gvn.type(array)->is_aryptr()->elem();
+    const TypeOopPtr*    elemtype = _gvn.type(array)->is_aryptr()->elem()->make_oopptr();
     const intptr_t header   = arrayOopDesc::base_offset_in_bytes(T_OBJECT);
     for (jint i = 0; i < length_con; i++) {
       Node*    elem   = expand_multianewarray(array_klass_1, &lengths[1], ndimensions-1, nargs);
--- a/src/share/vm/opto/phaseX.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/opto/phaseX.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -450,6 +450,8 @@
     subsume_node(old, nn);
   }
 
+  bool delay_transform() const { return _delay_transform; }
+
   void set_delay_transform(bool delay) {
     _delay_transform = delay;
   }
--- a/src/share/vm/opto/type.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/opto/type.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -487,6 +487,23 @@
   return false;
 }
 
+//----------------------interface_vs_oop---------------------------------------
+#ifdef ASSERT
+bool Type::interface_vs_oop(const Type *t) const {
+  bool result = false;
+
+  const TypeInstPtr* this_inst = this->isa_instptr();
+  const TypeInstPtr*    t_inst =    t->isa_instptr();
+  if( this_inst && this_inst->is_loaded() && t_inst && t_inst->is_loaded() ) {
+    bool this_interface = this_inst->klass()->is_interface();
+    bool    t_interface =    t_inst->klass()->is_interface();
+    result = this_interface ^ t_interface;
+  }
+
+  return result;
+}
+#endif
+
 //------------------------------meet-------------------------------------------
 // Compute the MEET of two types.  NOT virtual.  It enforces that meet is
 // commutative and the lattice is symmetric.
@@ -507,16 +524,8 @@
   // Interface meet Oop is Not Symmetric:
   // Interface:AnyNull meet Oop:AnyNull == Interface:AnyNull
   // Interface:NotNull meet Oop:NotNull == java/lang/Object:NotNull
-  const TypeInstPtr* this_inst = this->isa_instptr();
-  const TypeInstPtr*    t_inst =    t->isa_instptr();
-  bool interface_vs_oop = false;
-  if( this_inst && this_inst->is_loaded() && t_inst && t_inst->is_loaded() ) {
-    bool this_interface = this_inst->klass()->is_interface();
-    bool    t_interface =    t_inst->klass()->is_interface();
-    interface_vs_oop = this_interface ^ t_interface;
-  }
-
-  if( !interface_vs_oop && (t2t != t->_dual || t2this != _dual) ) {
+
+  if( !interface_vs_oop(t) && (t2t != t->_dual || t2this != _dual) ) {
     tty->print_cr("=== Meet Not Symmetric ===");
     tty->print("t   =                   ");         t->dump(); tty->cr();
     tty->print("this=                   ");            dump(); tty->cr();
@@ -1800,6 +1809,17 @@
   return (intptr_t)_elem + (intptr_t)_size;
 }
 
+//----------------------interface_vs_oop---------------------------------------
+#ifdef ASSERT
+bool TypeAry::interface_vs_oop(const Type *t) const {
+  const TypeAry* t_ary = t->is_ary();
+  if (t_ary) {
+    return _elem->interface_vs_oop(t_ary->_elem);
+  }
+  return false;
+}
+#endif
+
 //------------------------------dump2------------------------------------------
 #ifndef PRODUCT
 void TypeAry::dump2( Dict &d, uint depth, outputStream *st ) const {
@@ -3389,6 +3409,17 @@
   return new TypeAryPtr( dual_ptr(), _const_oop, _ary->dual()->is_ary(),_klass, _klass_is_exact, dual_offset(), dual_instance_id() );
 }
 
+//----------------------interface_vs_oop---------------------------------------
+#ifdef ASSERT
+bool TypeAryPtr::interface_vs_oop(const Type *t) const {
+  const TypeAryPtr* t_aryptr = t->isa_aryptr();
+  if (t_aryptr) {
+    return _ary->interface_vs_oop(t_aryptr->_ary);
+  }
+  return false;
+}
+#endif
+
 //------------------------------dump2------------------------------------------
 #ifndef PRODUCT
 void TypeAryPtr::dump2( Dict &d, uint depth, outputStream *st ) const {
@@ -3453,27 +3484,27 @@
 //------------------------------hash-------------------------------------------
 // Type-specific hashing function.
 int TypeNarrowOop::hash(void) const {
-  return _ooptype->hash() + 7;
+  return _ptrtype->hash() + 7;
 }
 
 
 bool TypeNarrowOop::eq( const Type *t ) const {
   const TypeNarrowOop* tc = t->isa_narrowoop();
   if (tc != NULL) {
-    if (_ooptype->base() != tc->_ooptype->base()) {
+    if (_ptrtype->base() != tc->_ptrtype->base()) {
       return false;
     }
-    return tc->_ooptype->eq(_ooptype);
+    return tc->_ptrtype->eq(_ptrtype);
   }
   return false;
 }
 
 bool TypeNarrowOop::singleton(void) const {    // TRUE if type is a singleton
-  return _ooptype->singleton();
+  return _ptrtype->singleton();
 }
 
 bool TypeNarrowOop::empty(void) const {
-  return _ooptype->empty();
+  return _ptrtype->empty();
 }
 
 //------------------------------xmeet------------------------------------------
@@ -3507,7 +3538,7 @@
     return this;
 
   case NarrowOop: {
-    const Type* result = _ooptype->xmeet(t->make_ptr());
+    const Type* result = _ptrtype->xmeet(t->make_ptr());
     if (result->isa_ptr()) {
       return TypeNarrowOop::make(result->is_ptr());
     }
@@ -3523,13 +3554,13 @@
 }
 
 const Type *TypeNarrowOop::xdual() const {    // Compute dual right now.
-  const TypePtr* odual = _ooptype->dual()->is_ptr();
+  const TypePtr* odual = _ptrtype->dual()->is_ptr();
   return new TypeNarrowOop(odual);
 }
 
 const Type *TypeNarrowOop::filter( const Type *kills ) const {
   if (kills->isa_narrowoop()) {
-    const Type* ft =_ooptype->filter(kills->is_narrowoop()->_ooptype);
+    const Type* ft =_ptrtype->filter(kills->is_narrowoop()->_ptrtype);
     if (ft->empty())
       return Type::TOP;           // Canonical empty value
     if (ft->isa_ptr()) {
@@ -3537,7 +3568,7 @@
     }
     return ft;
   } else if (kills->isa_ptr()) {
-    const Type* ft = _ooptype->join(kills);
+    const Type* ft = _ptrtype->join(kills);
     if (ft->empty())
       return Type::TOP;           // Canonical empty value
     return ft;
@@ -3548,13 +3579,13 @@
 
 
 intptr_t TypeNarrowOop::get_con() const {
-  return _ooptype->get_con();
+  return _ptrtype->get_con();
 }
 
 #ifndef PRODUCT
 void TypeNarrowOop::dump2( Dict & d, uint depth, outputStream *st ) const {
   st->print("narrowoop: ");
-  _ooptype->dump2(d, depth, st);
+  _ptrtype->dump2(d, depth, st);
 }
 #endif
 
--- a/src/share/vm/opto/type.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/opto/type.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -190,6 +190,11 @@
   // Currently, it also works around limitations involving interface types.
   virtual const Type *filter( const Type *kills ) const;
 
+#ifdef ASSERT
+  // One type is interface, the other is oop
+  virtual bool interface_vs_oop(const Type *t) const;
+#endif
+
   // Returns true if this pointer points at memory which contains a
   // compressed oop references.
   bool is_ptr_to_narrowoop() const;
@@ -227,6 +232,11 @@
 
   // Returns this ptr type or the equivalent ptr type for this compressed pointer.
   const TypePtr* make_ptr() const;
+
+  // Returns this oopptr type or the equivalent oopptr type for this compressed pointer.
+  // Asserts if the underlying type is not an oopptr or narrowoop.
+  const TypeOopPtr* make_oopptr() const;
+
   // Returns this compressed pointer or the equivalent compressed version
   // of this pointer type.
   const TypeNarrowOop* make_narrowoop() const;
@@ -546,6 +556,10 @@
   virtual const Type *xmeet( const Type *t ) const;
   virtual const Type *xdual() const;    // Compute dual right now.
   bool ary_must_be_exact() const;  // true if arrays of such are never generic
+#ifdef ASSERT
+  // One type is interface, the other is oop
+  virtual bool interface_vs_oop(const Type *t) const;
+#endif
 #ifndef PRODUCT
   virtual void dump2( Dict &d, uint, outputStream *st  ) const; // Specialized per-Type dumping
 #endif
@@ -867,6 +881,10 @@
   }
   static const TypeAryPtr *_array_body_type[T_CONFLICT+1];
   // sharpen the type of an int which is used as an array size
+#ifdef ASSERT
+  // One type is interface, the other is oop
+  virtual bool interface_vs_oop(const Type *t) const;
+#endif
 #ifndef PRODUCT
   virtual void dump2( Dict &d, uint depth, outputStream *st ) const; // Specialized per-Type dumping
 #endif
@@ -919,13 +937,13 @@
 // between the normal and the compressed form.
 class TypeNarrowOop : public Type {
 protected:
-  const TypePtr* _ooptype; // Could be TypePtr::NULL_PTR
+  const TypePtr* _ptrtype; // Could be TypePtr::NULL_PTR
 
-  TypeNarrowOop( const TypePtr* ooptype): Type(NarrowOop),
-    _ooptype(ooptype) {
-    assert(ooptype->offset() == 0 ||
-           ooptype->offset() == OffsetBot ||
-           ooptype->offset() == OffsetTop, "no real offsets");
+  TypeNarrowOop( const TypePtr* ptrtype): Type(NarrowOop),
+    _ptrtype(ptrtype) {
+    assert(ptrtype->offset() == 0 ||
+           ptrtype->offset() == OffsetBot ||
+           ptrtype->offset() == OffsetTop, "no real offsets");
   }
 public:
   virtual bool eq( const Type *t ) const;
@@ -949,8 +967,8 @@
   }
 
   // returns the equivalent ptr type for this compressed pointer
-  const TypePtr *make_oopptr() const {
-    return _ooptype;
+  const TypePtr *get_ptrtype() const {
+    return _ptrtype;
   }
 
   static const TypeNarrowOop *BOTTOM;
@@ -1137,10 +1155,14 @@
 }
 
 inline const TypePtr* Type::make_ptr() const {
-  return (_base == NarrowOop) ? is_narrowoop()->make_oopptr() :
+  return (_base == NarrowOop) ? is_narrowoop()->get_ptrtype() :
                                 (isa_ptr() ? is_ptr() : NULL);
 }
 
+inline const TypeOopPtr* Type::make_oopptr() const {
+  return (_base == NarrowOop) ? is_narrowoop()->get_ptrtype()->is_oopptr() : is_oopptr();
+}
+
 inline const TypeNarrowOop* Type::make_narrowoop() const {
   return (_base == NarrowOop) ? is_narrowoop() :
                                 (isa_ptr() ? TypeNarrowOop::make(is_ptr()) : NULL);
@@ -1194,6 +1216,8 @@
 #define Op_AndX      Op_AndL
 #define Op_AddX      Op_AddL
 #define Op_SubX      Op_SubL
+#define Op_XorX      Op_XorL
+#define Op_URShiftX  Op_URShiftL
 // conversions
 #define ConvI2X(x)   ConvI2L(x)
 #define ConvL2X(x)   (x)
@@ -1236,6 +1260,8 @@
 #define Op_AndX      Op_AndI
 #define Op_AddX      Op_AddI
 #define Op_SubX      Op_SubI
+#define Op_XorX      Op_XorI
+#define Op_URShiftX  Op_URShiftI
 // conversions
 #define ConvI2X(x)   (x)
 #define ConvL2X(x)   ConvL2I(x)
--- a/src/share/vm/prims/jvmtiEnvBase.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/prims/jvmtiEnvBase.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -606,6 +606,7 @@
     if (!mons->is_empty()) {
       for (int i = 0; i < mons->length(); i++) {
         MonitorInfo *mi = mons->at(i);
+        if (mi->owner_is_scalar_replaced()) continue;
 
         // see if owner of the monitor is our object
         if (mi->owner() != NULL && mi->owner() == hobj()) {
@@ -726,6 +727,8 @@
   for (int i = 0; i < mons->length(); i++) {
     MonitorInfo *mi = mons->at(i);
 
+    if (mi->owner_is_scalar_replaced()) continue;
+
     oop obj = mi->owner();
     if (obj == NULL) {
       // this monitor doesn't have an owning object so skip it
--- a/src/share/vm/prims/unsafe.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/prims/unsafe.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -1048,7 +1048,11 @@
   oop e = JNIHandles::resolve(e_h);
   oop p = JNIHandles::resolve(obj);
   HeapWord* addr = (HeapWord *)index_oop_from_field_offset_long(p, offset);
-  update_barrier_set_pre((void*)addr, e);
+  if (UseCompressedOops) {
+    update_barrier_set_pre((narrowOop*)addr, e);
+  } else {
+    update_barrier_set_pre((oop*)addr, e);
+  }
   oop res = oopDesc::atomic_compare_exchange_oop(x, addr, e);
   jboolean success  = (res == e);
   if (success)
--- a/src/share/vm/runtime/arguments.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/runtime/arguments.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -1202,18 +1202,13 @@
   }
 
 #ifdef _LP64
-  // Compressed Headers do not work with CMS, which uses a bit in the klass
-  // field offset to determine free list chunk markers.
   // Check that UseCompressedOops can be set with the max heap size allocated
   // by ergonomics.
   if (MaxHeapSize <= max_heap_for_compressed_oops()) {
-    if (FLAG_IS_DEFAULT(UseCompressedOops) && !UseG1GC) {
+    if (FLAG_IS_DEFAULT(UseCompressedOops)) {
       // Turn off until bug is fixed.
       // the following line to return it to default status.
       // FLAG_SET_ERGO(bool, UseCompressedOops, true);
-    } else if (UseCompressedOops && UseG1GC) {
-      warning(" UseCompressedOops does not currently work with UseG1GC; switching off UseCompressedOops. ");
-      FLAG_SET_DEFAULT(UseCompressedOops, false);
     }
 #ifdef _WIN64
     if (UseLargePages && UseCompressedOops) {
@@ -1454,6 +1449,7 @@
   if (UseSerialGC)                       i++;
   if (UseConcMarkSweepGC || UseParNewGC) i++;
   if (UseParallelGC || UseParallelOldGC) i++;
+  if (UseG1GC)                           i++;
   if (i > 1) {
     jio_fprintf(defaultStream::error_stream(),
                 "Conflicting collector combinations in option list; "
@@ -2603,22 +2599,6 @@
     return result;
   }
 
-  // These are hacks until G1 is fully supported and tested
-  // but lets you force -XX:+UseG1GC in PRT and get it where it (mostly) works
-  if (UseG1GC) {
-    if (UseConcMarkSweepGC || UseParNewGC || UseParallelGC || UseParallelOldGC || UseSerialGC) {
-#ifndef PRODUCT
-      tty->print_cr("-XX:+UseG1GC is incompatible with other collectors, using UseG1GC");
-#endif // PRODUCT
-      UseConcMarkSweepGC = false;
-      UseParNewGC        = false;
-      UseParallelGC      = false;
-      UseParallelOldGC   = false;
-      UseSerialGC        = false;
-    }
-    no_shared_spaces();
-  }
-
 #ifndef PRODUCT
   if (TraceBytecodesAt != 0) {
     TraceBytecodes = true;
@@ -2676,10 +2656,7 @@
   } else if (UseParNewGC) {
     // Set some flags for ParNew
     set_parnew_gc_flags();
-  }
-  // Temporary; make the "if" an "else-if" before
-  // we integrate G1. XXX
-  if (UseG1GC) {
+  } else if (UseG1GC) {
     // Set some flags for garbage-first, if needed.
     set_g1_gc_flags();
   }
--- a/src/share/vm/runtime/biasedLocking.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/runtime/biasedLocking.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -121,6 +121,7 @@
         // Walk monitors youngest to oldest
         for (int i = len - 1; i >= 0; i--) {
           MonitorInfo* mon_info = monitors->at(i);
+          if (mon_info->owner_is_scalar_replaced()) continue;
           oop owner = mon_info->owner();
           if (owner != NULL) {
             info->append(mon_info);
@@ -694,6 +695,7 @@
           // Walk monitors youngest to oldest
           for (int i = len - 1; i >= 0; i--) {
             MonitorInfo* mon_info = monitors->at(i);
+            if (mon_info->owner_is_scalar_replaced()) continue;
             oop owner = mon_info->owner();
             if (owner != NULL) {
               markOop mark = owner->mark();
--- a/src/share/vm/runtime/deoptimization.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/runtime/deoptimization.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -933,7 +933,7 @@
   GrowableArray<MonitorInfo*>* monitors = cvf->monitors();
   for (int i = 0; i < monitors->length(); i++) {
     MonitorInfo* mon_info = monitors->at(i);
-    if (mon_info->owner() != NULL && !mon_info->eliminated()) {
+    if (!mon_info->eliminated() && mon_info->owner() != NULL) {
       objects_to_revoke->append(Handle(mon_info->owner()));
     }
   }
--- a/src/share/vm/runtime/globals.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/runtime/globals.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -1994,6 +1994,10 @@
   product_rw(bool, PrintHeapAtGC, false,                                    \
           "Print heap layout before and after each GC")                     \
                                                                             \
+  product_rw(bool, PrintHeapAtGCExtended, false,                            \
+          "Prints extended information about the layout of the heap "       \
+          "when -XX:+PrintHeapAtGC is set")                                 \
+                                                                            \
   product(bool, PrintHeapAtSIGBREAK, true,                                  \
           "Print heap layout in response to SIGBREAK")                      \
                                                                             \
--- a/src/share/vm/runtime/safepoint.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/runtime/safepoint.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -49,7 +49,7 @@
     // In the future we should investigate whether CMS can use the
     // more-general mechanism below.  DLD (01/05).
     ConcurrentMarkSweepThread::synchronize(false);
-  } else {
+  } else if (UseG1GC) {
     ConcurrentGCThread::safepoint_synchronize();
   }
 #endif // SERIALGC
@@ -400,7 +400,7 @@
   // If there are any concurrent GC threads resume them.
   if (UseConcMarkSweepGC) {
     ConcurrentMarkSweepThread::desynchronize(false);
-  } else {
+  } else if (UseG1GC) {
     ConcurrentGCThread::safepoint_desynchronize();
   }
 #endif // SERIALGC
--- a/src/share/vm/runtime/sharedRuntime.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/runtime/sharedRuntime.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -119,6 +119,7 @@
     assert(false, "should be optimized out");
     return;
   }
+  assert(orig->is_oop(true /* ignore mark word */), "Error");
   // store the original value that was in the field reference
   thread->satb_mark_queue().enqueue(orig);
 JRT_END
--- a/src/share/vm/runtime/stackValue.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/runtime/stackValue.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -104,7 +104,17 @@
     }
 #endif
     case Location::oop: {
-      Handle h(*(oop *)value_addr); // Wrap a handle around the oop
+      oop val = *(oop *)value_addr;
+#ifdef _LP64
+      if (Universe::is_narrow_oop_base(val)) {
+         // Compiled code may produce decoded oop = narrow_oop_base
+         // when a narrow oop implicit null check is used.
+         // The narrow_oop_base could be NULL or be the address
+         // of the page below heap. Use NULL value for both cases.
+         val = (oop)NULL;
+      }
+#endif
+      Handle h(val); // Wrap a handle around the oop
       return new StackValue(h);
     }
     case Location::addr: {
@@ -146,8 +156,9 @@
     value.jl = ((ConstantLongValue *)sv)->value();
     return new StackValue(value.p);
 #endif
-  } else if (sv->is_object()) {
-    return new StackValue(((ObjectValue *)sv)->value());
+  } else if (sv->is_object()) { // Scalar replaced object in compiled frame
+    Handle ov = ((ObjectValue *)sv)->value();
+    return new StackValue(ov, (ov.is_null()) ? 1 : 0);
   }
 
   // Unknown ScopeValue type
--- a/src/share/vm/runtime/stackValue.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/runtime/stackValue.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -34,9 +34,11 @@
     _i     = value;
   }
 
-  StackValue(Handle value) {
+  StackValue(Handle value, intptr_t scalar_replaced = 0) {
     _type    = T_OBJECT;
+    _i       = scalar_replaced;
     _o       = value;
+    assert(_i == 0 || _o.is_null(), "not null object should not be marked as scalar replaced");
   }
 
   StackValue() {
@@ -56,6 +58,11 @@
     return _o;
   }
 
+  bool obj_is_scalar_replaced() const {
+    assert(type() == T_OBJECT, "type check");
+    return _i != 0;
+  }
+
   void set_obj(Handle value) {
     assert(type() == T_OBJECT, "type check");
     _o = value;
--- a/src/share/vm/runtime/vframe.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/runtime/vframe.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -106,6 +106,7 @@
 
   for (int index = (mons->length()-1); index >= 0; index--) {
     MonitorInfo* monitor = mons->at(index);
+    if (monitor->eliminated() && is_compiled_frame()) continue; // skip eliminated monitor
     oop obj = monitor->owner();
     if (obj == NULL) continue; // skip unowned monitor
     //
@@ -162,6 +163,18 @@
     bool found_first_monitor = false;
     for (int index = (mons->length()-1); index >= 0; index--) {
       MonitorInfo* monitor = mons->at(index);
+      if (monitor->eliminated() && is_compiled_frame()) { // Eliminated in compiled code
+        if (monitor->owner_is_scalar_replaced()) {
+          Klass* k = Klass::cast(monitor->owner_klass());
+          st->print("\t- eliminated <owner is scalar replaced> (a %s)", k->external_name());
+        } else {
+          oop obj = monitor->owner();
+          if (obj != NULL) {
+            print_locked_object_class_name(st, obj, "eliminated");
+          }
+        }
+        continue;
+      }
       if (monitor->owner() != NULL) {
 
         // First, assume we have the monitor locked. If we haven't found an
@@ -171,11 +184,11 @@
 
         const char *lock_state = "locked"; // assume we have the monitor locked
         if (!found_first_monitor && frame_count == 0) {
-         markOop mark = monitor->owner()->mark();
-         if (mark->has_monitor() &&
-             mark->monitor() == thread()->current_pending_monitor()) {
+          markOop mark = monitor->owner()->mark();
+          if (mark->has_monitor() &&
+              mark->monitor() == thread()->current_pending_monitor()) {
             lock_state = "waiting to lock";
-         }
+          }
         }
 
         found_first_monitor = true;
@@ -206,7 +219,7 @@
   for (BasicObjectLock* current = (fr().previous_monitor_in_interpreter_frame(fr().interpreter_frame_monitor_begin()));
        current >= fr().interpreter_frame_monitor_end();
        current = fr().previous_monitor_in_interpreter_frame(current)) {
-    result->push(new MonitorInfo(current->obj(), current->lock(), false));
+    result->push(new MonitorInfo(current->obj(), current->lock(), false, false));
   }
   return result;
 }
@@ -531,8 +544,18 @@
   tty->print_cr("\tmonitor list:");
   for (int index = (list->length()-1); index >= 0; index--) {
     MonitorInfo* monitor = list->at(index);
-    tty->print("\t  obj\t"); monitor->owner()->print_value();
-    tty->print("(" INTPTR_FORMAT ")", (address)monitor->owner());
+    tty->print("\t  obj\t");
+    if (monitor->owner_is_scalar_replaced()) {
+      Klass* k = Klass::cast(monitor->owner_klass());
+      tty->print("( is scalar replaced %s)", k->external_name());
+    } else if (monitor->owner() == NULL) {
+      tty->print("( null )");
+    } else {
+      monitor->owner()->print_value();
+      tty->print("(" INTPTR_FORMAT ")", (address)monitor->owner());
+    }
+    if (monitor->eliminated() && is_compiled_frame())
+      tty->print(" ( lock is eliminated )");
     tty->cr();
     tty->print("\t  ");
     monitor->lock()->print_on(tty);
--- a/src/share/vm/runtime/vframe.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/runtime/vframe.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -230,18 +230,36 @@
  private:
   oop        _owner; // the object owning the monitor
   BasicLock* _lock;
+  oop        _owner_klass; // klass if owner was scalar replaced
   bool       _eliminated;
+  bool       _owner_is_scalar_replaced;
  public:
   // Constructor
-  MonitorInfo(oop owner, BasicLock* lock, bool eliminated) {
-    _owner = owner;
+  MonitorInfo(oop owner, BasicLock* lock, bool eliminated, bool owner_is_scalar_replaced) {
+    if (!owner_is_scalar_replaced) {
+      _owner = owner;
+      _owner_klass = NULL;
+    } else {
+      assert(eliminated, "monitor should be eliminated for scalar replaced object");
+      _owner = NULL;
+      _owner_klass = owner;
+    }
     _lock  = lock;
     _eliminated = eliminated;
+    _owner_is_scalar_replaced = owner_is_scalar_replaced;
   }
   // Accessors
-  oop        owner() const { return _owner; }
+  oop        owner() const {
+    assert(!_owner_is_scalar_replaced, "should not be called for scalar replaced object");
+    return _owner;
+  }
+  klassOop   owner_klass() const {
+    assert(_owner_is_scalar_replaced, "should not be called for not scalar replaced object");
+    return (klassOop)_owner_klass;
+  }
   BasicLock* lock()  const { return _lock;  }
   bool eliminated()  const { return _eliminated; }
+  bool owner_is_scalar_replaced()  const { return _owner_is_scalar_replaced; }
 };
 
 class vframeStreamCommon : StackObj {
--- a/src/share/vm/runtime/vframeArray.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/runtime/vframeArray.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -61,6 +61,7 @@
     // Migrate the BasicLocks from the stack to the monitor chunk
     for (index = 0; index < list->length(); index++) {
       MonitorInfo* monitor = list->at(index);
+      assert(!monitor->owner_is_scalar_replaced(), "object should be reallocated already");
       assert(monitor->owner() == NULL || (!monitor->owner()->is_unlocked() && !monitor->owner()->has_bias_pattern()), "object must be null or locked, and unbiased");
       BasicObjectLock* dest = _monitors->at(index);
       dest->set_obj(monitor->owner());
@@ -89,6 +90,7 @@
     StackValue* value = locs->at(index);
     switch(value->type()) {
       case T_OBJECT:
+        assert(!value->obj_is_scalar_replaced(), "object should be reallocated already");
         // preserve object type
         _locals->add( new StackValue((intptr_t) (value->get_obj()()), T_OBJECT ));
         break;
@@ -113,6 +115,7 @@
     StackValue* value = exprs->at(index);
     switch(value->type()) {
       case T_OBJECT:
+        assert(!value->obj_is_scalar_replaced(), "object should be reallocated already");
         // preserve object type
         _expressions->add( new StackValue((intptr_t) (value->get_obj()()), T_OBJECT ));
         break;
--- a/src/share/vm/runtime/vframe_hp.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/runtime/vframe_hp.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -190,7 +190,7 @@
     // Casting away const
     frame& fr = (frame&) _fr;
     MonitorInfo* info = new MonitorInfo(fr.compiled_synchronized_native_monitor_owner(nm),
-                                        fr.compiled_synchronized_native_monitor(nm), false);
+                                        fr.compiled_synchronized_native_monitor(nm), false, false);
     monitors->push(info);
     return monitors;
   }
@@ -201,8 +201,20 @@
   GrowableArray<MonitorInfo*>* result = new GrowableArray<MonitorInfo*>(monitors->length());
   for (int index = 0; index < monitors->length(); index++) {
     MonitorValue* mv = monitors->at(index);
-    StackValue *owner_sv = create_stack_value(mv->owner()); // it is an oop
-    result->push(new MonitorInfo(owner_sv->get_obj()(), resolve_monitor_lock(mv->basic_lock()), mv->eliminated()));
+    ScopeValue*   ov = mv->owner();
+    StackValue *owner_sv = create_stack_value(ov); // it is an oop
+    if (ov->is_object() && owner_sv->obj_is_scalar_replaced()) { // The owner object was scalar replaced
+      assert(mv->eliminated(), "monitor should be eliminated for scalar replaced object");
+      // Put klass for scalar replaced object.
+      ScopeValue* kv = ((ObjectValue *)ov)->klass();
+      assert(kv->is_constant_oop(), "klass should be oop constant for scalar replaced object");
+      KlassHandle k(((ConstantOopReadValue*)kv)->value()());
+      result->push(new MonitorInfo(k->as_klassOop(), resolve_monitor_lock(mv->basic_lock()),
+                                   mv->eliminated(), true));
+    } else {
+      result->push(new MonitorInfo(owner_sv->get_obj()(), resolve_monitor_lock(mv->basic_lock()),
+                                   mv->eliminated(), false));
+    }
   }
   return result;
 }
--- a/src/share/vm/utilities/taskqueue.cpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/utilities/taskqueue.cpp	Mon Jul 27 17:23:52 2009 -0400
@@ -64,15 +64,18 @@
 }
 
 void ParallelTaskTerminator::yield() {
+  assert(_offered_termination <= _n_threads, "Invariant");
   os::yield();
 }
 
 void ParallelTaskTerminator::sleep(uint millis) {
+  assert(_offered_termination <= _n_threads, "Invariant");
   os::sleep(Thread::current(), millis, false);
 }
 
 bool
 ParallelTaskTerminator::offer_termination(TerminatorTerminator* terminator) {
+  assert(_offered_termination < _n_threads, "Invariant");
   Atomic::inc(&_offered_termination);
 
   uint yield_count = 0;
@@ -96,6 +99,7 @@
   // Loop waiting for all threads to offer termination or
   // more work.
   while (true) {
+    assert(_offered_termination <= _n_threads, "Invariant");
     // Are all threads offering termination?
     if (_offered_termination == _n_threads) {
       return true;
@@ -151,6 +155,7 @@
       if (peek_in_queue_set() ||
           (terminator != NULL && terminator->should_exit_termination())) {
         Atomic::dec(&_offered_termination);
+        assert(_offered_termination < _n_threads, "Invariant");
         return false;
       }
     }
--- a/src/share/vm/utilities/taskqueue.hpp	Mon Jul 27 09:06:22 2009 -0700
+++ b/src/share/vm/utilities/taskqueue.hpp	Mon Jul 27 17:23:52 2009 -0400
@@ -560,8 +560,14 @@
 class StarTask {
   void*  _holder;        // either union oop* or narrowOop*
  public:
-  StarTask(narrowOop *p) { _holder = (void *)((uintptr_t)p | COMPRESSED_OOP_MASK); }
-  StarTask(oop *p)       { _holder = (void*)p; }
+  StarTask(narrowOop* p) {
+    assert(((uintptr_t)p & COMPRESSED_OOP_MASK) == 0, "Information loss!");
+    _holder = (void *)((uintptr_t)p | COMPRESSED_OOP_MASK);
+  }
+  StarTask(oop* p)       {
+    assert(((uintptr_t)p & COMPRESSED_OOP_MASK) == 0, "Information loss!");
+    _holder = (void*)p;
+  }
   StarTask()             { _holder = NULL; }
   operator oop*()        { return (oop*)_holder; }
   operator narrowOop*()  {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/5057225/Test5057225.java	Mon Jul 27 17:23:52 2009 -0400
@@ -0,0 +1,140 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/**
+ * @test
+ * @bug 5057225
+ * @summary Remove useless I2L conversions
+ *
+ * @run main/othervm -Xcomp -XX:CompileOnly=Test5057225.doload Test5057225
+ */
+
+import java.net.URLClassLoader;
+
+public class Test5057225 {
+    static byte[]  ba = new byte[]  { -1 };
+    static short[] sa = new short[] { -1 };
+    static int[]   ia = new int[]   { -1 };
+
+    static final long[] BYTE_MASKS = {
+         0x0FL,
+         0x7FL,  // 7-bit
+         0xFFL,
+    };
+
+    static final long[] SHORT_MASKS = {
+        0x000FL,
+        0x007FL,  // 7-bit
+        0x00FFL,
+        0x0FFFL,
+        0x3FFFL,  // 14-bit
+        0x7FFFL,  // 15-bit
+        0xFFFFL,
+    };
+
+    static final long[] INT_MASKS = {
+        0x0000000FL,
+        0x0000007FL,  // 7-bit
+        0x000000FFL,
+        0x00000FFFL,
+        0x00003FFFL,  // 14-bit
+        0x00007FFFL,  // 15-bit
+        0x0000FFFFL,
+        0x00FFFFFFL,
+        0x7FFFFFFFL,  // 31-bit
+        0xFFFFFFFFL,
+    };
+
+    public static void main(String[] args) throws Exception {
+        for (int i = 0; i < BYTE_MASKS.length; i++) {
+            System.setProperty("value", "" + BYTE_MASKS[i]);
+            loadAndRunClass("Test5057225$loadUB2L");
+        }
+
+        for (int i = 0; i < SHORT_MASKS.length; i++) {
+            System.setProperty("value", "" + SHORT_MASKS[i]);
+            loadAndRunClass("Test5057225$loadUS2L");
+        }
+
+        for (int i = 0; i < INT_MASKS.length; i++) {
+            System.setProperty("value", "" + INT_MASKS[i]);
+            loadAndRunClass("Test5057225$loadUI2L");
+        }
+    }
+
+    static void check(long result, long expected) {
+        if (result != expected)
+            throw new InternalError(result + " != " + expected);
+    }
+
+    static void loadAndRunClass(String classname) throws Exception {
+        Class cl = Class.forName(classname);
+        URLClassLoader apploader = (URLClassLoader) cl.getClassLoader();
+        ClassLoader loader = new URLClassLoader(apploader.getURLs(), apploader.getParent());
+        Class c = loader.loadClass(classname);
+        Runnable r = (Runnable) c.newInstance();
+        r.run();
+    }
+
+    public static class loadUB2L implements Runnable {
+        static final long MASK;
+        static {
+            long value = 0;
+            try {
+                value = Long.decode(System.getProperty("value"));
+            } catch (Throwable e) {}
+            MASK = value;
+        }
+
+        public void run() { check(doload(ba), MASK); }
+        static long doload(byte[] ba) { return ba[0] & MASK; }
+    }
+
+    public static class loadUS2L implements Runnable {
+        static final long MASK;
+        static {
+            long value = 0;
+            try {
+                value = Long.decode(System.getProperty("value"));
+            } catch (Throwable e) {}
+            MASK = value;
+        }
+
+        public void run() { check(doload(sa), MASK); }
+        static long doload(short[] sa) { return sa[0] & MASK; }
+    }
+
+    public static class loadUI2L implements Runnable {
+        static final long MASK;
+        static {
+            long value = 0;
+            try {
+                value = Long.decode(System.getProperty("value"));
+            } catch (Throwable e) {}
+            MASK = value;
+        }
+
+        public void run() { check(doload(ia), MASK); }
+        static long doload(int[] ia) { return ia[0] & MASK; }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6826736/Test.java	Mon Jul 27 17:23:52 2009 -0400
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 6826736
+ * @summary CMS: core dump with -XX:+UseCompressedOops
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -Xbatch -XX:+ScavengeALot -XX:+UseCompressedOops -XX:HeapBaseMinAddress=32g -XX:CompileThreshold=100 -XX:CompileOnly=Test.test -XX:-BlockLayoutRotateLoops -XX:LoopUnrollLimit=0 Test
+ */
+
+public class Test {
+    int[] arr;
+    int[] arr2;
+    int test(int r) {
+        for (int i = 0; i < 100; i++) {
+            for (int j = i; j < 100; j++) {
+               int a = 0;
+               for (long k = 0; k < 100; k++) {
+                  a += k;
+               }
+               if (arr != null)
+                   a = arr[j];
+               r += a;
+            }
+        }
+        return r;
+    }
+
+    public static void main(String[] args) {
+        int r = 0;
+        Test t = new Test();
+        for (int i = 0; i < 100; i++) {
+            t.arr = new int[100];
+            r = t.test(r);
+        }
+        System.out.println("Warmup 1 is done.");
+        for (int i = 0; i < 100; i++) {
+            t.arr = null;
+            r = t.test(r);
+        }
+        System.out.println("Warmup 2 is done.");
+        for (int i = 0; i < 100; i++) {
+            t.arr = new int[100];
+            r = t.test(r);
+        }
+        System.out.println("Warmup is done.");
+        for (int i = 0; i < 100; i++) {
+            t.arr = new int[1000000];
+            t.arr = null;
+            r = t.test(r);
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6837094/Test.java	Mon Jul 27 17:23:52 2009 -0400
@@ -0,0 +1,94 @@
+/*
+ * Copyright 2009 Google Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 6837094
+ * @summary False positive for "meet not symmetric" failure
+ *
+ * @run main/othervm -Xbatch -XX:CompileOnly=Test.collectIs,Test$Factory$1.getArray,Test$Factory$2.getArray Test
+ */
+
+import java.util.Set;
+import java.util.HashSet;
+
+public class Test {
+
+  private interface Factory<M extends Interface> {
+    Factory<Child0> Zero = new Factory<Child0>() {
+      public Child0[] getArray() { return new Child0[1]; }
+    };
+
+    Factory<Child1> One = new Factory<Child1>() {
+      public Child1[] getArray() { return new Child1[1]; }
+    };
+
+    M[] getArray();
+  }
+
+  /**
+   * C2 asserts when compiling this method. Bimorphic inlining happens at
+   * getArray call site. A Phi in the catch block tries to join the meet type
+   * from he inline site (Parent[]) with the type expected by CI (Interface[]).
+   *
+   * C2 throws an assert when it doesn't need to.
+   */
+  private static <I extends Interface> void collectIs(
+      Factory<I> factory, Set<Interface> s) {
+    for (I i : factory.getArray()) {
+      try {
+        s.add(i);
+      } catch (Exception e) {
+      }
+    }
+  }
+
+  static public void main(String argv[]) {
+    Set<Interface> s = new HashSet();
+
+    for (int i = 0; i < 25000; i++) {
+      collectIs(Factory.Zero, s);
+      collectIs(Factory.One, s);
+    }
+  }
+}
+
+/**
+ * Establish necessary class hierarchy
+ */
+
+interface Interface {
+}
+
+class Parent {
+}
+
+class Child0 extends Parent implements Interface {
+}
+
+class Child1 extends Parent implements Interface {
+}
+
+class Child2 extends Parent implements Interface {
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6849574/Test.java	Mon Jul 27 17:23:52 2009 -0400
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 6849574
+ * @summary VM crash using NonBlockingHashMap (high_scale_lib)
+ *
+ * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+VerifyBeforeGC Test
+ */
+
+import java.util.concurrent.atomic.*;
+
+public class Test extends Thread {
+
+    public static void main(String[] args) {
+        AtomicReferenceArray a = new AtomicReferenceArray(10000);
+        for (int i = 0; i < 100000; i++) {
+            a.getAndSet(9999, new Object());
+            if (i > 99990) System.gc();
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6851282/Test.java	Mon Jul 27 17:23:52 2009 -0400
@@ -0,0 +1,124 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
<