changeset 699:00bcc4b01dde

Merge
author trims
date Fri, 27 Mar 2009 16:54:56 -0700
parents 1b1e8f1a4fe8 0aeec7d15d30
children 9ab385cb0c42
files
diffstat 106 files changed, 3169 insertions(+), 1323 deletions(-) [+]
line wrap: on
line diff
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/Debugger.java	Thu Mar 19 13:25:23 2009 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/Debugger.java	Fri Mar 27 16:54:56 2009 -0700
@@ -118,9 +118,9 @@
   public long getJIntSize();
   public long getJLongSize();
   public long getJShortSize();
-  public long getHeapBase();
   public long getHeapOopSize();
-  public long getLogMinObjAlignmentInBytes();
+  public long getNarrowOopBase();
+  public int  getNarrowOopShift();
 
   public ReadResult readBytesFromProcess(long address, long numBytes)
     throws DebuggerException;
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/DebuggerBase.java	Thu Mar 19 13:25:23 2009 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/DebuggerBase.java	Fri Mar 27 16:54:56 2009 -0700
@@ -56,8 +56,8 @@
   // heap data.
   protected long oopSize;
   protected long heapOopSize;
-  protected long heapBase;                 // heap base for compressed oops.
-  protected long logMinObjAlignmentInBytes; // Used to decode compressed oops.
+  protected long narrowOopBase;  // heap base for compressed oops.
+  protected int  narrowOopShift; // shift to decode compressed oops.
   // Should be initialized if desired by calling initCache()
   private PageCache cache;
 
@@ -159,10 +159,10 @@
     javaPrimitiveTypesConfigured = true;
   }
 
-  public void putHeapConst(long heapBase, long heapOopSize, long logMinObjAlignmentInBytes) {
-    this.heapBase = heapBase;
+  public void putHeapConst(long heapOopSize, long narrowOopBase, int narrowOopShift) {
     this.heapOopSize = heapOopSize;
-    this.logMinObjAlignmentInBytes = logMinObjAlignmentInBytes;
+    this.narrowOopBase = narrowOopBase;
+    this.narrowOopShift = narrowOopShift;
   }
 
   /** May be called by subclasses if desired to initialize the page
@@ -459,7 +459,7 @@
     long value = readCInteger(address, getHeapOopSize(), true);
     if (value != 0) {
       // See oop.inline.hpp decode_heap_oop
-      value = (long)(heapBase + (long)(value << logMinObjAlignmentInBytes));
+      value = (long)(narrowOopBase + (long)(value << narrowOopShift));
     }
     return value;
   }
@@ -545,10 +545,10 @@
     return heapOopSize;
   }
 
-  public long getHeapBase() {
-    return heapBase;
+  public long getNarrowOopBase() {
+    return narrowOopBase;
   }
-  public long getLogMinObjAlignmentInBytes() {
-    return logMinObjAlignmentInBytes;
+  public int getNarrowOopShift() {
+    return narrowOopShift;
   }
 }
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/JVMDebugger.java	Thu Mar 19 13:25:23 2009 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/JVMDebugger.java	Fri Mar 27 16:54:56 2009 -0700
@@ -42,5 +42,5 @@
                                               long jintSize,
                                               long jlongSize,
                                               long jshortSize);
-  public void putHeapConst(long heapBase, long heapOopSize, long logMinObjAlignment);
+  public void putHeapConst(long heapOopSize, long narrowOopBase, int narrowOopShift);
 }
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebugger.java	Thu Mar 19 13:25:23 2009 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebugger.java	Fri Mar 27 16:54:56 2009 -0700
@@ -65,9 +65,10 @@
   public long      getJIntSize() throws RemoteException;
   public long      getJLongSize() throws RemoteException;
   public long      getJShortSize() throws RemoteException;
-  public long      getHeapBase() throws RemoteException;
   public long      getHeapOopSize() throws RemoteException;
-  public long      getLogMinObjAlignmentInBytes() throws RemoteException;
+  public long      getNarrowOopBase() throws RemoteException;
+  public int       getNarrowOopShift() throws RemoteException;
+
   public boolean   areThreadsEqual(long addrOrId1, boolean isAddress1,
                                    long addrOrId2, boolean isAddress2) throws RemoteException;
   public int       getThreadHashCode(long addrOrId, boolean isAddress) throws RemoteException;
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java	Thu Mar 19 13:25:23 2009 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java	Fri Mar 27 16:54:56 2009 -0700
@@ -85,9 +85,9 @@
       jlongSize    = remoteDebugger.getJLongSize();
       jshortSize   = remoteDebugger.getJShortSize();
       javaPrimitiveTypesConfigured = true;
-      heapBase     = remoteDebugger.getHeapBase();
+      narrowOopBase  = remoteDebugger.getNarrowOopBase();
+      narrowOopShift = remoteDebugger.getNarrowOopShift();
       heapOopSize  = remoteDebugger.getHeapOopSize();
-      logMinObjAlignmentInBytes  = remoteDebugger.getLogMinObjAlignmentInBytes();
     }
     catch (RemoteException e) {
       throw new DebuggerException(e);
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerServer.java	Thu Mar 19 13:25:23 2009 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerServer.java	Fri Mar 27 16:54:56 2009 -0700
@@ -114,17 +114,18 @@
     return debugger.getJShortSize();
   }
 
-  public long getHeapBase() throws RemoteException {
-    return debugger.getHeapBase();
-  }
-
   public long getHeapOopSize() throws RemoteException {
     return debugger.getHeapOopSize();
   }
 
-  public long getLogMinObjAlignmentInBytes() throws RemoteException {
-    return debugger.getLogMinObjAlignmentInBytes();
+  public long getNarrowOopBase() throws RemoteException {
+    return debugger.getNarrowOopBase();
   }
+
+  public int  getNarrowOopShift() throws RemoteException {
+    return debugger.getNarrowOopShift();
+  }
+
   public boolean   areThreadsEqual(long addrOrId1, boolean isAddress1,
                                    long addrOrId2, boolean isAddress2) throws RemoteException {
     ThreadProxy t1 = getThreadProxy(addrOrId1, isAddress1);
--- a/agent/src/share/classes/sun/jvm/hotspot/memory/Universe.java	Thu Mar 19 13:25:23 2009 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/memory/Universe.java	Fri Mar 27 16:54:56 2009 -0700
@@ -53,7 +53,8 @@
   // system obj array klass object
   private static sun.jvm.hotspot.types.OopField systemObjArrayKlassObjField;
 
-  private static AddressField heapBaseField;
+  private static AddressField narrowOopBaseField;
+  private static CIntegerField narrowOopShiftField;
 
   static {
     VM.registerVMInitializedObserver(new Observer() {
@@ -86,7 +87,8 @@
 
     systemObjArrayKlassObjField = type.getOopField("_systemObjArrayKlassObj");
 
-    heapBaseField = type.getAddressField("_heap_base");
+    narrowOopBaseField = type.getAddressField("_narrow_oop._base");
+    narrowOopShiftField = type.getCIntegerField("_narrow_oop._shift");
   }
 
   public Universe() {
@@ -100,14 +102,18 @@
     }
   }
 
-  public static long getHeapBase() {
-    if (heapBaseField.getValue() == null) {
+  public static long getNarrowOopBase() {
+    if (narrowOopBaseField.getValue() == null) {
       return 0;
     } else {
-      return heapBaseField.getValue().minus(null);
+      return narrowOopBaseField.getValue().minus(null);
     }
   }
 
+  public static int getNarrowOopShift() {
+    return (int)narrowOopShiftField.getValue();
+  }
+
   /** Returns "TRUE" iff "p" points into the allocated area of the heap. */
   public boolean isIn(Address p) {
     return heap().isIn(p);
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java	Thu Mar 19 13:25:23 2009 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java	Fri Mar 27 16:54:56 2009 -0700
@@ -342,11 +342,12 @@
       throw new RuntimeException("Attempt to initialize VM twice");
     }
     soleInstance = new VM(db, debugger, debugger.getMachineDescription().isBigEndian());
-    debugger.putHeapConst(Universe.getHeapBase(), soleInstance.getHeapOopSize(),
-                          soleInstance.logMinObjAlignmentInBytes);
+    debugger.putHeapConst(soleInstance.getHeapOopSize(), Universe.getNarrowOopBase(),
+                          Universe.getNarrowOopShift());
     for (Iterator iter = vmInitializedObservers.iterator(); iter.hasNext(); ) {
       ((Observer) iter.next()).update(null, null);
     }
+
   }
 
   /** This is used by the debugging system */
--- a/make/jprt.properties	Thu Mar 19 13:25:23 2009 -0700
+++ b/make/jprt.properties	Fri Mar 27 16:54:56 2009 -0700
@@ -19,12 +19,12 @@
 # Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
 # CA 95054 USA or visit www.sun.com if you need additional information or
 # have any questions.
-#  
+#
 #
 
 # Properties for jprt
 
-# All build result bundles are full jdks, so the 64bit testing does not 
+# All build result bundles are full jdks, so the 64bit testing does not
 #    need the 32bit sibling bundle installed.
 #    Note: If the hotspot/make/Makefile changed to only bundle the 64bit files
 #          when bundling 64bit, and stripped out the 64bit files from any 32bit
@@ -89,60 +89,52 @@
     ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-jvm98, \
     ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-scimark, \
     ${jprt.my.solaris.sparc}-product-{c1|c2}-runThese, \
-    ${jprt.my.solaris.sparc}-product-{c1|c2}-runThese_Xcomp, \
-    ${jprt.my.solaris.sparc}-product-{c1|c2}-runThese_Xcomp_2, \
-    ${jprt.my.solaris.sparc}-product-{c1|c2}-runThese_Xcomp_3, \
     ${jprt.my.solaris.sparc}-fastdebug-c1-runThese_Xshare, \
     ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_default, \
     ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_SerialGC, \
     ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_ParallelGC, \
     ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_ParNewGC, \
     ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_CMS, \
-    ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_default_2, \
-    ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_SerialGC_2, \
-    ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_ParallelGC_2, \
-    ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_ParNewGC_2, \
-    ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_CMS_2, \
+    ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_G1, \
+    ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_ParOldGC, \
     ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCOld_default, \
     ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCOld_SerialGC, \
     ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCOld_ParallelGC, \
     ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCOld_ParNewGC, \
     ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCOld_CMS, \
+    ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCOld_G1, \
+    ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCOld_ParOldGC, \
     ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-jbb_default, \
     ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-jbb_SerialGC, \
     ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-jbb_ParallelGC, \
     ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-jbb_CMS, \
-    ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-scimark_2, \
-    ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-scimark_3
+    ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-jbb_G1, \
+    ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-jbb_ParOldGC
 
 jprt.my.solaris.sparcv9.test.targets= \
     ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-jvm98, \
     ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-scimark, \
     ${jprt.my.solaris.sparcv9}-product-c2-runThese, \
-    ${jprt.my.solaris.sparcv9}-product-c2-runThese_Xcomp, \
-    ${jprt.my.solaris.sparcv9}-product-c2-runThese_Xcomp_2, \
-    ${jprt.my.solaris.sparcv9}-product-c2-runThese_Xcomp_3, \
     ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_default, \
     ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_SerialGC, \
     ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_ParallelGC, \
     ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_ParNewGC, \
     ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_CMS, \
-    ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_default_2, \
-    ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_SerialGC_2, \
-    ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_ParallelGC_2, \
-    ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_ParNewGC_2, \
-    ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_CMS_2, \
+    ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_G1, \
+    ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_ParOldGC, \
     ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCOld_default, \
     ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCOld_SerialGC, \
     ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCOld_ParallelGC, \
     ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCOld_ParNewGC, \
     ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCOld_CMS, \
+    ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCOld_G1, \
+    ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCOld_ParOldGC, \
     ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-jbb_default, \
     ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-jbb_SerialGC, \
     ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-jbb_ParallelGC, \
     ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-jbb_CMS, \
-    ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-scimark_2, \
-    ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-scimark_3
+    ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-jbb_G1, \
+    ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-jbb_ParOldGC
 
 jprt.my.solaris.x64.test.targets= \
     ${jprt.my.solaris.x64}-{product|fastdebug}-c2-jvm98, \
@@ -154,73 +146,80 @@
     ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCBasher_ParallelGC, \
     ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCBasher_ParNewGC, \
     ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCBasher_CMS, \
-    ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCBasher_default_2, \
-    ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCBasher_SerialGC_2, \
-    ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCBasher_ParallelGC_2, \
-    ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCBasher_ParNewGC_2, \
-    ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCBasher_CMS_2, \
+    ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCBasher_G1, \
+    ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCBasher_ParOldGC, \
     ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCOld_default, \
     ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCOld_SerialGC, \
     ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCOld_ParallelGC, \
     ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCOld_ParNewGC, \
     ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCOld_CMS, \
+    ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCOld_G1, \
+    ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCOld_ParOldGC, \
     ${jprt.my.solaris.x64}-{product|fastdebug}-c2-jbb_default, \
     ${jprt.my.solaris.x64}-{product|fastdebug}-c2-jbb_SerialGC, \
     ${jprt.my.solaris.x64}-{product|fastdebug}-c2-jbb_ParallelGC, \
-    ${jprt.my.solaris.x64}-{product|fastdebug}-c2-jbb_CMS
+    ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCOld_CMS, \
+    ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCOld_G1, \
+    ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCOld_ParOldGC
 
 jprt.my.solaris.i586.test.targets= \
     ${jprt.my.solaris.i586}-{product|fastdebug}-{c1|c2}-jvm98, \
     ${jprt.my.solaris.i586}-{product|fastdebug}-{c1|c2}-scimark, \
     ${jprt.my.solaris.i586}-product-{c1|c2}-runThese_Xcomp, \
-    ${jprt.my.solaris.i586}-product-c2-runThese_Xcomp_2, \
-    ${jprt.my.solaris.i586}-fastdebug-c1-runThese_Xcomp_2, \
+    ${jprt.my.solaris.i586}-fastdebug-c1-runThese_Xcomp, \
     ${jprt.my.solaris.i586}-fastdebug-c1-runThese_Xshare, \
     ${jprt.my.solaris.i586}-product-c1-GCBasher_default, \
     ${jprt.my.solaris.i586}-product-c1-GCBasher_SerialGC, \
     ${jprt.my.solaris.i586}-product-c1-GCBasher_ParallelGC, \
     ${jprt.my.solaris.i586}-product-c1-GCBasher_ParNewGC, \
     ${jprt.my.solaris.i586}-product-c1-GCBasher_CMS, \
+    ${jprt.my.solaris.i586}-product-c1-GCBasher_G1, \
+    ${jprt.my.solaris.i586}-product-c1-GCBasher_ParOldGC, \
     ${jprt.my.solaris.i586}-fastdebug-c2-GCBasher_default, \
     ${jprt.my.solaris.i586}-fastdebug-c2-GCBasher_SerialGC, \
     ${jprt.my.solaris.i586}-fastdebug-c2-GCBasher_ParallelGC, \
     ${jprt.my.solaris.i586}-fastdebug-c2-GCBasher_ParNewGC, \
     ${jprt.my.solaris.i586}-fastdebug-c2-GCBasher_CMS, \
+    ${jprt.my.solaris.i586}-fastdebug-c2-GCBasher_G1, \
+    ${jprt.my.solaris.i586}-fastdebug-c2-GCBasher_ParOldGC, \
     ${jprt.my.solaris.i586}-product-c1-GCOld_default, \
     ${jprt.my.solaris.i586}-product-c1-GCOld_SerialGC, \
     ${jprt.my.solaris.i586}-product-c1-GCOld_ParallelGC, \
     ${jprt.my.solaris.i586}-product-c1-GCOld_ParNewGC, \
     ${jprt.my.solaris.i586}-product-c1-GCOld_CMS, \
+    ${jprt.my.solaris.i586}-product-c1-GCOld_G1, \
+    ${jprt.my.solaris.i586}-product-c1-GCOld_ParOldGC, \
     ${jprt.my.solaris.i586}-fastdebug-c2-jbb_default, \
     ${jprt.my.solaris.i586}-fastdebug-c2-jbb_ParallelGC, \
     ${jprt.my.solaris.i586}-fastdebug-c2-jbb_CMS, \
-    ${jprt.my.solaris.i586}-{product|fastdebug}-{c1|c2}-scimark_2, \
-    ${jprt.my.solaris.i586}-{product|fastdebug}-{c1|c2}-scimark_3
+    ${jprt.my.solaris.i586}-fastdebug-c2-jbb_G1, \
+    ${jprt.my.solaris.i586}-fastdebug-c2-jbb_ParOldGC
 
 jprt.my.linux.i586.test.targets = \
     ${jprt.my.linux.i586}-{product|fastdebug}-{c1|c2}-jvm98, \
     ${jprt.my.linux.i586}-{product|fastdebug}-{c1|c2}-scimark, \
     ${jprt.my.linux.i586}-product-c1-runThese_Xcomp, \
-    ${jprt.my.linux.i586}-product-c1-runThese_Xcomp_2, \
-    ${jprt.my.linux.i586}-product-c1-runThese_Xcomp_3, \
     ${jprt.my.linux.i586}-fastdebug-c1-runThese_Xshare, \
     ${jprt.my.linux.i586}-fastdebug-c2-runThese_Xcomp, \
-    ${jprt.my.linux.i586}-fastdebug-c2-runThese_Xcomp_2, \
     ${jprt.my.linux.i586}-{product|fastdebug}-{c1|c2}-GCBasher_default, \
     ${jprt.my.linux.i586}-{product|fastdebug}-{c1|c2}-GCBasher_SerialGC, \
     ${jprt.my.linux.i586}-{product|fastdebug}-{c1|c2}-GCBasher_ParallelGC, \
     ${jprt.my.linux.i586}-{product|fastdebug}-{c1|c2}-GCBasher_ParNewGC, \
     ${jprt.my.linux.i586}-{product|fastdebug}-{c1|c2}-GCBasher_CMS, \
+    ${jprt.my.linux.i586}-{product|fastdebug}-{c1|c2}-GCBasher_G1, \
+    ${jprt.my.linux.i586}-{product|fastdebug}-{c1|c2}-GCBasher_ParOldGC, \
     ${jprt.my.linux.i586}-product-{c1|c2}-GCOld_default, \
     ${jprt.my.linux.i586}-product-{c1|c2}-GCOld_SerialGC, \
     ${jprt.my.linux.i586}-product-{c1|c2}-GCOld_ParallelGC, \
     ${jprt.my.linux.i586}-product-{c1|c2}-GCOld_ParNewGC, \
     ${jprt.my.linux.i586}-product-{c1|c2}-GCOld_CMS, \
+    ${jprt.my.linux.i586}-product-{c1|c2}-GCOld_G1, \
+    ${jprt.my.linux.i586}-product-{c1|c2}-GCOld_ParOldGC, \
     ${jprt.my.linux.i586}-{product|fastdebug}-c1-jbb_default, \
     ${jprt.my.linux.i586}-{product|fastdebug}-c1-jbb_ParallelGC, \
     ${jprt.my.linux.i586}-{product|fastdebug}-c1-jbb_CMS, \
-    ${jprt.my.linux.i586}-{product|fastdebug}-c2-scimark_2, \
-    ${jprt.my.linux.i586}-{product|fastdebug}-c2-scimark_3
+    ${jprt.my.linux.i586}-{product|fastdebug}-c1-jbb_G1, \
+    ${jprt.my.linux.i586}-{product|fastdebug}-c1-jbb_ParOldGC
 
 jprt.my.linux.x64.test.targets = \
     ${jprt.my.linux.x64}-{product|fastdebug}-c2-jvm98, \
@@ -230,15 +229,19 @@
     ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCBasher_ParallelGC, \
     ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCBasher_ParNewGC, \
     ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCBasher_CMS, \
+    ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCBasher_G1, \
+    ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCBasher_ParOldGC, \
     ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCOld_default, \
     ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCOld_SerialGC, \
     ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCOld_ParallelGC, \
     ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCOld_ParNewGC, \
     ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCOld_CMS, \
+    ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCOld_G1, \
+    ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCOld_ParOldGC, \
     ${jprt.my.linux.x64}-{product|fastdebug}-c2-jbb_default, \
     ${jprt.my.linux.x64}-{product|fastdebug}-c2-jbb_ParallelGC, \
-    ${jprt.my.linux.x64}-{product|fastdebug}-c2-scimark_2, \
-    ${jprt.my.linux.x64}-{product|fastdebug}-c2-scimark_3
+    ${jprt.my.linux.x64}-{product|fastdebug}-c2-jbb_G1, \
+    ${jprt.my.linux.x64}-{product|fastdebug}-c2-jbb_ParOldGC
 
 jprt.my.windows.i586.test.targets = \
     ${jprt.my.windows.i586}-{product|fastdebug}-{c1|c2}-jvm98, \
@@ -251,16 +254,20 @@
     ${jprt.my.windows.i586}-{product|fastdebug}-{c1|c2}-GCBasher_ParallelGC, \
     ${jprt.my.windows.i586}-{product|fastdebug}-{c1|c2}-GCBasher_ParNewGC, \
     ${jprt.my.windows.i586}-{product|fastdebug}-{c1|c2}-GCBasher_CMS, \
+    ${jprt.my.windows.i586}-{product|fastdebug}-{c1|c2}-GCBasher_G1, \
+    ${jprt.my.windows.i586}-{product|fastdebug}-{c1|c2}-GCBasher_ParOldGC, \
     ${jprt.my.windows.i586}-product-{c1|c2}-GCOld_default, \
     ${jprt.my.windows.i586}-product-{c1|c2}-GCOld_SerialGC, \
     ${jprt.my.windows.i586}-product-{c1|c2}-GCOld_ParallelGC, \
     ${jprt.my.windows.i586}-product-{c1|c2}-GCOld_ParNewGC, \
     ${jprt.my.windows.i586}-product-{c1|c2}-GCOld_CMS, \
+    ${jprt.my.windows.i586}-product-{c1|c2}-GCOld_G1, \
+    ${jprt.my.windows.i586}-product-{c1|c2}-GCOld_ParOldGC, \
     ${jprt.my.windows.i586}-{product|fastdebug}-{c1|c2}-jbb_default, \
     ${jprt.my.windows.i586}-product-{c1|c2}-jbb_ParallelGC, \
     ${jprt.my.windows.i586}-product-{c1|c2}-jbb_CMS, \
-    ${jprt.my.windows.i586}-product-{c1|c2}-scimark_2, \
-    ${jprt.my.windows.i586}-product-{c1|c2}-scimark_3
+    ${jprt.my.windows.i586}-product-{c1|c2}-jbb_G1, \
+    ${jprt.my.windows.i586}-product-{c1|c2}-jbb_ParOldGC
 
 jprt.my.windows.x64.test.targets = \
     ${jprt.my.windows.x64}-{product|fastdebug}-c2-jvm98, \
@@ -272,16 +279,20 @@
     ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCBasher_ParallelGC, \
     ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCBasher_ParNewGC, \
     ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCBasher_CMS, \
+    ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCBasher_G1, \
+    ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCBasher_ParOldGC, \
     ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCOld_default, \
     ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCOld_SerialGC, \
     ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCOld_ParallelGC, \
     ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCOld_ParNewGC, \
     ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCOld_CMS, \
+    ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCOld_G1, \
+    ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCOld_ParOldGC, \
     ${jprt.my.windows.x64}-{product|fastdebug}-c2-jbb_default, \
     ${jprt.my.windows.x64}-product-c2-jbb_CMS, \
     ${jprt.my.windows.x64}-product-c2-jbb_ParallelGC, \
-    ${jprt.my.windows.x64}-{product|fastdebug}-c2-scimark_2, \
-    ${jprt.my.windows.x64}-{product|fastdebug}-c2-scimark_3
+    ${jprt.my.windows.x64}-product-c2-jbb_G1, \
+    ${jprt.my.windows.x64}-product-c2-jbb_ParOldGC
 
 # The complete list of test targets for jprt
 
--- a/make/windows/build_vm_def.sh	Thu Mar 19 13:25:23 2009 -0700
+++ b/make/windows/build_vm_def.sh	Fri Mar 27 16:54:56 2009 -0700
@@ -52,6 +52,19 @@
 RM="$MKS_HOME/rm.exe"
 DUMPBIN="link.exe /dump"
 
+# When called from IDE the first param should contain the link version, otherwise may be nill
+if [ "x$1" != "x" ]; then
+LINK_VER="$1"
+fi
+
+if [ "x$LINK_VER" != "x800" -a  "x$LINK_VER" != "x900" ]; then
 $DUMPBIN /symbols *.obj | "$GREP" "??_7.*@@6B@" | "$AWK" '{print $7}' | "$SORT" | "$UNIQ" > vm2.def
+else
+# Can't use pipes when calling cl.exe or link.exe from IDE. Using transit file vm3.def
+$DUMPBIN /OUT:vm3.def /symbols *.obj 
+"$CAT" vm3.def | "$GREP" "??_7.*@@6B@" | "$AWK" '{print $7}' | "$SORT" | "$UNIQ" > vm2.def
+"$RM" -f vm3.def
+fi
+
 "$CAT" vm1.def vm2.def > vm.def
 "$RM" -f vm1.def vm2.def
--- a/make/windows/create.bat	Thu Mar 19 13:25:23 2009 -0700
+++ b/make/windows/create.bat	Fri Mar 27 16:54:56 2009 -0700
@@ -72,12 +72,20 @@
 for /F %%i in ('sh %HotSpotWorkSpace%/make/windows/get_msc_ver.sh') do set %%i
 
 echo **************************************************************
+set ProjectFile=vm.vcproj
 if "%MSC_VER%" == "1200" (
 set ProjectFile=vm.dsp
 echo Will generate VC6 project {unsupported}
 ) else (
-set ProjectFile=vm.vcproj
-echo Will generate VC7 project
+if "%MSC_VER%" == "1400" (
+echo Will generate VC8 {Visual Studio 2005}
+) else (
+if "%MSC_VER%" == "1500" (
+echo Will generate VC9 {Visual Studio 2008}
+) else (
+echo Will generate VC7 project {Visual Studio 2003 .NET}
+)
+)
 )
 echo                            %ProjectFile%
 echo **************************************************************
--- a/make/windows/get_msc_ver.sh	Thu Mar 19 13:25:23 2009 -0700
+++ b/make/windows/get_msc_ver.sh	Fri Mar 27 16:54:56 2009 -0700
@@ -29,6 +29,7 @@
 # cl version 13.10.3077 returns "MSC_VER=1310"
 # cl version 14.00.30701 returns "MSC_VER=1399" (OLD_MSSDK version)
 # cl version 14.00.40310.41 returns "MSC_VER=1400"
+# cl version 15.00.21022.8 returns "MSC_VER=1500"
 
 # Note that we currently do not have a way to set HotSpotMksHome in
 # the batch build, but so far this has not seemed to be a problem. The
--- a/make/windows/makefiles/adlc.make	Thu Mar 19 13:25:23 2009 -0700
+++ b/make/windows/makefiles/adlc.make	Fri Mar 27 16:54:56 2009 -0700
@@ -46,6 +46,7 @@
 ADLCFLAGS=-q -T -U_LP64
 !endif
 
+CPP_FLAGS=$(CPP_FLAGS) /D _CRT_SECURE_NO_WARNINGS /D _CRT_SECURE_NO_DEPRECATE  
 
 CPP_INCLUDE_DIRS=\
   /I "..\generated"                          \
--- a/make/windows/makefiles/compile.make	Thu Mar 19 13:25:23 2009 -0700
+++ b/make/windows/makefiles/compile.make	Fri Mar 27 16:54:56 2009 -0700
@@ -170,10 +170,6 @@
 # Manifest Tool - used in VS2005 and later to adjust manifests stored
 # as resources inside build artifacts.
 MT=mt.exe
-!if "$(BUILDARCH)" == "i486"
-# VS2005 on x86 restricts the use of certain libc functions without this
-CPP_FLAGS=$(CPP_FLAGS) /D _CRT_SECURE_NO_DEPRECATE
-!endif
 !endif
 
 !if "$(COMPILER_NAME)" == "VS2008"
@@ -185,10 +181,6 @@
 # Manifest Tool - used in VS2005 and later to adjust manifests stored
 # as resources inside build artifacts.
 MT=mt.exe
-!if "$(BUILDARCH)" == "i486"
-# VS2005 on x86 restricts the use of certain libc functions without this
-CPP_FLAGS=$(CPP_FLAGS) /D _CRT_SECURE_NO_DEPRECATE
-!endif
 !endif
 
 # Compile for space above time.
--- a/make/windows/makefiles/makedeps.make	Thu Mar 19 13:25:23 2009 -0700
+++ b/make/windows/makefiles/makedeps.make	Fri Mar 27 16:54:56 2009 -0700
@@ -48,6 +48,8 @@
         $(WorkSpace)\src\share\tools\MakeDeps\WinGammaPlatform.java \
         $(WorkSpace)\src\share\tools\MakeDeps\WinGammaPlatformVC6.java \
         $(WorkSpace)\src\share\tools\MakeDeps\WinGammaPlatformVC7.java \
+        $(WorkSpace)\src\share\tools\MakeDeps\WinGammaPlatformVC8.java \
+        $(WorkSpace)\src\share\tools\MakeDeps\WinGammaPlatformVC9.java \
         $(WorkSpace)\src\share\tools\MakeDeps\Util.java \
         $(WorkSpace)\src\share\tools\MakeDeps\BuildConfig.java \
         $(WorkSpace)\src\share\tools\MakeDeps\ArgsParser.java
@@ -121,7 +123,7 @@
         -additionalFile includeDB_gc_shared \
         -additionalFile includeDB_gc_serial \
         -additionalGeneratedFile $(HOTSPOTBUILDSPACE)\%f\%b vm.def \
-        -prelink  "" "Generating vm.def..." "cd $(HOTSPOTBUILDSPACE)\%f\%b	$(HOTSPOTMKSHOME)\sh $(HOTSPOTWORKSPACE)\make\windows\build_vm_def.sh" \
+        -prelink  "" "Generating vm.def..." "cd $(HOTSPOTBUILDSPACE)\%f\%b	set HOTSPOTMKSHOME=$(HOTSPOTMKSHOME)	$(HOTSPOTMKSHOME)\sh $(HOTSPOTWORKSPACE)\make\windows\build_vm_def.sh $(LINK_VER)" \
        $(MakeDepsIncludesPRIVATE)
 
 # Add in build-specific options
--- a/make/windows/makefiles/rules.make	Thu Mar 19 13:25:23 2009 -0700
+++ b/make/windows/makefiles/rules.make	Fri Mar 27 16:54:56 2009 -0700
@@ -42,10 +42,23 @@
 BOOT_JAVA_HOME=
 !endif
 
+ProjectFile=vm.vcproj
+
 !if "$(MSC_VER)" == "1200"
+
 VcVersion=VC6
 ProjectFile=vm.dsp
+
+!elseif "$(MSC_VER)" == "1400"
+
+VcVersion=VC8
+
+!elseif "$(MSC_VER)" == "1500"
+
+VcVersion=VC9
+
 !else
+
 VcVersion=VC7
-ProjectFile=vm.vcproj
+
 !endif
--- a/make/windows/makefiles/sa.make	Thu Mar 19 13:25:23 2009 -0700
+++ b/make/windows/makefiles/sa.make	Fri Mar 27 16:54:56 2009 -0700
@@ -89,9 +89,11 @@
 SA_CFLAGS = /nologo $(MS_RUNTIME_OPTION) /W3 $(GX_OPTION) /Od /D "WIN32" /D "WIN64" /D "_WINDOWS" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
 !elseif "$(BUILDARCH)" == "amd64"
 SA_CFLAGS = /nologo $(MS_RUNTIME_OPTION) /W3 $(GX_OPTION) /Od /D "WIN32" /D "WIN64" /D "_WINDOWS" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
+!if "$(COMPILER_NAME)" == "VS2005"
 # On amd64, VS2005 compiler requires bufferoverflowU.lib on the link command line, 
 # otherwise we get missing __security_check_cookie externals at link time. 
 SA_LINK_FLAGS = bufferoverflowU.lib
+!endif
 !else
 SA_CFLAGS = /nologo $(MS_RUNTIME_OPTION) /W3 /Gm $(GX_OPTION) /ZI /Od /D "WIN32" /D "_WINDOWS" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /GZ /c
 !endif
--- a/make/windows/makefiles/sanity.make	Thu Mar 19 13:25:23 2009 -0700
+++ b/make/windows/makefiles/sanity.make	Fri Mar 27 16:54:56 2009 -0700
@@ -27,9 +27,9 @@
 all: checkCL checkLink
 
 checkCL:
-	@ if "$(MSC_VER)" NEQ "1310" if "$(MSC_VER)" NEQ "1399" if "$(MSC_VER)" NEQ "1400" \
+	@ if "$(MSC_VER)" NEQ "1310" if "$(MSC_VER)" NEQ "1399" if "$(MSC_VER)" NEQ "1400" if "$(MSC_VER)" NEQ "1500" \
 	echo *** WARNING *** unrecognized cl.exe version $(MSC_VER) ($(RAW_MSC_VER)).  Use FORCE_MSC_VER to override automatic detection.
 
 checkLink:
-	@ if "$(LINK_VER)" NEQ "710" if "$(LINK_VER)" NEQ "800" \
+	@ if "$(LINK_VER)" NEQ "710" if "$(LINK_VER)" NEQ "800" if "$(LINK_VER)" NEQ "900" \
 	echo *** WARNING *** unrecognized link.exe version $(LINK_VER) ($(RAW_LINK_VER)).  Use FORCE_LINK_VER to override automatic detection.
--- a/src/cpu/sparc/vm/assembler_sparc.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/cpu/sparc/vm/assembler_sparc.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -2767,6 +2767,268 @@
 }
 
 
+void MacroAssembler::check_klass_subtype(Register sub_klass,
+                                         Register super_klass,
+                                         Register temp_reg,
+                                         Register temp2_reg,
+                                         Label& L_success) {
+  Label L_failure, L_pop_to_failure;
+  check_klass_subtype_fast_path(sub_klass, super_klass,
+                                temp_reg, temp2_reg,
+                                &L_success, &L_failure, NULL);
+  Register sub_2 = sub_klass;
+  Register sup_2 = super_klass;
+  if (!sub_2->is_global())  sub_2 = L0;
+  if (!sup_2->is_global())  sup_2 = L1;
+
+  save_frame_and_mov(0, sub_klass, sub_2, super_klass, sup_2);
+  check_klass_subtype_slow_path(sub_2, sup_2,
+                                L2, L3, L4, L5,
+                                NULL, &L_pop_to_failure);
+
+  // on success:
+  restore();
+  ba(false, L_success);
+  delayed()->nop();
+
+  // on failure:
+  bind(L_pop_to_failure);
+  restore();
+  bind(L_failure);
+}
+
+
+void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
+                                                   Register super_klass,
+                                                   Register temp_reg,
+                                                   Register temp2_reg,
+                                                   Label* L_success,
+                                                   Label* L_failure,
+                                                   Label* L_slow_path,
+                                        RegisterConstant super_check_offset,
+                                        Register instanceof_hack) {
+  int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
+                   Klass::secondary_super_cache_offset_in_bytes());
+  int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
+                    Klass::super_check_offset_offset_in_bytes());
+
+  bool must_load_sco  = (super_check_offset.constant_or_zero() == -1);
+  bool need_slow_path = (must_load_sco ||
+                         super_check_offset.constant_or_zero() == sco_offset);
+
+  assert_different_registers(sub_klass, super_klass, temp_reg);
+  if (super_check_offset.is_register()) {
+    assert_different_registers(sub_klass, super_klass,
+                               super_check_offset.as_register());
+  } else if (must_load_sco) {
+    assert(temp2_reg != noreg, "supply either a temp or a register offset");
+  }
+
+  Label L_fallthrough;
+  int label_nulls = 0;
+  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
+  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
+  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
+  assert(label_nulls <= 1 || instanceof_hack != noreg ||
+         (L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path),
+         "at most one NULL in the batch, usually");
+
+  // Support for the instanceof hack, which uses delay slots to
+  // set a destination register to zero or one.
+  bool do_bool_sets = (instanceof_hack != noreg);
+#define BOOL_SET(bool_value)                            \
+  if (do_bool_sets && bool_value >= 0)                  \
+    set(bool_value, instanceof_hack)
+#define DELAYED_BOOL_SET(bool_value)                    \
+  if (do_bool_sets && bool_value >= 0)                  \
+    delayed()->set(bool_value, instanceof_hack);        \
+  else delayed()->nop()
+  // Hacked ba(), which may only be used just before L_fallthrough.
+#define FINAL_JUMP(label, bool_value)                   \
+  if (&(label) == &L_fallthrough) {                     \
+    BOOL_SET(bool_value);                               \
+  } else {                                              \
+    ba((do_bool_sets && bool_value >= 0), label);       \
+    DELAYED_BOOL_SET(bool_value);                       \
+  }
+
+  // If the pointers are equal, we are done (e.g., String[] elements).
+  // This self-check enables sharing of secondary supertype arrays among
+  // non-primary types such as array-of-interface.  Otherwise, each such
+  // type would need its own customized SSA.
+  // We move this check to the front of the fast path because many
+  // type checks are in fact trivially successful in this manner,
+  // so we get a nicely predicted branch right at the start of the check.
+  cmp(super_klass, sub_klass);
+  brx(Assembler::equal, do_bool_sets, Assembler::pn, *L_success);
+  DELAYED_BOOL_SET(1);
+
+  // Check the supertype display:
+  if (must_load_sco) {
+    // The super check offset is always positive...
+    lduw(super_klass, sco_offset, temp2_reg);
+    super_check_offset = RegisterConstant(temp2_reg);
+  }
+  ld_ptr(sub_klass, super_check_offset, temp_reg);
+  cmp(super_klass, temp_reg);
+
+  // This check has worked decisively for primary supers.
+  // Secondary supers are sought in the super_cache ('super_cache_addr').
+  // (Secondary supers are interfaces and very deeply nested subtypes.)
+  // This works in the same check above because of a tricky aliasing
+  // between the super_cache and the primary super display elements.
+  // (The 'super_check_addr' can address either, as the case requires.)
+  // Note that the cache is updated below if it does not help us find
+  // what we need immediately.
+  // So if it was a primary super, we can just fail immediately.
+  // Otherwise, it's the slow path for us (no success at this point).
+
+  if (super_check_offset.is_register()) {
+    brx(Assembler::equal, do_bool_sets, Assembler::pn, *L_success);
+    delayed(); if (do_bool_sets)  BOOL_SET(1);
+    // if !do_bool_sets, sneak the next cmp into the delay slot:
+    cmp(super_check_offset.as_register(), sc_offset);
+
+    if (L_failure == &L_fallthrough) {
+      brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_slow_path);
+      delayed()->nop();
+      BOOL_SET(0);  // fallthrough on failure
+    } else {
+      brx(Assembler::notEqual, do_bool_sets, Assembler::pn, *L_failure);
+      DELAYED_BOOL_SET(0);
+      FINAL_JUMP(*L_slow_path, -1);  // -1 => vanilla delay slot
+    }
+  } else if (super_check_offset.as_constant() == sc_offset) {
+    // Need a slow path; fast failure is impossible.
+    if (L_slow_path == &L_fallthrough) {
+      brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_success);
+      DELAYED_BOOL_SET(1);
+    } else {
+      brx(Assembler::notEqual, false, Assembler::pn, *L_slow_path);
+      delayed()->nop();
+      FINAL_JUMP(*L_success, 1);
+    }
+  } else {
+    // No slow path; it's a fast decision.
+    if (L_failure == &L_fallthrough) {
+      brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_success);
+      DELAYED_BOOL_SET(1);
+      BOOL_SET(0);
+    } else {
+      brx(Assembler::notEqual, do_bool_sets, Assembler::pn, *L_failure);
+      DELAYED_BOOL_SET(0);
+      FINAL_JUMP(*L_success, 1);
+    }
+  }
+
+  bind(L_fallthrough);
+
+#undef final_jump
+#undef bool_set
+#undef DELAYED_BOOL_SET
+#undef final_jump
+}
+
+
+void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
+                                                   Register super_klass,
+                                                   Register count_temp,
+                                                   Register scan_temp,
+                                                   Register scratch_reg,
+                                                   Register coop_reg,
+                                                   Label* L_success,
+                                                   Label* L_failure) {
+  assert_different_registers(sub_klass, super_klass,
+                             count_temp, scan_temp, scratch_reg, coop_reg);
+
+  Label L_fallthrough, L_loop;
+  int label_nulls = 0;
+  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
+  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
+  assert(label_nulls <= 1, "at most one NULL in the batch");
+
+  // a couple of useful fields in sub_klass:
+  int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
+                   Klass::secondary_supers_offset_in_bytes());
+  int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
+                   Klass::secondary_super_cache_offset_in_bytes());
+
+  // Do a linear scan of the secondary super-klass chain.
+  // This code is rarely used, so simplicity is a virtue here.
+
+#ifndef PRODUCT
+  int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
+  inc_counter((address) pst_counter, count_temp, scan_temp);
+#endif
+
+  // We will consult the secondary-super array.
+  ld_ptr(sub_klass, ss_offset, scan_temp);
+
+  // Compress superclass if necessary.
+  Register search_key = super_klass;
+  bool decode_super_klass = false;
+  if (UseCompressedOops) {
+    if (coop_reg != noreg) {
+      encode_heap_oop_not_null(super_klass, coop_reg);
+      search_key = coop_reg;
+    } else {
+      encode_heap_oop_not_null(super_klass);
+      decode_super_klass = true; // scarce temps!
+    }
+    // The superclass is never null; it would be a basic system error if a null
+    // pointer were to sneak in here.  Note that we have already loaded the
+    // Klass::super_check_offset from the super_klass in the fast path,
+    // so if there is a null in that register, we are already in the afterlife.
+  }
+
+  // Load the array length.  (Positive movl does right thing on LP64.)
+  lduw(scan_temp, arrayOopDesc::length_offset_in_bytes(), count_temp);
+
+  // Check for empty secondary super list
+  tst(count_temp);
+
+  // Top of search loop
+  bind(L_loop);
+  br(Assembler::equal, false, Assembler::pn, *L_failure);
+  delayed()->add(scan_temp, heapOopSize, scan_temp);
+  assert(heapOopSize != 0, "heapOopSize should be initialized");
+
+  // Skip the array header in all array accesses.
+  int elem_offset = arrayOopDesc::base_offset_in_bytes(T_OBJECT);
+  elem_offset -= heapOopSize;   // the scan pointer was pre-incremented also
+
+  // Load next super to check
+  if (UseCompressedOops) {
+    // Don't use load_heap_oop; we don't want to decode the element.
+    lduw(   scan_temp, elem_offset, scratch_reg );
+  } else {
+    ld_ptr( scan_temp, elem_offset, scratch_reg );
+  }
+
+  // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
+  cmp(scratch_reg, search_key);
+
+  // A miss means we are NOT a subtype and need to keep looping
+  brx(Assembler::notEqual, false, Assembler::pn, L_loop);
+  delayed()->deccc(count_temp); // decrement trip counter in delay slot
+
+  // Falling out the bottom means we found a hit; we ARE a subtype
+  if (decode_super_klass) decode_heap_oop(super_klass);
+
+  // Success.  Cache the super we found and proceed in triumph.
+  st_ptr(super_klass, sub_klass, sc_offset);
+
+  if (L_success != &L_fallthrough) {
+    ba(false, *L_success);
+    delayed()->nop();
+  }
+
+  bind(L_fallthrough);
+}
+
+
+
+
 void MacroAssembler::biased_locking_enter(Register obj_reg, Register mark_reg,
                                           Register temp_reg,
                                           Label& done, Label* slow_case,
@@ -4316,7 +4578,13 @@
 
 void MacroAssembler::encode_heap_oop(Register src, Register dst) {
   assert (UseCompressedOops, "must be compressed");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
   verify_oop(src);
+  if (Universe::narrow_oop_base() == NULL) {
+    srlx(src, LogMinObjAlignmentInBytes, dst);
+    return;
+  }
   Label done;
   if (src == dst) {
     // optimize for frequent case src == dst
@@ -4338,26 +4606,39 @@
 
 void MacroAssembler::encode_heap_oop_not_null(Register r) {
   assert (UseCompressedOops, "must be compressed");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
   verify_oop(r);
-  sub(r, G6_heapbase, r);
+  if (Universe::narrow_oop_base() != NULL)
+    sub(r, G6_heapbase, r);
   srlx(r, LogMinObjAlignmentInBytes, r);
 }
 
 void MacroAssembler::encode_heap_oop_not_null(Register src, Register dst) {
   assert (UseCompressedOops, "must be compressed");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
   verify_oop(src);
-  sub(src, G6_heapbase, dst);
-  srlx(dst, LogMinObjAlignmentInBytes, dst);
+  if (Universe::narrow_oop_base() == NULL) {
+    srlx(src, LogMinObjAlignmentInBytes, dst);
+  } else {
+    sub(src, G6_heapbase, dst);
+    srlx(dst, LogMinObjAlignmentInBytes, dst);
+  }
 }
 
 // Same algorithm as oops.inline.hpp decode_heap_oop.
 void  MacroAssembler::decode_heap_oop(Register src, Register dst) {
   assert (UseCompressedOops, "must be compressed");
-  Label done;
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
   sllx(src, LogMinObjAlignmentInBytes, dst);
-  bpr(rc_nz, true, Assembler::pt, dst, done);
-  delayed() -> add(dst, G6_heapbase, dst); // annuled if not taken
-  bind(done);
+  if (Universe::narrow_oop_base() != NULL) {
+    Label done;
+    bpr(rc_nz, true, Assembler::pt, dst, done);
+    delayed() -> add(dst, G6_heapbase, dst); // annuled if not taken
+    bind(done);
+  }
   verify_oop(dst);
 }
 
@@ -4366,8 +4647,11 @@
   // pd_code_size_limit.
   // Also do not verify_oop as this is called by verify_oop.
   assert (UseCompressedOops, "must be compressed");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
   sllx(r, LogMinObjAlignmentInBytes, r);
-  add(r, G6_heapbase, r);
+  if (Universe::narrow_oop_base() != NULL)
+    add(r, G6_heapbase, r);
 }
 
 void  MacroAssembler::decode_heap_oop_not_null(Register src, Register dst) {
@@ -4375,14 +4659,17 @@
   // pd_code_size_limit.
   // Also do not verify_oop as this is called by verify_oop.
   assert (UseCompressedOops, "must be compressed");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
   sllx(src, LogMinObjAlignmentInBytes, dst);
-  add(dst, G6_heapbase, dst);
+  if (Universe::narrow_oop_base() != NULL)
+    add(dst, G6_heapbase, dst);
 }
 
 void MacroAssembler::reinit_heapbase() {
   if (UseCompressedOops) {
     // call indirectly to solve generation ordering problem
-    Address base(G6_heapbase, (address)Universe::heap_base_addr());
+    Address base(G6_heapbase, (address)Universe::narrow_oop_base_addr());
     load_ptr_contents(base, G6_heapbase);
   }
 }
--- a/src/cpu/sparc/vm/assembler_sparc.hpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/cpu/sparc/vm/assembler_sparc.hpp	Fri Mar 27 16:54:56 2009 -0700
@@ -2327,6 +2327,46 @@
                                Register temp_reg, Register temp2_reg,
                                Label& no_such_interface);
 
+  // Test sub_klass against super_klass, with fast and slow paths.
+
+  // The fast path produces a tri-state answer: yes / no / maybe-slow.
+  // One of the three labels can be NULL, meaning take the fall-through.
+  // If super_check_offset is -1, the value is loaded up from super_klass.
+  // No registers are killed, except temp_reg and temp2_reg.
+  // If super_check_offset is not -1, temp2_reg is not used and can be noreg.
+  void check_klass_subtype_fast_path(Register sub_klass,
+                                     Register super_klass,
+                                     Register temp_reg,
+                                     Register temp2_reg,
+                                     Label* L_success,
+                                     Label* L_failure,
+                                     Label* L_slow_path,
+                RegisterConstant super_check_offset = RegisterConstant(-1),
+                Register instanceof_hack = noreg);
+
+  // The rest of the type check; must be wired to a corresponding fast path.
+  // It does not repeat the fast path logic, so don't use it standalone.
+  // The temp_reg can be noreg, if no temps are available.
+  // It can also be sub_klass or super_klass, meaning it's OK to kill that one.
+  // Updates the sub's secondary super cache as necessary.
+  void check_klass_subtype_slow_path(Register sub_klass,
+                                     Register super_klass,
+                                     Register temp_reg,
+                                     Register temp2_reg,
+                                     Register temp3_reg,
+                                     Register temp4_reg,
+                                     Label* L_success,
+                                     Label* L_failure);
+
+  // Simplified, combined version, good for typical uses.
+  // Falls through on failure.
+  void check_klass_subtype(Register sub_klass,
+                           Register super_klass,
+                           Register temp_reg,
+                           Register temp2_reg,
+                           Label& L_success);
+
+
   // Stack overflow checking
 
   // Note: this clobbers G3_scratch
--- a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -2393,23 +2393,11 @@
 
     // get instance klass
     load(k_RInfo, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc), k_RInfo, T_OBJECT, NULL);
-    // get super_check_offset
-    load(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes(), Rtmp1, T_INT, NULL);
-    // See if we get an immediate positive hit
-    __ ld_ptr(klass_RInfo, Rtmp1, FrameMap::O7_oop_opr->as_register());
-    __ cmp(k_RInfo, O7);
-    __ br(Assembler::equal, false, Assembler::pn, done);
-    __ delayed()->nop();
-    // check for immediate negative hit
-    __ cmp(Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
-    __ br(Assembler::notEqual, false, Assembler::pn, *stub->entry());
-    __ delayed()->nop();
-    // check for self
-    __ cmp(klass_RInfo, k_RInfo);
-    __ br(Assembler::equal, false, Assembler::pn, done);
-    __ delayed()->nop();
-
-    // assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup");
+    // perform the fast part of the checking logic
+    __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, O7, &done, stub->entry(), NULL);
+
+    // call out-of-line instance of __ check_klass_subtype_slow_path(...):
+    assert(klass_RInfo == G3 && k_RInfo == G1, "incorrect call setup");
     __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
     __ delayed()->nop();
     __ cmp(G3, 0);
@@ -2493,58 +2481,30 @@
       __ delayed()->nop();
       __ bind(done);
     } else {
+      bool need_slow_path = true;
       if (k->is_loaded()) {
-        load(klass_RInfo, k->super_check_offset(), Rtmp1, T_OBJECT, NULL);
-
-        if (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() != k->super_check_offset()) {
-          // See if we get an immediate positive hit
-          __ cmp(Rtmp1, k_RInfo );
-          __ br(Assembler::notEqual, false, Assembler::pn, *stub->entry());
-          __ delayed()->nop();
-        } else {
-          // See if we get an immediate positive hit
-          assert_different_registers(Rtmp1, k_RInfo, klass_RInfo);
-          __ cmp(Rtmp1, k_RInfo );
-          __ br(Assembler::equal, false, Assembler::pn, done);
-          // check for self
-          __ delayed()->cmp(klass_RInfo, k_RInfo);
-          __ br(Assembler::equal, false, Assembler::pn, done);
-          __ delayed()->nop();
-
-          // assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup");
-          __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
-          __ delayed()->nop();
-          __ cmp(G3, 0);
-          __ br(Assembler::equal, false, Assembler::pn, *stub->entry());
-          __ delayed()->nop();
-        }
-        __ bind(done);
+        if (k->super_check_offset() != sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())
+          need_slow_path = false;
+        // perform the fast part of the checking logic
+        __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, noreg,
+                                         (need_slow_path ? &done : NULL),
+                                         stub->entry(), NULL,
+                                         RegisterConstant(k->super_check_offset()));
       } else {
-        assert_different_registers(Rtmp1, klass_RInfo, k_RInfo);
-
-        load(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes(), Rtmp1, T_INT, NULL);
-        // See if we get an immediate positive hit
-        load(klass_RInfo, Rtmp1, FrameMap::O7_oop_opr, T_OBJECT);
-        __ cmp(k_RInfo, O7);
-        __ br(Assembler::equal, false, Assembler::pn, done);
-        __ delayed()->nop();
-        // check for immediate negative hit
-        __ cmp(Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
-        __ br(Assembler::notEqual, false, Assembler::pn, *stub->entry());
-        // check for self
-        __ delayed()->cmp(klass_RInfo, k_RInfo);
-        __ br(Assembler::equal, false, Assembler::pn, done);
-        __ delayed()->nop();
-
-        // assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup");
+        // perform the fast part of the checking logic
+        __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, O7,
+                                         &done, stub->entry(), NULL);
+      }
+      if (need_slow_path) {
+        // call out-of-line instance of __ check_klass_subtype_slow_path(...):
+        assert(klass_RInfo == G3 && k_RInfo == G1, "incorrect call setup");
         __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
         __ delayed()->nop();
         __ cmp(G3, 0);
         __ br(Assembler::equal, false, Assembler::pn, *stub->entry());
         __ delayed()->nop();
-        __ bind(done);
       }
-
+      __ bind(done);
     }
     __ mov(obj, dst);
   } else if (code == lir_instanceof) {
@@ -2582,58 +2542,32 @@
       __ set(0, dst);
       __ bind(done);
     } else {
+      bool need_slow_path = true;
       if (k->is_loaded()) {
-        assert_different_registers(Rtmp1, klass_RInfo, k_RInfo);
-        load(klass_RInfo, k->super_check_offset(), Rtmp1, T_OBJECT, NULL);
-
-        if (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() != k->super_check_offset()) {
-          // See if we get an immediate positive hit
-          __ cmp(Rtmp1, k_RInfo );
-          __ br(Assembler::equal, true, Assembler::pt, done);
-          __ delayed()->set(1, dst);
-          __ set(0, dst);
-          __ bind(done);
-        } else {
-          // See if we get an immediate positive hit
-          assert_different_registers(Rtmp1, k_RInfo, klass_RInfo);
-          __ cmp(Rtmp1, k_RInfo );
-          __ br(Assembler::equal, true, Assembler::pt, done);
-          __ delayed()->set(1, dst);
-          // check for self
-          __ cmp(klass_RInfo, k_RInfo);
-          __ br(Assembler::equal, true, Assembler::pt, done);
-          __ delayed()->set(1, dst);
-
-          // assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup");
-          __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
-          __ delayed()->nop();
-          __ mov(G3, dst);
-          __ bind(done);
-        }
+        if (k->super_check_offset() != sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())
+          need_slow_path = false;
+        // perform the fast part of the checking logic
+        __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, O7, noreg,
+                                         (need_slow_path ? &done : NULL),
+                                         (need_slow_path ? &done : NULL), NULL,
+                                         RegisterConstant(k->super_check_offset()),
+                                         dst);
       } else {
         assert(dst != klass_RInfo && dst != k_RInfo, "need 3 registers");
-
-        load(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes(), dst, T_INT, NULL);
-        // See if we get an immediate positive hit
-        load(klass_RInfo, dst, FrameMap::O7_oop_opr, T_OBJECT);
-        __ cmp(k_RInfo, O7);
-        __ br(Assembler::equal, true, Assembler::pt, done);
-        __ delayed()->set(1, dst);
-        // check for immediate negative hit
-        __ cmp(dst, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
-        __ br(Assembler::notEqual, true, Assembler::pt, done);
-        __ delayed()->set(0, dst);
-        // check for self
-        __ cmp(klass_RInfo, k_RInfo);
-        __ br(Assembler::equal, true, Assembler::pt, done);
-        __ delayed()->set(1, dst);
-
-        // assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup");
+        // perform the fast part of the checking logic
+        __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, O7, dst,
+                                         &done, &done, NULL,
+                                         RegisterConstant(-1),
+                                         dst);
+      }
+      if (need_slow_path) {
+        // call out-of-line instance of __ check_klass_subtype_slow_path(...):
+        assert(klass_RInfo == G3 && k_RInfo == G1, "incorrect call setup");
         __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
         __ delayed()->nop();
         __ mov(G3, dst);
-        __ bind(done);
       }
+      __ bind(done);
     }
   } else {
     ShouldNotReachHere();
--- a/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -714,38 +714,19 @@
         //      sub  : G3, argument, destroyed
         //      super: G1, argument, not changed
         //      raddr: O7, blown by call
-        Label loop, miss;
+        Label miss;
 
         __ save_frame(0);               // Blow no registers!
 
-        __ ld_ptr( G3, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes(), L3 );
-        __ lduw(L3,arrayOopDesc::length_offset_in_bytes(),L0); // length in l0
-        __ add(L3,arrayOopDesc::base_offset_in_bytes(T_OBJECT),L1); // ptr into array
-        __ clr(L4);                     // Index
-        // Load a little early; will load 1 off the end of the array.
-        // Ok for now; revisit if we have other uses of this routine.
-        __ ld_ptr(L1,0,L2);             // Will load a little early
-
-        // The scan loop
-        __ bind(loop);
-        __ add(L1,wordSize,L1); // Bump by OOP size
-        __ cmp(L4,L0);
-        __ br(Assembler::equal,false,Assembler::pn,miss);
-        __ delayed()->inc(L4);  // Bump index
-        __ subcc(L2,G1,L3);             // Check for match; zero in L3 for a hit
-        __ brx( Assembler::notEqual, false, Assembler::pt, loop );
-        __ delayed()->ld_ptr(L1,0,L2); // Will load a little early
-
-        // Got a hit; report success; set cache
-        __ st_ptr( G1, G3, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() );
+        __ check_klass_subtype_slow_path(G3, G1, L0, L1, L2, L4, NULL, &miss);
 
         __ mov(1, G3);
-        __ ret();                       // Result in G5 is ok; flags set
+        __ ret();                       // Result in G5 is 'true'
         __ delayed()->restore();        // free copy or add can go here
 
         __ bind(miss);
         __ mov(0, G3);
-        __ ret();                       // Result in G5 is ok; flags set
+        __ ret();                       // Result in G5 is 'false'
         __ delayed()->restore();        // free copy or add can go here
       }
 
--- a/src/cpu/sparc/vm/interp_masm_sparc.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/cpu/sparc/vm/interp_masm_sparc.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -866,65 +866,18 @@
                                                   Register Rtmp2,
                                                   Register Rtmp3,
                                                   Label &ok_is_subtype ) {
-  Label not_subtype, loop;
+  Label not_subtype;
 
   // Profile the not-null value's klass.
   profile_typecheck(Rsub_klass, Rtmp1);
 
-  // Load the super-klass's check offset into Rtmp1
-  ld( Rsuper_klass, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes(), Rtmp1 );
-  // Load from the sub-klass's super-class display list, or a 1-word cache of
-  // the secondary superclass list, or a failing value with a sentinel offset
-  // if the super-klass is an interface or exceptionally deep in the Java
-  // hierarchy and we have to scan the secondary superclass list the hard way.
-  ld_ptr( Rsub_klass, Rtmp1, Rtmp2 );
-  // See if we get an immediate positive hit
-  cmp( Rtmp2, Rsuper_klass );
-  brx( Assembler::equal, false, Assembler::pt, ok_is_subtype );
-  // In the delay slot, check for immediate negative hit
-  delayed()->cmp( Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() );
-  br( Assembler::notEqual, false, Assembler::pt, not_subtype );
-  // In the delay slot, check for self
-  delayed()->cmp( Rsub_klass, Rsuper_klass );
-  brx( Assembler::equal, false, Assembler::pt, ok_is_subtype );
-
-  // Now do a linear scan of the secondary super-klass chain.
-  delayed()->ld_ptr( Rsub_klass, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes(), Rtmp2 );
-
-  // compress superclass
-  if (UseCompressedOops) encode_heap_oop(Rsuper_klass);
-
-  // Rtmp2 holds the objArrayOop of secondary supers.
-  ld( Rtmp2, arrayOopDesc::length_offset_in_bytes(), Rtmp1 );// Load the array length
-  // Check for empty secondary super list
-  tst(Rtmp1);
-
-  // Top of search loop
-  bind( loop );
-  br( Assembler::equal, false, Assembler::pn, not_subtype );
-  delayed()->nop();
-
-  // load next super to check
-  if (UseCompressedOops) {
-    lduw( Rtmp2, arrayOopDesc::base_offset_in_bytes(T_OBJECT), Rtmp3);
-    // Bump array pointer forward one oop
-    add( Rtmp2, 4, Rtmp2 );
-  } else {
-    ld_ptr( Rtmp2, arrayOopDesc::base_offset_in_bytes(T_OBJECT), Rtmp3);
-    // Bump array pointer forward one oop
-    add( Rtmp2, wordSize, Rtmp2);
-  }
-  // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
-  cmp( Rtmp3, Rsuper_klass );
-  // A miss means we are NOT a subtype and need to keep looping
-  brx( Assembler::notEqual, false, Assembler::pt, loop );
-  delayed()->deccc( Rtmp1 );    // dec trip counter in delay slot
-  // Falling out the bottom means we found a hit; we ARE a subtype
-  if (UseCompressedOops) decode_heap_oop(Rsuper_klass);
-  br( Assembler::always, false, Assembler::pt, ok_is_subtype );
-  // Update the cache
-  delayed()->st_ptr( Rsuper_klass, Rsub_klass,
-                     sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() );
+  check_klass_subtype_fast_path(Rsub_klass, Rsuper_klass,
+                                Rtmp1, Rtmp2,
+                                &ok_is_subtype, &not_subtype, NULL);
+
+  check_klass_subtype_slow_path(Rsub_klass, Rsuper_klass,
+                                Rtmp1, Rtmp2, Rtmp3, /*hack:*/ noreg,
+                                &ok_is_subtype, NULL);
 
   bind(not_subtype);
   profile_typecheck_failed(Rtmp1);
--- a/src/cpu/sparc/vm/sparc.ad	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/cpu/sparc/vm/sparc.ad	Fri Mar 27 16:54:56 2009 -0700
@@ -547,7 +547,11 @@
     int v_off = entry_offset*wordSize + vtableEntry::method_offset_in_bytes();
     int klass_load_size;
     if (UseCompressedOops) {
-      klass_load_size = 3*BytesPerInstWord; // see MacroAssembler::load_klass()
+      assert(Universe::heap() != NULL, "java heap should be initialized");
+      if (Universe::narrow_oop_base() == NULL)
+        klass_load_size = 2*BytesPerInstWord; // see MacroAssembler::load_klass()
+      else
+        klass_load_size = 3*BytesPerInstWord;
     } else {
       klass_load_size = 1*BytesPerInstWord;
     }
@@ -1601,9 +1605,11 @@
   st->print_cr("\nUEP:");
 #ifdef    _LP64
   if (UseCompressedOops) {
+    assert(Universe::heap() != NULL, "java heap should be initialized");
     st->print_cr("\tLDUW   [R_O0 + oopDesc::klass_offset_in_bytes],R_G5\t! Inline cache check - compressed klass");
     st->print_cr("\tSLL    R_G5,3,R_G5");
-    st->print_cr("\tADD    R_G5,R_G6_heap_base,R_G5");
+    if (Universe::narrow_oop_base() != NULL)
+      st->print_cr("\tADD    R_G5,R_G6_heap_base,R_G5");
   } else {
     st->print_cr("\tLDX    [R_O0 + oopDesc::klass_offset_in_bytes],R_G5\t! Inline cache check");
   }
@@ -2502,7 +2508,11 @@
       __ load_klass(O0, G3_scratch);
       int klass_load_size;
       if (UseCompressedOops) {
-        klass_load_size = 3*BytesPerInstWord;
+        assert(Universe::heap() != NULL, "java heap should be initialized");
+        if (Universe::narrow_oop_base() == NULL)
+          klass_load_size = 2*BytesPerInstWord;
+        else
+          klass_load_size = 3*BytesPerInstWord;
       } else {
         klass_load_size = 1*BytesPerInstWord;
       }
@@ -9005,6 +9015,33 @@
   ins_pipe(long_memory_op);
 %}
 
+
+//---------- Population Count Instructions -------------------------------------
+
+instruct popCountI(iRegI dst, iRegI src) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountI src));
+
+  format %{ "POPC   $src, $dst" %}
+  ins_encode %{
+    __ popc($src$$Register, $dst$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// Note: Long.bitCount(long) returns an int.
+instruct popCountL(iRegI dst, iRegL src) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountL src));
+
+  format %{ "POPC   $src, $dst" %}
+  ins_encode %{
+    __ popc($src$$Register, $dst$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+
 // ============================================================================
 //------------Bytes reverse--------------------------------------------------
 
--- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -900,19 +900,7 @@
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", "partial_subtype_check");
     address start = __ pc();
-    Label loop, miss;
-
-    // Compare super with sub directly, since super is not in its own SSA.
-    // The compiler used to emit this test, but we fold it in here,
-    // to increase overall code density, with no real loss of speed.
-    { Label L;
-      __ cmp(O1, O2);
-      __ brx(Assembler::notEqual, false, Assembler::pt, L);
-      __ delayed()->nop();
-      __ retl();
-      __ delayed()->addcc(G0,0,O0); // set Z flags, zero result
-      __ bind(L);
-    }
+    Label miss;
 
 #if defined(COMPILER2) && !defined(_LP64)
     // Do not use a 'save' because it blows the 64-bit O registers.
@@ -936,56 +924,12 @@
     Register L2_super   = L2;
     Register L3_index   = L3;
 
-#ifdef _LP64
-    Register L4_ooptmp  = L4;
-
-    if (UseCompressedOops) {
-      // this must be under UseCompressedOops check, as we rely upon fact
-      // that L4 not clobbered in C2 on 32-bit platforms, where we do explicit save
-      // on stack, see several lines above
-      __ encode_heap_oop(Rsuper, L4_ooptmp);
-    }
-#endif
-
-    inc_counter_np(SharedRuntime::_partial_subtype_ctr, L0, L1);
-
-    __ ld_ptr( Rsub, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes(), L3 );
-    __ lduw(L3,arrayOopDesc::length_offset_in_bytes(),L0_ary_len);
-    __ add(L3,arrayOopDesc::base_offset_in_bytes(T_OBJECT),L1_ary_ptr);
-    __ clr(L3_index);           // zero index
-    // Load a little early; will load 1 off the end of the array.
-    // Ok for now; revisit if we have other uses of this routine.
-    if (UseCompressedOops) {
-      __ lduw(L1_ary_ptr,0,L2_super);// Will load a little early
-    } else {
-      __ ld_ptr(L1_ary_ptr,0,L2_super);// Will load a little early
-    }
-
-    assert(heapOopSize != 0, "heapOopSize should be initialized");
-    // The scan loop
-    __ BIND(loop);
-    __ add(L1_ary_ptr, heapOopSize, L1_ary_ptr); // Bump by OOP size
-    __ cmp(L3_index,L0_ary_len);
-    __ br(Assembler::equal,false,Assembler::pn,miss);
-    __ delayed()->inc(L3_index); // Bump index
-
-    if (UseCompressedOops) {
-#ifdef  _LP64
-      __ subcc(L2_super,L4_ooptmp,Rret);   // Check for match; zero in Rret for a hit
-      __ br( Assembler::notEqual, false, Assembler::pt, loop );
-      __ delayed()->lduw(L1_ary_ptr,0,L2_super);// Will load a little early
-#else
-      ShouldNotReachHere();
-#endif
-    } else {
-      __ subcc(L2_super,Rsuper,Rret);   // Check for match; zero in Rret for a hit
-      __ brx( Assembler::notEqual, false, Assembler::pt, loop );
-      __ delayed()->ld_ptr(L1_ary_ptr,0,L2_super);// Will load a little early
-    }
-
-    // Got a hit; report success; set cache.  Cache load doesn't
-    // happen here; for speed it is directly emitted by the compiler.
-    __ st_ptr( Rsuper, Rsub, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() );
+    __ check_klass_subtype_slow_path(Rsub, Rsuper,
+                                     L0, L1, L2, L3,
+                                     NULL, &miss);
+
+    // Match falls through here.
+    __ addcc(G0,0,Rret);        // set Z flags, Z result
 
 #if defined(COMPILER2) && !defined(_LP64)
     __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0);
@@ -999,7 +943,6 @@
     __ delayed()->restore();
 #endif
 
-    // Hit or miss falls through here
     __ BIND(miss);
     __ addcc(G0,1,Rret);        // set NZ flags, NZ result
 
@@ -2330,51 +2273,31 @@
                            Register super_check_offset,
                            Register super_klass,
                            Register temp,
-                           Label& L_success,
-                           Register deccc_hack = noreg) {
+                           Label& L_success) {
     assert_different_registers(sub_klass, super_check_offset, super_klass, temp);
 
     BLOCK_COMMENT("type_check:");
 
-    Label L_miss;
+    Label L_miss, L_pop_to_miss;
 
     assert_clean_int(super_check_offset, temp);
 
-    // maybe decrement caller's trip count:
-#define DELAY_SLOT delayed();   \
-    { if (deccc_hack == noreg) __ nop(); else __ deccc(deccc_hack); }
-
-    // if the pointers are equal, we are done (e.g., String[] elements)
-    __ cmp(sub_klass, super_klass);
-    __ brx(Assembler::equal, true, Assembler::pt, L_success);
-    __ DELAY_SLOT;
-
-    // check the supertype display:
-    __ ld_ptr(sub_klass, super_check_offset, temp); // query the super type
-    __ cmp(super_klass,                      temp); // test the super type
-    __ brx(Assembler::equal, true, Assembler::pt, L_success);
-    __ DELAY_SLOT;
-
-    int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
-                     Klass::secondary_super_cache_offset_in_bytes());
-    __ cmp(super_klass, sc_offset);
-    __ brx(Assembler::notEqual, true, Assembler::pt, L_miss);
-    __ delayed()->nop();
-
+    __ check_klass_subtype_fast_path(sub_klass, super_klass, temp, noreg,
+                                     &L_success, &L_miss, NULL,
+                                     super_check_offset);
+
+    BLOCK_COMMENT("type_check_slow_path:");
     __ save_frame(0);
-    __ mov(sub_klass->after_save(), O1);
-    // mov(super_klass->after_save(), O2); //fill delay slot
-    assert(StubRoutines::Sparc::_partial_subtype_check != NULL, "order of generation");
-    __ call(StubRoutines::Sparc::_partial_subtype_check);
-    __ delayed()->mov(super_klass->after_save(), O2);
+    __ check_klass_subtype_slow_path(sub_klass->after_save(),
+                                     super_klass->after_save(),
+                                     L0, L1, L2, L4,
+                                     NULL, &L_pop_to_miss);
+    __ ba(false, L_success);
+    __ delayed()->restore();
+
+    __ bind(L_pop_to_miss);
     __ restore();
 
-    // Upon return, the condition codes are already set.
-    __ brx(Assembler::equal, true, Assembler::pt, L_success);
-    __ DELAY_SLOT;
-
-#undef DELAY_SLOT
-
     // Fall through on failure!
     __ BIND(L_miss);
   }
@@ -2411,7 +2334,7 @@
     gen_write_ref_array_pre_barrier(O1, O2);
 
 #ifdef ASSERT
-    // We sometimes save a frame (see partial_subtype_check below).
+    // We sometimes save a frame (see generate_type_check below).
     // If this will cause trouble, let's fail now instead of later.
     __ save_frame(0);
     __ restore();
@@ -2455,41 +2378,39 @@
     //   G3, G4, G5 --- current oop, oop.klass, oop.klass.super
     __ align(16);
 
-    __ bind(store_element);
-    // deccc(G1_remain);                // decrement the count (hoisted)
+    __ BIND(store_element);
+    __ deccc(G1_remain);                // decrement the count
     __ store_heap_oop(G3_oop, O1_to, O5_offset); // store the oop
     __ inc(O5_offset, heapOopSize);     // step to next offset
     __ brx(Assembler::zero, true, Assembler::pt, do_card_marks);
     __ delayed()->set(0, O0);           // return -1 on success
 
     // ======== loop entry is here ========
-    __ bind(load_element);
+    __ BIND(load_element);
     __ load_heap_oop(O0_from, O5_offset, G3_oop);  // load the oop
     __ br_null(G3_oop, true, Assembler::pt, store_element);
-    __ delayed()->deccc(G1_remain);     // decrement the count
+    __ delayed()->nop();
 
     __ load_klass(G3_oop, G4_klass); // query the object klass
 
     generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super,
                         // branch to this on success:
-                        store_element,
-                        // decrement this on success:
-                        G1_remain);
+                        store_element);
     // ======== end loop ========
 
     // It was a real error; we must depend on the caller to finish the job.
     // Register G1 has number of *remaining* oops, O2 number of *total* oops.
     // Emit GC store barriers for the oops we have copied (O2 minus G1),
     // and report their number to the caller.
-    __ bind(fail);
+    __ BIND(fail);
     __ subcc(O2_count, G1_remain, O2_count);
     __ brx(Assembler::zero, false, Assembler::pt, done);
     __ delayed()->not1(O2_count, O0);   // report (-1^K) to caller
 
-    __ bind(do_card_marks);
+    __ BIND(do_card_marks);
     gen_write_ref_array_post_barrier(O1_to, O2_count, O3);   // store check on O1[0..O2]
 
-    __ bind(done);
+    __ BIND(done);
     inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4);
     __ retl();
     __ delayed()->nop();             // return value in 00
@@ -2942,14 +2863,15 @@
     StubRoutines::_atomic_add_ptr_entry      = StubRoutines::_atomic_add_entry;
     StubRoutines::_fence_entry               = generate_fence();
 #endif  // COMPILER2 !=> _LP64
-
-    StubRoutines::Sparc::_partial_subtype_check                = generate_partial_subtype_check();
   }
 
 
   void generate_all() {
     // Generates all stubs and initializes the entry points
 
+    // Generate partial_subtype_check first here since its code depends on
+    // UseZeroBaseCompressedOops which is defined after heap initialization.
+    StubRoutines::Sparc::_partial_subtype_check                = generate_partial_subtype_check();
     // These entry points require SharedInfo::stack0 to be set up in non-core builds
     StubRoutines::_throw_AbstractMethodError_entry         = generate_throw_exception("AbstractMethodError throw_exception",          CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError),  false);
     StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError),  false);
--- a/src/cpu/sparc/vm/vm_version_sparc.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/cpu/sparc/vm/vm_version_sparc.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -72,6 +72,9 @@
         FLAG_SET_ERGO(bool, UseCompressedOops, false);
       }
     }
+    // 32-bit oops don't make sense for the 64-bit VM on sparc
+    // since the 32-bit VM has the same registers and smaller objects.
+    Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes);
 #endif // _LP64
 #ifdef COMPILER2
     // Indirect branch is the same cost as direct
@@ -89,16 +92,26 @@
 #endif
   }
 
+  // Use hardware population count instruction if available.
+  if (has_hardware_popc()) {
+    if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
+      UsePopCountInstruction = true;
+    }
+  }
+
   char buf[512];
-  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s",
+  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s",
                (has_v8() ? ", has_v8" : ""),
                (has_v9() ? ", has_v9" : ""),
+               (has_hardware_popc() ? ", popc" : ""),
                (has_vis1() ? ", has_vis1" : ""),
                (has_vis2() ? ", has_vis2" : ""),
                (is_ultra3() ? ", is_ultra3" : ""),
                (is_sun4v() ? ", is_sun4v" : ""),
                (is_niagara1() ? ", is_niagara1" : ""),
-               (!has_hardware_int_muldiv() ? ", no-muldiv" : ""),
+               (is_niagara1_plus() ? ", is_niagara1_plus" : ""),
+               (!has_hardware_mul32() ? ", no-mul32" : ""),
+               (!has_hardware_div32() ? ", no-div32" : ""),
                (!has_hardware_fsmuld() ? ", no-fsmuld" : ""));
 
   // buf is started with ", " or is empty
--- a/src/cpu/sparc/vm/vm_version_sparc.hpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/cpu/sparc/vm/vm_version_sparc.hpp	Fri Mar 27 16:54:56 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,34 +25,38 @@
 class VM_Version: public Abstract_VM_Version {
 protected:
   enum Feature_Flag {
-    v8_instructions     = 0,
-    hardware_int_muldiv = 1,
-    hardware_fsmuld     = 2,
-    v9_instructions     = 3,
-    vis1_instructions   = 4,
-    vis2_instructions   = 5,
-    sun4v_instructions  = 6
+    v8_instructions    = 0,
+    hardware_mul32     = 1,
+    hardware_div32     = 2,
+    hardware_fsmuld    = 3,
+    hardware_popc      = 4,
+    v9_instructions    = 5,
+    vis1_instructions  = 6,
+    vis2_instructions  = 7,
+    sun4v_instructions = 8
   };
 
   enum Feature_Flag_Set {
-    unknown_m             = 0,
-    all_features_m        = -1,
+    unknown_m           = 0,
+    all_features_m      = -1,
 
-    v8_instructions_m     = 1 << v8_instructions,
-    hardware_int_muldiv_m = 1 << hardware_int_muldiv,
-    hardware_fsmuld_m     = 1 << hardware_fsmuld,
-    v9_instructions_m     = 1 << v9_instructions,
-    vis1_instructions_m   = 1 << vis1_instructions,
-    vis2_instructions_m   = 1 << vis2_instructions,
-    sun4v_m               = 1 << sun4v_instructions,
+    v8_instructions_m   = 1 << v8_instructions,
+    hardware_mul32_m    = 1 << hardware_mul32,
+    hardware_div32_m    = 1 << hardware_div32,
+    hardware_fsmuld_m   = 1 << hardware_fsmuld,
+    hardware_popc_m     = 1 << hardware_popc,
+    v9_instructions_m   = 1 << v9_instructions,
+    vis1_instructions_m = 1 << vis1_instructions,
+    vis2_instructions_m = 1 << vis2_instructions,
+    sun4v_m             = 1 << sun4v_instructions,
 
-    generic_v8_m          = v8_instructions_m | hardware_int_muldiv_m | hardware_fsmuld_m,
-    generic_v9_m          = generic_v8_m | v9_instructions_m | vis1_instructions_m,
-    ultra3_m              = generic_v9_m | vis2_instructions_m,
+    generic_v8_m        = v8_instructions_m | hardware_mul32_m | hardware_div32_m | hardware_fsmuld_m,
+    generic_v9_m        = generic_v8_m | v9_instructions_m,
+    ultra3_m            = generic_v9_m | vis1_instructions_m | vis2_instructions_m,
 
     // Temporary until we have something more accurate
-    niagara1_unique_m     = sun4v_m,
-    niagara1_m            = generic_v9_m | niagara1_unique_m
+    niagara1_unique_m   = sun4v_m,
+    niagara1_m          = generic_v9_m | niagara1_unique_m
   };
 
   static int  _features;
@@ -62,7 +66,7 @@
   static int  determine_features();
   static int  platform_features(int features);
 
-  static bool is_niagara1(int features) { return (features & niagara1_m) == niagara1_m; }
+  static bool is_niagara1(int features) { return (features & sun4v_m) != 0; }
 
   static int maximum_niagara1_processor_count() { return 32; }
   // Returns true if the platform is in the niagara line and
@@ -76,8 +80,10 @@
   // Instruction support
   static bool has_v8()                  { return (_features & v8_instructions_m) != 0; }
   static bool has_v9()                  { return (_features & v9_instructions_m) != 0; }
-  static bool has_hardware_int_muldiv() { return (_features & hardware_int_muldiv_m) != 0; }
+  static bool has_hardware_mul32()      { return (_features & hardware_mul32_m) != 0; }
+  static bool has_hardware_div32()      { return (_features & hardware_div32_m) != 0; }
   static bool has_hardware_fsmuld()     { return (_features & hardware_fsmuld_m) != 0; }
+  static bool has_hardware_popc()       { return (_features & hardware_popc_m) != 0; }
   static bool has_vis1()                { return (_features & vis1_instructions_m) != 0; }
   static bool has_vis2()                { return (_features & vis2_instructions_m) != 0; }
 
--- a/src/cpu/sparc/vm/vtableStubs_sparc.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/cpu/sparc/vm/vtableStubs_sparc.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -221,13 +221,15 @@
     if (is_vtable_stub) {
       // ld;ld;ld,jmp,nop
       const int basic = 5*BytesPerInstWord +
-                        // shift;add for load_klass
-                        (UseCompressedOops ? 2*BytesPerInstWord : 0);
+                        // shift;add for load_klass (only shift with zero heap based)
+                        (UseCompressedOops ?
+                         ((Universe::narrow_oop_base() == NULL) ? BytesPerInstWord : 2*BytesPerInstWord) : 0);
       return basic + slop;
     } else {
       const int basic = (28 LP64_ONLY(+ 6)) * BytesPerInstWord +
-                        // shift;add for load_klass
-                        (UseCompressedOops ? 2*BytesPerInstWord : 0);
+                        // shift;add for load_klass (only shift with zero heap based)
+                        (UseCompressedOops ?
+                         ((Universe::narrow_oop_base() == NULL) ? BytesPerInstWord : 2*BytesPerInstWord) : 0);
       return (basic + slop);
     }
   }
--- a/src/cpu/x86/vm/assembler_x86.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -727,7 +727,7 @@
   }
 
 #ifdef _LP64
-  assert(false, "fix locate_operand");
+  assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
 #else
   assert(which == imm_operand, "instruction has only an imm field");
 #endif // LP64
@@ -2193,6 +2193,25 @@
   emit_byte(0x58 | encode);
 }
 
+void Assembler::popcntl(Register dst, Address src) {
+  assert(VM_Version::supports_popcnt(), "must support");
+  InstructionMark im(this);
+  emit_byte(0xF3);
+  prefix(src, dst);
+  emit_byte(0x0F);
+  emit_byte(0xB8);
+  emit_operand(dst, src);
+}
+
+void Assembler::popcntl(Register dst, Register src) {
+  assert(VM_Version::supports_popcnt(), "must support");
+  emit_byte(0xF3);
+  int encode = prefix_and_encode(dst->encoding(), src->encoding());
+  emit_byte(0x0F);
+  emit_byte(0xB8);
+  emit_byte(0xC0 | encode);
+}
+
 void Assembler::popf() {
   emit_byte(0x9D);
 }
@@ -3224,12 +3243,6 @@
   emit_byte(0xF1);
 }
 
-void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec, int format) {
-  InstructionMark im(this);
-  int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xB8 | encode);
-  emit_data((int)imm32, rspec, format);
-}
 
 #ifndef _LP64
 
@@ -3249,6 +3262,12 @@
   emit_data((int)imm32, rspec, 0);
 }
 
+void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
+  InstructionMark im(this);
+  int encode = prefix_and_encode(dst->encoding());
+  emit_byte(0xB8 | encode);
+  emit_data((int)imm32, rspec, 0);
+}
 
 void Assembler::popa() { // 32bit
   emit_byte(0x61);
@@ -3857,6 +3876,37 @@
   emit_data64(imm64, rspec);
 }
 
+void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
+  InstructionMark im(this);
+  int encode = prefix_and_encode(dst->encoding());
+  emit_byte(0xB8 | encode);
+  emit_data((int)imm32, rspec, narrow_oop_operand);
+}
+
+void Assembler::mov_narrow_oop(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
+  InstructionMark im(this);
+  prefix(dst);
+  emit_byte(0xC7);
+  emit_operand(rax, dst, 4);
+  emit_data((int)imm32, rspec, narrow_oop_operand);
+}
+
+void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
+  InstructionMark im(this);
+  int encode = prefix_and_encode(src1->encoding());
+  emit_byte(0x81);
+  emit_byte(0xF8 | encode);
+  emit_data((int)imm32, rspec, narrow_oop_operand);
+}
+
+void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
+  InstructionMark im(this);
+  prefix(src1);
+  emit_byte(0x81);
+  emit_operand(rax, src1, 4);
+  emit_data((int)imm32, rspec, narrow_oop_operand);
+}
+
 void Assembler::movdq(XMMRegister dst, Register src) {
   // table D-1 says MMX/SSE2
   NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
@@ -4049,6 +4099,25 @@
   addq(rsp, 16 * wordSize);
 }
 
+void Assembler::popcntq(Register dst, Address src) {
+  assert(VM_Version::supports_popcnt(), "must support");
+  InstructionMark im(this);
+  emit_byte(0xF3);
+  prefixq(src, dst);
+  emit_byte(0x0F);
+  emit_byte(0xB8);
+  emit_operand(dst, src);
+}
+
+void Assembler::popcntq(Register dst, Register src) {
+  assert(VM_Version::supports_popcnt(), "must support");
+  emit_byte(0xF3);
+  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
+  emit_byte(0x0F);
+  emit_byte(0xB8);
+  emit_byte(0xC0 | encode);
+}
+
 void Assembler::popq(Address dst) {
   InstructionMark im(this);
   prefixq(dst);
@@ -7217,6 +7286,225 @@
 }
 
 
+void MacroAssembler::check_klass_subtype(Register sub_klass,
+                           Register super_klass,
+                           Register temp_reg,
+                           Label& L_success) {
+  Label L_failure;
+  check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg,        &L_success, &L_failure, NULL);
+  check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
+  bind(L_failure);
+}
+
+
+void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
+                                                   Register super_klass,
+                                                   Register temp_reg,
+                                                   Label* L_success,
+                                                   Label* L_failure,
+                                                   Label* L_slow_path,
+                                        RegisterConstant super_check_offset) {
+  assert_different_registers(sub_klass, super_klass, temp_reg);
+  bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
+  if (super_check_offset.is_register()) {
+    assert_different_registers(sub_klass, super_klass,
+                               super_check_offset.as_register());
+  } else if (must_load_sco) {
+    assert(temp_reg != noreg, "supply either a temp or a register offset");
+  }
+
+  Label L_fallthrough;
+  int label_nulls = 0;
+  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
+  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
+  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
+  assert(label_nulls <= 1, "at most one NULL in the batch");
+
+  int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
+                   Klass::secondary_super_cache_offset_in_bytes());
+  int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
+                    Klass::super_check_offset_offset_in_bytes());
+  Address super_check_offset_addr(super_klass, sco_offset);
+
+  // Hacked jcc, which "knows" that L_fallthrough, at least, is in
+  // range of a jccb.  If this routine grows larger, reconsider at
+  // least some of these.
+#define local_jcc(assembler_cond, label)                                \
+  if (&(label) == &L_fallthrough)  jccb(assembler_cond, label);         \
+  else                             jcc( assembler_cond, label) /*omit semi*/
+
+  // Hacked jmp, which may only be used just before L_fallthrough.
+#define final_jmp(label)                                                \
+  if (&(label) == &L_fallthrough) { /*do nothing*/ }                    \
+  else                            jmp(label)                /*omit semi*/
+
+  // If the pointers are equal, we are done (e.g., String[] elements).
+  // This self-check enables sharing of secondary supertype arrays among
+  // non-primary types such as array-of-interface.  Otherwise, each such
+  // type would need its own customized SSA.
+  // We move this check to the front of the fast path because many
+  // type checks are in fact trivially successful in this manner,
+  // so we get a nicely predicted branch right at the start of the check.
+  cmpptr(sub_klass, super_klass);
+  local_jcc(Assembler::equal, *L_success);
+
+  // Check the supertype display:
+  if (must_load_sco) {
+    // Positive movl does right thing on LP64.
+    movl(temp_reg, super_check_offset_addr);
+    super_check_offset = RegisterConstant(temp_reg);
+  }
+  Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
+  cmpptr(super_klass, super_check_addr); // load displayed supertype
+
+  // This check has worked decisively for primary supers.
+  // Secondary supers are sought in the super_cache ('super_cache_addr').
+  // (Secondary supers are interfaces and very deeply nested subtypes.)
+  // This works in the same check above because of a tricky aliasing
+  // between the super_cache and the primary super display elements.
+  // (The 'super_check_addr' can address either, as the case requires.)
+  // Note that the cache is updated below if it does not help us find
+  // what we need immediately.
+  // So if it was a primary super, we can just fail immediately.
+  // Otherwise, it's the slow path for us (no success at this point).
+
+  if (super_check_offset.is_register()) {
+    local_jcc(Assembler::equal, *L_success);
+    cmpl(super_check_offset.as_register(), sc_offset);
+    if (L_failure == &L_fallthrough) {
+      local_jcc(Assembler::equal, *L_slow_path);
+    } else {
+      local_jcc(Assembler::notEqual, *L_failure);
+      final_jmp(*L_slow_path);
+    }
+  } else if (super_check_offset.as_constant() == sc_offset) {
+    // Need a slow path; fast failure is impossible.
+    if (L_slow_path == &L_fallthrough) {
+      local_jcc(Assembler::equal, *L_success);
+    } else {
+      local_jcc(Assembler::notEqual, *L_slow_path);
+      final_jmp(*L_success);
+    }
+  } else {
+    // No slow path; it's a fast decision.
+    if (L_failure == &L_fallthrough) {
+      local_jcc(Assembler::equal, *L_success);
+    } else {
+      local_jcc(Assembler::notEqual, *L_failure);
+      final_jmp(*L_success);
+    }
+  }
+
+  bind(L_fallthrough);
+
+#undef local_jcc
+#undef final_jmp
+}
+
+
+void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
+                                                   Register super_klass,
+                                                   Register temp_reg,
+                                                   Register temp2_reg,
+                                                   Label* L_success,
+                                                   Label* L_failure,
+                                                   bool set_cond_codes) {
+  assert_different_registers(sub_klass, super_klass, temp_reg);
+  if (temp2_reg != noreg)
+    assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
+#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
+
+  Label L_fallthrough;
+  int label_nulls = 0;
+  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
+  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
+  assert(label_nulls <= 1, "at most one NULL in the batch");
+
+  // a couple of useful fields in sub_klass:
+  int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
+                   Klass::secondary_supers_offset_in_bytes());
+  int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
+                   Klass::secondary_super_cache_offset_in_bytes());
+  Address secondary_supers_addr(sub_klass, ss_offset);
+  Address super_cache_addr(     sub_klass, sc_offset);
+
+  // Do a linear scan of the secondary super-klass chain.
+  // This code is rarely used, so simplicity is a virtue here.
+  // The repne_scan instruction uses fixed registers, which we must spill.
+  // Don't worry too much about pre-existing connections with the input regs.
+
+  assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super)
+  assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter)
+
+  // Get super_klass value into rax (even if it was in rdi or rcx).
+  bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false;
+  if (super_klass != rax || UseCompressedOops) {
+    if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; }
+    mov(rax, super_klass);
+  }
+  if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; }
+  if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; }
+
+#ifndef PRODUCT
+  int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
+  ExternalAddress pst_counter_addr((address) pst_counter);
+  NOT_LP64(  incrementl(pst_counter_addr) );
+  LP64_ONLY( lea(rcx, pst_counter_addr) );
+  LP64_ONLY( incrementl(Address(rcx, 0)) );
+#endif //PRODUCT
+
+  // We will consult the secondary-super array.
+  movptr(rdi, secondary_supers_addr);
+  // Load the array length.  (Positive movl does right thing on LP64.)
+  movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
+  // Skip to start of data.
+  addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+
+  // Scan RCX words at [RDI] for an occurrence of RAX.
+  // Set NZ/Z based on last compare.
+#ifdef _LP64
+  // This part is tricky, as values in supers array could be 32 or 64 bit wide
+  // and we store values in objArrays always encoded, thus we need to encode
+  // the value of rax before repne.  Note that rax is dead after the repne.
+  if (UseCompressedOops) {
+    encode_heap_oop_not_null(rax);
+    // The superclass is never null; it would be a basic system error if a null
+    // pointer were to sneak in here.  Note that we have already loaded the
+    // Klass::super_check_offset from the super_klass in the fast path,
+    // so if there is a null in that register, we are already in the afterlife.
+    repne_scanl();
+  } else
+#endif // _LP64
+    repne_scan();
+
+  // Unspill the temp. registers:
+  if (pushed_rdi)  pop(rdi);
+  if (pushed_rcx)  pop(rcx);
+  if (pushed_rax)  pop(rax);
+
+  if (set_cond_codes) {
+    // Special hack for the AD files:  rdi is guaranteed non-zero.
+    assert(!pushed_rdi, "rdi must be left non-NULL");
+    // Also, the condition codes are properly set Z/NZ on succeed/failure.
+  }
+
+  if (L_failure == &L_fallthrough)
+        jccb(Assembler::notEqual, *L_failure);
+  else  jcc(Assembler::notEqual, *L_failure);
+
+  // Success.  Cache the super we found and proceed in triumph.
+  movptr(super_cache_addr, super_klass);
+
+  if (L_success != &L_fallthrough) {
+    jmp(*L_success);
+  }
+
+#undef IS_A_TEMP
+
+  bind(L_fallthrough);
+}
+
+
 void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
   ucomisd(dst, as_Address(src));
 }
@@ -7710,14 +7998,21 @@
 void MacroAssembler::load_prototype_header(Register dst, Register src) {
 #ifdef _LP64
   if (UseCompressedOops) {
+    assert (Universe::heap() != NULL, "java heap should be initialized");
     movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
-    movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+    if (Universe::narrow_oop_shift() != 0) {
+      assert(Address::times_8 == LogMinObjAlignmentInBytes &&
+             Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong");
+      movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+    } else {
+      movq(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+    }
   } else
 #endif
-    {
-      movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
-      movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
-    }
+  {
+    movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+    movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+  }
 }
 
 void MacroAssembler::store_klass(Register dst, Register src) {
@@ -7760,11 +8055,20 @@
 // Algorithm must match oop.inline.hpp encode_heap_oop.
 void MacroAssembler::encode_heap_oop(Register r) {
   assert (UseCompressedOops, "should be compressed");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  if (Universe::narrow_oop_base() == NULL) {
+    verify_oop(r, "broken oop in encode_heap_oop");
+    if (Universe::narrow_oop_shift() != 0) {
+      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+      shrq(r, LogMinObjAlignmentInBytes);
+    }
+    return;
+  }
 #ifdef ASSERT
   if (CheckCompressedOops) {
     Label ok;
     push(rscratch1); // cmpptr trashes rscratch1
-    cmpptr(r12_heapbase, ExternalAddress((address)Universe::heap_base_addr()));
+    cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
     jcc(Assembler::equal, ok);
     stop("MacroAssembler::encode_heap_oop: heap base corrupted?");
     bind(ok);
@@ -7780,6 +8084,7 @@
 
 void MacroAssembler::encode_heap_oop_not_null(Register r) {
   assert (UseCompressedOops, "should be compressed");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
 #ifdef ASSERT
   if (CheckCompressedOops) {
     Label ok;
@@ -7790,12 +8095,18 @@
   }
 #endif
   verify_oop(r, "broken oop in encode_heap_oop_not_null");
-  subq(r, r12_heapbase);
-  shrq(r, LogMinObjAlignmentInBytes);
+  if (Universe::narrow_oop_base() != NULL) {
+    subq(r, r12_heapbase);
+  }
+  if (Universe::narrow_oop_shift() != 0) {
+    assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+    shrq(r, LogMinObjAlignmentInBytes);
+  }
 }
 
 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
   assert (UseCompressedOops, "should be compressed");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
 #ifdef ASSERT
   if (CheckCompressedOops) {
     Label ok;
@@ -7809,18 +8120,32 @@
   if (dst != src) {
     movq(dst, src);
   }
-  subq(dst, r12_heapbase);
-  shrq(dst, LogMinObjAlignmentInBytes);
+  if (Universe::narrow_oop_base() != NULL) {
+    subq(dst, r12_heapbase);
+  }
+  if (Universe::narrow_oop_shift() != 0) {
+    assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+    shrq(dst, LogMinObjAlignmentInBytes);
+  }
 }
 
 void  MacroAssembler::decode_heap_oop(Register r) {
   assert (UseCompressedOops, "should be compressed");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  if (Universe::narrow_oop_base() == NULL) {
+    if (Universe::narrow_oop_shift() != 0) {
+      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+      shlq(r, LogMinObjAlignmentInBytes);
+    }
+    verify_oop(r, "broken oop in decode_heap_oop");
+    return;
+  }
 #ifdef ASSERT
   if (CheckCompressedOops) {
     Label ok;
     push(rscratch1);
     cmpptr(r12_heapbase,
-           ExternalAddress((address)Universe::heap_base_addr()));
+           ExternalAddress((address)Universe::narrow_oop_base_addr()));
     jcc(Assembler::equal, ok);
     stop("MacroAssembler::decode_heap_oop: heap base corrupted?");
     bind(ok);
@@ -7844,32 +8169,76 @@
 
 void  MacroAssembler::decode_heap_oop_not_null(Register r) {
   assert (UseCompressedOops, "should only be used for compressed headers");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
   // Cannot assert, unverified entry point counts instructions (see .ad file)
   // vtableStubs also counts instructions in pd_code_size_limit.
   // Also do not verify_oop as this is called by verify_oop.
-  assert(Address::times_8 == LogMinObjAlignmentInBytes, "decode alg wrong");
-  leaq(r, Address(r12_heapbase, r, Address::times_8, 0));
+  if (Universe::narrow_oop_base() == NULL) {
+    if (Universe::narrow_oop_shift() != 0) {
+      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+      shlq(r, LogMinObjAlignmentInBytes);
+    }
+  } else {
+      assert (Address::times_8 == LogMinObjAlignmentInBytes &&
+              Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong");
+    leaq(r, Address(r12_heapbase, r, Address::times_8, 0));
+  }
 }
 
 void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
   assert (UseCompressedOops, "should only be used for compressed headers");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
   // Cannot assert, unverified entry point counts instructions (see .ad file)
   // vtableStubs also counts instructions in pd_code_size_limit.
   // Also do not verify_oop as this is called by verify_oop.
-  assert(Address::times_8 == LogMinObjAlignmentInBytes, "decode alg wrong");
-  leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
+  if (Universe::narrow_oop_shift() != 0) {
+    assert (Address::times_8 == LogMinObjAlignmentInBytes &&
+            Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong");
+    leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
+  } else if (dst != src) {
+    movq(dst, src);
+  }
 }
 
 void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
-  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
+  assert (UseCompressedOops, "should only be used for compressed headers");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
   int oop_index = oop_recorder()->find_index(obj);
   RelocationHolder rspec = oop_Relocation::spec(oop_index);
-  mov_literal32(dst, oop_index, rspec, narrow_oop_operand);
+  mov_narrow_oop(dst, oop_index, rspec);
+}
+
+void  MacroAssembler::set_narrow_oop(Address dst, jobject obj) {
+  assert (UseCompressedOops, "should only be used for compressed headers");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
+  int oop_index = oop_recorder()->find_index(obj);
+  RelocationHolder rspec = oop_Relocation::spec(oop_index);
+  mov_narrow_oop(dst, oop_index, rspec);
+}
+
+void  MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) {
+  assert (UseCompressedOops, "should only be used for compressed headers");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
+  int oop_index = oop_recorder()->find_index(obj);
+  RelocationHolder rspec = oop_Relocation::spec(oop_index);
+  Assembler::cmp_narrow_oop(dst, oop_index, rspec);
+}
+
+void  MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) {
+  assert (UseCompressedOops, "should only be used for compressed headers");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
+  int oop_index = oop_recorder()->find_index(obj);
+  RelocationHolder rspec = oop_Relocation::spec(oop_index);
+  Assembler::cmp_narrow_oop(dst, oop_index, rspec);
 }
 
 void MacroAssembler::reinit_heapbase() {
   if (UseCompressedOops) {
-    movptr(r12_heapbase, ExternalAddress((address)Universe::heap_base_addr()));
+    movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
   }
 }
 #endif // _LP64
--- a/src/cpu/x86/vm/assembler_x86.hpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Fri Mar 27 16:54:56 2009 -0700
@@ -578,20 +578,25 @@
 
   // These are all easily abused and hence protected
 
-  void mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec, int format = 0);
-
   // 32BIT ONLY SECTION
 #ifndef _LP64
   // Make these disappear in 64bit mode since they would never be correct
   void cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec);   // 32BIT ONLY
   void cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec);    // 32BIT ONLY
 
+  void mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec);    // 32BIT ONLY
   void mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec);     // 32BIT ONLY
 
   void push_literal32(int32_t imm32, RelocationHolder const& rspec);                 // 32BIT ONLY
 #else
   // 64BIT ONLY SECTION
   void mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec);   // 64BIT ONLY
+
+  void cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec);
+  void cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec);
+
+  void mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec);
+  void mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec);
 #endif // _LP64
 
   // These are unique in that we are ensured by the caller that the 32bit
@@ -1219,6 +1224,14 @@
   void popq(Address dst);
 #endif
 
+  void popcntl(Register dst, Address src);
+  void popcntl(Register dst, Register src);
+
+#ifdef _LP64
+  void popcntq(Register dst, Address src);
+  void popcntq(Register dst, Register src);
+#endif
+
   // Prefetches (SSE, SSE2, 3DNOW only)
 
   void prefetchnta(Address src);
@@ -1647,6 +1660,9 @@
   void decode_heap_oop_not_null(Register dst, Register src);
 
   void set_narrow_oop(Register dst, jobject obj);
+  void set_narrow_oop(Address dst, jobject obj);
+  void cmp_narrow_oop(Register dst, jobject obj);
+  void cmp_narrow_oop(Address dst, jobject obj);
 
   // if heap base register is used - reinit it with the correct value
   void reinit_heapbase();
@@ -1791,6 +1807,40 @@
                                Register scan_temp,
                                Label& no_such_interface);
 
+  // Test sub_klass against super_klass, with fast and slow paths.
+
+  // The fast path produces a tri-state answer: yes / no / maybe-slow.
+  // One of the three labels can be NULL, meaning take the fall-through.
+  // If super_check_offset is -1, the value is loaded up from super_klass.
+  // No registers are killed, except temp_reg.
+  void check_klass_subtype_fast_path(Register sub_klass,
+                                     Register super_klass,
+                                     Register temp_reg,
+                                     Label* L_success,
+                                     Label* L_failure,
+                                     Label* L_slow_path,
+                RegisterConstant super_check_offset = RegisterConstant(-1));
+
+  // The rest of the type check; must be wired to a corresponding fast path.
+  // It does not repeat the fast path logic, so don't use it standalone.
+  // The temp_reg and temp2_reg can be noreg, if no temps are available.
+  // Updates the sub's secondary super cache as necessary.
+  // If set_cond_codes, condition codes will be Z on success, NZ on failure.
+  void check_klass_subtype_slow_path(Register sub_klass,
+                                     Register super_klass,
+                                     Register temp_reg,
+                                     Register temp2_reg,
+                                     Label* L_success,
+                                     Label* L_failure,
+                                     bool set_cond_codes = false);
+
+  // Simplified, combined version, good for typical uses.
+  // Falls through on failure.
+  void check_klass_subtype(Register sub_klass,
+                           Register super_klass,
+                           Register temp_reg,
+                           Label& L_success);
+
   //----
   void set_word_if_not_zero(Register reg); // sets reg to 1 if not zero, otherwise 0
 
--- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -1598,18 +1598,9 @@
 
     // get instance klass
     __ movptr(k_RInfo, Address(k_RInfo, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)));
-    // get super_check_offset
-    __ movl(Rtmp1, Address(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes()));
-    // See if we get an immediate positive hit
-    __ cmpptr(k_RInfo, Address(klass_RInfo, Rtmp1, Address::times_1));
-    __ jcc(Assembler::equal, done);
-    // check for immediate negative hit
-    __ cmpl(Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
-    __ jcc(Assembler::notEqual, *stub->entry());
-    // check for self
-    __ cmpptr(klass_RInfo, k_RInfo);
-    __ jcc(Assembler::equal, done);
-
+    // perform the fast part of the checking logic
+    __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, &done, stub->entry(), NULL);
+    // call out-of-line instance of __ check_klass_subtype_slow_path(...):
     __ push(klass_RInfo);
     __ push(k_RInfo);
     __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
@@ -1735,17 +1726,9 @@
         }
         __ bind(done);
       } else {
-        __ movl(Rtmp1, Address(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes()));
-        // See if we get an immediate positive hit
-        __ cmpptr(k_RInfo, Address(klass_RInfo, Rtmp1, Address::times_1));
-        __ jcc(Assembler::equal, done);
-        // check for immediate negative hit
-        __ cmpl(Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
-        __ jcc(Assembler::notEqual, *stub->entry());
-        // check for self
-        __ cmpptr(klass_RInfo, k_RInfo);
-        __ jcc(Assembler::equal, done);
-
+        // perform the fast part of the checking logic
+        __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, &done, stub->entry(), NULL);
+        // call out-of-line instance of __ check_klass_subtype_slow_path(...):
         __ push(klass_RInfo);
         __ push(k_RInfo);
         __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
@@ -1821,23 +1804,15 @@
           __ pop(dst);
           __ jmp(done);
         }
-      } else {
-#else
-      { // YUCK
+      }
+        else // next block is unconditional if LP64:
 #endif // LP64
+      {
         assert(dst != klass_RInfo && dst != k_RInfo, "need 3 registers");
 
-        __ movl(dst, Address(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes()));
-        // See if we get an immediate positive hit
-        __ cmpptr(k_RInfo, Address(klass_RInfo, dst, Address::times_1));
-        __ jcc(Assembler::equal, one);
-        // check for immediate negative hit
-        __ cmpl(dst, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
-        __ jcc(Assembler::notEqual, zero);
-        // check for self
-        __ cmpptr(klass_RInfo, k_RInfo);
-        __ jcc(Assembler::equal, one);
-
+        // perform the fast part of the checking logic
+        __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, dst, &one, &zero, NULL);
+        // call out-of-line instance of __ check_klass_subtype_slow_path(...):
         __ push(klass_RInfo);
         __ push(k_RInfo);
         __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
--- a/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -1354,6 +1354,13 @@
 
     case slow_subtype_check_id:
       {
+        // Typical calling sequence:
+        // __ push(klass_RInfo);  // object klass or other subclass
+        // __ push(sup_k_RInfo);  // array element klass or other superclass
+        // __ call(slow_subtype_check);
+        // Note that the subclass is pushed first, and is therefore deepest.
+        // Previous versions of this code reversed the names 'sub' and 'super'.
+        // This was operationally harmless but made the code unreadable.
         enum layout {
           rax_off, SLOT2(raxH_off)
           rcx_off, SLOT2(rcxH_off)
@@ -1361,9 +1368,10 @@
           rdi_off, SLOT2(rdiH_off)
           // saved_rbp_off, SLOT2(saved_rbpH_off)
           return_off, SLOT2(returnH_off)
-          sub_off, SLOT2(subH_off)
-          super_off, SLOT2(superH_off)
-          framesize
+          sup_k_off, SLOT2(sup_kH_off)
+          klass_off, SLOT2(superH_off)
+          framesize,
+          result_off = klass_off  // deepest argument is also the return value
         };
 
         __ set_info("slow_subtype_check", dont_gc_arguments);
@@ -1373,19 +1381,14 @@
         __ push(rax);
 
         // This is called by pushing args and not with C abi
-        __ movptr(rsi, Address(rsp, (super_off) * VMRegImpl::stack_slot_size)); // super
-        __ movptr(rax, Address(rsp, (sub_off  ) * VMRegImpl::stack_slot_size)); // sub
-
-        __ movptr(rdi,Address(rsi,sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes()));
-        // since size is postive movl does right thing on 64bit
-        __ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
-        __ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+        __ movptr(rsi, Address(rsp, (klass_off) * VMRegImpl::stack_slot_size)); // subclass
+        __ movptr(rax, Address(rsp, (sup_k_off) * VMRegImpl::stack_slot_size)); // superclass
 
         Label miss;
-        __ repne_scan();
-        __ jcc(Assembler::notEqual, miss);
-        __ movptr(Address(rsi,sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()), rax);
-        __ movptr(Address(rsp, (super_off) * VMRegImpl::stack_slot_size), 1); // result
+        __ check_klass_subtype_slow_path(rsi, rax, rcx, rdi, NULL, &miss);
+
+        // fallthrough on success:
+        __ movptr(Address(rsp, (result_off) * VMRegImpl::stack_slot_size), 1); // result
         __ pop(rax);
         __ pop(rcx);
         __ pop(rsi);
@@ -1393,7 +1396,7 @@
         __ ret(0);
 
         __ bind(miss);
-        __ movptr(Address(rsp, (super_off) * VMRegImpl::stack_slot_size), NULL_WORD); // result
+        __ movptr(Address(rsp, (result_off) * VMRegImpl::stack_slot_size), NULL_WORD); // result
         __ pop(rax);
         __ pop(rcx);
         __ pop(rsi);
--- a/src/cpu/x86/vm/interp_masm_x86_32.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/cpu/x86/vm/interp_masm_x86_32.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -219,47 +219,16 @@
   // Resets EDI to locals.  Register sub_klass cannot be any of the above.
 void InterpreterMacroAssembler::gen_subtype_check( Register Rsub_klass, Label &ok_is_subtype ) {
   assert( Rsub_klass != rax, "rax, holds superklass" );
-  assert( Rsub_klass != rcx, "rcx holds 2ndary super array length" );
-  assert( Rsub_klass != rdi, "rdi holds 2ndary super array scan ptr" );
-  Label not_subtype, loop;
+  assert( Rsub_klass != rcx, "used as a temp" );
+  assert( Rsub_klass != rdi, "used as a temp, restored from locals" );
 
   // Profile the not-null value's klass.
-  profile_typecheck(rcx, Rsub_klass, rdi); // blows rcx, rdi
+  profile_typecheck(rcx, Rsub_klass, rdi); // blows rcx, reloads rdi
 
-  // Load the super-klass's check offset into ECX
-  movl( rcx, Address(rax, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes() ) );
-  // Load from the sub-klass's super-class display list, or a 1-word cache of
-  // the secondary superclass list, or a failing value with a sentinel offset
-  // if the super-klass is an interface or exceptionally deep in the Java
-  // hierarchy and we have to scan the secondary superclass list the hard way.
-  // See if we get an immediate positive hit
-  cmpptr( rax, Address(Rsub_klass,rcx,Address::times_1) );
-  jcc( Assembler::equal,ok_is_subtype );
+  // Do the check.
+  check_klass_subtype(Rsub_klass, rax, rcx, ok_is_subtype); // blows rcx
 
-  // Check for immediate negative hit
-  cmpl( rcx, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() );
-  jcc( Assembler::notEqual, not_subtype );
-  // Check for self
-  cmpptr( Rsub_klass, rax );
-  jcc( Assembler::equal, ok_is_subtype );
-
-  // Now do a linear scan of the secondary super-klass chain.
-  movptr( rdi, Address(Rsub_klass, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes()) );
-  // EDI holds the objArrayOop of secondary supers.
-  movl( rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));// Load the array length
-  // Skip to start of data; also clear Z flag incase ECX is zero
-  addptr( rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT) );
-  // Scan ECX words at [EDI] for occurance of EAX
-  // Set NZ/Z based on last compare
-  repne_scan();
-  restore_locals();           // Restore EDI; Must not blow flags
-  // Not equal?
-  jcc( Assembler::notEqual, not_subtype );
-  // Must be equal but missed in cache.  Update cache.
-  movptr( Address(Rsub_klass, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()), rax );
-  jmp( ok_is_subtype );
-
-  bind(not_subtype);
+  // Profile the failure of the check.
   profile_typecheck_failed(rcx); // blows rcx
 }
 
--- a/src/cpu/x86/vm/interp_masm_x86_64.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/cpu/x86/vm/interp_masm_x86_64.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -232,65 +232,13 @@
   assert(Rsub_klass != rcx, "rcx holds 2ndary super array length");
   assert(Rsub_klass != rdi, "rdi holds 2ndary super array scan ptr");
 
-  Label not_subtype, not_subtype_pop, loop;
+  // Profile the not-null value's klass.
+  profile_typecheck(rcx, Rsub_klass, rdi); // blows rcx, reloads rdi
 
-  // Profile the not-null value's klass.
-  profile_typecheck(rcx, Rsub_klass, rdi); // blows rcx, rdi
+  // Do the check.
+  check_klass_subtype(Rsub_klass, rax, rcx, ok_is_subtype); // blows rcx
 
-  // Load the super-klass's check offset into rcx
-  movl(rcx, Address(rax, sizeof(oopDesc) +
-                    Klass::super_check_offset_offset_in_bytes()));
-  // Load from the sub-klass's super-class display list, or a 1-word
-  // cache of the secondary superclass list, or a failing value with a
-  // sentinel offset if the super-klass is an interface or
-  // exceptionally deep in the Java hierarchy and we have to scan the
-  // secondary superclass list the hard way.  See if we get an
-  // immediate positive hit
-  cmpptr(rax, Address(Rsub_klass, rcx, Address::times_1));
-  jcc(Assembler::equal,ok_is_subtype);
-
-  // Check for immediate negative hit
-  cmpl(rcx, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
-  jcc( Assembler::notEqual, not_subtype );
-  // Check for self
-  cmpptr(Rsub_klass, rax);
-  jcc(Assembler::equal, ok_is_subtype);
-
-  // Now do a linear scan of the secondary super-klass chain.
-  movptr(rdi, Address(Rsub_klass, sizeof(oopDesc) +
-                      Klass::secondary_supers_offset_in_bytes()));
-  // rdi holds the objArrayOop of secondary supers.
-  // Load the array length
-  movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
-  // Skip to start of data; also clear Z flag incase rcx is zero
-  addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
-  // Scan rcx words at [rdi] for occurance of rax
-  // Set NZ/Z based on last compare
-
-  // this part is kind tricky, as values in supers array could be 32 or 64 bit wide
-  // and we store values in objArrays always encoded, thus we need to encode value
-  // before repne
-  if (UseCompressedOops) {
-    push(rax);
-    encode_heap_oop(rax);
-    repne_scanl();
-    // Not equal?
-    jcc(Assembler::notEqual, not_subtype_pop);
-    // restore heap oop here for movq
-    pop(rax);
-  } else {
-    repne_scan();
-    jcc(Assembler::notEqual, not_subtype);
-  }
-  // Must be equal but missed in cache.  Update cache.
-  movptr(Address(Rsub_klass, sizeof(oopDesc) +
-               Klass::secondary_super_cache_offset_in_bytes()), rax);
-  jmp(ok_is_subtype);
-
-  bind(not_subtype_pop);
-  // restore heap oop here for miss
-  if (UseCompressedOops) pop(rax);
-  bind(not_subtype);
+  // Profile the failure of the check.
   profile_typecheck_failed(rcx); // blows rcx
 }
 
--- a/src/cpu/x86/vm/interpreterRT_x86_64.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/cpu/x86/vm/interpreterRT_x86_64.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -349,7 +349,7 @@
 
     if (_num_args < Argument::n_float_register_parameters_c-1) {
       *_reg_args++ = from_obj;
-      *_fp_identifiers |= (0x01 << (_num_args*2)); // mark as float
+      *_fp_identifiers |= (intptr_t)(0x01 << (_num_args*2)); // mark as float
       _num_args++;
     } else {
       *_to++ = from_obj;
@@ -364,7 +364,7 @@
 
     if (_num_args < Argument::n_float_register_parameters_c-1) {
       *_reg_args++ = from_obj;
-      *_fp_identifiers |= (0x3 << (_num_args*2)); // mark as double
+      *_fp_identifiers |= (intptr_t)(0x3 << (_num_args*2)); // mark as double
       _num_args++;
     } else {
       *_to++ = from_obj;
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -1310,81 +1310,51 @@
                            Address& super_check_offset_addr,
                            Address& super_klass_addr,
                            Register temp,
-                           Label* L_success_ptr, Label* L_failure_ptr) {
+                           Label* L_success, Label* L_failure) {
     BLOCK_COMMENT("type_check:");
 
     Label L_fallthrough;
-    bool fall_through_on_success = (L_success_ptr == NULL);
-    if (fall_through_on_success) {
-      L_success_ptr = &L_fallthrough;
-    } else {
-      L_failure_ptr = &L_fallthrough;
-    }
-    Label& L_success = *L_success_ptr;
-    Label& L_failure = *L_failure_ptr;
+#define LOCAL_JCC(assembler_con, label_ptr)                             \
+    if (label_ptr != NULL)  __ jcc(assembler_con, *(label_ptr));        \
+    else                    __ jcc(assembler_con, L_fallthrough) /*omit semi*/
 
+    // The following is a strange variation of the fast path which requires
+    // one less register, because needed values are on the argument stack.
+    // __ check_klass_subtype_fast_path(sub_klass, *super_klass*, temp,
+    //                                  L_success, L_failure, NULL);
     assert_different_registers(sub_klass, temp);
 
-    // a couple of useful fields in sub_klass:
-    int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
-                     Klass::secondary_supers_offset_in_bytes());
     int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
                      Klass::secondary_super_cache_offset_in_bytes());
-    Address secondary_supers_addr(sub_klass, ss_offset);
-    Address super_cache_addr(     sub_klass, sc_offset);
 
     // if the pointers are equal, we are done (e.g., String[] elements)
     __ cmpptr(sub_klass, super_klass_addr);
-    __ jcc(Assembler::equal, L_success);
+    LOCAL_JCC(Assembler::equal, L_success);
 
     // check the supertype display:
     __ movl2ptr(temp, super_check_offset_addr);
     Address super_check_addr(sub_klass, temp, Address::times_1, 0);
     __ movptr(temp, super_check_addr); // load displayed supertype
     __ cmpptr(temp, super_klass_addr); // test the super type
-    __ jcc(Assembler::equal, L_success);
+    LOCAL_JCC(Assembler::equal, L_success);
 
     // if it was a primary super, we can just fail immediately
     __ cmpl(super_check_offset_addr, sc_offset);
-    __ jcc(Assembler::notEqual, L_failure);
+    LOCAL_JCC(Assembler::notEqual, L_failure);
 
-    // Now do a linear scan of the secondary super-klass chain.
-    // This code is rarely used, so simplicity is a virtue here.
-    inc_counter_np(SharedRuntime::_partial_subtype_ctr);
-    {
-      // The repne_scan instruction uses fixed registers, which we must spill.
-      // (We need a couple more temps in any case.)
-      __ push(rax);
-      __ push(rcx);
-      __ push(rdi);
-      assert_different_registers(sub_klass, rax, rcx, rdi);
+    // The repne_scan instruction uses fixed registers, which will get spilled.
+    // We happen to know this works best when super_klass is in rax.
+    Register super_klass = temp;
+    __ movptr(super_klass, super_klass_addr);
+    __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg,
+                                     L_success, L_failure);
 
-      __ movptr(rdi, secondary_supers_addr);
-      // Load the array length.
-      __ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
-      // Skip to start of data.
-      __ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
-      // Scan rcx words at [edi] for occurance of rax,
-      // Set NZ/Z based on last compare
-      __ movptr(rax, super_klass_addr);
-      __ repne_scan();
+    __ bind(L_fallthrough);
 
-      // Unspill the temp. registers:
-      __ pop(rdi);
-      __ pop(rcx);
-      __ pop(rax);
-    }
-    __ jcc(Assembler::notEqual, L_failure);
+    if (L_success == NULL) { BLOCK_COMMENT("L_success:"); }
+    if (L_failure == NULL) { BLOCK_COMMENT("L_failure:"); }
 
-    // Success.  Cache the super we found and proceed in triumph.
-    __ movptr(temp, super_klass_addr); // note: rax, is dead
-    __ movptr(super_cache_addr, temp);
-
-    if (!fall_through_on_success)
-      __ jmp(L_success);
-
-    // Fall through on failure!
-    __ bind(L_fallthrough);
+#undef LOCAL_JCC
   }
 
   //
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -2091,66 +2091,9 @@
 
     Label L_miss;
 
-    // a couple of useful fields in sub_klass:
-    int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
-                     Klass::secondary_supers_offset_in_bytes());
-    int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
-                     Klass::secondary_super_cache_offset_in_bytes());
-    Address secondary_supers_addr(sub_klass, ss_offset);
-    Address super_cache_addr(     sub_klass, sc_offset);
-
-    // if the pointers are equal, we are done (e.g., String[] elements)
-    __ cmpptr(super_klass, sub_klass);
-    __ jcc(Assembler::equal, L_success);
-
-    // check the supertype display:
-    Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
-    __ cmpptr(super_klass, super_check_addr); // test the super type
-    __ jcc(Assembler::equal, L_success);
-
-    // if it was a primary super, we can just fail immediately
-    __ cmpl(super_check_offset, sc_offset);
-    __ jcc(Assembler::notEqual, L_miss);
-
-    // Now do a linear scan of the secondary super-klass chain.
-    // The repne_scan instruction uses fixed registers, which we must spill.
-    // (We need a couple more temps in any case.)
-    // This code is rarely used, so simplicity is a virtue here.
-    inc_counter_np(SharedRuntime::_partial_subtype_ctr);
-    {
-      __ push(rax);
-      __ push(rcx);
-      __ push(rdi);
-      assert_different_registers(sub_klass, super_klass, rax, rcx, rdi);
-
-      __ movptr(rdi, secondary_supers_addr);
-      // Load the array length.
-      __ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
-      // Skip to start of data.
-      __ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
-      // Scan rcx words at [rdi] for occurance of rax
-      // Set NZ/Z based on last compare
-      __ movptr(rax, super_klass);
-      if (UseCompressedOops) {
-        // Compare against compressed form.  Don't need to uncompress because
-        // looks like orig rax is restored in popq below.
-        __ encode_heap_oop(rax);
-        __ repne_scanl();
-      } else {
-        __ repne_scan();
-      }
-
-      // Unspill the temp. registers:
-      __ pop(rdi);
-      __ pop(rcx);
-      __ pop(rax);
-
-      __ jcc(Assembler::notEqual, L_miss);
-    }
-
-    // Success.  Cache the super we found and proceed in triumph.
-    __ movptr(super_cache_addr, super_klass); // note: rax is dead
-    __ jmp(L_success);
+    __ check_klass_subtype_fast_path(sub_klass, super_klass, noreg,        &L_success, &L_miss, NULL,
+                                     super_check_offset);
+    __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL);
 
     // Fall through on failure!
     __ BIND(L_miss);
--- a/src/cpu/x86/vm/vm_version_x86.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/cpu/x86/vm/vm_version_x86.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -284,7 +284,7 @@
   }
 
   char buf[256];
-  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
                cores_per_cpu(), threads_per_core(),
                cpu_family(), _model, _stepping,
                (supports_cmov() ? ", cmov" : ""),
@@ -297,6 +297,7 @@
                (supports_ssse3()? ", ssse3": ""),
                (supports_sse4_1() ? ", sse4.1" : ""),
                (supports_sse4_2() ? ", sse4.2" : ""),
+               (supports_popcnt() ? ", popcnt" : ""),
                (supports_mmx_ext() ? ", mmxext" : ""),
                (supports_3dnow()   ? ", 3dnow"  : ""),
                (supports_3dnow2()  ? ", 3dnowext" : ""),
@@ -410,6 +411,13 @@
     }
   }
 
+  // Use population count instruction if available.
+  if (supports_popcnt()) {
+    if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
+      UsePopCountInstruction = true;
+    }
+  }
+
   assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value");
   assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value");
 
--- a/src/cpu/x86/vm/vm_version_x86.hpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/cpu/x86/vm/vm_version_x86.hpp	Fri Mar 27 16:54:56 2009 -0700
@@ -70,7 +70,9 @@
                dca      : 1,
                sse4_1   : 1,
                sse4_2   : 1,
-                        : 11;
+                        : 2,
+               popcnt   : 1,
+                        : 8;
     } bits;
   };
 
@@ -179,7 +181,8 @@
      CPU_SSSE3  = (1 << 9),
      CPU_SSE4A  = (1 << 10),
      CPU_SSE4_1 = (1 << 11),
-     CPU_SSE4_2 = (1 << 12)
+     CPU_SSE4_2 = (1 << 12),
+     CPU_POPCNT = (1 << 13)
    } cpuFeatureFlags;
 
   // cpuid information block.  All info derived from executing cpuid with
@@ -290,6 +293,8 @@
       result |= CPU_SSE4_1;
     if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
       result |= CPU_SSE4_2;
+    if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0)
+      result |= CPU_POPCNT;
     return result;
   }
 
@@ -379,6 +384,7 @@
   static bool supports_ssse3()    { return (_cpuFeatures & CPU_SSSE3)!= 0; }
   static bool supports_sse4_1()   { return (_cpuFeatures & CPU_SSE4_1) != 0; }
   static bool supports_sse4_2()   { return (_cpuFeatures & CPU_SSE4_2) != 0; }
+  static bool supports_popcnt()   { return (_cpuFeatures & CPU_POPCNT) != 0; }
   //
   // AMD features
   //
--- a/src/cpu/x86/vm/x86_32.ad	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/cpu/x86/vm/x86_32.ad	Fri Mar 27 16:54:56 2009 -0700
@@ -1483,16 +1483,20 @@
   // main source block for now.  In future, we can generalize this by
   // adding a syntax that specifies the sizes of fields in an order,
   // so that the adlc can build the emit functions automagically
-  enc_class OpcP %{             // Emit opcode
-    emit_opcode(cbuf,$primary);
-  %}
-
-  enc_class OpcS %{             // Emit opcode
-    emit_opcode(cbuf,$secondary);
-  %}
-
-  enc_class Opcode(immI d8 ) %{ // Emit opcode
-    emit_opcode(cbuf,$d8$$constant);
+
+  // Emit primary opcode
+  enc_class OpcP %{
+    emit_opcode(cbuf, $primary);
+  %}
+
+  // Emit secondary opcode
+  enc_class OpcS %{
+    emit_opcode(cbuf, $secondary);
+  %}
+
+  // Emit opcode directly
+  enc_class Opcode(immI d8) %{
+    emit_opcode(cbuf, $d8$$constant);
   %}
 
   enc_class SizePrefix %{
@@ -1688,26 +1692,15 @@
     Register Reax = as_Register(EAX_enc); // super class
     Register Recx = as_Register(ECX_enc); // killed
     Register Resi = as_Register(ESI_enc); // sub class
-    Label hit, miss;
+    Label miss;
 
     MacroAssembler _masm(&cbuf);
-    // Compare super with sub directly, since super is not in its own SSA.
-    // The compiler used to emit this test, but we fold it in here,
-    // to allow platform-specific tweaking on sparc.
-    __ cmpptr(Reax, Resi);
-    __ jcc(Assembler::equal, hit);
-#ifndef PRODUCT
-    __ incrementl(ExternalAddress((address)&SharedRuntime::_partial_subtype_ctr));
-#endif //PRODUCT
-    __ movptr(Redi,Address(Resi,sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes()));
-    __ movl(Recx,Address(Redi,arrayOopDesc::length_offset_in_bytes()));
-    __ addptr(Redi,arrayOopDesc::base_offset_in_bytes(T_OBJECT));
-    __ repne_scan();
-    __ jcc(Assembler::notEqual, miss);
-    __ movptr(Address(Resi,sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()),Reax);
-    __ bind(hit);
-    if( $primary )
-      __ xorptr(Redi,Redi);
+    __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
+                                     NULL, &miss,
+                                     /*set_cond_codes:*/ true);
+    if ($primary) {
+      __ xorptr(Redi, Redi);
+    }
     __ bind(miss);
   %}
 
@@ -6387,6 +6380,67 @@
 %}
 
 
+//---------- Population Count Instructions -------------------------------------
+
+instruct popCountI(eRegI dst, eRegI src) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountI src));
+
+  format %{ "POPCNT $dst, $src" %}
+  ins_encode %{
+    __ popcntl($dst$$Register, $src$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct popCountI_mem(eRegI dst, memory mem) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountI (LoadI mem)));
+
+  format %{ "POPCNT $dst, $mem" %}
+  ins_encode %{
+    __ popcntl($dst$$Register, $mem$$Address);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// Note: Long.bitCount(long) returns an int.
+instruct popCountL(eRegI dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountL src));
+  effect(KILL cr, TEMP tmp, TEMP dst);
+
+  format %{ "POPCNT $dst, $src.lo\n\t"
+            "POPCNT $tmp, $src.hi\n\t"
+            "ADD    $dst, $tmp" %}
+  ins_encode %{
+    __ popcntl($dst$$Register, $src$$Register);
+    __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
+    __ addl($dst$$Register, $tmp$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// Note: Long.bitCount(long) returns an int.
+instruct popCountL_mem(eRegI dst, memory mem, eRegI tmp, eFlagsReg cr) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountL (LoadL mem)));
+  effect(KILL cr, TEMP tmp, TEMP dst);
+
+  format %{ "POPCNT $dst, $mem\n\t"
+            "POPCNT $tmp, $mem+4\n\t"
+            "ADD    $dst, $tmp" %}
+  ins_encode %{
+    //__ popcntl($dst$$Register, $mem$$Address$$first);
+    //__ popcntl($tmp$$Register, $mem$$Address$$second);
+    __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false));
+    __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false));
+    __ addl($dst$$Register, $tmp$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+
 //----------Load/Store/Move Instructions---------------------------------------
 //----------Load Instructions--------------------------------------------------
 // Load Byte (8bit signed)
@@ -12501,15 +12555,12 @@
   effect( KILL rcx, KILL cr );
 
   ins_cost(1100);  // slightly larger than the next version
-  format %{ "CMPL   EAX,ESI\n\t"
-            "JEQ,s  hit\n\t"
-            "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
+  format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
             "MOV    ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
             "ADD    EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
-     "hit:\n\t"
             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
      "miss:\t" %}
 
@@ -12523,9 +12574,7 @@
   effect( KILL rcx, KILL result );
 
   ins_cost(1000);
-  format %{ "CMPL   EAX,ESI\n\t"
-            "JEQ,s  miss\t# Actually a hit; we are done.\n\t"
-            "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
+  format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
             "MOV    ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
             "ADD    EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
--- a/src/cpu/x86/vm/x86_64.ad	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/cpu/x86/vm/x86_64.ad	Fri Mar 27 16:54:56 2009 -0700
@@ -326,7 +326,6 @@
                          R9,  R9_H,
                          R10, R10_H,
                          R11, R11_H,
-                         R12, R12_H,
                          R13, R13_H,
                          R14, R14_H);
 
@@ -340,7 +339,6 @@
                          R9,  R9_H,
                          R10, R10_H,
                          R11, R11_H,
-                         R12, R12_H,
                          R13, R13_H,
                          R14, R14_H);
 
@@ -354,7 +352,6 @@
                              R9,  R9_H,
                              R10, R10_H,
                              R11, R11_H,
-                             R12, R12_H,
                              R13, R13_H,
                              R14, R14_H);
 
@@ -444,9 +441,6 @@
 // Singleton class for RDX long register
 reg_class long_rdx_reg(RDX, RDX_H);
 
-// Singleton class for R12 long register
-reg_class long_r12_reg(R12, R12_H);
-
 // Class for all int registers (except RSP)
 reg_class int_reg(RAX,
                   RDX,
@@ -1842,7 +1836,9 @@
 {
   if (UseCompressedOops) {
     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t", oopDesc::klass_offset_in_bytes());
-    st->print_cr("leaq    rscratch1, [r12_heapbase, r, Address::times_8, 0]");
+    if (Universe::narrow_oop_shift() != 0) {
+      st->print_cr("leaq    rscratch1, [r12_heapbase, r, Address::times_8, 0]");
+    }
     st->print_cr("cmpq    rax, rscratch1\t # Inline cache check");
   } else {
     st->print_cr("cmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t"
@@ -1891,7 +1887,11 @@
 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 {
   if (UseCompressedOops) {
-    return OptoBreakpoint ? 19 : 20;
+    if (Universe::narrow_oop_shift() == 0) {
+      return OptoBreakpoint ? 15 : 16;
+    } else {
+      return OptoBreakpoint ? 19 : 20;
+    }
   } else {
     return OptoBreakpoint ? 11 : 12;
   }
@@ -2575,45 +2575,13 @@
     Register Rrax = as_Register(RAX_enc); // super class
     Register Rrcx = as_Register(RCX_enc); // killed
     Register Rrsi = as_Register(RSI_enc); // sub class
-    Label hit, miss, cmiss;
+    Label miss;
+    const bool set_cond_codes = true;
 
     MacroAssembler _masm(&cbuf);
-    // Compare super with sub directly, since super is not in its own SSA.
-    // The compiler used to emit this test, but we fold it in here,
-    // to allow platform-specific tweaking on sparc.
-    __ cmpptr(Rrax, Rrsi);
-    __ jcc(Assembler::equal, hit);
-#ifndef PRODUCT
-    __ lea(Rrcx, ExternalAddress((address)&SharedRuntime::_partial_subtype_ctr));
-    __ incrementl(Address(Rrcx, 0));
-#endif //PRODUCT
-    __ movptr(Rrdi, Address(Rrsi, 
-                          sizeof(oopDesc) + 
-                          Klass::secondary_supers_offset_in_bytes()));
-    __ movl(Rrcx, Address(Rrdi, arrayOopDesc::length_offset_in_bytes()));
-    __ addptr(Rrdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
-    if (UseCompressedOops) {
-      __ encode_heap_oop(Rrax);
-      __ repne_scanl();
-      __ jcc(Assembler::notEqual, cmiss);
-      __ decode_heap_oop(Rrax);
-      __ movptr(Address(Rrsi,
-                      sizeof(oopDesc) +
-                      Klass::secondary_super_cache_offset_in_bytes()),
-              Rrax);
-      __ jmp(hit);
-      __ bind(cmiss);
-      __ decode_heap_oop(Rrax);
-      __ jmp(miss);
-    } else {
-      __ repne_scan();
-      __ jcc(Assembler::notEqual, miss);
-      __ movptr(Address(Rrsi,
-                      sizeof(oopDesc) +
-                      Klass::secondary_super_cache_offset_in_bytes()),
-              Rrax);
-    }
-    __ bind(hit);
+    __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
+                                     NULL, &miss,
+                                     /*set_cond_codes:*/ true);
     if ($primary) {
       __ xorptr(Rrdi, Rrdi);
     }
@@ -4906,15 +4874,6 @@
   interface(REG_INTER);
 %}
 
-
-operand r12RegL() %{
-  constraint(ALLOC_IN_RC(long_r12_reg));
-  match(RegL);
-
-  format %{ %}
-  interface(REG_INTER);
-%}
-
 operand rRegN() %{
   constraint(ALLOC_IN_RC(int_reg));
   match(RegN);
@@ -5289,21 +5248,6 @@
   %}
 %}
 
-// Indirect Narrow Oop Plus Offset Operand
-operand indNarrowOopOffset(rRegN src, immL32 off) %{
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP (DecodeN src) off);
-
-  op_cost(10);
-  format %{"[R12 + $src << 3 + $off] (compressed oop addressing)" %}
-  interface(MEMORY_INTER) %{
-    base(0xc); // R12
-    index($src);
-    scale(0x3);
-    disp($off);
-  %}
-%}
-
 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 %{
@@ -5321,6 +5265,158 @@
   %}
 %}
 
+// Indirect Narrow Oop Plus Offset Operand
+// Note: x86 architecture doesn't support "scale * index + offset" without a base
+// we can't free r12 even with Universe::narrow_oop_base() == NULL.
+operand indCompressedOopOffset(rRegN reg, immL32 off) %{
+  predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) off);
+
+  op_cost(10);
+  format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
+  interface(MEMORY_INTER) %{
+    base(0xc); // R12
+    index($reg);
+    scale(0x3);
+    disp($off);
+  %}
+%}
+
+// Indirect Memory Operand
+operand indirectNarrow(rRegN reg)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(DecodeN reg);
+
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x4);
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+// Indirect Memory Plus Short Offset Operand
+operand indOffset8Narrow(rRegN reg, immL8 off)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) off);
+
+  format %{ "[$reg + $off (8-bit)]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x4);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+// Indirect Memory Plus Long Offset Operand
+operand indOffset32Narrow(rRegN reg, immL32 off)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) off);
+
+  format %{ "[$reg + $off (32-bit)]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x4);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+// Indirect Memory Plus Index Register Plus Offset Operand
+operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (AddP (DecodeN reg) lreg) off);
+
+  op_cost(10);
+  format %{"[$reg + $off + $lreg]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+// Indirect Memory Plus Index Register Plus Offset Operand
+operand indIndexNarrow(rRegN reg, rRegL lreg)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) lreg);
+
+  op_cost(10);
+  format %{"[$reg + $lreg]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+// Indirect Memory Times Scale Plus Index Register
+operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) (LShiftL lreg scale));
+
+  op_cost(10);
+  format %{"[$reg + $lreg << $scale]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale($scale);
+    disp(0x0);
+  %}
+%}
+
+// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
+operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
+
+  op_cost(10);
+  format %{"[$reg + $off + $lreg << $scale]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale($scale);
+    disp($off);
+  %}
+%}
+
+// Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
+operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
+  match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
+
+  op_cost(10);
+  format %{"[$reg + $off + $idx << $scale]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($idx);
+    scale($scale);
+    disp($off);
+  %}
+%}
+
+
 //----------Special Memory Operands--------------------------------------------
 // Stack Slot Operand - This operand is used for loading and storing temporary
 //                      values on the stack where a match requires a value to
@@ -5488,7 +5584,10 @@
 
 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
                indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
-               indNarrowOopOffset);
+               indCompressedOopOffset,
+               indirectNarrow, indOffset8Narrow, indOffset32Narrow,
+               indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
+               indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);
 
 //----------PIPELINE-----------------------------------------------------------
 // Rules which define the behavior of the target architectures pipeline.
@@ -6234,9 +6333,7 @@
    ins_cost(125); // XXX
    format %{ "movl    $dst, $mem\t# compressed ptr" %}
    ins_encode %{
-     Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
-     Register dst = as_Register($dst$$reg);
-     __ movl(dst, addr);
+     __ movl($dst$$Register, $mem$$Address);
    %}
    ins_pipe(ialu_reg_mem); // XXX
 %}
@@ -6262,9 +6359,7 @@
   ins_cost(125); // XXX
   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
   ins_encode %{
-    Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
-    Register dst = as_Register($dst$$reg);
-    __ movl(dst, addr);
+    __ movl($dst$$Register, $mem$$Address);
   %}
   ins_pipe(ialu_reg_mem); // XXX
 %}
@@ -6418,6 +6513,102 @@
   ins_pipe(ialu_reg_reg_fat);
 %}
 
+instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
+%{
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
+  opcode(0x8D);
+  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
+  ins_pipe(ialu_reg_reg_fat);
+%}
+
+// Load Effective Address which uses Narrow (32-bits) oop
+instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
+%{
+  predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
+  opcode(0x8D);
+  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
+  ins_pipe(ialu_reg_reg_fat);
+%}
+
+instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  match(Set dst mem);
+
+  ins_cost(110); // XXX
+  format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
+  opcode(0x8D);
+  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
+  ins_pipe(ialu_reg_reg_fat);
+%}
+
+instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
+  opcode(0x8D);
+  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
+  ins_pipe(ialu_reg_reg_fat);
+%}
+
+instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
+  opcode(0x8D);
+  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
+  ins_pipe(ialu_reg_reg_fat);
+%}
+
+instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
+  opcode(0x8D);
+  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
+  ins_pipe(ialu_reg_reg_fat);
+%}
+
+instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
+  opcode(0x8D);
+  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
+  ins_pipe(ialu_reg_reg_fat);
+%}
+
+instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
+  opcode(0x8D);
+  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
+  ins_pipe(ialu_reg_reg_fat);
+%}
+
 instruct loadConI(rRegI dst, immI src)
 %{
   match(Set dst src);
@@ -6528,8 +6719,7 @@
   effect(KILL cr);
   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
   ins_encode %{
-    Register dst = $dst$$Register;
-    __ xorq(dst, dst);
+    __ xorq($dst$$Register, $dst$$Register);
   %}
   ins_pipe(ialu_reg);
 %}
@@ -6541,11 +6731,10 @@
   format %{ "movl    $dst, $src\t# compressed ptr" %}
   ins_encode %{
     address con = (address)$src$$constant;
-    Register dst = $dst$$Register;
     if (con == NULL) {
       ShouldNotReachHere();
     } else {
-      __ set_narrow_oop(dst, (jobject)$src$$constant);
+      __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
     }
   %}
   ins_pipe(ialu_reg_fat); // XXX
@@ -6794,12 +6983,25 @@
   ins_pipe(ialu_mem_reg);
 %}
 
+instruct storeImmP0(memory mem, immP0 zero)
+%{
+  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
+  match(Set mem (StoreP mem zero));
+
+  ins_cost(125); // XXX
+  format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
+  ins_encode %{
+    __ movq($mem$$Address, r12);
+  %}
+  ins_pipe(ialu_mem_reg);
+%}
+
 // Store NULL Pointer, mark word, or other simple pointer constant.
 instruct storeImmP(memory mem, immP31 src)
 %{
   match(Set mem (StoreP mem src));
 
-  ins_cost(125); // XXX
+  ins_cost(150); // XXX
   format %{ "movq    $mem, $src\t# ptr" %}
   opcode(0xC7); /* C7 /0 */
   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
@@ -6814,14 +7016,55 @@
   ins_cost(125); // XXX
   format %{ "movl    $mem, $src\t# compressed ptr" %}
   ins_encode %{
-    Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
-    Register src = as_Register($src$$reg);
-    __ movl(addr, src);
+    __ movl($mem$$Address, $src$$Register);
   %}
   ins_pipe(ialu_mem_reg);
 %}
 
+instruct storeImmN0(memory mem, immN0 zero)
+%{
+  predicate(Universe::narrow_oop_base() == NULL);
+  match(Set mem (StoreN mem zero));
+
+  ins_cost(125); // XXX
+  format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
+  ins_encode %{
+    __ movl($mem$$Address, r12);
+  %}
+  ins_pipe(ialu_mem_reg);
+%}
+
+instruct storeImmN(memory mem, immN src)
+%{
+  match(Set mem (StoreN mem src));
+
+  ins_cost(150); // XXX
+  format %{ "movl    $mem, $src\t# compressed ptr" %}
+  ins_encode %{
+    address con = (address)$src$$constant;
+    if (con == NULL) {
+      __ movl($mem$$Address, (int32_t)0);
+    } else {
+      __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
+    }
+  %}
+  ins_pipe(ialu_mem_imm);
+%}
+
 // Store Integer Immediate
+instruct storeImmI0(memory mem, immI0 zero)
+%{
+  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
+  match(Set mem (StoreI mem zero));
+
+  ins_cost(125); // XXX
+  format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
+  ins_encode %{
+    __ movl($mem$$Address, r12);
+  %}
+  ins_pipe(ialu_mem_reg);
+%}
+
 instruct storeImmI(memory mem, immI src)
 %{
   match(Set mem (StoreI mem src));
@@ -6834,6 +7077,19 @@
 %}
 
 // Store Long Immediate
+instruct storeImmL0(memory mem, immL0 zero)
+%{
+  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
+  match(Set mem (StoreL mem zero));
+
+  ins_cost(125); // XXX
+  format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
+  ins_encode %{
+    __ movq($mem$$Address, r12);
+  %}
+  ins_pipe(ialu_mem_reg);
+%}
+
 instruct storeImmL(memory mem, immL32 src)
 %{
   match(Set mem (StoreL mem src));
@@ -6846,6 +7102,19 @@
 %}
 
 // Store Short/Char Immediate
+instruct storeImmC0(memory mem, immI0 zero)
+%{
+  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
+  match(Set mem (StoreC mem zero));
+
+  ins_cost(125); // XXX
+  format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
+  ins_encode %{
+    __ movw($mem$$Address, r12);
+  %}
+  ins_pipe(ialu_mem_reg);
+%}
+
 instruct storeImmI16(memory mem, immI16 src)
 %{
   predicate(UseStoreImmI16);
@@ -6859,6 +7128,19 @@
 %}
 
 // Store Byte Immediate
+instruct storeImmB0(memory mem, immI0 zero)
+%{
+  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
+  match(Set mem (StoreB mem zero));
+
+  ins_cost(125); // XXX
+  format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
+  ins_encode %{
+    __ movb($mem$$Address, r12);
+  %}
+  ins_pipe(ialu_mem_reg);
+%}
+
 instruct storeImmB(memory mem, immI8 src)
 %{
   match(Set mem (StoreB mem src));
@@ -6898,6 +7180,19 @@
 %}
 
 // Store CMS card-mark Immediate
+instruct storeImmCM0_reg(memory mem, immI0 zero)
+%{
+  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
+  match(Set mem (StoreCM mem zero));
+
+  ins_cost(125); // XXX
+  format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
+  ins_encode %{
+    __ movb($mem$$Address, r12);
+  %}
+  ins_pipe(ialu_mem_reg);
+%}
+
 instruct storeImmCM0(memory mem, immI0 src)
 %{
   match(Set mem (StoreCM mem src));
@@ -6931,6 +7226,19 @@
 %}
 
 // Store immediate Float value (it is faster than store from XMM register)
+instruct storeF0(memory mem, immF0 zero)
+%{
+  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
+  match(Set mem (StoreF mem zero));
+
+  ins_cost(25); // XXX
+  format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
+  ins_encode %{
+    __ movl($mem$$Address, r12);
+  %}
+  ins_pipe(ialu_mem_reg);
+%}
+
 instruct storeF_imm(memory mem, immF src)
 %{
   match(Set mem (StoreF mem src));
@@ -6957,6 +7265,7 @@
 // Store immediate double 0.0 (it is faster than store from XMM register)
 instruct storeD0_imm(memory mem, immD0 src)
 %{
+  predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
   match(Set mem (StoreD mem src));
 
   ins_cost(50);
@@ -6966,6 +7275,19 @@
   ins_pipe(ialu_mem_imm);
 %}
 
+instruct storeD0(memory mem, immD0 zero)
+%{
+  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
+  match(Set mem (StoreD mem zero));
+
+  ins_cost(25); // XXX
+  format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
+  ins_encode %{
+    __ movq($mem$$Address, r12);
+  %}
+  ins_pipe(ialu_mem_reg);
+%}
+
 instruct storeSSI(stackSlotI dst, rRegI src)
 %{
   match(Set dst src);
@@ -7077,6 +7399,56 @@
   ins_pipe( ialu_mem_reg );
 %}
 
+
+//---------- Population Count Instructions -------------------------------------
+
+instruct popCountI(rRegI dst, rRegI src) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountI src));
+
+  format %{ "popcnt  $dst, $src" %}
+  ins_encode %{
+    __ popcntl($dst$$Register, $src$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct popCountI_mem(rRegI dst, memory mem) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountI (LoadI mem)));
+
+  format %{ "popcnt  $dst, $mem" %}
+  ins_encode %{
+    __ popcntl($dst$$Register, $mem$$Address);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// Note: Long.bitCount(long) returns an int.
+instruct popCountL(rRegI dst, rRegL src) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountL src));
+
+  format %{ "popcnt  $dst, $src" %}
+  ins_encode %{
+    __ popcntq($dst$$Register, $src$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// Note: Long.bitCount(long) returns an int.
+instruct popCountL_mem(rRegI dst, memory mem) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountL (LoadL mem)));
+
+  format %{ "popcnt  $dst, $mem" %}
+  ins_encode %{
+    __ popcntq($dst$$Register, $mem$$Address);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+
 //----------MemBar Instructions-----------------------------------------------
 // Memory barrier flavors
 
@@ -7192,9 +7564,7 @@
   effect(KILL cr);
   format %{ "encode_heap_oop_not_null $dst,$src" %}
   ins_encode %{
-    Register s = $src$$Register;
-    Register d = $dst$$Register;
-    __ encode_heap_oop_not_null(d, s);
+    __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
   %}
   ins_pipe(ialu_reg_long);
 %}
@@ -7224,7 +7594,11 @@
   ins_encode %{
     Register s = $src$$Register;
     Register d = $dst$$Register;
-    __ decode_heap_oop_not_null(d, s);
+    if (s != d) {
+      __ decode_heap_oop_not_null(d, s);
+    } else {
+      __ decode_heap_oop_not_null(d);
+    }
   %}
   ins_pipe(ialu_reg_long);
 %}
@@ -11389,8 +11763,9 @@
 
 // This will generate a signed flags result. This should be OK since
 // any compare to a zero should be eq/neq.
-instruct testP_reg_mem(rFlagsReg cr, memory op, immP0 zero)
-%{
+instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
+%{
+  predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
   match(Set cr (CmpP (LoadP op) zero));
 
   ins_cost(500); // XXX
@@ -11401,13 +11776,24 @@
   ins_pipe(ialu_cr_reg_imm);
 %}
 
+instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
+%{
+  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
+  match(Set cr (CmpP (LoadP mem) zero));
+
+  format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
+  ins_encode %{
+    __ cmpq(r12, $mem$$Address);
+  %}
+  ins_pipe(ialu_cr_reg_mem);
+%}
 
 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
 %{
   match(Set cr (CmpN op1 op2));
 
   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
-  ins_encode %{ __ cmpl(as_Register($op1$$reg), as_Register($op2$$reg)); %}
+  ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
   ins_pipe(ialu_cr_reg_reg);
 %}
 
@@ -11415,11 +11801,30 @@
 %{
   match(Set cr (CmpN src (LoadN mem)));
 
-  ins_cost(500); // XXX
-  format %{ "cmpl    $src, mem\t# compressed ptr" %}
+  format %{ "cmpl    $src, $mem\t# compressed ptr" %}
   ins_encode %{
-    Address adr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
-    __ cmpl(as_Register($src$$reg), adr);
+    __ cmpl($src$$Register, $mem$$Address);
+  %}
+  ins_pipe(ialu_cr_reg_mem);
+%}
+
+instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
+  match(Set cr (CmpN op1 op2));
+
+  format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
+  ins_encode %{
+    __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
+  %}
+  ins_pipe(ialu_cr_reg_imm);
+%}
+
+instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
+%{
+  match(Set cr (CmpN src (LoadN mem)));
+
+  format %{ "cmpl    $mem, $src\t# compressed ptr" %}
+  ins_encode %{
+    __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
   %}
   ins_pipe(ialu_cr_reg_mem);
 %}
@@ -11432,15 +11837,27 @@
   ins_pipe(ialu_cr_reg_imm);
 %}
 
-instruct testN_reg_mem(rFlagsReg cr, memory mem, immN0 zero)
-%{
+instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
+%{
+  predicate(Universe::narrow_oop_base() != NULL);
   match(Set cr (CmpN (LoadN mem) zero));
 
   ins_cost(500); // XXX
   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
   ins_encode %{
-    Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
-    __ cmpl(addr, (int)0xFFFFFFFF);
+    __ cmpl($mem$$Address, (int)0xFFFFFFFF);
+  %}
+  ins_pipe(ialu_cr_reg_mem);
+%}
+
+instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
+%{
+  predicate(Universe::narrow_oop_base() == NULL);
+  match(Set cr (CmpN (LoadN mem) zero));
+
+  format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
+  ins_encode %{
+    __ cmpl(r12, $mem$$Address);
   %}
   ins_pipe(ialu_cr_reg_mem);
 %}
@@ -11472,7 +11889,6 @@
 %{
   match(Set cr (CmpL op1 (LoadL op2)));
 
-  ins_cost(500); // XXX
   format %{ "cmpq    $op1, $op2" %}
   opcode(0x3B); /* Opcode 3B /r */
   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
@@ -11733,15 +12149,12 @@
   effect(KILL rcx, KILL cr);
 
   ins_cost(1100);  // slightly larger than the next version
-  format %{ "cmpq    rax, rsi\n\t"
-            "jeq,s   hit\n\t"
-            "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
+  format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
-    "hit:\n\t"
             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
     "miss:\t" %}
 
@@ -11756,13 +12169,10 @@
                                      rdi_RegP result)
 %{
   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
-  predicate(!UseCompressedOops); // decoding oop kills condition codes
   effect(KILL rcx, KILL result);
 
   ins_cost(1000);
-  format %{ "cmpq    rax, rsi\n\t"
-            "jeq,s   miss\t# Actually a hit; we are done.\n\t"
-            "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
+  format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
--- a/src/os/linux/vm/os_linux.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/os/linux/vm/os_linux.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -2269,15 +2269,16 @@
 //       All it does is to check if there are enough free pages
 //       left at the time of mmap(). This could be a potential
 //       problem.
-bool os::commit_memory(char* addr, size_t size) {
-  uintptr_t res = (uintptr_t) ::mmap(addr, size,
-                                   PROT_READ|PROT_WRITE|PROT_EXEC,
+bool os::commit_memory(char* addr, size_t size, bool exec) {
+  int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
+  uintptr_t res = (uintptr_t) ::mmap(addr, size, prot,
                                    MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0);
   return res != (uintptr_t) MAP_FAILED;
 }
 
-bool os::commit_memory(char* addr, size_t size, size_t alignment_hint) {
-  return commit_memory(addr, size);
+bool os::commit_memory(char* addr, size_t size, size_t alignment_hint,
+                       bool exec) {
+  return commit_memory(addr, size, exec);
 }
 
 void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) { }
@@ -2417,8 +2418,7 @@
 unsigned long* os::Linux::_numa_all_nodes;
 
 bool os::uncommit_memory(char* addr, size_t size) {
-  return ::mmap(addr, size,
-                PROT_READ|PROT_WRITE|PROT_EXEC,
+  return ::mmap(addr, size, PROT_NONE,
                 MAP_PRIVATE|MAP_FIXED|MAP_NORESERVE|MAP_ANONYMOUS, -1, 0)
     != MAP_FAILED;
 }
@@ -2441,7 +2441,9 @@
     flags |= MAP_FIXED;
   }
 
-  addr = (char*)::mmap(requested_addr, bytes, PROT_READ|PROT_WRITE|PROT_EXEC,
+  // Map uncommitted pages PROT_READ and PROT_WRITE, change access
+  // to PROT_EXEC if executable when we commit the page.
+  addr = (char*)::mmap(requested_addr, bytes, PROT_READ|PROT_WRITE,
                        flags, -1, 0);
 
   if (addr != MAP_FAILED) {
@@ -2582,7 +2584,9 @@
 #define SHM_HUGETLB 04000
 #endif
 
-char* os::reserve_memory_special(size_t bytes) {
+char* os::reserve_memory_special(size_t bytes, char* req_addr, bool exec) {
+  // "exec" is passed in but not used.  Creating the shared image for
+  // the code cache doesn't have an SHM_X executable permission to check.
   assert(UseLargePages, "only for large pages");
 
   key_t key = IPC_PRIVATE;
--- a/src/os/solaris/dtrace/generateJvmOffsets.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/os/solaris/dtrace/generateJvmOffsets.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -249,6 +249,10 @@
 
   printf("\n");
 
+  GEN_OFFS(NarrowOopStruct, _base);
+  GEN_OFFS(NarrowOopStruct, _shift);
+  printf("\n");
+
   GEN_VALUE(SIZE_HeapBlockHeader, sizeof(HeapBlock::Header));
   GEN_SIZE(oopDesc);
   GEN_SIZE(constantPoolOopDesc);
--- a/src/os/solaris/dtrace/jhelper.d	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/os/solaris/dtrace/jhelper.d	Fri Mar 27 16:54:56 2009 -0700
@@ -46,7 +46,10 @@
 extern pointer __1cJCodeCacheF_heap_;
 extern pointer __1cIUniverseP_methodKlassObj_;
 extern pointer __1cIUniverseO_collectedHeap_;
-extern pointer __1cIUniverseK_heap_base_;
+extern pointer __1cIUniverseL_narrow_oop_;
+#ifdef _LP64
+extern pointer UseCompressedOops;
+#endif
 
 extern pointer __1cHnmethodG__vtbl_;
 extern pointer __1cKBufferBlobG__vtbl_;
@@ -56,6 +59,7 @@
 #define copyin_uint16(ADDR) *(uint16_t*) copyin((pointer) (ADDR), sizeof(uint16_t))
 #define copyin_uint32(ADDR) *(uint32_t*) copyin((pointer) (ADDR), sizeof(uint32_t))
 #define copyin_int32(ADDR)  *(int32_t*)  copyin((pointer) (ADDR), sizeof(int32_t))
+#define copyin_uint8(ADDR)  *(uint8_t*)  copyin((pointer) (ADDR), sizeof(uint8_t))
 
 #define SAME(x) x
 #define copyin_offset(JVM_CONST)  JVM_CONST = \
@@ -132,6 +136,9 @@
   copyin_offset(SIZE_oopDesc);
   copyin_offset(SIZE_constantPoolOopDesc);
 
+  copyin_offset(OFFSET_NarrowOopStruct_base);
+  copyin_offset(OFFSET_NarrowOopStruct_shift);
+
   /*
    * The PC to translate is in arg0.
    */
@@ -151,9 +158,19 @@
 
   this->Universe_methodKlassOop = copyin_ptr(&``__1cIUniverseP_methodKlassObj_);
   this->CodeCache_heap_address = copyin_ptr(&``__1cJCodeCacheF_heap_);
-  this->Universe_heap_base = copyin_ptr(&``__1cIUniverseK_heap_base_);
 
   /* Reading volatile values */
+#ifdef _LP64
+  this->Use_Compressed_Oops  = copyin_uint8(&``UseCompressedOops);
+#else
+  this->Use_Compressed_Oops  = 0;
+#endif
+
+  this->Universe_narrow_oop_base  = copyin_ptr(&``__1cIUniverseL_narrow_oop_ +
+                                               OFFSET_NarrowOopStruct_base);
+  this->Universe_narrow_oop_shift = copyin_int32(&``__1cIUniverseL_narrow_oop_ +
+                                                 OFFSET_NarrowOopStruct_shift);
+
   this->CodeCache_low = copyin_ptr(this->CodeCache_heap_address + 
       OFFSET_CodeHeap_memory + OFFSET_VirtualSpace_low);
 
@@ -295,7 +312,7 @@
 
 dtrace:helper:ustack:
 /!this->done && this->vtbl == this->BufferBlob_vtbl &&
-this->Universe_heap_base == NULL &&
+this->Use_Compressed_Oops == 0 &&
 this->methodOopPtr > this->heap_start && this->methodOopPtr < this->heap_end/
 {
   MARK_LINE;
@@ -306,7 +323,7 @@
 
 dtrace:helper:ustack:
 /!this->done && this->vtbl == this->BufferBlob_vtbl &&
-this->Universe_heap_base != NULL &&
+this->Use_Compressed_Oops != 0 &&
 this->methodOopPtr > this->heap_start && this->methodOopPtr < this->heap_end/
 {
   MARK_LINE;
@@ -314,8 +331,8 @@
    * Read compressed pointer and  decode heap oop, same as oop.inline.hpp
    */
   this->cklass = copyin_uint32(this->methodOopPtr + OFFSET_oopDesc_metadata);
-  this->klass = (uint64_t)((uintptr_t)this->Universe_heap_base +
-                ((uintptr_t)this->cklass << 3));
+  this->klass = (uint64_t)((uintptr_t)this->Universe_narrow_oop_base +
+                ((uintptr_t)this->cklass << this->Universe_narrow_oop_shift));
   this->methodOop = this->klass == this->Universe_methodKlassOop;
   this->done = !this->methodOop;
 }
--- a/src/os/solaris/dtrace/libjvm_db.c	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/os/solaris/dtrace/libjvm_db.c	Fri Mar 27 16:54:56 2009 -0700
@@ -146,13 +146,17 @@
   uint64_t BufferBlob_vtbl;
   uint64_t RuntimeStub_vtbl;
 
+  uint64_t Use_Compressed_Oops_address;
   uint64_t Universe_methodKlassObj_address;
+  uint64_t Universe_narrow_oop_base_address;
+  uint64_t Universe_narrow_oop_shift_address;
   uint64_t CodeCache_heap_address;
-  uint64_t Universe_heap_base_address;
 
   /* Volatiles */
+  uint8_t  Use_Compressed_Oops;
   uint64_t Universe_methodKlassObj;
-  uint64_t Universe_heap_base;
+  uint64_t Universe_narrow_oop_base;
+  uint32_t Universe_narrow_oop_shift;
   uint64_t CodeCache_low;
   uint64_t CodeCache_high;
   uint64_t CodeCache_segmap_low;
@@ -279,8 +283,11 @@
       if (strcmp("_methodKlassObj", vmp->fieldName) == 0) {
         J->Universe_methodKlassObj_address = vmp->address;
       }
-      if (strcmp("_heap_base", vmp->fieldName) == 0) {
-        J->Universe_heap_base_address = vmp->address;
+      if (strcmp("_narrow_oop._base", vmp->fieldName) == 0) {
+        J->Universe_narrow_oop_base_address = vmp->address;
+      }
+      if (strcmp("_narrow_oop._shift", vmp->fieldName) == 0) {
+        J->Universe_narrow_oop_shift_address = vmp->address;
       }
     }
     CHECK_FAIL(err);
@@ -298,14 +305,39 @@
   return -1;
 }
 
+static int find_symbol(jvm_agent_t* J, const char *name, uint64_t* valuep) {
+  psaddr_t sym_addr;
+  int err;
+
+  err = ps_pglobal_lookup(J->P, LIBJVM_SO, name, &sym_addr);
+  if (err != PS_OK) goto fail;
+  *valuep = sym_addr;
+  return PS_OK;
+
+ fail:
+  return err;
+}
+
 static int read_volatiles(jvm_agent_t* J) {
   uint64_t ptr;
   int err;
 
+  err = find_symbol(J, "UseCompressedOops", &J->Use_Compressed_Oops_address);
+  if (err == PS_OK) {
+    err = ps_pread(J->P,  J->Use_Compressed_Oops_address, &J->Use_Compressed_Oops, sizeof(uint8_t));
+    CHECK_FAIL(err);
+  } else {
+    J->Use_Compressed_Oops = 0;
+  }
+
   err = read_pointer(J, J->Universe_methodKlassObj_address, &J->Universe_methodKlassObj);
   CHECK_FAIL(err);
-  err = read_pointer(J, J->Universe_heap_base_address, &J->Universe_heap_base);
+
+  err = read_pointer(J, J->Universe_narrow_oop_base_address, &J->Universe_narrow_oop_base);
   CHECK_FAIL(err);
+  err = ps_pread(J->P,  J->Universe_narrow_oop_shift_address, &J->Universe_narrow_oop_shift, sizeof(uint32_t));
+  CHECK_FAIL(err);
+
   err = read_pointer(J, J->CodeCache_heap_address + OFFSET_CodeHeap_memory +
                      OFFSET_VirtualSpace_low, &J->CodeCache_low);
   CHECK_FAIL(err);
@@ -374,19 +406,6 @@
   return -1;
 }
 
-static int find_symbol(jvm_agent_t* J, const char *name, uint64_t* valuep) {
-  psaddr_t sym_addr;
-  int err;
-
-  err = ps_pglobal_lookup(J->P, LIBJVM_SO, name, &sym_addr);
-  if (err != PS_OK) goto fail;
-  *valuep = sym_addr;
-  return PS_OK;
-
- fail:
-  return err;
-}
-
 static int find_jlong_constant(jvm_agent_t* J, const char *name, uint64_t* valuep) {
   psaddr_t sym_addr;
   int err = ps_pglobal_lookup(J->P, LIBJVM_SO, name, &sym_addr);
@@ -458,14 +477,14 @@
 static int is_methodOop(jvm_agent_t* J, uint64_t methodOopPtr) {
   uint64_t klass;
   int err;
-  // If heap_base is nonnull, this was a compressed oop.
-  if (J->Universe_heap_base != NULL) {
+  // If UseCompressedOops, this was a compressed oop.
+  if (J->Use_Compressed_Oops != 0) {
     uint32_t cklass;
     err = read_compressed_pointer(J, methodOopPtr + OFFSET_oopDesc_metadata,
           &cklass);
     // decode heap oop, same as oop.inline.hpp
-    klass = (uint64_t)((uintptr_t)J->Universe_heap_base +
-            ((uintptr_t)cklass << 3));
+    klass = (uint64_t)((uintptr_t)J->Universe_narrow_oop_base +
+            ((uintptr_t)cklass << J->Universe_narrow_oop_shift));
   } else {
     err = read_pointer(J, methodOopPtr + OFFSET_oopDesc_metadata, &klass);
   }
--- a/src/os/solaris/vm/os_solaris.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/os/solaris/vm/os_solaris.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -2623,15 +2623,16 @@
   return page_size;
 }
 
-bool os::commit_memory(char* addr, size_t bytes) {
+bool os::commit_memory(char* addr, size_t bytes, bool exec) {
+  int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
   size_t size = bytes;
   return
-     NULL != Solaris::mmap_chunk(addr, size, MAP_PRIVATE|MAP_FIXED,
-                                 PROT_READ | PROT_WRITE | PROT_EXEC);
-}
-
-bool os::commit_memory(char* addr, size_t bytes, size_t alignment_hint) {
-  if (commit_memory(addr, bytes)) {
+     NULL != Solaris::mmap_chunk(addr, size, MAP_PRIVATE|MAP_FIXED, prot);
+}
+
+bool os::commit_memory(char* addr, size_t bytes, size_t alignment_hint,
+                       bool exec) {
+  if (commit_memory(addr, bytes, exec)) {
     if (UseMPSS && alignment_hint > (size_t)vm_page_size()) {
       // If the large page size has been set and the VM
       // is using large pages, use the large page size
@@ -3220,7 +3221,9 @@
   return true;
 }
 
-char* os::reserve_memory_special(size_t bytes) {
+char* os::reserve_memory_special(size_t bytes, char* addr, bool exec) {
+  // "exec" is passed in but not used.  Creating the shared image for
+  // the code cache doesn't have an SHM_X executable permission to check.
   assert(UseLargePages && UseISM, "only for ISM large pages");
 
   size_t size = bytes;
@@ -4451,6 +4454,9 @@
 int_fnP_thread_t os::Solaris::_thr_suspend_mutator;
 int_fnP_thread_t os::Solaris::_thr_continue_mutator;
 
+// (Static) wrapper for getisax(2) call.
+os::Solaris::getisax_func_t os::Solaris::_getisax = 0;
+
 // (Static) wrappers for the liblgrp API
 os::Solaris::lgrp_home_func_t os::Solaris::_lgrp_home;
 os::Solaris::lgrp_init_func_t os::Solaris::_lgrp_init;
@@ -4465,16 +4471,19 @@
 // (Static) wrapper for meminfo() call.
 os::Solaris::meminfo_func_t os::Solaris::_meminfo = 0;
 
-static address resolve_symbol(const char *name) {
-  address addr;
-
-  addr = (address) dlsym(RTLD_DEFAULT, name);
+static address resolve_symbol_lazy(const char* name) {
+  address addr = (address) dlsym(RTLD_DEFAULT, name);
   if(addr == NULL) {
     // RTLD_DEFAULT was not defined on some early versions of 2.5.1
     addr = (address) dlsym(RTLD_NEXT, name);
-    if(addr == NULL) {
-      fatal(dlerror());
-    }
+  }
+  return addr;
+}
+
+static address resolve_symbol(const char* name) {
+  address addr = resolve_symbol_lazy(name);
+  if(addr == NULL) {
+    fatal(dlerror());
   }
   return addr;
 }
@@ -4673,15 +4682,26 @@
 }
 
 void os::Solaris::misc_sym_init() {
-  address func = (address)dlsym(RTLD_DEFAULT, "meminfo");
-  if(func == NULL) {
-    func = (address) dlsym(RTLD_NEXT, "meminfo");
-  }
+  address func;
+
+  // getisax
+  func = resolve_symbol_lazy("getisax");
+  if (func != NULL) {
+    os::Solaris::_getisax = CAST_TO_FN_PTR(getisax_func_t, func);
+  }
+
+  // meminfo
+  func = resolve_symbol_lazy("meminfo");
   if (func != NULL) {
     os::Solaris::set_meminfo(CAST_TO_FN_PTR(meminfo_func_t, func));
   }
 }
 
+uint_t os::Solaris::getisax(uint32_t* array, uint_t n) {
+  assert(_getisax != NULL, "_getisax not set");
+  return _getisax(array, n);
+}
+
 // Symbol doesn't exist in Solaris 8 pset.h
 #ifndef PS_MYID
 #define PS_MYID -3
@@ -4716,6 +4736,10 @@
 
   Solaris::initialize_system_info();
 
+  // Initialize misc. symbols as soon as possible, so we can use them
+  // if we need them.
+  Solaris::misc_sym_init();
+
   int fd = open("/dev/zero", O_RDWR);
   if (fd < 0) {
     fatal1("os::init: cannot open /dev/zero (%s)", strerror(errno));
@@ -4857,7 +4881,6 @@
     }
   }
 
-  Solaris::misc_sym_init();
   Solaris::signal_sets_init();
   Solaris::init_signal_mem();
   Solaris::install_signal_handlers();
--- a/src/os/solaris/vm/os_solaris.hpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/os/solaris/vm/os_solaris.hpp	Fri Mar 27 16:54:56 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -72,6 +72,8 @@
         LGRP_VIEW_OS            /* what's available to operating system */
   } lgrp_view_t;
 
+  typedef uint_t (*getisax_func_t)(uint32_t* array, uint_t n);
+
   typedef lgrp_id_t (*lgrp_home_func_t)(idtype_t idtype, id_t id);
   typedef lgrp_cookie_t (*lgrp_init_func_t)(lgrp_view_t view);
   typedef int (*lgrp_fini_func_t)(lgrp_cookie_t cookie);
@@ -87,6 +89,8 @@
                                 const uint_t  info_req[],  int info_count,
                                 uint64_t  outdata[], uint_t validity[]);
 
+  static getisax_func_t _getisax;
+
   static lgrp_home_func_t _lgrp_home;
   static lgrp_init_func_t _lgrp_init;
   static lgrp_fini_func_t _lgrp_fini;
@@ -283,6 +287,9 @@
   }
   static lgrp_cookie_t lgrp_cookie()                 { return _lgrp_cookie; }
 
+  static bool supports_getisax()                     { return _getisax != NULL; }
+  static uint_t getisax(uint32_t* array, uint_t n);
+
   static void set_meminfo(meminfo_func_t func)       { _meminfo = func; }
   static int meminfo (const uint64_t inaddr[],   int addr_count,
                      const uint_t  info_req[],  int info_count,
--- a/src/os/windows/vm/os_windows.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/os/windows/vm/os_windows.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -2189,7 +2189,8 @@
           if (addr > thread->stack_yellow_zone_base() && addr < thread->stack_base() ) {
                   addr = (address)((uintptr_t)addr &
                          (~((uintptr_t)os::vm_page_size() - (uintptr_t)1)));
-                  os::commit_memory( (char *)addr, thread->stack_base() - addr );
+                  os::commit_memory((char *)addr, thread->stack_base() - addr,
+                                    false );
                   return EXCEPTION_CONTINUE_EXECUTION;
           }
           else
@@ -2565,8 +2566,7 @@
   assert((size_t)addr % os::vm_allocation_granularity() == 0,
          "reserve alignment");
   assert(bytes % os::vm_allocation_granularity() == 0, "reserve block size");
-  char* res = (char*)VirtualAlloc(addr, bytes, MEM_RESERVE,
-                                  PAGE_EXECUTE_READWRITE);
+  char* res = (char*)VirtualAlloc(addr, bytes, MEM_RESERVE, PAGE_READWRITE);
   assert(res == NULL || addr == NULL || addr == res,
          "Unexpected address from reserve.");
   return res;
@@ -2595,7 +2595,7 @@
   return true;
 }
 
-char* os::reserve_memory_special(size_t bytes) {
+char* os::reserve_memory_special(size_t bytes, char* addr, bool exec) {
 
   if (UseLargePagesIndividualAllocation) {
     if (TracePageSizes && Verbose) {
@@ -2615,10 +2615,10 @@
         "use -XX:-UseLargePagesIndividualAllocation to turn off");
       return NULL;
     }
-    p_buf = (char *) VirtualAlloc(NULL,
+    p_buf = (char *) VirtualAlloc(addr,
                                  size_of_reserve,  // size of Reserve
                                  MEM_RESERVE,
-                                 PAGE_EXECUTE_READWRITE);
+                                 PAGE_READWRITE);
     // If reservation failed, return NULL
     if (p_buf == NULL) return NULL;
 
@@ -2659,7 +2659,13 @@
         p_new = (char *) VirtualAlloc(next_alloc_addr,
                                     bytes_to_rq,
                                     MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES,
-                                    PAGE_EXECUTE_READWRITE);
+                                    PAGE_READWRITE);
+        if (p_new != NULL && exec) {
+          DWORD oldprot;
+          // Windows doc says to use VirtualProtect to get execute permissions
+          VirtualProtect(next_alloc_addr, bytes_to_rq,
+                         PAGE_EXECUTE_READWRITE, &oldprot);
+        }
       }
 
       if (p_new == NULL) {
@@ -2688,10 +2694,12 @@
   } else {
     // normal policy just allocate it all at once
     DWORD flag = MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES;
-    char * res = (char *)VirtualAlloc(NULL,
-                                      bytes,
-                                      flag,
-                                      PAGE_EXECUTE_READWRITE);
+    char * res = (char *)VirtualAlloc(NULL, bytes, flag, PAGE_READWRITE);
+    if (res != NULL && exec) {
+      DWORD oldprot;
+      // Windows doc says to use VirtualProtect to get execute permissions
+      VirtualProtect(res, bytes, PAGE_EXECUTE_READWRITE, &oldprot);
+    }
     return res;
   }
 }
@@ -2703,7 +2711,7 @@
 void os::print_statistics() {
 }
 
-bool os::commit_memory(char* addr, size_t bytes) {
+bool os::commit_memory(char* addr, size_t bytes, bool exec) {
   if (bytes == 0) {
     // Don't bother the OS with noops.
     return true;
@@ -2712,11 +2720,19 @@
   assert(bytes % os::vm_page_size() == 0, "commit in page-sized chunks");
   // Don't attempt to print anything if the OS call fails. We're
   // probably low on resources, so the print itself may cause crashes.
-  return VirtualAlloc(addr, bytes, MEM_COMMIT, PAGE_EXECUTE_READWRITE) != NULL;
+  bool result = VirtualAlloc(addr, bytes, MEM_COMMIT, PAGE_READWRITE) != 0;
+  if (result != NULL && exec) {
+    DWORD oldprot;
+    // Windows doc says to use VirtualProtect to get execute permissions
+    return VirtualProtect(addr, bytes, PAGE_EXECUTE_READWRITE, &oldprot) != 0;
+  } else {
+    return result;
+  }
 }
 
-bool os::commit_memory(char* addr, size_t size, size_t alignment_hint) {
-  return commit_memory(addr, size);
+bool os::commit_memory(char* addr, size_t size, size_t alignment_hint,
+                       bool exec) {
+  return commit_memory(addr, size, exec);
 }
 
 bool os::uncommit_memory(char* addr, size_t bytes) {
@@ -2750,7 +2766,7 @@
 
   // Strange enough, but on Win32 one can change protection only for committed
   // memory, not a big deal anyway, as bytes less or equal than 64K
-  if (!is_committed && !commit_memory(addr, bytes)) {
+  if (!is_committed && !commit_memory(addr, bytes, prot == MEM_PROT_RWX)) {
     fatal("cannot commit protection page");
   }
   // One cannot use os::guard_memory() here, as on Win32 guard page
@@ -3248,10 +3264,10 @@
 #endif
 
   if (!UseMembar) {
-    address mem_serialize_page = (address)VirtualAlloc(NULL, os::vm_page_size(), MEM_RESERVE, PAGE_EXECUTE_READWRITE);
+    address mem_serialize_page = (address)VirtualAlloc(NULL, os::vm_page_size(), MEM_RESERVE, PAGE_READWRITE);
     guarantee( mem_serialize_page != NULL, "Reserve Failed for memory serialize page");
 
-    return_page  = (address)VirtualAlloc(mem_serialize_page, os::vm_page_size(), MEM_COMMIT, PAGE_EXECUTE_READWRITE);
+    return_page  = (address)VirtualAlloc(mem_serialize_page, os::vm_page_size(), MEM_COMMIT, PAGE_READWRITE);
     guarantee( return_page != NULL, "Commit Failed for memory serialize page");
 
     os::set_memory_serialize_page( mem_serialize_page );
--- a/src/os_cpu/linux_sparc/vm/globals_linux_sparc.hpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/os_cpu/linux_sparc/vm/globals_linux_sparc.hpp	Fri Mar 27 16:54:56 2009 -0700
@@ -30,5 +30,7 @@
 define_pd_global(uintx, JVMInvokeMethodSlack,    12288);
 define_pd_global(intx, CompilerThreadStackSize,  0);
 
+// Only used on 64 bit platforms
+define_pd_global(uintx, HeapBaseMinAddress,      4*G);
 // Only used on 64 bit Windows platforms
 define_pd_global(bool, UseVectoredExceptions, false);
--- a/src/os_cpu/linux_x86/vm/globals_linux_x86.hpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/os_cpu/linux_x86/vm/globals_linux_x86.hpp	Fri Mar 27 16:54:56 2009 -0700
@@ -43,5 +43,7 @@
 
 define_pd_global(uintx, JVMInvokeMethodSlack,    8192);
 
+// Only used on 64 bit platforms
+define_pd_global(uintx, HeapBaseMinAddress,      2*G);
 // Only used on 64 bit Windows platforms
 define_pd_global(bool, UseVectoredExceptions,    false);
--- a/src/os_cpu/solaris_sparc/vm/globals_solaris_sparc.hpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/os_cpu/solaris_sparc/vm/globals_solaris_sparc.hpp	Fri Mar 27 16:54:56 2009 -0700
@@ -30,5 +30,9 @@
 define_pd_global(uintx, JVMInvokeMethodSlack,    12288);
 define_pd_global(intx, CompilerThreadStackSize,  0);
 
+// Only used on 64 bit platforms
+define_pd_global(uintx, HeapBaseMinAddress,      4*G);
 // Only used on 64 bit Windows platforms
 define_pd_global(bool, UseVectoredExceptions,    false);
+
+
--- a/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2006-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,58 +25,107 @@
 # include "incls/_precompiled.incl"
 # include "incls/_vm_version_solaris_sparc.cpp.incl"
 
+# include <sys/auxv.h>
+# include <sys/auxv_SPARC.h>
 # include <sys/systeminfo.h>
 
+// We need to keep these here as long as we have to build on Solaris
+// versions before 10.
+#ifndef SI_ARCHITECTURE_32
+#define SI_ARCHITECTURE_32      516     /* basic 32-bit SI_ARCHITECTURE */
+#endif
+
+#ifndef SI_ARCHITECTURE_64
+#define SI_ARCHITECTURE_64      517     /* basic 64-bit SI_ARCHITECTURE */
+#endif
+
+static void do_sysinfo(int si, const char* string, int* features, int mask) {
+  char   tmp;
+  size_t bufsize = sysinfo(si, &tmp, 1);
+
+  // All SI defines used below must be supported.
+  guarantee(bufsize != -1, "must be supported");
+
+  char* buf = (char*) malloc(bufsize);
+
+  if (buf == NULL)
+    return;
+
+  if (sysinfo(si, buf, bufsize) == bufsize) {
+    // Compare the string.
+    if (strcmp(buf, string) == 0) {
+      *features |= mask;
+    }
+  }
+
+  free(buf);
+}
+
 int VM_Version::platform_features(int features) {
-  // We determine what sort of hardware we have via sysinfo(SI_ISALIST, ...).
-  // This isn't the best of all possible ways because there's not enough
-  // detail in the isa list it returns, but it's a bit less arcane than
-  // generating assembly code and an illegal instruction handler.  We used
-  // to generate a getpsr trap, but that's even more arcane.
-  //
-  // Another possibility would be to use sysinfo(SI_PLATFORM, ...), but
-  // that would require more knowledge here than is wise.
+  // getisax(2), SI_ARCHITECTURE_32, and SI_ARCHITECTURE_64 are
+  // supported on Solaris 10 and later.
+  if (os::Solaris::supports_getisax()) {
+#ifndef PRODUCT
+    if (PrintMiscellaneous && Verbose)
+      tty->print_cr("getisax(2) supported.");
+#endif
 
-  // isalist spec via 'man isalist' as of 01-Aug-2001
+    // Check 32-bit architecture.
+    do_sysinfo(SI_ARCHITECTURE_32, "sparc", &features, v8_instructions_m);
 
-  char   tmp;
-  size_t bufsize  = sysinfo(SI_ISALIST, &tmp, 1);
-  char*  buf      = (char*)malloc(bufsize);
+    // Check 64-bit architecture.
+    do_sysinfo(SI_ARCHITECTURE_64, "sparcv9", &features, generic_v9_m);
 
-  if (buf != NULL) {
-    if (sysinfo(SI_ISALIST, buf, bufsize) == bufsize) {
-      // Figure out what kind of sparc we have
-      char *sparc_string = strstr(buf, "sparc");
-      if (sparc_string != NULL) {            features |= v8_instructions_m;
-        if (sparc_string[5] == 'v') {
-          if (sparc_string[6] == '8') {
-            if (sparc_string[7] == '-')      features |= hardware_int_muldiv_m;
-            else if (sparc_string[7] == 'p') features |= generic_v9_m;
-            else                      features |= generic_v8_m;
-          } else if (sparc_string[6] == '9') features |= generic_v9_m;
+    // Extract valid instruction set extensions.
+    uint_t av;
+    uint_t avn = os::Solaris::getisax(&av, 1);
+    assert(avn == 1, "should only return one av");
+
+    if (av & AV_SPARC_MUL32)  features |= hardware_mul32_m;
+    if (av & AV_SPARC_DIV32)  features |= hardware_div32_m;
+    if (av & AV_SPARC_FSMULD) features |= hardware_fsmuld_m;
+    if (av & AV_SPARC_V8PLUS) features |= v9_instructions_m;
+    if (av & AV_SPARC_POPC)   features |= hardware_popc_m;
+    if (av & AV_SPARC_VIS)    features |= vis1_instructions_m;
+    if (av & AV_SPARC_VIS2)   features |= vis2_instructions_m;
+  } else {
+    // getisax(2) failed, use the old legacy code.
+#ifndef PRODUCT
+    if (PrintMiscellaneous && Verbose)
+      tty->print_cr("getisax(2) not supported.");
+#endif
+
+    char   tmp;
+    size_t bufsize = sysinfo(SI_ISALIST, &tmp, 1);
+    char*  buf     = (char*) malloc(bufsize);
+
+    if (buf != NULL) {
+      if (sysinfo(SI_ISALIST, buf, bufsize) == bufsize) {
+        // Figure out what kind of sparc we have
+        char *sparc_string = strstr(buf, "sparc");
+        if (sparc_string != NULL) {              features |= v8_instructions_m;
+          if (sparc_string[5] == 'v') {
+            if (sparc_string[6] == '8') {
+              if (sparc_string[7] == '-') {      features |= hardware_mul32_m;
+                                                 features |= hardware_div32_m;
+              } else if (sparc_string[7] == 'p') features |= generic_v9_m;
+              else                               features |= generic_v8_m;
+            } else if (sparc_string[6] == '9')   features |= generic_v9_m;
+          }
+        }
+
+        // Check for visualization instructions
+        char *vis = strstr(buf, "vis");
+        if (vis != NULL) {                       features |= vis1_instructions_m;
+          if (vis[3] == '2')                     features |= vis2_instructions_m;
         }
       }
-
-      // Check for visualization instructions
-      char *vis = strstr(buf, "vis");
-      if (vis != NULL) {              features |= vis1_instructions_m;
-        if (vis[3] == '2')            features |= vis2_instructions_m;
-      }
+      free(buf);
     }
-    free(buf);
   }
 
-  bufsize = sysinfo(SI_MACHINE, &tmp, 1);
-  buf     = (char*)malloc(bufsize);
-
-  if (buf != NULL) {
-    if (sysinfo(SI_MACHINE, buf, bufsize) == bufsize) {
-      if (strstr(buf, "sun4v") != NULL) {
-        features |= sun4v_m;
-      }
-    }
-    free(buf);
-  }
+  // Determine the machine type.
+  do_sysinfo(SI_MACHINE, "sun4v", &features, sun4v_m);
 
   return features;
 }
--- a/src/os_cpu/solaris_x86/vm/globals_solaris_x86.hpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/os_cpu/solaris_x86/vm/globals_solaris_x86.hpp	Fri Mar 27 16:54:56 2009 -0700
@@ -46,5 +46,7 @@
 
 define_pd_global(intx, CompilerThreadStackSize,  0);
 
+// Only used on 64 bit platforms
+define_pd_global(uintx, HeapBaseMinAddress,      256*M);
 // Only used on 64 bit Windows platforms
 define_pd_global(bool, UseVectoredExceptions,    false);
--- a/src/os_cpu/windows_x86/vm/globals_windows_x86.hpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/os_cpu/windows_x86/vm/globals_windows_x86.hpp	Fri Mar 27 16:54:56 2009 -0700
@@ -45,5 +45,7 @@
 
 define_pd_global(uintx, JVMInvokeMethodSlack,    8192);
 
+// Only used on 64 bit platforms
+define_pd_global(uintx, HeapBaseMinAddress,      2*G);
 // Only used on 64 bit Windows platforms
 define_pd_global(bool, UseVectoredExceptions,    false);
--- a/src/os_cpu/windows_x86/vm/unwind_windows_x86.hpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/os_cpu/windows_x86/vm/unwind_windows_x86.hpp	Fri Mar 27 16:54:56 2009 -0700
@@ -68,6 +68,9 @@
     PVOID HandlerData;
 } DISPATCHER_CONTEXT, *PDISPATCHER_CONTEXT;
 
+#if MSC_VER < 1500
+
+/* Not needed for VS2008 compiler, comes from winnt.h. */
 typedef EXCEPTION_DISPOSITION (*PEXCEPTION_ROUTINE) (
     IN PEXCEPTION_RECORD ExceptionRecord,
     IN ULONG64 EstablisherFrame,
@@ -75,4 +78,6 @@
     IN OUT PDISPATCHER_CONTEXT DispatcherContext
 );
 
+#endif
+
 #endif // AMD64
--- a/src/share/tools/MakeDeps/WinGammaPlatformVC7.java	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/tools/MakeDeps/WinGammaPlatformVC7.java	Fri Mar 27 16:54:56 2009 -0700
@@ -27,6 +27,8 @@
 
 public class WinGammaPlatformVC7 extends WinGammaPlatform {
 
+    String projectVersion() {return "7.10";};
+
     public void writeProjectFile(String projectFileName, String projectName,
                                  Vector allConfigs) throws IOException {
         System.out.println();
@@ -40,7 +42,7 @@
             "VisualStudioProject",
             new String[] {
                 "ProjectType", "Visual C++",
-                "Version", "7.10",
+                "Version", projectVersion(),
                 "Name", projectName,
                 "ProjectGUID", "{8822CB5C-1C41-41C2-8493-9F6E1994338B}",
                 "SccProjectName", "",
@@ -417,7 +419,9 @@
             new String[] {
                 "Name", "VCPreLinkEventTool",
                 "Description", BuildConfig.getFieldString(null, "PrelinkDescription"),
-                "CommandLine", cfg.expandFormat(BuildConfig.getFieldString(null, "PrelinkCommand").replace('\t', '\n'))
+                //Caution: String.replace(String,String) is available from JDK5 onwards only
+                "CommandLine", cfg.expandFormat(BuildConfig.getFieldString(null, "PrelinkCommand").replace
+                   ("\t", "&#x0D;&#x0A;"))
             }
             );
 
@@ -542,25 +546,41 @@
 }
 
 class CompilerInterfaceVC7 extends CompilerInterface {
-    Vector getBaseCompilerFlags(Vector defines, Vector includes, String outDir) {
-        Vector rv = new Vector();
+    void getBaseCompilerFlags_common(Vector defines, Vector includes, String outDir,Vector rv) {
 
         // advanced M$ IDE (2003) can only recognize name if it's first or
         // second attribute in the tag - go guess
         addAttr(rv, "Name", "VCCLCompilerTool");
         addAttr(rv, "AdditionalIncludeDirectories", Util.join(",", includes));
-        addAttr(rv, "PreprocessorDefinitions", Util.join(";", defines).replace("\"","&quot;"));
-        addAttr(rv, "UsePrecompiledHeader", "3");
-        addAttr(rv, "PrecompiledHeaderThrough", "incls"+Util.sep+"_precompiled.incl");
+        addAttr(rv, "PreprocessorDefinitions",
+                                Util.join(";", defines).replace("\"","&quot;"));
+        addAttr(rv, "PrecompiledHeaderThrough",
+                                "incls"+Util.sep+"_precompiled.incl");
         addAttr(rv, "PrecompiledHeaderFile", outDir+Util.sep+"vm.pch");
         addAttr(rv, "AssemblerListingLocation", outDir);
         addAttr(rv, "ObjectFile", outDir+Util.sep);
         addAttr(rv, "ProgramDataBaseFileName", outDir+Util.sep+"vm.pdb");
+        // Set /nologo optin
         addAttr(rv, "SuppressStartupBanner", "TRUE");
+        // Surpass the default /Tc or /Tp. 0 is compileAsDefault
         addAttr(rv, "CompileAs", "0");
+        // Set /W3 option. 3 is warningLevel_3
         addAttr(rv, "WarningLevel", "3");
+        // Set /WX option,
         addAttr(rv, "WarnAsError", "TRUE");
+        // Set /GS option
         addAttr(rv, "BufferSecurityCheck", "FALSE");
+        // Set /Zi option. 3 is debugEnabled
+        addAttr(rv, "DebugInformationFormat", "3");
+    }
+    Vector getBaseCompilerFlags(Vector defines, Vector includes, String outDir) {
+        Vector rv = new Vector();
+
+        getBaseCompilerFlags_common(defines,includes, outDir, rv);
+        // Set /Yu option. 3 is pchUseUsingSpecific
+        // Note: Starting VC8 pchUseUsingSpecific is 2 !!!
+        addAttr(rv, "UsePrecompiledHeader", "3");
+        // Set /EHsc- option
         addAttr(rv, "ExceptionHandling", "FALSE");
 
         return rv;
@@ -579,27 +599,39 @@
                 "/export:jio_vsnprintf ");
         addAttr(rv, "AdditionalDependencies", "Wsock32.lib winmm.lib");
         addAttr(rv, "OutputFile", outDll);
+        // Set /INCREMENTAL option. 1 is linkIncrementalNo
         addAttr(rv, "LinkIncremental", "1");
         addAttr(rv, "SuppressStartupBanner", "TRUE");
         addAttr(rv, "ModuleDefinitionFile", outDir+Util.sep+"vm.def");
         addAttr(rv, "ProgramDatabaseFile", outDir+Util.sep+"vm.pdb");
+        // Set /SUBSYSTEM option. 2 is subSystemWindows
         addAttr(rv, "SubSystem", "2");
         addAttr(rv, "BaseAddress", "0x8000000");
         addAttr(rv, "ImportLibrary", outDir+Util.sep+"jvm.lib");
+        // Set /MACHINE option. 1 is machineX86
         addAttr(rv, "TargetMachine", "1");
 
         return rv;
     }
 
+    void  getDebugCompilerFlags_common(String opt,Vector rv) {
+
+        // Set /On option
+        addAttr(rv, "Optimization", opt);
+        // Set /FR option. 1 is brAllInfo
+        addAttr(rv, "BrowseInformation", "1");
+        addAttr(rv, "BrowseInformationFile", "$(IntDir)" + Util.sep);
+        // Set /MD option. 2 is rtMultiThreadedDLL
+        addAttr(rv, "RuntimeLibrary", "2");
+        // Set /Oy- option
+        addAttr(rv, "OmitFramePointers", "FALSE");
+
+    }
+
     Vector getDebugCompilerFlags(String opt) {
         Vector rv = new Vector();
 
-        addAttr(rv, "Optimization", opt);
-        addAttr(rv, "OptimizeForProcessor", "1");
-        addAttr(rv, "DebugInformationFormat", "3");
-        addAttr(rv, "RuntimeLibrary", "2");
-        addAttr(rv, "BrowseInformation", "1");
-        addAttr(rv, "BrowseInformationFile", "$(IntDir)" + Util.sep);
+        getDebugCompilerFlags_common(opt,rv);
 
         return rv;
     }
@@ -607,18 +639,29 @@
     Vector getDebugLinkerFlags() {
         Vector rv = new Vector();
 
-        addAttr(rv, "GenerateDebugInformation", "TRUE");
+        addAttr(rv, "GenerateDebugInformation", "TRUE"); // == /DEBUG option
 
         return rv;
     }
 
+    void getProductCompilerFlags_common(Vector rv) {
+        // Set /O2 option. 2 is optimizeMaxSpeed
+        addAttr(rv, "Optimization", "2");
+        // Set /Oy- option
+        addAttr(rv, "OmitFramePointers", "FALSE");
+    }
+
     Vector getProductCompilerFlags() {
         Vector rv = new Vector();
 
-        addAttr(rv, "Optimization", "2");
+        getProductCompilerFlags_common(rv);
+        // Set /Ob option.  1 is expandOnlyInline
         addAttr(rv, "InlineFunctionExpansion", "1");
+        // Set /GF option.
         addAttr(rv, "StringPooling", "TRUE");
+        // Set /MD option. 2 is rtMultiThreadedDLL
         addAttr(rv, "RuntimeLibrary", "2");
+        // Set /Gy option
         addAttr(rv, "EnableFunctionLevelLinking", "TRUE");
 
         return rv;
@@ -627,7 +670,9 @@
     Vector getProductLinkerFlags() {
         Vector rv = new Vector();
 
+        // Set /OPT:REF option. 2 is optReferences
         addAttr(rv, "OptimizeReferences", "2");
+        // Set /OPT:optFolding option. 2 is optFolding
         addAttr(rv, "EnableCOMDATFolding", "2");
 
         return rv;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/tools/MakeDeps/WinGammaPlatformVC8.java	Fri Mar 27 16:54:56 2009 -0700
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2005-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+import java.io.*;
+import java.util.*;
+
+public class WinGammaPlatformVC8 extends WinGammaPlatformVC7 {
+
+    String projectVersion() {return "8.00";};
+
+}
+
+class CompilerInterfaceVC8 extends CompilerInterfaceVC7 {
+
+    Vector getBaseCompilerFlags(Vector defines, Vector includes, String outDir) {
+        Vector rv = new Vector();
+
+        getBaseCompilerFlags_common(defines,includes, outDir, rv);
+        // Set /Yu option. 2 is pchUseUsingSpecific
+        addAttr(rv, "UsePrecompiledHeader", "2");
+        // Set /EHsc- option. 0 is cppExceptionHandlingNo
+        addAttr(rv, "ExceptionHandling", "0");
+
+        return rv;
+    }
+
+
+    Vector getDebugCompilerFlags(String opt) {
+        Vector rv = new Vector();
+
+        getDebugCompilerFlags_common(opt,rv);
+
+        return rv;
+    }
+
+    Vector getProductCompilerFlags() {
+        Vector rv = new Vector();
+
+        getProductCompilerFlags_common(rv);
+
+        return rv;
+    }
+
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/tools/MakeDeps/WinGammaPlatformVC9.java	Fri Mar 27 16:54:56 2009 -0700
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2005-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+import java.io.*;
+import java.util.*;
+
+public class WinGammaPlatformVC9 extends WinGammaPlatformVC8 {
+
+    String projectVersion() {return "9.00";};
+
+}
+
+class CompilerInterfaceVC9 extends CompilerInterfaceVC8 {
+}
--- a/src/share/vm/adlc/adlc.hpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/adlc/adlc.hpp	Fri Mar 27 16:54:56 2009 -0700
@@ -44,7 +44,7 @@
 #error "Something is wrong with the detection of MSC_VER in the makefiles"
 #endif
 
-#if _MSC_VER >= 1400 && !defined(_WIN64)
+#if _MSC_VER >= 1400
 #define strdup _strdup
 #endif
 
--- a/src/share/vm/asm/assembler.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/asm/assembler.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -321,16 +321,19 @@
 bool MacroAssembler::needs_explicit_null_check(intptr_t offset) {
   // Exception handler checks the nmethod's implicit null checks table
   // only when this method returns false.
-  if (UseCompressedOops) {
+#ifdef _LP64
+  if (UseCompressedOops && Universe::narrow_oop_base() != NULL) {
+    assert (Universe::heap() != NULL, "java heap should be initialized");
     // The first page after heap_base is unmapped and
     // the 'offset' is equal to [heap_base + offset] for
     // narrow oop implicit null checks.
-    uintptr_t heap_base = (uintptr_t)Universe::heap_base();
-    if ((uintptr_t)offset >= heap_base) {
+    uintptr_t base = (uintptr_t)Universe::narrow_oop_base();
+    if ((uintptr_t)offset >= base) {
       // Normalize offset for the next check.
-      offset = (intptr_t)(pointer_delta((void*)offset, (void*)heap_base, 1));
+      offset = (intptr_t)(pointer_delta((void*)offset, (void*)base, 1));
     }
   }
+#endif
   return offset < 0 || os::vm_page_size() <= offset;
 }
 
--- a/src/share/vm/ci/ciMethodBlocks.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/ci/ciMethodBlocks.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -284,6 +284,11 @@
       //
       int ex_start = handler->start();
       int ex_end = handler->limit();
+      // ensure a block at the start of exception range and start of following code
+      (void) make_block_at(ex_start);
+      if (ex_end < _code_size)
+        (void) make_block_at(ex_end);
+
       if (eb->is_handler()) {
         // Extend old handler exception range to cover additional range.
         int old_ex_start = eb->ex_start_bci();
@@ -295,10 +300,6 @@
         eb->clear_exception_handler(); // Reset exception information
       }
       eb->set_exception_range(ex_start, ex_end);
-      // ensure a block at the start of exception range and start of following code
-      (void) make_block_at(ex_start);
-      if (ex_end < _code_size)
-        (void) make_block_at(ex_end);
     }
   }
 
--- a/src/share/vm/classfile/classFileParser.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/classfile/classFileParser.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -2747,9 +2747,10 @@
                                                       super_klass(),
                                                       methods(),
                                                       access_flags,
-                                                      class_loader(),
-                                                      class_name(),
-                                                      local_interfaces());
+                                                      class_loader,
+                                                      class_name,
+                                                      local_interfaces(),
+                                                      CHECK_(nullHandle));
 
     // Size of Java itable (in words)
     itable_size = access_flags.is_interface() ? 0 : klassItable::compute_itable_size(transitive_interfaces);
@@ -3229,7 +3230,7 @@
       // print out the superclass.
       const char * from = Klass::cast(this_klass())->external_name();
       if (this_klass->java_super() != NULL) {
-        tty->print("RESOLVE %s %s\n", from, instanceKlass::cast(this_klass->java_super())->external_name());
+        tty->print("RESOLVE %s %s (super)\n", from, instanceKlass::cast(this_klass->java_super())->external_name());
       }
       // print out each of the interface classes referred to by this class.
       objArrayHandle local_interfaces(THREAD, this_klass->local_interfaces());
@@ -3239,7 +3240,7 @@
           klassOop k = klassOop(local_interfaces->obj_at(i));
           instanceKlass* to_class = instanceKlass::cast(k);
           const char * to = to_class->external_name();
-          tty->print("RESOLVE %s %s\n", from, to);
+          tty->print("RESOLVE %s %s (interface)\n", from, to);
         }
       }
     }
--- a/src/share/vm/classfile/vmSymbols.hpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/classfile/vmSymbols.hpp	Fri Mar 27 16:54:56 2009 -0700
@@ -284,6 +284,7 @@
   template(value_name,                                "value")                                    \
   template(frontCacheEnabled_name,                    "frontCacheEnabled")                        \
   template(stringCacheEnabled_name,                   "stringCacheEnabled")                       \
+  template(bitCount_name,                             "bitCount")                                 \
                                                                                                   \
   /* non-intrinsic name/signature pairs: */                                                       \
   template(register_method_name,                      "register")                                 \
@@ -304,6 +305,7 @@
   template(double_long_signature,                     "(D)J")                                     \
   template(double_double_signature,                   "(D)D")                                     \
   template(int_float_signature,                       "(I)F")                                     \
+  template(long_int_signature,                        "(J)I")                                     \
   template(long_long_signature,                       "(J)J")                                     \
   template(long_double_signature,                     "(J)D")                                     \
   template(byte_signature,                            "B")                                        \
@@ -376,7 +378,7 @@
   template(unknown_class_name,                        "<Unknown>")                                                \
                                                                                                                   \
   /* used to identify class loaders handling parallel class loading */                                            \
-  template(parallelCapable_name,                      "parallelLockMap;")                                         \
+  template(parallelCapable_name,                      "parallelLockMap")                                          \
                                                                                                                   \
   /* JVM monitoring and management support */                                                                     \
   template(java_lang_StackTraceElement_array,          "[Ljava/lang/StackTraceElement;")                          \
@@ -507,6 +509,10 @@
    do_name(     doubleToLongBits_name,                           "doubleToLongBits")                                    \
   do_intrinsic(_longBitsToDouble,         java_lang_Double,       longBitsToDouble_name,    long_double_signature, F_S) \
    do_name(     longBitsToDouble_name,                           "longBitsToDouble")                                    \
+                                                                                                                        \
+  do_intrinsic(_bitCount_i,               java_lang_Integer,      bitCount_name,            int_int_signature,   F_S)   \
+  do_intrinsic(_bitCount_l,               java_lang_Long,         bitCount_name,            long_int_signature,  F_S)   \
+                                                                                                                        \
   do_intrinsic(_reverseBytes_i,           java_lang_Integer,      reverseBytes_name,        int_int_signature,   F_S)   \
    do_name(     reverseBytes_name,                               "reverseBytes")                                        \
   do_intrinsic(_reverseBytes_l,           java_lang_Long,         reverseBytes_name,        long_long_signature, F_S)   \
@@ -696,7 +702,6 @@
   do_signature(putShort_raw_signature,    "(JS)V")                                                                      \
   do_signature(getChar_raw_signature,     "(J)C")                                                                       \
   do_signature(putChar_raw_signature,     "(JC)V")                                                                      \
-  do_signature(getInt_raw_signature,      "(J)I")                                                                       \
   do_signature(putInt_raw_signature,      "(JI)V")                                                                      \
       do_alias(getLong_raw_signature,    /*(J)J*/ long_long_signature)                                                  \
       do_alias(putLong_raw_signature,    /*(JJ)V*/ long_long_void_signature)                                            \
@@ -713,7 +718,7 @@
   do_intrinsic(_getByte_raw,              sun_misc_Unsafe,        getByte_name, getByte_raw_signature,           F_RN)  \
   do_intrinsic(_getShort_raw,             sun_misc_Unsafe,        getShort_name, getShort_raw_signature,         F_RN)  \
   do_intrinsic(_getChar_raw,              sun_misc_Unsafe,        getChar_name, getChar_raw_signature,           F_RN)  \
-  do_intrinsic(_getInt_raw,               sun_misc_Unsafe,        getInt_name, getInt_raw_signature,             F_RN)  \
+  do_intrinsic(_getInt_raw,               sun_misc_Unsafe,        getInt_name, long_int_signature,               F_RN)  \
   do_intrinsic(_getLong_raw,              sun_misc_Unsafe,        getLong_name, getLong_raw_signature,           F_RN)  \
   do_intrinsic(_getFloat_raw,             sun_misc_Unsafe,        getFloat_name, getFloat_raw_signature,         F_RN)  \
   do_intrinsic(_getDouble_raw,            sun_misc_Unsafe,        getDouble_name, getDouble_raw_signature,       F_RN)  \
--- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -145,14 +145,9 @@
   if (G1RSBarrierUseQueue) {
     DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
     dcqs.abandon_logs();
-    if (_cg1rThread->do_traversal()) {
-      _pya = PYA_restart;
-    } else {
-      _cg1rThread->set_do_traversal(true);
-      // Reset the post-yield actions.
-      _pya = PYA_continue;
-      _last_pya = PYA_continue;
-    }
+    // Reset the post-yield actions.
+    _pya = PYA_continue;
+    _last_pya = PYA_continue;
   } else {
     _pya = PYA_restart;
   }
--- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -107,7 +107,7 @@
 #ifndef PRODUCT
 bool CMBitMapRO::covers(ReservedSpace rs) const {
   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
-  assert(((size_t)_bm.size() * (1 << _shifter)) == _bmWordSize,
+  assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
          "size inconsistency");
   return _bmStartWord == (HeapWord*)(rs.base()) &&
          _bmWordSize  == rs.size()>>LogHeapWordSize;
@@ -1232,7 +1232,16 @@
     if (!_final && _regions_done == 0)
       _start_vtime_sec = os::elapsedVTime();
 
-    if (hr->continuesHumongous()) return false;
+    if (hr->continuesHumongous()) {
+      HeapRegion* hum_start = hr->humongous_start_region();
+      // If the head region of the humongous region has been determined
+      // to be alive, then all the tail regions should be marked
+      // such as well.
+      if (_region_bm->at(hum_start->hrs_index())) {
+        _region_bm->par_at_put(hr->hrs_index(), 1);
+      }
+      return false;
+    }
 
     HeapWord* nextTop = hr->next_top_at_mark_start();
     HeapWord* start   = hr->top_at_conc_mark_count();
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -786,6 +786,12 @@
   }
 }
 
+void G1CollectedHeap::abandon_gc_alloc_regions() {
+  // first, make sure that the GC alloc region list is empty (it should!)
+  assert(_gc_alloc_region_list == NULL, "invariant");
+  release_gc_alloc_regions(true /* totally */);
+}
+
 class PostMCRemSetClearClosure: public HeapRegionClosure {
   ModRefBarrierSet* _mr_bs;
 public:
@@ -914,6 +920,7 @@
 
     // Make sure we'll choose a new allocation region afterwards.
     abandon_cur_alloc_region();
+    abandon_gc_alloc_regions();
     assert(_cur_alloc_region == NULL, "Invariant.");
     g1_rem_set()->as_HRInto_G1RemSet()->cleanupHRRS();
     tear_down_region_lists();
@@ -954,6 +961,7 @@
     if (VerifyAfterGC && total_collections() >= VerifyGCStartAt) {
       HandleMark hm;  // Discard invalid handles created during verification
       gclog_or_tty->print(" VerifyAfterGC:");
+      prepare_for_verify();
       Universe::verify(false);
     }
     NOT_PRODUCT(ref_processor()->verify_no_references_recorded());
@@ -1306,7 +1314,7 @@
 }
 
 void G1CollectedHeap::shrink(size_t shrink_bytes) {
-  release_gc_alloc_regions();
+  release_gc_alloc_regions(true /* totally */);
   tear_down_region_lists();  // We will rebuild them in a moment.
   shrink_helper(shrink_bytes);
   rebuild_region_lists();
@@ -1345,8 +1353,7 @@
   _gc_time_stamp(0),
   _surviving_young_words(NULL),
   _in_cset_fast_test(NULL),
-  _in_cset_fast_test_base(NULL)
-{
+  _in_cset_fast_test_base(NULL) {
   _g1h = this; // To catch bugs.
   if (_process_strong_tasks == NULL || !_process_strong_tasks->valid()) {
     vm_exit_during_initialization("Failed necessary allocation.");
@@ -1371,9 +1378,19 @@
   }
 
   for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
-    _gc_alloc_regions[ap]       = NULL;
-    _gc_alloc_region_counts[ap] = 0;
-  }
+    _gc_alloc_regions[ap]          = NULL;
+    _gc_alloc_region_counts[ap]    = 0;
+    _retained_gc_alloc_regions[ap] = NULL;
+    // by default, we do not retain a GC alloc region for each ap;
+    // we'll override this, when appropriate, below
+    _retain_gc_alloc_region[ap]    = false;
+  }
+
+  // We will try to remember the last half-full tenured region we
+  // allocated to at the end of a collection so that we can re-use it
+  // during the next collection.
+  _retain_gc_alloc_region[GCAllocForTenured]  = true;
+
   guarantee(_task_queues != NULL, "task_queues allocation failure.");
 }
 
@@ -1405,9 +1422,34 @@
   // Reserve the maximum.
   PermanentGenerationSpec* pgs = collector_policy()->permanent_generation();
   // Includes the perm-gen.
+
+  const size_t total_reserved = max_byte_size + pgs->max_size();
+  char* addr = Universe::preferred_heap_base(total_reserved, Universe::UnscaledNarrowOop);
+
   ReservedSpace heap_rs(max_byte_size + pgs->max_size(),
                         HeapRegion::GrainBytes,
-                        false /*ism*/);
+                        false /*ism*/, addr);
+
+  if (UseCompressedOops) {
+    if (addr != NULL && !heap_rs.is_reserved()) {
+      // Failed to reserve at specified address - the requested memory
+      // region is taken already, for example, by 'java' launcher.
+      // Try again to reserver heap higher.
+      addr = Universe::preferred_heap_base(total_reserved, Universe::ZeroBasedNarrowOop);
+      ReservedSpace heap_rs0(total_reserved, HeapRegion::GrainBytes,
+                             false /*ism*/, addr);
+      if (addr != NULL && !heap_rs0.is_reserved()) {
+        // Failed to reserve at specified address again - give up.
+        addr = Universe::preferred_heap_base(total_reserved, Universe::HeapBasedNarrowOop);
+        assert(addr == NULL, "");
+        ReservedSpace heap_rs1(total_reserved, HeapRegion::GrainBytes,
+                               false /*ism*/, addr);
+        heap_rs = heap_rs1;
+      } else {
+        heap_rs = heap_rs0;
+      }
+    }
+  }
 
   if (!heap_rs.is_reserved()) {
     vm_exit_during_initialization("Could not reserve enough space for object heap");
@@ -2119,15 +2161,7 @@
   bool doHeapRegion(HeapRegion* r) {
     guarantee(_par || r->claim_value() == HeapRegion::InitialClaimValue,
               "Should be unclaimed at verify points.");
-    if (r->isHumongous()) {
-      if (r->startsHumongous()) {
-        // Verify the single H object.
-        oop(r->bottom())->verify();
-        size_t word_sz = oop(r->bottom())->size();
-        guarantee(r->top() == r->bottom() + word_sz,
-                  "Only one object in a humongous region");
-      }
-    } else {
+    if (!r->continuesHumongous()) {
       VerifyObjsInRegionClosure not_dead_yet_cl(r);
       r->verify(_allow_dirty);
       r->object_iterate(&not_dead_yet_cl);
@@ -2179,6 +2213,7 @@
     _g1h(g1h), _allow_dirty(allow_dirty) { }
 
   void work(int worker_i) {
+    HandleMark hm;
     VerifyRegionClosure blk(_allow_dirty, true);
     _g1h->heap_region_par_iterate_chunked(&blk, worker_i,
                                           HeapRegion::ParVerifyClaimValue);
@@ -2644,7 +2679,7 @@
         popular_region->set_popular_pending(false);
       }
 
-      release_gc_alloc_regions();
+      release_gc_alloc_regions(false /* totally */);
 
       cleanup_surviving_young_words();
 
@@ -2697,6 +2732,7 @@
     if (VerifyAfterGC && total_collections() >= VerifyGCStartAt) {
       HandleMark hm;  // Discard invalid handles created during verification
       gclog_or_tty->print(" VerifyAfterGC:");
+      prepare_for_verify();
       Universe::verify(false);
     }
 
@@ -2735,6 +2771,10 @@
 
 void G1CollectedHeap::set_gc_alloc_region(int purpose, HeapRegion* r) {
   assert(purpose >= 0 && purpose < GCAllocPurposeCount, "invalid purpose");
+  // make sure we don't call set_gc_alloc_region() multiple times on
+  // the same region
+  assert(r == NULL || !r->is_gc_alloc_region(),
+         "shouldn't already be a GC alloc region");
   HeapWord* original_top = NULL;
   if (r != NULL)
     original_top = r->top();
@@ -2824,6 +2864,12 @@
   while (_gc_alloc_region_list != NULL) {
     HeapRegion* r = _gc_alloc_region_list;
     assert(r->is_gc_alloc_region(), "Invariant.");
+    // We need HeapRegion::oops_on_card_seq_iterate_careful() to work on
+    // newly allocated data in order to be able to apply deferred updates
+    // before the GC is done for verification purposes (i.e to allow
+    // G1HRRSFlushLogBuffersOnVerify). It's safe thing to do after the
+    // collection.
+    r->ContiguousSpace::set_saved_mark();
     _gc_alloc_region_list = r->next_gc_alloc_region();
     r->set_next_gc_alloc_region(NULL);
     r->set_is_gc_alloc_region(false);
@@ -2851,23 +2897,55 @@
 }
 
 void G1CollectedHeap::get_gc_alloc_regions() {
+  // First, let's check that the GC alloc region list is empty (it should)
+  assert(_gc_alloc_region_list == NULL, "invariant");
+
   for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
+    assert(_gc_alloc_regions[ap] == NULL, "invariant");
+
     // Create new GC alloc regions.
-    HeapRegion* alloc_region = _gc_alloc_regions[ap];
-    // Clear this alloc region, so that in case it turns out to be
-    // unacceptable, we end up with no allocation region, rather than a bad
-    // one.
-    _gc_alloc_regions[ap] = NULL;
-    if (alloc_region == NULL || alloc_region->in_collection_set()) {
-      // Can't re-use old one.  Allocate a new one.
+    HeapRegion* alloc_region = _retained_gc_alloc_regions[ap];
+    _retained_gc_alloc_regions[ap] = NULL;
+
+    if (alloc_region != NULL) {
+      assert(_retain_gc_alloc_region[ap], "only way to retain a GC region");
+
+      // let's make sure that the GC alloc region is not tagged as such
+      // outside a GC operation
+      assert(!alloc_region->is_gc_alloc_region(), "sanity");
+
+      if (alloc_region->in_collection_set() ||
+          alloc_region->top() == alloc_region->end() ||
+          alloc_region->top() == alloc_region->bottom()) {
+        // we will discard the current GC alloc region if it's in the
+        // collection set (it can happen!), if it's already full (no
+        // point in using it), or if it's empty (this means that it
+        // was emptied during a cleanup and it should be on the free
+        // list now).
+
+        alloc_region = NULL;
+      }
+    }
+
+    if (alloc_region == NULL) {
+      // we will get a new GC alloc region
       alloc_region = newAllocRegionWithExpansion(ap, 0);
     }
+
     if (alloc_region != NULL) {
+      assert(_gc_alloc_regions[ap] == NULL, "pre-condition");
       set_gc_alloc_region(ap, alloc_region);
     }
+
+    assert(_gc_alloc_regions[ap] == NULL ||
+           _gc_alloc_regions[ap]->is_gc_alloc_region(),
+           "the GC alloc region should be tagged as such");
+    assert(_gc_alloc_regions[ap] == NULL ||
+           _gc_alloc_regions[ap] == _gc_alloc_region_list,
+           "the GC alloc region should be the same as the GC alloc list head");
   }
   // Set alternative regions for allocation purposes that have reached
-  // thier limit.
+  // their limit.
   for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
     GCAllocPurpose alt_purpose = g1_policy()->alternative_purpose(ap);
     if (_gc_alloc_regions[ap] == NULL && alt_purpose != ap) {
@@ -2877,27 +2955,55 @@
   assert(check_gc_alloc_regions(), "alloc regions messed up");
 }
 
-void G1CollectedHeap::release_gc_alloc_regions() {
+void G1CollectedHeap::release_gc_alloc_regions(bool totally) {
   // We keep a separate list of all regions that have been alloc regions in
-  // the current collection pause.  Forget that now.
+  // the current collection pause. Forget that now. This method will
+  // untag the GC alloc regions and tear down the GC alloc region
+  // list. It's desirable that no regions are tagged as GC alloc
+  // outside GCs.
   forget_alloc_region_list();
 
   // The current alloc regions contain objs that have survived
   // collection. Make them no longer GC alloc regions.
   for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
     HeapRegion* r = _gc_alloc_regions[ap];
-    if (r != NULL && r->is_empty()) {
-      {
+    _retained_gc_alloc_regions[ap] = NULL;
+
+    if (r != NULL) {
+      // we retain nothing on _gc_alloc_regions between GCs
+      set_gc_alloc_region(ap, NULL);
+      _gc_alloc_region_counts[ap] = 0;
+
+      if (r->is_empty()) {
+        // we didn't actually allocate anything in it; let's just put
+        // it on the free list
         MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
         r->set_zero_fill_complete();
         put_free_region_on_list_locked(r);
+      } else if (_retain_gc_alloc_region[ap] && !totally) {
+        // retain it so that we can use it at the beginning of the next GC
+        _retained_gc_alloc_regions[ap] = r;
       }
     }
-    // set_gc_alloc_region will also NULLify all aliases to the region
-    set_gc_alloc_region(ap, NULL);
-    _gc_alloc_region_counts[ap] = 0;
-  }
-}
+  }
+}
+
+#ifndef PRODUCT
+// Useful for debugging
+
+void G1CollectedHeap::print_gc_alloc_regions() {
+  gclog_or_tty->print_cr("GC alloc regions");
+  for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
+    HeapRegion* r = _gc_alloc_regions[ap];
+    if (r == NULL) {
+      gclog_or_tty->print_cr("  %2d : "PTR_FORMAT, ap, NULL);
+    } else {
+      gclog_or_tty->print_cr("  %2d : "PTR_FORMAT" "SIZE_FORMAT,
+                             ap, r->bottom(), r->used());
+    }
+  }
+}
+#endif // PRODUCT
 
 void G1CollectedHeap::init_for_evac_failure(OopsInHeapRegionClosure* cl) {
   _drain_in_progress = false;
@@ -3658,7 +3764,9 @@
   CardTableModRefBS* ctbs()                      { return _ct_bs; }
 
   void immediate_rs_update(HeapRegion* from, oop* p, int tid) {
-    _g1_rem->par_write_ref(from, p, tid);
+    if (!from->is_survivor()) {
+      _g1_rem->par_write_ref(from, p, tid);
+    }
   }
 
   void deferred_rs_update(HeapRegion* from, oop* p, int tid) {
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Fri Mar 27 16:54:56 2009 -0700
@@ -172,7 +172,6 @@
     NumAPIs = HeapRegion::MaxAge
   };
 
-
   // The one and only G1CollectedHeap, so static functions can find it.
   static G1CollectedHeap* _g1h;
 
@@ -217,11 +216,20 @@
 
   // Postcondition: cur_alloc_region == NULL.
   void abandon_cur_alloc_region();
+  void abandon_gc_alloc_regions();
 
   // The to-space memory regions into which objects are being copied during
   // a GC.
   HeapRegion* _gc_alloc_regions[GCAllocPurposeCount];
   size_t _gc_alloc_region_counts[GCAllocPurposeCount];
+  // These are the regions, one per GCAllocPurpose, that are half-full
+  // at the end of a collection and that we want to reuse during the
+  // next collection.
+  HeapRegion* _retained_gc_alloc_regions[GCAllocPurposeCount];
+  // This specifies whether we will keep the last half-full region at
+  // the end of a collection so that it can be reused during the next
+  // collection (this is specified per GCAllocPurpose)
+  bool _retain_gc_alloc_region[GCAllocPurposeCount];
 
   // A list of the regions that have been set to be alloc regions in the
   // current collection.
@@ -589,8 +597,21 @@
 
   // Ensure that the relevant gc_alloc regions are set.
   void get_gc_alloc_regions();
-  // We're done with GC alloc regions; release them, as appropriate.
-  void release_gc_alloc_regions();
+  // We're done with GC alloc regions. We are going to tear down the
+  // gc alloc list and remove the gc alloc tag from all the regions on
+  // that list. However, we will also retain the last (i.e., the one
+  // that is half-full) GC alloc region, per GCAllocPurpose, for
+  // possible reuse during the next collection, provided
+  // _retain_gc_alloc_region[] indicates that it should be the
+  // case. Said regions are kept in the _retained_gc_alloc_regions[]
+  // array. If the parameter totally is set, we will not retain any
+  // regions, irrespective of what _retain_gc_alloc_region[]
+  // indicates.
+  void release_gc_alloc_regions(bool totally);
+#ifndef PRODUCT
+  // Useful for debugging.
+  void print_gc_alloc_regions();
+#endif // !PRODUCT
 
   // ("Weak") Reference processing support
   ReferenceProcessor* _ref_processor;
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -1087,6 +1087,7 @@
 
   assert(_g1->used_regions() == _g1->recalculate_used_regions(),
          "sanity");
+  assert(_g1->used() == _g1->recalculate_used(), "sanity");
 
   double s_w_t_ms = (start_time_sec - _stop_world_start) * 1000.0;
   _all_stop_world_times_ms->add(s_w_t_ms);
@@ -1636,7 +1637,9 @@
   double obj_copy_time = avg_value(_par_last_obj_copy_times_ms);
   double termination_time = avg_value(_par_last_termination_times_ms);
 
-  double parallel_other_time;
+  double parallel_other_time = _cur_collection_par_time_ms -
+    (update_rs_time + ext_root_scan_time + mark_stack_scan_time +
+     scan_only_time + scan_rs_time + obj_copy_time + termination_time);
   if (update_stats) {
     MainBodySummary* body_summary = summary->main_body_summary();
     guarantee(body_summary != NULL, "should not be null!");
@@ -1655,9 +1658,6 @@
       body_summary->record_parallel_time_ms(_cur_collection_par_time_ms);
       body_summary->record_clear_ct_time_ms(_cur_clear_ct_time_ms);
       body_summary->record_termination_time_ms(termination_time);
-      parallel_other_time = _cur_collection_par_time_ms -
-        (update_rs_time + ext_root_scan_time + mark_stack_scan_time +
-         scan_only_time + scan_rs_time + obj_copy_time + termination_time);
       body_summary->record_parallel_other_time_ms(parallel_other_time);
     }
     body_summary->record_mark_closure_time_ms(_mark_closure_time_ms);
@@ -1802,8 +1802,10 @@
     gclog_or_tty->print_cr("]");
 
   _all_pause_times_ms->add(elapsed_ms);
-  summary->record_total_time_ms(elapsed_ms);
-  summary->record_other_time_ms(other_time_ms);
+  if (update_stats) {
+    summary->record_total_time_ms(elapsed_ms);
+    summary->record_other_time_ms(other_time_ms);
+  }
   for (int i = 0; i < _aux_num; ++i)
     if (_cur_aux_times_set[i])
       _all_aux_times_ms[i].add(_cur_aux_times_ms[i]);
--- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -502,15 +502,26 @@
   }
 
   if (ParallelGCThreads > 0) {
-    // This is a temporary change to serialize the update and scanning
-    // of remembered sets. There are some race conditions when this is
-    // done in parallel and they are causing failures. When we resolve
-    // said race conditions, we'll revert back to parallel remembered
-    // set updating and scanning. See CRs 6677707 and 6677708.
-    if (worker_i == 0) {
+    // The two flags below were introduced temporarily to serialize
+    // the updating and scanning of remembered sets. There are some
+    // race conditions when these two operations are done in parallel
+    // and they are causing failures. When we resolve said race
+    // conditions, we'll revert back to parallel remembered set
+    // updating and scanning. See CRs 6677707 and 6677708.
+    if (G1EnableParallelRSetUpdating || (worker_i == 0)) {
       updateRS(worker_i);
       scanNewRefsRS(oc, worker_i);
+    } else {
+      _g1p->record_update_rs_start_time(worker_i, os::elapsedTime());
+      _g1p->record_update_rs_processed_buffers(worker_i, 0.0);
+      _g1p->record_update_rs_time(worker_i, 0.0);
+      _g1p->record_scan_new_refs_time(worker_i, 0.0);
+    }
+    if (G1EnableParallelRSetScanning || (worker_i == 0)) {
       scanRS(oc, worker_i);
+    } else {
+      _g1p->record_scan_rs_start_time(worker_i, os::elapsedTime());
+      _g1p->record_scan_rs_time(worker_i, 0.0);
     }
   } else {
     assert(worker_i == 0, "invariant");
@@ -716,8 +727,7 @@
   bool doHeapRegion(HeapRegion* r) {
     if (!r->in_collection_set() &&
         !r->continuesHumongous() &&
-        !r->is_young() &&
-        !r->is_survivor()) {
+        !r->is_young()) {
       _update_rs_oop_cl.set_from(r);
       UpdateRSObjectClosure update_rs_obj_cl(&_update_rs_oop_cl);
 
@@ -854,7 +864,7 @@
   // before all the cards on the region are dirtied. This is unlikely,
   // and it doesn't happen often, but it can happen. So, the extra
   // check below filters out those cards.
-  if (r->is_young() || r->is_survivor()) {
+  if (r->is_young()) {
     return;
   }
   // While we are processing RSet buffers during the collection, we
@@ -1025,7 +1035,9 @@
   }
 }
 void HRInto_G1RemSet::prepare_for_verify() {
-  if (G1HRRSFlushLogBuffersOnVerify && VerifyBeforeGC && !_g1->full_collection()) {
+  if (G1HRRSFlushLogBuffersOnVerify &&
+      (VerifyBeforeGC || VerifyAfterGC)
+      &&  !_g1->full_collection()) {
     cleanupHRRS();
     _g1->set_refine_cte_cl_concurrency(false);
     if (SafepointSynchronize::is_at_safepoint()) {
@@ -1036,5 +1048,7 @@
     _cg1r->set_use_cache(false);
     updateRS(0);
     _cg1r->set_use_cache(cg1r_use_cache);
+
+    assert(JavaThread::dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed");
   }
 }
--- a/src/share/vm/gc_implementation/g1/g1_globals.hpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp	Fri Mar 27 16:54:56 2009 -0700
@@ -295,6 +295,14 @@
                                                                             \
   product(uintx, G1FixedSurvivorSpaceSize, 0,                               \
           "If non-0 is the size of the G1 survivor space, "                 \
-          "otherwise SurvivorRatio is used to determine the size")
+          "otherwise SurvivorRatio is used to determine the size")          \
+                                                                            \
+  experimental(bool, G1EnableParallelRSetUpdating, false,                   \
+          "Enables the parallelization of remembered set updating "         \
+          "during evacuation pauses")                                       \
+                                                                            \
+  experimental(bool, G1EnableParallelRSetScanning, false,                   \
+          "Enables the parallelization of remembered set scanning "         \
+          "during evacuation pauses")
 
 G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG)
--- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -508,7 +508,7 @@
   typedef PosParPRT* PosParPRTPtr;
   if (_max_fine_entries == 0) {
     assert(_mod_max_fine_entries_mask == 0, "Both or none.");
-    _max_fine_entries = (1 << G1LogRSRegionEntries);
+    _max_fine_entries = (size_t)(1 << G1LogRSRegionEntries);
     _mod_max_fine_entries_mask = _max_fine_entries - 1;
 #if SAMPLE_FOR_EVICTION
     assert(_fine_eviction_sample_size == 0
--- a/src/share/vm/gc_implementation/parNew/parGCAllocBuffer.hpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/gc_implementation/parNew/parGCAllocBuffer.hpp	Fri Mar 27 16:54:56 2009 -0700
@@ -63,9 +63,8 @@
   // return NULL.
   HeapWord* allocate(size_t word_sz) {
     HeapWord* res = _top;
-    HeapWord* new_top = _top + word_sz;
-    if (new_top <= _end) {
-      _top = new_top;
+    if (pointer_delta(_end, _top) >= word_sz) {
+      _top = _top + word_sz;
       return res;
     } else {
       return NULL;
@@ -75,10 +74,9 @@
   // Undo the last allocation in the buffer, which is required to be of the
   // "obj" of the given "word_sz".
   void undo_allocation(HeapWord* obj, size_t word_sz) {
-    assert(_top - word_sz >= _bottom
-           && _top - word_sz == obj,
-           "Bad undo_allocation");
-    _top = _top - word_sz;
+    assert(pointer_delta(_top, _bottom) >= word_sz, "Bad undo");
+    assert(pointer_delta(_top, obj)     == word_sz, "Bad undo");
+    _top = obj;
   }
 
   // The total (word) size of the buffer, including both allocated and
--- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -104,12 +104,38 @@
                   og_min_size, og_max_size,
                   yg_min_size, yg_max_size);
 
+  const size_t total_reserved = pg_max_size + og_max_size + yg_max_size;
+  char* addr = Universe::preferred_heap_base(total_reserved, Universe::UnscaledNarrowOop);
+
   // The main part of the heap (old gen + young gen) can often use a larger page
   // size than is needed or wanted for the perm gen.  Use the "compound
   // alignment" ReservedSpace ctor to avoid having to use the same page size for
   // all gens.
+
   ReservedHeapSpace heap_rs(pg_max_size, pg_align, og_max_size + yg_max_size,
-                            og_align);
+                            og_align, addr);
+
+  if (UseCompressedOops) {
+    if (addr != NULL && !heap_rs.is_reserved()) {
+      // Failed to reserve at specified address - the requested memory
+      // region is taken already, for example, by 'java' launcher.
+      // Try again to reserver heap higher.
+      addr = Universe::preferred_heap_base(total_reserved, Universe::ZeroBasedNarrowOop);
+      ReservedHeapSpace heap_rs0(pg_max_size, pg_align, og_max_size + yg_max_size,
+                                 og_align, addr);
+      if (addr != NULL && !heap_rs0.is_reserved()) {
+        // Failed to reserve at specified address again - give up.
+        addr = Universe::preferred_heap_base(total_reserved, Universe::HeapBasedNarrowOop);
+        assert(addr == NULL, "");
+        ReservedHeapSpace heap_rs1(pg_max_size, pg_align, og_max_size + yg_max_size,
+                                   og_align, addr);
+        heap_rs = heap_rs1;
+      } else {
+        heap_rs = heap_rs0;
+      }
+    }
+  }
+
   os::trace_page_sizes("ps perm", pg_min_size, pg_max_size, pg_page_sz,
                        heap_rs.base(), pg_max_size);
   os::trace_page_sizes("ps main", og_min_size + yg_min_size,
--- a/src/share/vm/includeDB_core	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/includeDB_core	Fri Mar 27 16:54:56 2009 -0700
@@ -4598,6 +4598,7 @@
 vm_version_<arch>.hpp                   globals_extension.hpp
 vm_version_<arch>.hpp                   vm_version.hpp
 
+vm_version_<os_arch>.cpp                os.hpp
 vm_version_<os_arch>.cpp                vm_version_<arch>.hpp
 
 vmreg.cpp                               assembler.hpp
--- a/src/share/vm/interpreter/invocationCounter.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/interpreter/invocationCounter.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -47,6 +47,8 @@
   // executed many more times before re-entering the VM.
   int old_count = count();
   int new_count = MIN2(old_count, (int) (CompileThreshold / 2));
+  // prevent from going to zero, to distinguish from never-executed methods
+  if (new_count == 0)  new_count = 1;
   if (old_count != new_count)  set(state(), new_count);
 }
 
--- a/src/share/vm/memory/blockOffsetTable.hpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/memory/blockOffsetTable.hpp	Fri Mar 27 16:54:56 2009 -0700
@@ -235,7 +235,7 @@
   };
 
   static size_t power_to_cards_back(uint i) {
-    return 1 << (LogBase * i);
+    return (size_t)(1 << (LogBase * i));
   }
   static size_t power_to_words_back(uint i) {
     return power_to_cards_back(i) * N_words;
--- a/src/share/vm/memory/genCollectedHeap.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/memory/genCollectedHeap.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -218,6 +218,31 @@
     heap_address -= total_reserved;
   } else {
     heap_address = NULL;  // any address will do.
+    if (UseCompressedOops) {
+      heap_address = Universe::preferred_heap_base(total_reserved, Universe::UnscaledNarrowOop);
+      *_total_reserved = total_reserved;
+      *_n_covered_regions = n_covered_regions;
+      *heap_rs = ReservedHeapSpace(total_reserved, alignment,
+                                   UseLargePages, heap_address);
+
+      if (heap_address != NULL && !heap_rs->is_reserved()) {
+        // Failed to reserve at specified address - the requested memory
+        // region is taken already, for example, by 'java' launcher.
+        // Try again to reserver heap higher.
+        heap_address = Universe::preferred_heap_base(total_reserved, Universe::ZeroBasedNarrowOop);
+        *heap_rs = ReservedHeapSpace(total_reserved, alignment,
+                                     UseLargePages, heap_address);
+
+        if (heap_address != NULL && !heap_rs->is_reserved()) {
+          // Failed to reserve at specified address again - give up.
+          heap_address = Universe::preferred_heap_base(total_reserved, Universe::HeapBasedNarrowOop);
+          assert(heap_address == NULL, "");
+          *heap_rs = ReservedHeapSpace(total_reserved, alignment,
+                                       UseLargePages, heap_address);
+        }
+      }
+      return heap_address;
+    }
   }
 
   *_total_reserved = total_reserved;
--- a/src/share/vm/memory/heap.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/memory/heap.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -112,7 +112,7 @@
 
   const size_t rs_align = page_size == (size_t) os::vm_page_size() ? 0 :
     MAX2(page_size, granularity);
-  ReservedSpace rs(r_size, rs_align, rs_align > 0);
+  ReservedCodeSpace rs(r_size, rs_align, rs_align > 0);
   os::trace_page_sizes("code heap", committed_size, reserved_size, page_size,
                        rs.base(), rs.size());
   if (!_memory.initialize(rs, c_size)) {
--- a/src/share/vm/memory/universe.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/memory/universe.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -99,7 +99,8 @@
 size_t          Universe::_heap_used_at_last_gc = 0;
 
 CollectedHeap*  Universe::_collectedHeap = NULL;
-address         Universe::_heap_base = NULL;
+
+NarrowOopStruct Universe::_narrow_oop = { NULL, 0, true };
 
 
 void Universe::basic_type_classes_do(void f(klassOop)) {
@@ -729,6 +730,53 @@
   return JNI_OK;
 }
 
+// Choose the heap base address and oop encoding mode
+// when compressed oops are used:
+// Unscaled  - Use 32-bits oops without encoding when
+//     NarrowOopHeapBaseMin + heap_size < 4Gb
+// ZeroBased - Use zero based compressed oops with encoding when
+//     NarrowOopHeapBaseMin + heap_size < 32Gb
+// HeapBased - Use compressed oops with heap base + encoding.
+
+// 4Gb
+static const uint64_t NarrowOopHeapMax = (uint64_t(max_juint) + 1);
+// 32Gb
+static const uint64_t OopEncodingHeapMax = NarrowOopHeapMax << LogMinObjAlignmentInBytes;
+
+char* Universe::preferred_heap_base(size_t heap_size, NARROW_OOP_MODE mode) {
+#ifdef _LP64
+  if (UseCompressedOops) {
+    assert(mode == UnscaledNarrowOop  ||
+           mode == ZeroBasedNarrowOop ||
+           mode == HeapBasedNarrowOop, "mode is invalid");
+
+    const size_t total_size = heap_size + HeapBaseMinAddress;
+    if (total_size <= OopEncodingHeapMax && (mode != HeapBasedNarrowOop)) {
+      if (total_size <= NarrowOopHeapMax && (mode == UnscaledNarrowOop) &&
+          (Universe::narrow_oop_shift() == 0)) {
+        // Use 32-bits oops without encoding and
+        // place heap's top on the 4Gb boundary
+        return (char*)(NarrowOopHeapMax - heap_size);
+      } else {
+        // Can't reserve with NarrowOopShift == 0
+        Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes);
+        if (mode == UnscaledNarrowOop ||
+            mode == ZeroBasedNarrowOop && total_size <= NarrowOopHeapMax) {
+          // Use zero based compressed oops with encoding and
+          // place heap's top on the 32Gb boundary in case
+          // total_size > 4Gb or failed to reserve below 4Gb.
+          return (char*)(OopEncodingHeapMax - heap_size);
+        }
+      }
+    } else {
+      // Can't reserve below 32Gb.
+      Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes);
+    }
+  }
+#endif
+  return NULL; // also return NULL (don't care) for 32-bit VM
+}
+
 jint Universe::initialize_heap() {
 
   if (UseParallelGC) {
@@ -773,6 +821,8 @@
   if (status != JNI_OK) {
     return status;
   }
+
+#ifdef _LP64
   if (UseCompressedOops) {
     // Subtract a page because something can get allocated at heap base.
     // This also makes implicit null checking work, because the
@@ -780,8 +830,49 @@
     // See needs_explicit_null_check.
     // Only set the heap base for compressed oops because it indicates
     // compressed oops for pstack code.
-    Universe::_heap_base = Universe::heap()->base() - os::vm_page_size();
+    if (PrintCompressedOopsMode) {
+      tty->cr();
+      tty->print("heap address: "PTR_FORMAT, Universe::heap()->base());
+    }
+    if ((uint64_t)Universe::heap()->reserved_region().end() > OopEncodingHeapMax) {
+      // Can't reserve heap below 32Gb.
+      Universe::set_narrow_oop_base(Universe::heap()->base() - os::vm_page_size());
+      Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes);
+      if (PrintCompressedOopsMode) {
+        tty->print(", Compressed Oops with base: "PTR_FORMAT, Universe::narrow_oop_base());
+      }
+    } else {
+      Universe::set_narrow_oop_base(0);
+      if (PrintCompressedOopsMode) {
+        tty->print(", zero based Compressed Oops");
+      }
+#ifdef _WIN64
+      if (!Universe::narrow_oop_use_implicit_null_checks()) {
+        // Don't need guard page for implicit checks in indexed addressing
+        // mode with zero based Compressed Oops.
+        Universe::set_narrow_oop_use_implicit_null_checks(true);
+      }
+#endif //  _WIN64
+      if((uint64_t)Universe::heap()->reserved_region().end() > NarrowOopHeapMax) {
+        // Can't reserve heap below 4Gb.
+        Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes);
+      } else {
+        assert(Universe::narrow_oop_shift() == 0, "use unscaled narrow oop");
+        if (PrintCompressedOopsMode) {
+          tty->print(", 32-bits Oops");
+        }
+      }
+    }
+    if (PrintCompressedOopsMode) {
+      tty->cr();
+      tty->cr();
+    }
   }
+  assert(Universe::narrow_oop_base() == (Universe::heap()->base() - os::vm_page_size()) ||
+         Universe::narrow_oop_base() == NULL, "invalid value");
+  assert(Universe::narrow_oop_shift() == LogMinObjAlignmentInBytes ||
+         Universe::narrow_oop_shift() == 0, "invalid value");
+#endif
 
   // We will never reach the CATCH below since Exceptions::_throw will cause
   // the VM to exit if an exception is thrown during initialization
--- a/src/share/vm/memory/universe.hpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/memory/universe.hpp	Fri Mar 27 16:54:56 2009 -0700
@@ -90,6 +90,19 @@
   methodOop get_methodOop();
 };
 
+// For UseCompressedOops.
+struct NarrowOopStruct {
+  // Base address for oop-within-java-object materialization.
+  // NULL if using wide oops or zero based narrow oops.
+  address _base;
+  // Number of shift bits for encoding/decoding narrow oops.
+  // 0 if using wide oops or zero based unscaled narrow oops,
+  // LogMinObjAlignmentInBytes otherwise.
+  int     _shift;
+  // Generate code with implicit null checks for narrow oops.
+  bool    _use_implicit_null_checks;
+};
+
 
 class Universe: AllStatic {
   // Ugh.  Universe is much too friendly.
@@ -181,9 +194,9 @@
 
   // The particular choice of collected heap.
   static CollectedHeap* _collectedHeap;
-  // Base address for oop-within-java-object materialization.
-  // NULL if using wide oops.  Doubles as heap oop null value.
-  static address        _heap_base;
+
+  // For UseCompressedOops.
+  static struct NarrowOopStruct _narrow_oop;
 
   // array of dummy objects used with +FullGCAlot
   debug_only(static objArrayOop _fullgc_alot_dummy_array;)
@@ -328,8 +341,25 @@
   static CollectedHeap* heap() { return _collectedHeap; }
 
   // For UseCompressedOops
-  static address heap_base()       { return _heap_base; }
-  static address* heap_base_addr() { return &_heap_base; }
+  static address* narrow_oop_base_addr()              { return &_narrow_oop._base; }
+  static address  narrow_oop_base()                   { return  _narrow_oop._base; }
+  static int      narrow_oop_shift()                  { return  _narrow_oop._shift; }
+  static void     set_narrow_oop_base(address base)   { _narrow_oop._base  = base; }
+  static void     set_narrow_oop_shift(int shift)     { _narrow_oop._shift = shift; }
+  static bool     narrow_oop_use_implicit_null_checks()             { return  _narrow_oop._use_implicit_null_checks; }
+  static void     set_narrow_oop_use_implicit_null_checks(bool use) { _narrow_oop._use_implicit_null_checks = use; }
+  // Narrow Oop encoding mode:
+  // 0 - Use 32-bits oops without encoding when
+  //     NarrowOopHeapBaseMin + heap_size < 4Gb
+  // 1 - Use zero based compressed oops with encoding when
+  //     NarrowOopHeapBaseMin + heap_size < 32Gb
+  // 2 - Use compressed oops with heap base + encoding.
+  enum NARROW_OOP_MODE {
+    UnscaledNarrowOop  = 0,
+    ZeroBasedNarrowOop = 1,
+    HeapBasedNarrowOop = 2
+  };
+  static char* preferred_heap_base(size_t heap_size, NARROW_OOP_MODE mode);
 
   // Historic gc information
   static size_t get_heap_capacity_at_last_gc()         { return _heap_capacity_at_last_gc; }
--- a/src/share/vm/oops/instanceKlass.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/oops/instanceKlass.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -1859,6 +1859,25 @@
   }
 }
 
+// Returns true iff super_method can be overridden by a method in targetclassname
+// See JSL 3rd edition 8.4.6.1
+// Assumes name-signature match
+// "this" is instanceKlass of super_method which must exist
+// note that the instanceKlass of the method in the targetclassname has not always been created yet
+bool instanceKlass::is_override(methodHandle super_method, Handle targetclassloader, symbolHandle targetclassname, TRAPS) {
+   // Private methods can not be overridden
+   if (super_method->is_private()) {
+     return false;
+   }
+   // If super method is accessible, then override
+   if ((super_method->is_protected()) ||
+       (super_method->is_public())) {
+     return true;
+   }
+   // Package-private methods are not inherited outside of package
+   assert(super_method->is_package_private(), "must be package private");
+   return(is_same_class_package(targetclassloader(), targetclassname()));
+}
 
 jint instanceKlass::compute_modifier_flags(TRAPS) const {
   klassOop k = as_klassOop();
--- a/src/share/vm/oops/instanceKlass.hpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/oops/instanceKlass.hpp	Fri Mar 27 16:54:56 2009 -0700
@@ -303,6 +303,9 @@
     inner_class_next_offset = 4
   };
 
+  // method override check
+  bool is_override(methodHandle super_method, Handle targetclassloader, symbolHandle targetclassname, TRAPS);
+
   // package
   bool is_same_class_package(klassOop class2);
   bool is_same_class_package(oop classloader2, symbolOop classname2);
--- a/src/share/vm/oops/klassVtable.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/oops/klassVtable.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -45,9 +45,10 @@
                                                        klassOop super,
                                                        objArrayOop methods,
                                                        AccessFlags class_flags,
-                                                       oop classloader,
-                                                       symbolOop classname,
-                                                       objArrayOop local_interfaces
+                                                       Handle classloader,
+                                                       symbolHandle classname,
+                                                       objArrayOop local_interfaces,
+                                                       TRAPS
                                                        ) {
 
   No_Safepoint_Verifier nsv;
@@ -64,9 +65,9 @@
   int len = methods->length();
   for (int i = 0; i < len; i++) {
     assert(methods->obj_at(i)->is_method(), "must be a methodOop");
-    methodOop m = methodOop(methods->obj_at(i));
+    methodHandle mh(THREAD, methodOop(methods->obj_at(i)));
 
-    if (needs_new_vtable_entry(m, super, classloader, classname, class_flags)) {
+    if (needs_new_vtable_entry(mh, super, classloader, classname, class_flags, THREAD)) {
       vtable_length += vtableEntry::size(); // we need a new entry
     }
   }
@@ -117,6 +118,7 @@
     superVtable->copy_vtable_to(table());
 #ifndef PRODUCT
     if (PrintVtables && Verbose) {
+      ResourceMark rm;
       tty->print_cr("copy vtable from %s to %s size %d", sk->internal_name(), klass()->internal_name(), _length);
     }
 #endif
@@ -159,13 +161,13 @@
     int len = methods()->length();
     int initialized = super_vtable_len;
 
-    // update_super_vtable can stop for gc - ensure using handles
+    // update_inherited_vtable can stop for gc - ensure using handles
     for (int i = 0; i < len; i++) {
       HandleMark hm(THREAD);
       assert(methods()->obj_at(i)->is_method(), "must be a methodOop");
       methodHandle mh(THREAD, (methodOop)methods()->obj_at(i));
 
-      bool needs_new_entry = update_super_vtable(ik(), mh, super_vtable_len, checkconstraints, CHECK);
+      bool needs_new_entry = update_inherited_vtable(ik(), mh, super_vtable_len, checkconstraints, CHECK);
 
       if (needs_new_entry) {
         put_method_at(mh(), initialized);
@@ -177,7 +179,7 @@
     // add miranda methods; it will also update the value of initialized
     fill_in_mirandas(initialized);
 
-    // In class hierachieswhere the accesibility is not increasing (i.e., going from private ->
+    // In class hierarchies where the accessibility is not increasing (i.e., going from private ->
     // package_private -> publicprotected), the vtable might actually be smaller than our initial
     // calculation.
     assert(initialized <= _length, "vtable initialization failed");
@@ -188,26 +190,49 @@
   }
 }
 
-// Interates through the vtables to find the broadest access level. This
-// will always be monotomic for valid Java programs - but not neccesarily
-// for incompatible class files.
-klassVtable::AccessType klassVtable::vtable_accessibility_at(int i) {
-  // This vtable is not implementing the specific method
-  if (i >= length()) return acc_private;
+// Called for cases where a method does not override its superclass' vtable entry
+// For bytecodes not produced by javac together it is possible that a method does not override
+// the superclass's method, but might indirectly override a super-super class's vtable entry
+// If none found, return a null superk, else return the superk of the method this does override
+instanceKlass* klassVtable::find_transitive_override(instanceKlass* initialsuper, methodHandle target_method,
+                            int vtable_index, Handle target_loader, symbolHandle target_classname, Thread * THREAD) {
+  instanceKlass* superk = initialsuper;
+  while (superk != NULL && superk->super() != NULL) {
+    instanceKlass* supersuperklass = instanceKlass::cast(superk->super());
+    klassVtable* ssVtable = supersuperklass->vtable();
+    if (vtable_index < ssVtable->length()) {
+      methodOop super_method = ssVtable->method_at(vtable_index);
+#ifndef PRODUCT
+      symbolHandle name(THREAD,target_method()->name());
+      symbolHandle signature(THREAD,target_method()->signature());
+      assert(super_method->name() == name() && super_method->signature() == signature(), "vtable entry name/sig mismatch");
+#endif
+      if (supersuperklass->is_override(super_method, target_loader, target_classname, THREAD)) {
+#ifndef PRODUCT
+        if (PrintVtables && Verbose) {
+          ResourceMark rm(THREAD);
+          tty->print("transitive overriding superclass %s with %s::%s index %d, original flags: ",
+           supersuperklass->internal_name(),
+           _klass->internal_name(), (target_method() != NULL) ?
+           target_method()->name()->as_C_string() : "<NULL>", vtable_index);
+           super_method->access_flags().print_on(tty);
+           tty->print("overriders flags: ");
+           target_method->access_flags().print_on(tty);
+           tty->cr();
+        }
+#endif /*PRODUCT*/
+        break; // return found superk
+      }
+    } else  {
+      // super class has no vtable entry here, stop transitive search
+      superk = (instanceKlass*)NULL;
+      break;
+    }
+    // if no override found yet, continue to search up
+    superk = instanceKlass::cast(superk->super());
+  }
 
-  // Compute AccessType for current method. public or protected we are done.
-  methodOop m = method_at(i);
-  if (m->is_protected() || m->is_public()) return acc_publicprotected;
-
-  AccessType acc = m->is_package_private() ? acc_package_private : acc_private;
-
-  // Compute AccessType for method in super classes
-  klassOop super = klass()->super();
-  AccessType super_acc = (super != NULL) ? instanceKlass::cast(klass()->super())->vtable()->vtable_accessibility_at(i)
-                                         : acc_private;
-
-  // Merge
-  return (AccessType)MAX2((int)acc, (int)super_acc);
+  return superk;
 }
 
 
@@ -215,7 +240,8 @@
 // OR return true if a new vtable entry is required
 // Only called for instanceKlass's, i.e. not for arrays
 // If that changed, could not use _klass as handle for klass
-bool klassVtable::update_super_vtable(instanceKlass* klass, methodHandle target_method, int super_vtable_len, bool checkconstraints, TRAPS) {
+bool klassVtable::update_inherited_vtable(instanceKlass* klass, methodHandle target_method, int super_vtable_len,
+                  bool checkconstraints, TRAPS) {
   ResourceMark rm;
   bool allocate_new = true;
   assert(klass->oop_is_instance(), "must be instanceKlass");
@@ -242,58 +268,35 @@
   }
 
   // private methods always have a new entry in the vtable
+  // specification interpretation since classic has
+  // private methods not overriding
   if (target_method()->is_private()) {
     return allocate_new;
   }
 
   // search through the vtable and update overridden entries
   // Since check_signature_loaders acquires SystemDictionary_lock
-  // which can block for gc, once we are in this loop, use handles, not
-  // unhandled oops unless they are reinitialized for each loop
-  // handles for name, signature, klass, target_method
-  // not for match_method, holder
+  // which can block for gc, once we are in this loop, use handles
+  // For classfiles built with >= jdk7, we now look for transitive overrides
 
   symbolHandle name(THREAD,target_method()->name());
   symbolHandle signature(THREAD,target_method()->signature());
+  Handle target_loader(THREAD, _klass->class_loader());
+  symbolHandle target_classname(THREAD, _klass->name());
   for(int i = 0; i < super_vtable_len; i++) {
-    methodOop match_method = method_at(i);
+    methodOop super_method = method_at(i);
     // Check if method name matches
-    if (match_method->name() == name() && match_method->signature() == signature()) {
+    if (super_method->name() == name() && super_method->signature() == signature()) {
 
-      instanceKlass* holder = (THREAD, instanceKlass::cast(match_method->method_holder()));
+      // get super_klass for method_holder for the found method
+      instanceKlass* super_klass =  instanceKlass::cast(super_method->method_holder());
 
-      // Check if the match_method is accessable from current class
-
-      bool same_package_init = false;
-      bool same_package_flag = false;
-      bool simple_match = match_method->is_public()  || match_method->is_protected();
-      if (!simple_match) {
-        same_package_init = true;
-        same_package_flag = holder->is_same_class_package(_klass->class_loader(), _klass->name());
-
-        simple_match = match_method->is_package_private() && same_package_flag;
-      }
-      // match_method is the superclass' method. Note we can't override
-      // and shouldn't access superclass' ACC_PRIVATE methods
-      // (although they have been copied into our vtable)
-      // A simple form of this statement is:
-      // if ( (match_method->is_public()  || match_method->is_protected()) ||
-      //    (match_method->is_package_private() && holder->is_same_class_package(klass->class_loader(), klass->name()))) {
-      //
-      // The complexity is introduced it avoid recomputing 'is_same_class_package' which is expensive.
-      if (simple_match) {
-        // Check if target_method and match_method has same level of accessibility. The accesibility of the
-        // match method is the "most-general" visibility of all entries at it's particular vtable index for
-        // all superclasses. This check must be done before we override the current entry in the vtable.
-        AccessType at = vtable_accessibility_at(i);
-        bool same_access = false;
-
-        if (  (at == acc_publicprotected && (target_method()->is_public() || target_method()->is_protected())
-           || (at == acc_package_private && (target_method()->is_package_private() &&
-                                            (( same_package_init && same_package_flag) ||
-                                             (!same_package_init && holder->is_same_class_package(_klass->class_loader(), _klass->name()))))))) {
-           same_access = true;
-        }
+      if ((super_klass->is_override(super_method, target_loader, target_classname, THREAD)) ||
+      ((klass->major_version() >= VTABLE_TRANSITIVE_OVERRIDE_VERSION)
+        && ((super_klass = find_transitive_override(super_klass, target_method, i, target_loader,
+             target_classname, THREAD)) != (instanceKlass*)NULL))) {
+        // overriding, so no new entry
+        allocate_new = false;
 
         if (checkconstraints) {
         // Override vtable entry if passes loader constraint check
@@ -302,15 +305,12 @@
         // have already made any needed loader constraints.
         // Since loader constraints are transitive, it is enough
         // to link to the first super, and we get all the others.
-          symbolHandle signature(THREAD, target_method()->signature());
-          Handle this_loader(THREAD, _klass->class_loader());
-          instanceKlassHandle super_klass(THREAD, _klass->super());
           Handle super_loader(THREAD, super_klass->class_loader());
 
-          if (this_loader() != super_loader()) {
+          if (target_loader() != super_loader()) {
             ResourceMark rm(THREAD);
             char* failed_type_name =
-              SystemDictionary::check_signature_loaders(signature, this_loader,
+              SystemDictionary::check_signature_loaders(signature, target_loader,
                                                         super_loader, true,
                                                         CHECK_(false));
             if (failed_type_name != NULL) {
@@ -320,7 +320,7 @@
                 "(instance of %s), have different Class objects for the type "
                 "%s used in the signature";
               char* sig = target_method()->name_and_sig_as_C_string();
-              const char* loader1 = SystemDictionary::loader_name(this_loader());
+              const char* loader1 = SystemDictionary::loader_name(target_loader());
               char* current = _klass->name()->as_C_string();
               const char* loader2 = SystemDictionary::loader_name(super_loader());
               size_t buflen = strlen(msg) + strlen(sig) + strlen(loader1) +
@@ -331,59 +331,46 @@
               THROW_MSG_(vmSymbols::java_lang_LinkageError(), buf, false);
             }
           }
+       }
+
+        put_method_at(target_method(), i);
+        target_method()->set_vtable_index(i);
+#ifndef PRODUCT
+        if (PrintVtables && Verbose) {
+          tty->print("overriding with %s::%s index %d, original flags: ",
+           _klass->internal_name(), (target_method() != NULL) ?
+           target_method()->name()->as_C_string() : "<NULL>", i);
+           super_method->access_flags().print_on(tty);
+           tty->print("overriders flags: ");
+           target_method->access_flags().print_on(tty);
+           tty->cr();
         }
-        put_method_at(target_method(), i);
-
-
-        if (same_access) {
-          // target and match has same accessiblity - share entry
-          allocate_new = false;
-          target_method()->set_vtable_index(i);
+#endif /*PRODUCT*/
+      } else {
+        // allocate_new = true; default. We might override one entry,
+        // but not override another. Once we override one, not need new
 #ifndef PRODUCT
-          if (PrintVtables && Verbose) {
-            AccessType targetacc;
-            if (target_method()->is_protected() ||
-                 target_method()->is_public()) {
-               targetacc =  acc_publicprotected;
-            } else {
-              targetacc = target_method()->is_package_private() ? acc_package_private : acc_private;
-            }
-            tty->print_cr("overriding with %s::%s index %d, original flags: %x overriders flags: %x",
-             _klass->internal_name(), (target_method() != NULL) ?
-             target_method()->name()->as_C_string() : "<NULL>", i,
-             at, targetacc);
-          }
+        if (PrintVtables && Verbose) {
+          tty->print("NOT overriding with %s::%s index %d, original flags: ",
+           _klass->internal_name(), (target_method() != NULL) ?
+           target_method()->name()->as_C_string() : "<NULL>", i);
+           super_method->access_flags().print_on(tty);
+           tty->print("overriders flags: ");
+           target_method->access_flags().print_on(tty);
+           tty->cr();
+        }
 #endif /*PRODUCT*/
-        } else {
-#ifndef PRODUCT
-          if (PrintVtables && Verbose) {
-            AccessType targetacc;
-            if (target_method()->is_protected() ||
-                 target_method()->is_public()) {
-               targetacc =  acc_publicprotected;
-            } else {
-              targetacc = target_method()->is_package_private() ? acc_package_private : acc_private;
-            }
-            tty->print_cr("override %s %s::%s at index %d, original flags: %x overriders flags: %x",
-            allocate_new ? "+ new" : "only",
-            _klass->internal_name(), (target_method() != NULL) ?
-            target_method()->name()->as_C_string() : "<NULL>", i,
-            at, targetacc);
-           }
-#endif /*PRODUCT*/
-        }
       }
     }
   }
   return allocate_new;
 }
 
-
-
 void klassVtable::put_method_at(methodOop m, int index) {
   assert(m->is_oop_or_null(), "Not an oop or null");
 #ifndef PRODUCT
   if (PrintVtables && Verbose) {
+    ResourceMark rm;
     tty->print_cr("adding %s::%s at index %d", _klass->internal_name(),
       (m != NULL) ? m->name()->as_C_string() : "<NULL>", index);
   }
@@ -397,19 +384,23 @@
 // by "classloader" and "classname".
 // NOTE: The logic used here is very similar to the one used for computing
 // the vtables indices for a method. We cannot directly use that function because,
-// when the Universe is boostrapping, a super's vtable might not be initialized.
-bool klassVtable::needs_new_vtable_entry(methodOop target_method,
+// we allocate the instanceKlass at load time, and that requires that the
+// superclass has been loaded.
+// However, the vtable entries are filled in at link time, and therefore
+// the superclass' vtable may not yet have been filled in.
+bool klassVtable::needs_new_vtable_entry(methodHandle target_method,
                                          klassOop super,
-                                         oop classloader,
-                                         symbolOop classname,
-                                         AccessFlags class_flags) {
-  if ((class_flags.is_final() || target_method->is_final()) ||
+                                         Handle classloader,
+                                         symbolHandle classname,
+                                         AccessFlags class_flags,
+                                         TRAPS) {
+  if ((class_flags.is_final() || target_method()->is_final()) ||
       // a final method never needs a new entry; final methods can be statically
       // resolved and they have to be present in the vtable only if they override
       // a super's method, in which case they re-use its entry
-      (target_method->is_static()) ||
+      (target_method()->is_static()) ||
       // static methods don't need to be in vtable
-      (target_method->name() ==  vmSymbols::object_initializer_name())
+      (target_method()->name() ==  vmSymbols::object_initializer_name())
       // <init> is never called dynamically-bound
       ) {
     return false;
@@ -421,55 +412,58 @@
   }
 
   // private methods always have a new entry in the vtable
-  if (target_method->is_private()) {
+  // specification interpretation since classic has
+  // private methods not overriding
+  if (target_method()->is_private()) {
     return true;
   }
 
   // search through the super class hierarchy to see if we need
   // a new entry
-  symbolOop name = target_method->name();
-  symbolOop signature = target_method->signature();
+  ResourceMark rm;
+  symbolOop name = target_method()->name();
+  symbolOop signature = target_method()->signature();
   klassOop k = super;
-  methodOop match_method = NULL;
+  methodOop super_method = NULL;
   instanceKlass *holder = NULL;
+  methodOop recheck_method =  NULL;
   while (k != NULL) {
     // lookup through the hierarchy for a method with matching name and sign.
-    match_method = instanceKlass::cast(k)->lookup_method(name, signature);
-    if (match_method == NULL) {
+    super_method = instanceKlass::cast(k)->lookup_method(name, signature);
+    if (super_method == NULL) {
       break; // we still have to search for a matching miranda method
     }
     // get the class holding the matching method
-    holder = instanceKlass::cast(match_method->method_holder());
-
-    if (!match_method->is_static()) { // we want only instance method matches
-      if ((target_method->is_public() || target_method->is_protected()) &&
-          (match_method->is_public()  || match_method->is_protected())) {
-        // target and match are public/protected; we do not need a new entry
+    // make sure you use that class for is_override
+    instanceKlass* superk = instanceKlass::cast(super_method->method_holder());
+    // we want only instance method matches
+    // pretend private methods are not in the super vtable
+    // since we do override around them: e.g. a.m pub/b.m private/c.m pub,
+    // ignore private, c.m pub does override a.m pub
+    // For classes that were not javac'd together, we also do transitive overriding around
+    // methods that have less accessibility
+    if ((!super_method->is_static()) &&
+       (!super_method->is_private())) {
+      if (superk->is_override(super_method, classloader, classname, THREAD)) {
         return false;
-      }
-
-      if (target_method->is_package_private() &&
-          match_method->is_package_private() &&
-          holder->is_same_class_package(classloader, classname)) {
-        // target and match are P private; we do not need a new entry
-        return false;
+      // else keep looking for transitive overrides
       }
     }
 
-    k = holder->super(); // haven't found a match yet; continue to look
+    // Start with lookup result and continue to search up
+    k = superk->super(); // haven't found an override match yet; continue to look
   }
 
   // if the target method is public or protected it may have a matching
   // miranda method in the super, whose entry it should re-use.
-  if (target_method->is_public() || target_method->is_protected()) {
-    instanceKlass *sk = instanceKlass::cast(super);
-    if (sk->has_miranda_methods()) {
-      if (sk->lookup_method_in_all_interfaces(name, signature) != NULL) {
-        return false;  // found a matching miranda; we do not need a new entry
-      }
+  // Actually, to handle cases that javac would not generate, we need
+  // this check for all access permissions.
+  instanceKlass *sk = instanceKlass::cast(super);
+  if (sk->has_miranda_methods()) {
+    if (sk->lookup_method_in_all_interfaces(name, signature) != NULL) {
+      return false;  // found a matching miranda; we do not need a new entry
     }
   }
-
   return true; // found no match; we need a new entry
 }
 
@@ -884,7 +878,7 @@
                                     _klass->name()->as_C_string());
 
 
-    // Interate through all interfaces
+    // Iterate through all interfaces
     int i;
     for(i = 0; i < num_interfaces; i++) {
       itableOffsetEntry* ioe = offset_entry(i);
@@ -1012,6 +1006,7 @@
             new_method->name()->as_C_string(),
             new_method->signature()->as_C_string()));
         }
+        break;
       }
       ime++;
     }
--- a/src/share/vm/oops/klassVtable.hpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/oops/klassVtable.hpp	Fri Mar 27 16:54:56 2009 -0700
@@ -70,8 +70,9 @@
   // conputes vtable length (in words) and the number of miranda methods
   static void compute_vtable_size_and_num_mirandas(int &vtable_length, int &num_miranda_methods,
                                                    klassOop super, objArrayOop methods,
-                                                   AccessFlags class_flags, oop classloader,
-                                                   symbolOop classname, objArrayOop local_interfaces);
+                                                   AccessFlags class_flags, Handle classloader,
+                                                   symbolHandle classname, objArrayOop local_interfaces,
+                                                   TRAPS);
 
   // RedefineClasses() API support:
   // If any entry of this vtable points to any of old_methods,
@@ -111,14 +112,16 @@
  protected:
   friend class vtableEntry;
  private:
+  enum { VTABLE_TRANSITIVE_OVERRIDE_VERSION = 51 } ;
   void copy_vtable_to(vtableEntry* start);
   int  initialize_from_super(KlassHandle super);
   int  index_of(methodOop m, int len) const; // same as index_of, but search only up to len
   void put_method_at(methodOop m, int index);
-  static bool needs_new_vtable_entry(methodOop m, klassOop super, oop classloader, symbolOop classname, AccessFlags access_flags);
-  AccessType vtable_accessibility_at(int i);
+  static bool needs_new_vtable_entry(methodHandle m, klassOop super, Handle classloader, symbolHandle classname, AccessFlags access_flags, TRAPS);
 
-  bool update_super_vtable(instanceKlass* klass, methodHandle target_method, int super_vtable_len, bool checkconstraints, TRAPS);
+  bool update_inherited_vtable(instanceKlass* klass, methodHandle target_method, int super_vtable_len, bool checkconstraints, TRAPS);
+ instanceKlass* find_transitive_override(instanceKlass* initialsuper, methodHandle target_method, int vtable_index,
+                                         Handle target_loader, symbolHandle target_classname, Thread* THREAD);
 
   // support for miranda methods
   bool is_miranda_entry_at(int i);
--- a/src/share/vm/oops/oop.inline.hpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/oops/oop.inline.hpp	Fri Mar 27 16:54:56 2009 -0700
@@ -148,10 +148,11 @@
 
 inline narrowOop oopDesc::encode_heap_oop_not_null(oop v) {
   assert(!is_null(v), "oop value can never be zero");
-  address heap_base = Universe::heap_base();
-  uint64_t pd = (uint64_t)(pointer_delta((void*)v, (void*)heap_base, 1));
+  address base = Universe::narrow_oop_base();
+  int    shift = Universe::narrow_oop_shift();
+  uint64_t  pd = (uint64_t)(pointer_delta((void*)v, (void*)base, 1));
   assert(OopEncodingHeapMax > pd, "change encoding max if new encoding");
-  uint64_t result = pd >> LogMinObjAlignmentInBytes;
+  uint64_t result = pd >> shift;
   assert((result & CONST64(0xffffffff00000000)) == 0, "narrow oop overflow");
   return (narrowOop)result;
 }
@@ -162,8 +163,9 @@
 
 inline oop oopDesc::decode_heap_oop_not_null(narrowOop v) {
   assert(!is_null(v), "narrow oop value can never be zero");
-  address heap_base = Universe::heap_base();
-  return (oop)(void*)((uintptr_t)heap_base + ((uintptr_t)v << LogMinObjAlignmentInBytes));
+  address base = Universe::narrow_oop_base();
+  int    shift = Universe::narrow_oop_shift();
+  return (oop)(void*)((uintptr_t)base + ((uintptr_t)v << shift));
 }
 
 inline oop oopDesc::decode_heap_oop(narrowOop v) {
--- a/src/share/vm/opto/addnode.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/opto/addnode.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -756,7 +756,13 @@
       if ( eti == NULL ) {
         // there must be one pointer among the operands
         guarantee(tptr == NULL, "must be only one pointer operand");
-        tptr = et->isa_oopptr();
+        if (UseCompressedOops && Universe::narrow_oop_shift() == 0) {
+          // 32-bits narrow oop can be the base of address expressions
+          tptr = et->make_ptr()->isa_oopptr();
+        } else {
+          // only regular oops are expected here
+          tptr = et->isa_oopptr();
+        }
         guarantee(tptr != NULL, "non-int operand must be pointer");
         if (tptr->higher_equal(tp->add_offset(tptr->offset())))
           tp = tptr; // Set more precise type for bailout
--- a/src/share/vm/opto/classes.hpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/opto/classes.hpp	Fri Mar 27 16:54:56 2009 -0700
@@ -184,6 +184,8 @@
 macro(Parm)
 macro(PartialSubtypeCheck)
 macro(Phi)
+macro(PopCountI)
+macro(PopCountL)
 macro(PowD)
 macro(PrefetchRead)
 macro(PrefetchWrite)
--- a/src/share/vm/opto/compile.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/opto/compile.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -2081,7 +2081,7 @@
 
 #ifdef _LP64
   case Op_CastPP:
-    if (n->in(1)->is_DecodeN() && UseImplicitNullCheckForNarrowOop) {
+    if (n->in(1)->is_DecodeN() && Universe::narrow_oop_use_implicit_null_checks()) {
       Compile* C = Compile::current();
       Node* in1 = n->in(1);
       const Type* t = n->bottom_type();
@@ -2136,7 +2136,7 @@
         new_in2 = in2->in(1);
       } else if (in2->Opcode() == Op_ConP) {
         const Type* t = in2->bottom_type();
-        if (t == TypePtr::NULL_PTR && UseImplicitNullCheckForNarrowOop) {
+        if (t == TypePtr::NULL_PTR && Universe::narrow_oop_use_implicit_null_checks()) {
           new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR);
           //
           // This transformation together with CastPP transformation above
--- a/src/share/vm/opto/connode.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/opto/connode.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -433,7 +433,7 @@
 // If not converting int->oop, throw away cast after constant propagation
 Node *CastPPNode::Ideal_DU_postCCP( PhaseCCP *ccp ) {
   const Type *t = ccp->type(in(1));
-  if (!t->isa_oop_ptr() || in(1)->is_DecodeN()) {
+  if (!t->isa_oop_ptr() || (in(1)->is_DecodeN() && Universe::narrow_oop_use_implicit_null_checks())) {
     return NULL; // do not transform raw pointers or narrow oops
   }
   return ConstraintCastNode::Ideal_DU_postCCP(ccp);
--- a/src/share/vm/opto/connode.hpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/opto/connode.hpp	Fri Mar 27 16:54:56 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -635,3 +635,23 @@
   virtual uint ideal_reg() const { return Op_RegL; }
   virtual const Type* Value( PhaseTransform *phase ) const;
 };
+
+//---------- PopCountINode -----------------------------------------------------
+// Population count (bit count) of an integer.
+class PopCountINode : public Node {
+public:
+  PopCountINode(Node* in1) : Node(0, in1) {}
+  virtual int Opcode() const;
+  const Type* bottom_type() const { return TypeInt::INT; }
+  virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//---------- PopCountLNode -----------------------------------------------------
+// Population count (bit count) of a long.
+class PopCountLNode : public Node {
+public:
+  PopCountLNode(Node* in1) : Node(0, in1) {}
+  virtual int Opcode() const;
+  const Type* bottom_type() const { return TypeInt::INT; }
+  virtual uint ideal_reg() const { return Op_RegI; }
+};
--- a/src/share/vm/opto/graphKit.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/opto/graphKit.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -2277,7 +2277,7 @@
   r_not_subtype->init_req(1, _gvn.transform( new (C, 1) IfTrueNode (iff2) ) );
   set_control(                _gvn.transform( new (C, 1) IfFalseNode(iff2) ) );
 
-  // Check for self.  Very rare to get here, but its taken 1/3 the time.
+  // Check for self.  Very rare to get here, but it is taken 1/3 the time.
   // No performance impact (too rare) but allows sharing of secondary arrays
   // which has some footprint reduction.
   Node *cmp3 = _gvn.transform( new (C, 3) CmpPNode( subklass, superklass ) );
@@ -2286,11 +2286,27 @@
   r_ok_subtype->init_req(2, _gvn.transform( new (C, 1) IfTrueNode ( iff3 ) ) );
   set_control(               _gvn.transform( new (C, 1) IfFalseNode( iff3 ) ) );
 
+  // -- Roads not taken here: --
+  // We could also have chosen to perform the self-check at the beginning
+  // of this code sequence, as the assembler does.  This would not pay off
+  // the same way, since the optimizer, unlike the assembler, can perform
+  // static type analysis to fold away many successful self-checks.
+  // Non-foldable self checks work better here in second position, because
+  // the initial primary superclass check subsumes a self-check for most
+  // types.  An exception would be a secondary type like array-of-interface,
+  // which does not appear in its own primary supertype display.
+  // Finally, we could have chosen to move the self-check into the
+  // PartialSubtypeCheckNode, and from there out-of-line in a platform
+  // dependent manner.  But it is worthwhile to have the check here,
+  // where it can be perhaps be optimized.  The cost in code space is
+  // small (register compare, branch).
+
   // Now do a linear scan of the secondary super-klass array.  Again, no real
   // performance impact (too rare) but it's gotta be done.
-  // (The stub also contains the self-check of subklass == superklass.
   // Since the code is rarely used, there is no penalty for moving it
-  // out of line, and it can only improve I-cache density.)
+  // out of line, and it can only improve I-cache density.
+  // The decision to inline or out-of-line this final check is platform
+  // dependent, and is found in the AD file definition of PartialSubtypeCheck.
   Node* psc = _gvn.transform(
     new (C, 3) PartialSubtypeCheckNode(control(), subklass, superklass) );
 
--- a/src/share/vm/opto/lcm.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/opto/lcm.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -158,7 +158,14 @@
           continue;             // Give up if offset is beyond page size
         // cannot reason about it; is probably not implicit null exception
       } else {
-        const TypePtr* tptr = base->bottom_type()->is_ptr();
+        const TypePtr* tptr;
+        if (UseCompressedOops && Universe::narrow_oop_shift() == 0) {
+          // 32-bits narrow oop can be the base of address expressions
+          tptr = base->bottom_type()->make_ptr();
+        } else {
+          // only regular oops are expected here
+          tptr = base->bottom_type()->is_ptr();
+        }
         // Give up if offset is not a compile-time constant
         if( offset == Type::OffsetBot || tptr->_offset == Type::OffsetBot )
           continue;
--- a/src/share/vm/opto/library_call.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/opto/library_call.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1999-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1999-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -221,6 +221,7 @@
   bool inline_unsafe_CAS(BasicType type);
   bool inline_unsafe_ordered_store(BasicType type);
   bool inline_fp_conversions(vmIntrinsics::ID id);
+  bool inline_bitCount(vmIntrinsics::ID id);
   bool inline_reverseBytes(vmIntrinsics::ID id);
 };
 
@@ -314,6 +315,11 @@
     if (!JDK_Version::is_gte_jdk14x_version())  return NULL;
     break;
 
+  case vmIntrinsics::_bitCount_i:
+  case vmIntrinsics::_bitCount_l:
+    if (!UsePopCountInstruction)  return NULL;
+    break;
+
  default:
     break;
   }
@@ -617,6 +623,10 @@
   case vmIntrinsics::_longBitsToDouble:
     return inline_fp_conversions(intrinsic_id());
 
+  case vmIntrinsics::_bitCount_i:
+  case vmIntrinsics::_bitCount_l:
+    return inline_bitCount(intrinsic_id());
+
   case vmIntrinsics::_reverseBytes_i:
   case vmIntrinsics::_reverseBytes_l:
     return inline_reverseBytes((vmIntrinsics::ID) intrinsic_id());
@@ -1714,6 +1724,27 @@
   }
 }
 
+//----------------------------inline_bitCount_int/long-----------------------
+// inline int Integer.bitCount(int)
+// inline int Long.bitCount(long)
+bool LibraryCallKit::inline_bitCount(vmIntrinsics::ID id) {
+  assert(id == vmIntrinsics::_bitCount_i || id == vmIntrinsics::_bitCount_l, "not bitCount");
+  if (id == vmIntrinsics::_bitCount_i && !Matcher::has_match_rule(Op_PopCountI)) return false;
+  if (id == vmIntrinsics::_bitCount_l && !Matcher::has_match_rule(Op_PopCountL)) return false;
+  _sp += arg_size();  // restore stack pointer
+  switch (id) {
+  case vmIntrinsics::_bitCount_i:
+    push(_gvn.transform(new (C, 2) PopCountINode(pop())));
+    break;
+  case vmIntrinsics::_bitCount_l:
+    push(_gvn.transform(new (C, 2) PopCountLNode(pop_pair())));
+    break;
+  default:
+    ShouldNotReachHere();
+  }
+  return true;
+}
+
 //----------------------------inline_reverseBytes_int/long-------------------
 // inline Integer.reverseBytes(int)
 // inline Long.reverseBytes(long)
--- a/src/share/vm/opto/matcher.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/opto/matcher.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -1481,8 +1481,13 @@
       const Type* mach_at = mach->adr_type();
       // DecodeN node consumed by an address may have different type
       // then its input. Don't compare types for such case.
-      if (m->adr_type() != mach_at && m->in(MemNode::Address)->is_AddP() &&
-          m->in(MemNode::Address)->in(AddPNode::Address)->is_DecodeN()) {
+      if (m->adr_type() != mach_at &&
+          (m->in(MemNode::Address)->is_DecodeN() ||
+           m->in(MemNode::Address)->is_AddP() &&
+           m->in(MemNode::Address)->in(AddPNode::Address)->is_DecodeN() ||
+           m->in(MemNode::Address)->is_AddP() &&
+           m->in(MemNode::Address)->in(AddPNode::Address)->is_AddP() &&
+           m->in(MemNode::Address)->in(AddPNode::Address)->in(AddPNode::Address)->is_DecodeN())) {
         mach_at = m->adr_type();
       }
       if (m->adr_type() != mach_at) {
--- a/src/share/vm/prims/jni.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/prims/jni.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -301,6 +301,10 @@
   klassOop k = SystemDictionary::resolve_from_stream(class_name, class_loader,
                                                      Handle(), &st, CHECK_NULL);
 
+  if (TraceClassResolution && k != NULL) {
+    trace_class_resolution(k);
+  }
+
   cls = (jclass)JNIHandles::make_local(
     env, Klass::cast(k)->java_mirror());
   return cls;
@@ -365,6 +369,10 @@
   result = find_class_from_class_loader(env, sym, true, loader,
                                         protection_domain, true, thread);
 
+  if (TraceClassResolution && result != NULL) {
+    trace_class_resolution(java_lang_Class::as_klassOop(JNIHandles::resolve_non_null(result)));
+  }
+
   // If we were the first invocation of jni_FindClass, we enable compilation again
   // rather than just allowing invocation counter to overflow and decay.
   // Controlled by flag DelayCompilationDuringStartup.
@@ -2646,7 +2654,12 @@
   Handle protection_domain; // null protection domain
 
   symbolHandle sym = oopFactory::new_symbol_handle(name, CHECK_NULL);
-  return find_class_from_class_loader(env, sym, true, loader, protection_domain, true, CHECK_NULL);
+  jclass result =  find_class_from_class_loader(env, sym, true, loader, protection_domain, true, CHECK_NULL);
+
+  if (TraceClassResolution && result != NULL) {
+    trace_class_resolution(java_lang_Class::as_klassOop(JNIHandles::resolve_non_null(result)));
+  }
+  return result;
 }
 
 // These lookups are done with the NULL (bootstrap) ClassLoader to
--- a/src/share/vm/prims/jvm.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/prims/jvm.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -64,6 +64,7 @@
   ResourceMark rm;
   int line_number = -1;
   const char * source_file = NULL;
+  const char * trace = "explicit";
   klassOop caller = NULL;
   JavaThread* jthread = JavaThread::current();
   if (jthread->has_last_Java_frame()) {
@@ -107,12 +108,21 @@
                (last_caller->name() == vmSymbols::loadClassInternal_name() ||
                 last_caller->name() == vmSymbols::loadClass_name())) {
       found_it = true;
+    } else if (!vfst.at_end()) {
+      if (vfst.method()->is_native()) {
+        // JNI call
+        found_it = true;
+      }
     }
     if (found_it && !vfst.at_end()) {
       // found the caller
       caller = vfst.method()->method_holder();
       line_number = vfst.method()->line_number_from_bci(vfst.bci());
-      symbolOop s = instanceKlass::cast(vfst.method()->method_holder())->source_file_name();
+      if (line_number == -1) {
+        // show method name if it's a native method
+        trace = vfst.method()->name_and_sig_as_C_string();
+      }
+      symbolOop s = instanceKlass::cast(caller)->source_file_name();
       if (s != NULL) {
         source_file = s->as_C_string();
       }
@@ -124,15 +134,15 @@
       const char * to = Klass::cast(to_class)->external_name();
       // print in a single call to reduce interleaving between threads
       if (source_file != NULL) {
-        tty->print("RESOLVE %s %s %s:%d (explicit)\n", from, to, source_file, line_number);
+        tty->print("RESOLVE %s %s %s:%d (%s)\n", from, to, source_file, line_number, trace);
       } else {
-        tty->print("RESOLVE %s %s (explicit)\n", from, to);
+        tty->print("RESOLVE %s %s (%s)\n", from, to, trace);
       }
     }
   }
 }
 
-static void trace_class_resolution(klassOop to_class) {
+void trace_class_resolution(klassOop to_class) {
   EXCEPTION_MARK;
   trace_class_resolution_impl(to_class, THREAD);
   if (HAS_PENDING_EXCEPTION) {
@@ -3213,8 +3223,12 @@
   }
   Handle h_loader(THREAD, loader);
   Handle h_prot  (THREAD, protection_domain);
-  return find_class_from_class_loader(env, name, true, h_loader, h_prot,
-                                      false, thread);
+  jclass result =  find_class_from_class_loader(env, name, true, h_loader, h_prot,
+                                                false, thread);
+  if (TraceClassResolution && result != NULL) {
+    trace_class_resolution(java_lang_Class::as_klassOop(JNIHandles::resolve_non_null(result)));
+  }
+  return result;
 JVM_END
 
 
--- a/src/share/vm/prims/jvm_misc.hpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/prims/jvm_misc.hpp	Fri Mar 27 16:54:56 2009 -0700
@@ -27,6 +27,7 @@
 
 jclass find_class_from_class_loader(JNIEnv* env, symbolHandle name, jboolean init, Handle loader, Handle protection_domain, jboolean throwError, TRAPS);
 
+void trace_class_resolution(klassOop to_class);
 
 /*
  * Support for Serialization and RMI. Currently used by HotSpot only.
--- a/src/share/vm/runtime/arguments.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/runtime/arguments.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -1211,7 +1211,9 @@
     if (UseLargePages && UseCompressedOops) {
       // Cannot allocate guard pages for implicit checks in indexed addressing
       // mode, when large pages are specified on windows.
-      FLAG_SET_DEFAULT(UseImplicitNullCheckForNarrowOop, false);
+      // This flag could be switched ON if narrow oop base address is set to 0,
+      // see code in Universe::initialize_heap().
+      Universe::set_narrow_oop_use_implicit_null_checks(false);
     }
 #endif //  _WIN64
   } else {
--- a/src/share/vm/runtime/globals.hpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/runtime/globals.hpp	Fri Mar 27 16:54:56 2009 -0700
@@ -303,11 +303,14 @@
             "Use 32-bit object references in 64-bit VM. "                   \
             "lp64_product means flag is always constant in 32 bit VM")      \
                                                                             \
-  lp64_product(bool, CheckCompressedOops, trueInDebug,                      \
-            "generate checks in encoding/decoding code")                    \
-                                                                            \
-  product(bool, UseImplicitNullCheckForNarrowOop, true,                     \
-            "generate implicit null check in indexed addressing mode.")     \
+  notproduct(bool, CheckCompressedOops, true,                               \
+            "generate checks in encoding/decoding code in debug VM")        \
+                                                                            \
+  product_pd(uintx, HeapBaseMinAddress,                                     \
+            "OS specific low limit for heap base address")                  \
+                                                                            \
+  diagnostic(bool, PrintCompressedOopsMode, false,                          \
+            "Print compressed oops base address and encoding mode")         \
                                                                             \
   /* UseMembar is theoretically a temp flag used for memory barrier         \
    * removal testing.  It was supposed to be removed before FCS but has     \
@@ -2169,6 +2172,9 @@
   diagnostic(bool, PrintIntrinsics, false,                                  \
           "prints attempted and successful inlining of intrinsics")         \
                                                                             \
+  product(bool, UsePopCountInstruction, false,                              \
+          "Use population count instruction")                               \
+                                                                            \
   diagnostic(ccstrlist, DisableIntrinsic, "",                               \
           "do not expand intrinsics whose (internal) names appear here")    \
                                                                             \
--- a/src/share/vm/runtime/os.hpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/runtime/os.hpp	Fri Mar 27 16:54:56 2009 -0700
@@ -202,8 +202,10 @@
   static char*  attempt_reserve_memory_at(size_t bytes, char* addr);
   static void   split_reserved_memory(char *base, size_t size,
                                       size_t split, bool realloc);
-  static bool   commit_memory(char* addr, size_t bytes);
-  static bool   commit_memory(char* addr, size_t size, size_t alignment_hint);
+  static bool   commit_memory(char* addr, size_t bytes,
+                              bool executable = false);
+  static bool   commit_memory(char* addr, size_t size, size_t alignment_hint,
+                              bool executable = false);
   static bool   uncommit_memory(char* addr, size_t bytes);
   static bool   release_memory(char* addr, size_t bytes);
 
@@ -243,7 +245,8 @@
 
   static char*  non_memory_address_word();
   // reserve, commit and pin the entire memory region
-  static char*  reserve_memory_special(size_t size);
+  static char*  reserve_memory_special(size_t size, char* addr = NULL,
+                bool executable = false);
   static bool   release_memory_special(char* addr, size_t bytes);
   static bool   large_page_init();
   static size_t large_page_size();
--- a/src/share/vm/runtime/virtualspace.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/runtime/virtualspace.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -28,7 +28,7 @@
 
 // ReservedSpace
 ReservedSpace::ReservedSpace(size_t size) {
-  initialize(size, 0, false, NULL, 0);
+  initialize(size, 0, false, NULL, 0, false);
 }
 
 ReservedSpace::ReservedSpace(size_t size, size_t alignment,
@@ -36,7 +36,13 @@
                              char* requested_address,
                              const size_t noaccess_prefix) {
   initialize(size+noaccess_prefix, alignment, large, requested_address,
-             noaccess_prefix);
+             noaccess_prefix, false);
+}
+
+ReservedSpace::ReservedSpace(size_t size, size_t alignment,
+                             bool large,
+                             bool executable) {
+  initialize(size, alignment, large, NULL, 0, executable);
 }
 
 char *
@@ -109,6 +115,7 @@
                              const size_t prefix_align,
                              const size_t suffix_size,
                              const size_t suffix_align,
+                             char* requested_address,
                              const size_t noaccess_prefix)
 {
   assert(prefix_size != 0, "sanity");
@@ -131,7 +138,8 @@
   const bool try_reserve_special = UseLargePages &&
     prefix_align == os::large_page_size();
   if (!os::can_commit_large_page_memory() && try_reserve_special) {
-    initialize(size, prefix_align, true, NULL, noaccess_prefix);
+    initialize(size, prefix_align, true, requested_address, noaccess_prefix,
+               false);
     return;
   }
 
@@ -140,13 +148,20 @@
   _alignment = 0;
   _special = false;
   _noaccess_prefix = 0;
+  _executable = false;
 
   // Assert that if noaccess_prefix is used, it is the same as prefix_align.
   assert(noaccess_prefix == 0 ||
          noaccess_prefix == prefix_align, "noaccess prefix wrong");
 
   // Optimistically try to reserve the exact size needed.
-  char* addr = os::reserve_memory(size, NULL, prefix_align);
+  char* addr;
+  if (requested_address != 0) {
+    addr = os::attempt_reserve_memory_at(size,
+                                         requested_address-noaccess_prefix);
+  } else {
+    addr = os::reserve_memory(size, NULL, prefix_align);
+  }
   if (addr == NULL) return;
 
   // Check whether the result has the needed alignment (unlikely unless
@@ -182,7 +197,8 @@
 
 void ReservedSpace::initialize(size_t size, size_t alignment, bool large,
                                char* requested_address,
-                               const size_t noaccess_prefix) {
+                               const size_t noaccess_prefix,
+                               bool executable) {
   const size_t granularity = os::vm_allocation_granularity();
   assert((size & granularity - 1) == 0,
          "size not aligned to os::vm_allocation_granularity()");
@@ -194,6 +210,7 @@
   _base = NULL;
   _size = 0;
   _special = false;
+  _executable = executable;
   _alignment = 0;
   _noaccess_prefix = 0;
   if (size == 0) {
@@ -206,12 +223,8 @@
   char* base = NULL;
 
   if (special) {
-    // It's not hard to implement reserve_memory_special() such that it can
-    // allocate at fixed address, but there seems no use of this feature
-    // for now, so it's not implemented.
-    assert(requested_address == NULL, "not implemented");
 
-    base = os::reserve_memory_special(size);
+    base = os::reserve_memory_special(size, requested_address, executable);
 
     if (base != NULL) {
       // Check alignment constraints
@@ -281,7 +294,7 @@
 
 
 ReservedSpace::ReservedSpace(char* base, size_t size, size_t alignment,
-                             bool special) {
+                             bool special, bool executable) {
   assert((size % os::vm_allocation_granularity()) == 0,
          "size not allocation aligned");
   _base = base;
@@ -289,6 +302,7 @@
   _alignment = alignment;
   _noaccess_prefix = 0;
   _special = special;
+  _executable = executable;
 }
 
 
@@ -296,9 +310,10 @@
                                         bool split, bool realloc) {
   assert(partition_size <= size(), "partition failed");
   if (split) {
-    os::split_reserved_memory(_base, _size, partition_size, realloc);
+    os::split_reserved_memory(base(), size(), partition_size, realloc);
   }
-  ReservedSpace result(base(), partition_size, alignment, special());
+  ReservedSpace result(base(), partition_size, alignment, special(),
+                       executable());
   return result;
 }
 
@@ -307,7 +322,7 @@
 ReservedSpace::last_part(size_t partition_size, size_t alignment) {
   assert(partition_size <= size(), "partition failed");
   ReservedSpace result(base() + partition_size, size() - partition_size,
-                       alignment, special());
+                       alignment, special(), executable());
   return result;
 }
 
@@ -345,6 +360,7 @@
     _size = 0;
     _noaccess_prefix = 0;
     _special = false;
+    _executable = false;
   }
 }
 
@@ -372,7 +388,8 @@
                                      bool large, char* requested_address) :
   ReservedSpace(size, alignment, large,
                 requested_address,
-                UseCompressedOops && UseImplicitNullCheckForNarrowOop ?
+                (UseCompressedOops && (Universe::narrow_oop_base() != NULL) &&
+                 Universe::narrow_oop_use_implicit_null_checks()) ?
                   lcm(os::vm_page_size(), alignment) : 0) {
   // Only reserved space for the java heap should have a noaccess_prefix
   // if using compressed oops.
@@ -382,13 +399,24 @@
 ReservedHeapSpace::ReservedHeapSpace(const size_t prefix_size,
                                      const size_t prefix_align,
                                      const size_t suffix_size,
-                                     const size_t suffix_align) :
+                                     const size_t suffix_align,
+                                     char* requested_address) :
   ReservedSpace(prefix_size, prefix_align, suffix_size, suffix_align,
-                UseCompressedOops && UseImplicitNullCheckForNarrowOop ?
+                requested_address,
+                (UseCompressedOops && (Universe::narrow_oop_base() != NULL) &&
+                 Universe::narrow_oop_use_implicit_null_checks()) ?
                   lcm(os::vm_page_size(), prefix_align) : 0) {
   protect_noaccess_prefix(prefix_size+suffix_size);
 }
 
+// Reserve space for code segment.  Same as Java heap only we mark this as
+// executable.
+ReservedCodeSpace::ReservedCodeSpace(size_t r_size,
+                                     size_t rs_align,
+                                     bool large) :
+  ReservedSpace(r_size, rs_align, large, /*executable*/ true) {
+}
+
 // VirtualSpace
 
 VirtualSpace::VirtualSpace() {
@@ -406,6 +434,7 @@
   _middle_alignment       = 0;
   _upper_alignment        = 0;
   _special                = false;
+  _executable             = false;
 }
 
 
@@ -419,6 +448,7 @@
   _high = low();
 
   _special = rs.special();
+  _executable = rs.executable();
 
   // When a VirtualSpace begins life at a large size, make all future expansion
   // and shrinking occur aligned to a granularity of large pages.  This avoids
@@ -476,6 +506,7 @@
   _middle_alignment       = 0;
   _upper_alignment        = 0;
   _special                = false;
+  _executable             = false;
 }
 
 
@@ -585,7 +616,7 @@
     assert(low_boundary() <= lower_high() &&
            lower_high() + lower_needs <= lower_high_boundary(),
            "must not expand beyond region");
-    if (!os::commit_memory(lower_high(), lower_needs)) {
+    if (!os::commit_memory(lower_high(), lower_needs, _executable)) {
       debug_only(warning("os::commit_memory failed"));
       return false;
     } else {
@@ -596,7 +627,8 @@
     assert(lower_high_boundary() <= middle_high() &&
            middle_high() + middle_needs <= middle_high_boundary(),
            "must not expand beyond region");
-    if (!os::commit_memory(middle_high(), middle_needs, middle_alignment())) {
+    if (!os::commit_memory(middle_high(), middle_needs, middle_alignment(),
+                           _executable)) {
       debug_only(warning("os::commit_memory failed"));
       return false;
     }
@@ -606,7 +638,7 @@
     assert(middle_high_boundary() <= upper_high() &&
            upper_high() + upper_needs <= upper_high_boundary(),
            "must not expand beyond region");
-    if (!os::commit_memory(upper_high(), upper_needs)) {
+    if (!os::commit_memory(upper_high(), upper_needs, _executable)) {
       debug_only(warning("os::commit_memory failed"));
       return false;
     } else {
--- a/src/share/vm/runtime/virtualspace.hpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/runtime/virtualspace.hpp	Fri Mar 27 16:54:56 2009 -0700
@@ -32,12 +32,15 @@
   size_t _noaccess_prefix;
   size_t _alignment;
   bool   _special;
+  bool   _executable;
 
   // ReservedSpace
-  ReservedSpace(char* base, size_t size, size_t alignment, bool special);
+  ReservedSpace(char* base, size_t size, size_t alignment, bool special,
+                bool executable);
   void initialize(size_t size, size_t alignment, bool large,
                   char* requested_address,
-                  const size_t noaccess_prefix);
+                  const size_t noaccess_prefix,
+                  bool executable);
 
   // Release parts of an already-reserved memory region [addr, addr + len) to
   // get a new region that has "compound alignment."  Return the start of the
@@ -73,17 +76,18 @@
                 const size_t noaccess_prefix = 0);
   ReservedSpace(const size_t prefix_size, const size_t prefix_align,
                 const size_t suffix_size, const size_t suffix_align,
-                const size_t noaccess_prefix);
+                char* requested_address,
+                const size_t noaccess_prefix = 0);
+  ReservedSpace(size_t size, size_t alignment, bool large, bool executable);
 
   // Accessors
-  char*  base()      const { return _base;      }
-  size_t size()      const { return _size;      }
-  size_t alignment() const { return _alignment; }
-  bool   special()   const { return _special;   }
-
-  size_t noaccess_prefix()   const { return _noaccess_prefix;   }
-
-  bool is_reserved() const { return _base != NULL; }
+  char*  base()            const { return _base;      }
+  size_t size()            const { return _size;      }
+  size_t alignment()       const { return _alignment; }
+  bool   special()         const { return _special;   }
+  bool   executable()      const { return _executable;   }
+  size_t noaccess_prefix() const { return _noaccess_prefix;   }
+  bool is_reserved()       const { return _base != NULL; }
   void release();
 
   // Splitting
@@ -121,7 +125,15 @@
   ReservedHeapSpace(size_t size, size_t forced_base_alignment,
                     bool large, char* requested_address);
   ReservedHeapSpace(const size_t prefix_size, const size_t prefix_align,
-                    const size_t suffix_size, const size_t suffix_align);
+                    const size_t suffix_size, const size_t suffix_align,
+                    char* requested_address);
+};
+
+// Class encapsulating behavior specific memory space for Code
+class ReservedCodeSpace : public ReservedSpace {
+ public:
+  // Constructor
+  ReservedCodeSpace(size_t r_size, size_t rs_align, bool large);
 };
 
 // VirtualSpace is data structure for committing a previously reserved address range in smaller chunks.
@@ -141,6 +153,9 @@
   // os::commit_memory() or os::uncommit_memory().
   bool _special;
 
+  // Need to know if commit should be executable.
+  bool   _executable;
+
   // MPSS Support
   // Each virtualspace region has a lower, middle, and upper region.
   // Each region has an end boundary and a high pointer which is the
--- a/src/share/vm/runtime/vmStructs.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/runtime/vmStructs.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -263,7 +263,9 @@
      static_field(Universe,                    _bootstrapping,                                bool)                                  \
      static_field(Universe,                    _fully_initialized,                            bool)                                  \
      static_field(Universe,                    _verify_count,                                 int)                                   \
-     static_field(Universe,                    _heap_base,                                    address)                                   \
+     static_field(Universe,                    _narrow_oop._base,                             address)                               \
+     static_field(Universe,                    _narrow_oop._shift,                            int)                                   \
+     static_field(Universe,                    _narrow_oop._use_implicit_null_checks,         bool)                                  \
                                                                                                                                      \
   /**********************************************************************************/                                               \
   /* Generation and Space hierarchies                                               */                                               \
--- a/src/share/vm/runtime/vm_version.cpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/runtime/vm_version.cpp	Fri Mar 27 16:54:56 2009 -0700
@@ -163,9 +163,11 @@
       #elif _MSC_VER == 1200
         #define HOTSPOT_BUILD_COMPILER "MS VC++ 6.0"
       #elif _MSC_VER == 1310
-        #define HOTSPOT_BUILD_COMPILER "MS VC++ 7.1"
+        #define HOTSPOT_BUILD_COMPILER "MS VC++ 7.1 (VS2003)"
       #elif _MSC_VER == 1400
-        #define HOTSPOT_BUILD_COMPILER "MS VC++ 8.0"
+        #define HOTSPOT_BUILD_COMPILER "MS VC++ 8.0 (VS2005)"
+      #elif _MSC_VER == 1500
+        #define HOTSPOT_BUILD_COMPILER "MS VC++ 9.0 (VS2008)"
       #else
         #define HOTSPOT_BUILD_COMPILER "unknown MS VC++:" XSTR(_MSC_VER)
       #endif
--- a/src/share/vm/utilities/globalDefinitions_visCPP.hpp	Thu Mar 19 13:25:23 2009 -0700
+++ b/src/share/vm/utilities/globalDefinitions_visCPP.hpp	Fri Mar 27 16:54:56 2009 -0700
@@ -153,16 +153,8 @@
 //----------------------------------------------------------------------------------------------------
 // Miscellaneous
 
-inline int vsnprintf(char* buf, size_t count, const char* fmt, va_list argptr) {
-  // If number of characters written == count, Windows doesn't write a
-  // terminating NULL, so we do it ourselves.
-  int ret = _vsnprintf(buf, count, fmt, argptr);
-  if (count > 0) buf[count-1] = '\0';
-  return ret;
-}
-
 // Visual Studio 2005 deprecates POSIX names - use ISO C++ names instead
-#if _MSC_VER >= 1400 && !defined(_WIN64)
+#if _MSC_VER >= 1400
 #define open _open
 #define close _close
 #define read  _read
@@ -180,6 +172,17 @@
 #pragma warning( disable : 4201 ) // nonstandard extension used : nameless struct/union (needed in windows.h)
 #pragma warning( disable : 4511 ) // copy constructor could not be generated
 #pragma warning( disable : 4291 ) // no matching operator delete found; memory will not be freed if initialization thows an exception
+#if _MSC_VER >= 1400
+#pragma warning( disable : 4996 ) // unsafe string functions. Same as define _CRT_SECURE_NO_WARNINGS/_CRT_SECURE_NO_DEPRICATE
+#endif
+
+inline int vsnprintf(char* buf, size_t count, const char* fmt, va_list argptr) {
+  // If number of characters written == count, Windows doesn't write a
+  // terminating NULL, so we do it ourselves.
+  int ret = _vsnprintf(buf, count, fmt, argptr);
+  if (count > 0) buf[count-1] = '\0';
+  return ret;
+}
 
 // Portability macros
 #define PRAGMA_INTERFACE
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6378821/Test6378821.java	Fri Mar 27 16:54:56 2009 -0700
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/**
+ * @test
+ * @bug 6378821
+ * @summary where available, bitCount() should use POPC on SPARC processors and AMD+10h
+ *
+ * @run main/othervm -Xcomp -XX:CompileOnly=Test6378821.fcomp Test6378821
+ */
+
+public class Test6378821 {
+    static final int[]  ia = new int[]  { 0x12345678 };
+    static final long[] la = new long[] { 0x12345678abcdefL };
+
+    public static void main(String [] args) {
+        // Resolve the class and the method.
+        Integer.bitCount(1);
+        Long.bitCount(1);
+
+        sub(ia[0]);
+        sub(la[0]);
+        sub(ia);
+        sub(la);
+    }
+
+    static void check(int i, int expected, int result) {
+        if (result != expected) {
+            throw new InternalError("Wrong population count for " + i + ": " + result + " != " + expected);
+        }
+    }
+
+    static void check(long l, int expected, int result) {
+        if (result != expected) {
+            throw new InternalError("Wrong population count for " + l + ": " + result + " != " + expected);
+        }
+    }
+
+    static void sub(int i)     { check(i,     fint(i),  fcomp(i) ); }
+    static void sub(int[] ia)  { check(ia[0], fint(ia), fcomp(ia)); }
+    static void sub(long l)    { check(l,     fint(l),  fcomp(l) ); }
+    static void sub(long[] la) { check(la[0], fint(la), fcomp(la)); }
+
+    static int fint (int i)     { return Integer.bitCount(i); }
+    static int fcomp(int i)     { return Integer.bitCount(i); }
+
+    static int fint (int[] ia)  { return Integer.bitCount(ia[0]); }
+    static int fcomp(int[] ia)  { return Integer.bitCount(ia[0]); }
+
+    static int fint (long l)    { return Long.bitCount(l); }
+    static int fcomp(long l)    { return Long.bitCount(l); }
+
+    static int fint (long[] la) { return Long.bitCount(la[0]); }
+    static int fcomp(long[] la) { return Long.bitCount(la[0]); }
+}