changeset 3110:ea527c5cde03

Merge
author zgu
date Thu, 09 Feb 2012 07:35:48 -0800
parents db006a85bf91 1b0e0f8be510
children 54d3535a6dd3
files
diffstat 166 files changed, 5497 insertions(+), 2002 deletions(-) [+]
line wrap: on
line diff
--- a/.hgtags	Thu Feb 09 10:16:26 2012 -0500
+++ b/.hgtags	Thu Feb 09 07:35:48 2012 -0800
@@ -213,3 +213,8 @@
 513351373923f74a7c91755748b95c9771e59f96 hs23-b10
 24727fb37561779077fdfa5a33342246f20e5c0f jdk8-b22
 dcc292399a39113957eebbd3e487b7e05e2c79fc hs23-b11
+e850d8e7ea54b91c7aa656e297f0f9f38dd4c296 jdk8-b23
+9e177d44b10fe92ecffa965fef9c5ac5433c1b46 hs23-b12
+a80fd4f45d7aaa154ed2f86a129f3c9c4035ec7a jdk8-b24
+b22de824749922986ce4d442bed029916b832807 hs23-b13
+64b46f975ab82948c1e021e17775ff4fab8bc40e hs23-b14
--- a/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/G1CollectedHeap.java	Thu Feb 09 10:16:26 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/G1CollectedHeap.java	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -49,8 +49,12 @@
     static private long g1CommittedFieldOffset;
     // size_t _summary_bytes_used;
     static private CIntegerField summaryBytesUsedField;
-    // G1MonitoringSupport* _g1mm
+    // G1MonitoringSupport* _g1mm;
     static private AddressField g1mmField;
+    // MasterOldRegionSet _old_set;
+    static private long oldSetFieldOffset;
+    // MasterHumongousRegionSet _humongous_set;
+    static private long humongousSetFieldOffset;
 
     static {
         VM.registerVMInitializedObserver(new Observer() {
@@ -67,12 +71,14 @@
         g1CommittedFieldOffset = type.getField("_g1_committed").getOffset();
         summaryBytesUsedField = type.getCIntegerField("_summary_bytes_used");
         g1mmField = type.getAddressField("_g1mm");
+        oldSetFieldOffset = type.getField("_old_set").getOffset();
+        humongousSetFieldOffset = type.getField("_humongous_set").getOffset();
     }
 
     public long capacity() {
         Address g1CommittedAddr = addr.addOffsetTo(g1CommittedFieldOffset);
-        MemRegion g1_committed = new MemRegion(g1CommittedAddr);
-        return g1_committed.byteSize();
+        MemRegion g1Committed = new MemRegion(g1CommittedAddr);
+        return g1Committed.byteSize();
     }
 
     public long used() {
@@ -94,6 +100,18 @@
         return (G1MonitoringSupport) VMObjectFactory.newObject(G1MonitoringSupport.class, g1mmAddr);
     }
 
+    public HeapRegionSetBase oldSet() {
+        Address oldSetAddr = addr.addOffsetTo(oldSetFieldOffset);
+        return (HeapRegionSetBase) VMObjectFactory.newObject(HeapRegionSetBase.class,
+                                                             oldSetAddr);
+    }
+
+    public HeapRegionSetBase humongousSet() {
+        Address humongousSetAddr = addr.addOffsetTo(humongousSetFieldOffset);
+        return (HeapRegionSetBase) VMObjectFactory.newObject(HeapRegionSetBase.class,
+                                                             humongousSetAddr);
+    }
+
     private Iterator<HeapRegion> heapRegionIterator() {
         return hrs().heapRegionIterator();
     }
--- a/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/G1MonitoringSupport.java	Thu Feb 09 10:16:26 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/G1MonitoringSupport.java	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -77,6 +77,10 @@
         return edenUsedField.getValue(addr);
     }
 
+    public long edenRegionNum() {
+        return edenUsed() / HeapRegion.grainBytes();
+    }
+
     public long survivorCommitted() {
         return survivorCommittedField.getValue(addr);
     }
@@ -85,6 +89,10 @@
         return survivorUsedField.getValue(addr);
     }
 
+    public long survivorRegionNum() {
+        return survivorUsed() / HeapRegion.grainBytes();
+    }
+
     public long oldCommitted() {
         return oldCommittedField.getValue(addr);
     }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/HeapRegionSetBase.java	Thu Feb 09 07:35:48 2012 -0800
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.gc_implementation.g1;
+
+import java.util.Iterator;
+import java.util.Observable;
+import java.util.Observer;
+
+import sun.jvm.hotspot.debugger.Address;
+import sun.jvm.hotspot.runtime.VM;
+import sun.jvm.hotspot.runtime.VMObject;
+import sun.jvm.hotspot.runtime.VMObjectFactory;
+import sun.jvm.hotspot.types.AddressField;
+import sun.jvm.hotspot.types.CIntegerField;
+import sun.jvm.hotspot.types.Type;
+import sun.jvm.hotspot.types.TypeDataBase;
+
+// Mirror class for HeapRegionSetBase. Represents a group of regions.
+
+public class HeapRegionSetBase extends VMObject {
+    // size_t _length;
+    static private CIntegerField lengthField;
+    // size_t _region_num;
+    static private CIntegerField regionNumField;
+    // size_t _total_used_bytes;
+    static private CIntegerField totalUsedBytesField;
+
+    static {
+        VM.registerVMInitializedObserver(new Observer() {
+                public void update(Observable o, Object data) {
+                    initialize(VM.getVM().getTypeDataBase());
+                }
+            });
+    }
+
+    static private synchronized void initialize(TypeDataBase db) {
+        Type type = db.lookupType("HeapRegionSetBase");
+
+        lengthField         = type.getCIntegerField("_length");
+        regionNumField      = type.getCIntegerField("_region_num");
+        totalUsedBytesField = type.getCIntegerField("_total_used_bytes");
+    }
+
+    public long length() {
+        return lengthField.getValue(addr);
+    }
+
+    public long regionNum() {
+        return regionNumField.getValue(addr);
+    }
+
+    public long totalUsedBytes() {
+        return totalUsedBytesField.getValue(addr);
+    }
+
+    public HeapRegionSetBase(Address addr) {
+        super(addr);
+    }
+}
--- a/agent/src/share/classes/sun/jvm/hotspot/tools/HeapSummary.java	Thu Feb 09 10:16:26 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/tools/HeapSummary.java	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -67,6 +67,7 @@
       printValue("SurvivorRatio    = ", getFlagValue("SurvivorRatio", flagMap));
       printValMB("PermSize         = ", getFlagValue("PermSize", flagMap));
       printValMB("MaxPermSize      = ", getFlagValue("MaxPermSize", flagMap));
+      printValMB("G1HeapRegionSize = ", HeapRegion.grainBytes());
 
       System.out.println();
       System.out.println("Heap Usage:");
@@ -100,11 +101,20 @@
          } else if (sharedHeap instanceof G1CollectedHeap) {
              G1CollectedHeap g1h = (G1CollectedHeap) sharedHeap;
              G1MonitoringSupport g1mm = g1h.g1mm();
-             System.out.println("G1 Young Generation");
-             printG1Space("Eden Space:", g1mm.edenUsed(), g1mm.edenCommitted());
-             printG1Space("From Space:", g1mm.survivorUsed(), g1mm.survivorCommitted());
-             printG1Space("To Space:", 0, 0);
-             printG1Space("G1 Old Generation", g1mm.oldUsed(), g1mm.oldCommitted());
+             long edenRegionNum = g1mm.edenRegionNum();
+             long survivorRegionNum = g1mm.survivorRegionNum();
+             HeapRegionSetBase oldSet = g1h.oldSet();
+             HeapRegionSetBase humongousSet = g1h.humongousSet();
+             long oldRegionNum = oldSet.regionNum() + humongousSet.regionNum();
+             printG1Space("G1 Heap:", g1h.n_regions(),
+                          g1h.used(), g1h.capacity());
+             System.out.println("G1 Young Generation:");
+             printG1Space("Eden Space:", edenRegionNum,
+                          g1mm.edenUsed(), g1mm.edenCommitted());
+             printG1Space("Survivor Space:", survivorRegionNum,
+                          g1mm.survivorUsed(), g1mm.survivorCommitted());
+             printG1Space("G1 Old Generation:", oldRegionNum,
+                          g1mm.oldUsed(), g1mm.oldCommitted());
          } else {
              throw new RuntimeException("unknown SharedHeap type : " + heap.getClass());
          }
@@ -216,9 +226,11 @@
       System.out.println(alignment +  (double)space.used() * 100.0 / space.capacity() + "% used");
    }
 
-   private void printG1Space(String spaceName, long used, long capacity) {
+   private void printG1Space(String spaceName, long regionNum,
+                             long used, long capacity) {
       long free = capacity - used;
       System.out.println(spaceName);
+      printValue("regions  = ", regionNum);
       printValMB("capacity = ", capacity);
       printValMB("used     = ", used);
       printValMB("free     = ", free);
--- a/make/Makefile	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/Makefile	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -89,19 +89,31 @@
 ZERO_VM_TARGETS=productzero fastdebugzero optimizedzero jvmgzero
 SHARK_VM_TARGETS=productshark fastdebugshark optimizedshark jvmgshark
 
+COMMON_VM_PRODUCT_TARGETS=product product1 productkernel docs export_product
+COMMON_VM_FASTDEBUG_TARGETS=fastdebug fastdebug1 fastdebugkernel docs export_fastdebug
+COMMON_VM_DEBUG_TARGETS=jvmg jvmg1 jvmgkernel docs export_debug
+
 # JDK directory list
 JDK_DIRS=bin include jre lib demo
 
 all:           all_product all_fastdebug
-ifndef BUILD_CLIENT_ONLY
-all_product:   product product1 productkernel docs export_product
-all_fastdebug: fastdebug fastdebug1 fastdebugkernel docs export_fastdebug
-all_debug:     jvmg jvmg1 jvmgkernel docs export_debug
-else
+
+ifdef BUILD_CLIENT_ONLY
 all_product:   product1 docs export_product
 all_fastdebug: fastdebug1 docs export_fastdebug
 all_debug:     jvmg1 docs export_debug
+else
+ifeq ($(MACOSX_UNIVERSAL),true)
+all_product:   universal_product
+all_fastdebug: universal_fastdebug
+all_debug:     universal_debug
+else
+all_product:   $(COMMON_VM_PRODUCT_TARGETS)
+all_fastdebug: $(COMMON_VM_FASTDEBUG_TARGETS)
+all_debug:     $(COMMON_VM_DEBUG_TARGETS)
 endif
+endif
+
 all_optimized: optimized optimized1 optimizedkernel docs export_optimized
 
 allzero:           all_productzero all_fastdebugzero
@@ -232,20 +244,19 @@
 	$(MAKE) VM_SUBDIR=${VM_DEBUG} EXPORT_SUBDIR=/debug   generic_export
 export_optimized:
 	$(MAKE) VM_SUBDIR=optimized EXPORT_SUBDIR=/optimized generic_export
-export_product_jdk:
+export_product_jdk::
 	$(MAKE) ALT_EXPORT_PATH=$(JDK_IMAGE_DIR) \
 		VM_SUBDIR=product                            generic_export
-export_optimized_jdk:
+export_optimized_jdk::
 	$(MAKE) ALT_EXPORT_PATH=$(JDK_IMAGE_DIR) \
 		VM_SUBDIR=optimized                          generic_export
-export_fastdebug_jdk:
+export_fastdebug_jdk::
 	$(MAKE) ALT_EXPORT_PATH=$(JDK_IMAGE_DIR)/fastdebug \
 		VM_SUBDIR=fastdebug                          generic_export
-export_debug_jdk:
+export_debug_jdk::
 	$(MAKE) ALT_EXPORT_PATH=$(JDK_IMAGE_DIR)/debug \
 		VM_SUBDIR=${VM_DEBUG}                        generic_export
 
-
 # Export file copy rules
 XUSAGE=$(HS_SRC_DIR)/share/vm/Xusage.txt
 DOCS_DIR=$(OUTPUTDIR)/$(VM_PLATFORM)_docs
@@ -444,14 +455,14 @@
   endif
 	$(JDK_IMAGE_DIR)/bin/java -server -version
 
-copy_product_jdk:
+copy_product_jdk::
 	$(RM) -r $(JDK_IMAGE_DIR)
 	$(MKDIR) -p $(JDK_IMAGE_DIR)
 	($(CD) $(JDK_IMPORT_PATH) && \
 	 $(TAR) -cf - $(JDK_DIRS)) | \
 	 ($(CD) $(JDK_IMAGE_DIR) && $(TAR) -xf -)
 
-copy_fastdebug_jdk:
+copy_fastdebug_jdk::
 	$(RM) -r $(JDK_IMAGE_DIR)/fastdebug
 	$(MKDIR) -p $(JDK_IMAGE_DIR)/fastdebug
 	if [ -d $(JDK_IMPORT_PATH)/fastdebug ] ; then \
@@ -464,7 +475,7 @@
 	   ($(CD) $(JDK_IMAGE_DIR)/fastdebug && $(TAR) -xf -) ; \
 	fi
 
-copy_debug_jdk:
+copy_debug_jdk::
 	$(RM) -r $(JDK_IMAGE_DIR)/debug
 	$(MKDIR) -p $(JDK_IMAGE_DIR)/debug
 	if [ -d $(JDK_IMPORT_PATH)/debug ] ; then \
@@ -481,36 +492,6 @@
 	   ($(CD) $(JDK_IMAGE_DIR)/debug && $(TAR) -xf -) ; \
 	fi
 
-# macosx universal builds
-
-ifeq ($(MACOSX_UNIVERSAL), true)
-$(UNIVERSAL_LIPO_LIST):
-	lipo -create -output $@ $(EXPORT_JRE_LIB_DIR)/{i386,amd64}/$(subst $(EXPORT_JRE_LIB_DIR)/,,$@)
-
-$(UNIVERSAL_COPY_LIST):
-	$(CP) $(EXPORT_JRE_LIB_DIR)/i386/$(subst $(EXPORT_JRE_LIB_DIR)/,,$@) $@
-
-universalize: $(UNIVERSAL_LIPO_LIST) $(UNIVERSAL_COPY_LIST)
-endif
-
-universal_product:
-	$(QUIETLY) $(MAKE) ARCH_DATA_MODEL=32 MACOSX_UNIVERSAL=true all_product
-	$(QUIETLY) $(MAKE) ARCH_DATA_MODEL=64 MACOSX_UNIVERSAL=true all_product
-	$(MKDIR) -p $(EXPORT_JRE_LIB_DIR)/{client,server}
-	$(QUIETLY) $(MAKE) MACOSX_UNIVERSAL=true universalize
-
-universal_fastdebug:
-	$(QUIETLY) $(MAKE) ARCH_DATA_MODEL=32 MACOSX_UNIVERSAL=true all_fastdebug
-	$(QUIETLY) $(MAKE) ARCH_DATA_MODEL=64 MACOSX_UNIVERSAL=true all_fastdebug
-	$(MKDIR) -p $(EXPORT_JRE_LIB_DIR)/{client,server}
-	$(QUIETLY) $(MAKE) MACOSX_UNIVERSAL=true universalize
-
-universal_debug:
-	$(QUIETLY) $(MAKE) ARCH_DATA_MODEL=32 MACOSX_UNIVERSAL=true all_debug
-	$(QUIETLY) $(MAKE) ARCH_DATA_MODEL=64 MACOSX_UNIVERSAL=true all_debug
-	$(MKDIR) -p $(EXPORT_JRE_LIB_DIR)/{client,server}
-	$(QUIETLY) $(MAKE) MACOSX_UNIVERSAL=true universalize
-
 #
 # Check target
 #
@@ -630,6 +611,13 @@
 	@$(ECHO) \
 "  $(MAKE) ALT_JDK_IMPORT_PATH=/opt/java/jdk$(JDK_VERSION)"
 
+# Universal build support
+ifeq ($(OS_VENDOR), Darwin)
+ifeq ($(MACOSX_UNIVERSAL),true)
+include $(GAMMADIR)/make/$(OSNAME)/makefiles/universal.gmk
+endif
+endif
+
 # JPRT rule to build this workspace
 include $(GAMMADIR)/make/jprt.gmk
 
@@ -639,6 +627,4 @@
 	export_product export_fastdebug export_debug export_optimized \
 	export_jdk_product export_jdk_fastdebug export_jdk_debug \
 	create_jdk copy_jdk update_jdk test_jdk \
-	copy_product_jdk copy_fastdebug_jdk copy_debug_jdk universalize \
-	universal_product
-
+	copy_product_jdk copy_fastdebug_jdk copy_debug_jdk 
--- a/make/bsd/makefiles/adlc.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/bsd/makefiles/adlc.make	Thu Feb 09 07:35:48 2012 -0800
@@ -61,10 +61,10 @@
 INCLUDES += $(Src_Dirs_I:%=-I%)
 
 # set flags for adlc compilation
-CPPFLAGS = $(SYSDEFS) $(INCLUDES)
+CXXFLAGS = $(SYSDEFS) $(INCLUDES)
 
 # Force assertions on.
-CPPFLAGS += -DASSERT
+CXXFLAGS += -DASSERT
 
 # CFLAGS_WARN holds compiler options to suppress/enable warnings.
 # Compiler warnings are treated as errors
@@ -111,7 +111,7 @@
 
 $(EXEC) : $(OBJECTS)
 	@echo Making adlc
-	$(QUIETLY) $(HOST.LINK_NOPROF.CC) -o $(EXEC) $(OBJECTS)
+	$(QUIETLY) $(HOST.LINK_NOPROF.CXX) -o $(EXEC) $(OBJECTS)
 
 # Random dependencies:
 $(OBJECTS): opcodes.hpp classes.hpp adlc.hpp adlcVMDeps.hpp adlparse.hpp archDesc.hpp arena.hpp dict2.hpp filebuff.hpp forms.hpp formsopt.hpp formssel.hpp
@@ -213,14 +213,14 @@
 $(OUTDIR)/%.o: %.cpp
 	@echo Compiling $<
 	$(QUIETLY) $(REMOVE_TARGET)
-	$(QUIETLY) $(HOST.COMPILE.CC) -o $@ $< $(COMPILE_DONE)
+	$(QUIETLY) $(HOST.COMPILE.CXX) -o $@ $< $(COMPILE_DONE)
 
 # Some object files are given a prefix, to disambiguate
 # them from objects of the same name built for the VM.
 $(OUTDIR)/adlc-%.o: %.cpp
 	@echo Compiling $<
 	$(QUIETLY) $(REMOVE_TARGET)
-	$(QUIETLY) $(HOST.COMPILE.CC) -o $@ $< $(COMPILE_DONE)
+	$(QUIETLY) $(HOST.COMPILE.CXX) -o $@ $< $(COMPILE_DONE)
 
 # #########################################################################
 
--- a/make/bsd/makefiles/defs.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/bsd/makefiles/defs.make	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -171,10 +171,36 @@
 
 EXPORT_LIST += $(ADD_SA_BINARIES/$(HS_ARCH))
 
-UNIVERSAL_LIPO_LIST += $(EXPORT_JRE_LIB_DIR)/libjsig.$(LIBRARY_SUFFIX)
-UNIVERSAL_LIPO_LIST += $(EXPORT_JRE_LIB_DIR)/libsaproc.$(LIBRARY_SUFFIX)
-UNIVERSAL_LIPO_LIST += $(EXPORT_JRE_LIB_DIR)/server/libjvm.$(LIBRARY_SUFFIX)
+# Universal build settings
+ifeq ($(OS_VENDOR), Darwin)
+  # Build universal binaries by default on Mac OS X
+  MACOSX_UNIVERSAL = true
+  ifneq ($(ALT_MACOSX_UNIVERSAL),)
+    MACOSX_UNIVERSAL = $(ALT_MACOSX_UNIVERSAL)
+  endif
+  MAKE_ARGS += MACOSX_UNIVERSAL=$(MACOSX_UNIVERSAL)
 
-UNIVERSAL_COPY_LIST += $(EXPORT_JRE_LIB_DIR)/server/Xusage.txt
-UNIVERSAL_COPY_LIST += $(EXPORT_JRE_LIB_DIR)/client/Xusage.txt
-UNIVERSAL_COPY_LIST += $(EXPORT_JRE_LIB_DIR)/client/libjvm.$(LIBRARY_SUFFIX)
+  # Universal settings
+  ifeq ($(MACOSX_UNIVERSAL), true)
+
+    # Set universal export path but avoid using ARCH or PLATFORM subdirs
+    EXPORT_PATH=$(OUTPUTDIR)/export-universal$(EXPORT_SUBDIR)
+    ifneq ($(ALT_EXPORT_PATH),)
+      EXPORT_PATH=$(ALT_EXPORT_PATH)
+    endif
+
+    # Set universal image dir
+    JDK_IMAGE_DIR=$(OUTPUTDIR)/jdk-universal$(EXPORT_SUBDIR)
+
+    # Binaries to 'universalize' if built
+    UNIVERSAL_LIPO_LIST += $(EXPORT_JRE_LIB_DIR)/libjsig.$(LIBRARY_SUFFIX)
+    UNIVERSAL_LIPO_LIST += $(EXPORT_JRE_LIB_DIR)/libsaproc.$(LIBRARY_SUFFIX)
+    UNIVERSAL_LIPO_LIST += $(EXPORT_JRE_LIB_DIR)/server/libjvm.$(LIBRARY_SUFFIX)
+    UNIVERSAL_LIPO_LIST += $(EXPORT_JRE_LIB_DIR)/client/libjvm.$(LIBRARY_SUFFIX)
+
+    # Files to simply copy in place
+    UNIVERSAL_COPY_LIST += $(EXPORT_JRE_LIB_DIR)/server/Xusage.txt
+    UNIVERSAL_COPY_LIST += $(EXPORT_JRE_LIB_DIR)/client/Xusage.txt
+
+  endif
+endif
--- a/make/bsd/makefiles/dtrace.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/bsd/makefiles/dtrace.make	Thu Feb 09 07:35:48 2012 -0800
@@ -105,11 +105,11 @@
 
 lib$(GENOFFS).dylib: $(DTRACE_SRCDIR)/$(GENOFFS).cpp $(DTRACE_SRCDIR)/$(GENOFFS).h \
                   $(LIBJVM.o)
-	$(QUIETLY) $(CCC) $(CPPFLAGS) $(GENOFFS_CFLAGS) $(SHARED_FLAG) $(PICFLAG) \
+	$(QUIETLY) $(CXX) $(CXXFLAGS) $(GENOFFS_CFLAGS) $(SHARED_FLAG) $(PICFLAG) \
 		 $(LFLAGS_GENOFFS) -o $@ $(DTRACE_SRCDIR)/$(GENOFFS).cpp -ljvm
 
 $(GENOFFS): $(DTRACE_SRCDIR)/$(GENOFFS)Main.c lib$(GENOFFS).dylib
-	$(QUIETLY) $(LINK.CC) -o $@ $(DTRACE_SRCDIR)/$(GENOFFS)Main.c \
+	$(QUIETLY) $(LINK.CXX) -o $@ $(DTRACE_SRCDIR)/$(GENOFFS)Main.c \
 		./lib$(GENOFFS).dylib
 
 # $@.tmp is created first to avoid an empty $(JVMOFFS).h if an error occurs.
@@ -135,7 +135,7 @@
 	fi
 
 $(JVMOFFS.o): $(JVMOFFS).h $(JVMOFFS).cpp 
-	$(QUIETLY) $(CCC) -c -I. -o $@ $(ARCHFLAG) -D$(TYPE) $(JVMOFFS).cpp
+	$(QUIETLY) $(CXX) -c -I. -o $@ $(ARCHFLAG) -D$(TYPE) $(JVMOFFS).cpp
 
 $(LIBJVM_DB): $(DTRACE_SRCDIR)/$(JVM_DB).c $(JVMOFFS.o) $(XLIBJVM_DB) $(LIBJVM_DB_MAPFILE)
 	@echo Making $@
--- a/make/bsd/makefiles/gcc.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/bsd/makefiles/gcc.make	Thu Feb 09 07:35:48 2012 -0800
@@ -25,20 +25,19 @@
 OS_VENDOR = $(shell uname -s)
 
 #------------------------------------------------------------------------
-# CC, CPP & AS
+# CC, CXX & AS
 
 # When cross-compiling the ALT_COMPILER_PATH points
 # to the cross-compilation toolset
 ifdef CROSS_COMPILE_ARCH
- CPP = $(ALT_COMPILER_PATH)/g++
+ CXX = $(ALT_COMPILER_PATH)/g++
  CC  = $(ALT_COMPILER_PATH)/gcc
- HOSTCPP = g++
+ HOSTCXX = g++
  HOSTCC  = gcc
 else ifneq ($(OS_VENDOR), Darwin)
  CXX = g++
- CPP = $(CXX)
  CC  = gcc
- HOSTCPP = $(CPP)
+ HOSTCXX = $(CXX)
  HOSTCC  = $(CC)
 endif
 
@@ -53,7 +52,6 @@
   ifeq ($(origin CC), default)
    CC  = llvm-gcc
   endif
-  CPP  = $(CXX)
 
   ifeq ($(ARCH), i486)
   LLVM_SUPPORTS_STACKREALIGN := $(shell \
@@ -67,11 +65,11 @@
     CXX32 ?= g++-4.0
     CC32  ?= gcc-4.0
   endif
-  CPP = $(CXX32)
+  CXX = $(CXX32)
   CC  = $(CC32)
   endif
 
-  HOSTCPP = $(CPP)
+  HOSTCXX = $(CXX)
   HOSTCC  = $(CC)
 endif
 
--- a/make/bsd/makefiles/launcher.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/bsd/makefiles/launcher.make	Thu Feb 09 07:35:48 2012 -0800
@@ -71,10 +71,10 @@
   LIBS_LAUNCHER             += -l$(JVM) $(LIBS)
 endif
 
-LINK_LAUNCHER = $(LINK.c)
+LINK_LAUNCHER = $(LINK.CC)
 
-LINK_LAUNCHER/PRE_HOOK  = $(LINK_LIB.CC/PRE_HOOK)
-LINK_LAUNCHER/POST_HOOK = $(LINK_LIB.CC/POST_HOOK)
+LINK_LAUNCHER/PRE_HOOK  = $(LINK_LIB.CXX/PRE_HOOK)
+LINK_LAUNCHER/POST_HOOK = $(LINK_LIB.CXX/POST_HOOK)
 
 LAUNCHER_OUT = launcher
 
@@ -90,11 +90,11 @@
 
 $(LAUNCHER_OUT)/%.o: $(LAUNCHERDIR_SHARE)/%.c
 	$(QUIETLY) [ -d $(LAUNCHER_OUT) ] || { mkdir -p $(LAUNCHER_OUT); }
-	$(QUIETLY) $(CC) -g -o $@ -c $< -MMD $(LAUNCHERFLAGS) $(CPPFLAGS)
+	$(QUIETLY) $(CC) -g -o $@ -c $< -MMD $(LAUNCHERFLAGS) $(CXXFLAGS)
 
 $(LAUNCHER_OUT)/%.o: $(LAUNCHERDIR)/%.c
 	$(QUIETLY) [ -d $(LAUNCHER_OUT) ] || { mkdir -p $(LAUNCHER_OUT); }
-	$(QUIETLY) $(CC) -g -o $@ -c $< -MMD $(LAUNCHERFLAGS) $(CPPFLAGS)
+	$(QUIETLY) $(CC) -g -o $@ -c $< -MMD $(LAUNCHERFLAGS) $(CXXFLAGS)
 
 $(LAUNCHER): $(OBJS) $(LIBJVM) $(LAUNCHER_MAPFILE)
 	$(QUIETLY) echo Linking launcher...
--- a/make/bsd/makefiles/product.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/bsd/makefiles/product.make	Thu Feb 09 07:35:48 2012 -0800
@@ -55,4 +55,4 @@
 STRIP_AOUT   = $(STRIP) -x $@ || exit 1;
 
 # Don't strip in VM build; JDK build will strip libraries later
-# LINK_LIB.CC/POST_HOOK += $(STRIP_$(LINK_INTO))
+# LINK_LIB.CXX/POST_HOOK += $(STRIP_$(LINK_INTO))
--- a/make/bsd/makefiles/rules.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/bsd/makefiles/rules.make	Thu Feb 09 07:35:48 2012 -0800
@@ -27,52 +27,39 @@
 # Tell make that .cpp is important
 .SUFFIXES: .cpp $(SUFFIXES)
 
-# For now.  Other makefiles use CPP as the c++ compiler, but that should really
-# name the preprocessor.
-ifeq    ($(CCC),)
-CCC             = $(CPP)
-endif
-
 DEMANGLER       = c++filt
 DEMANGLE        = $(DEMANGLER) < $@ > .$@ && mv -f .$@ $@
 
-# $(CC) is the c compiler (cc/gcc), $(CCC) is the c++ compiler (CC/g++).
-C_COMPILE       = $(CC) $(CPPFLAGS) $(CFLAGS)
-CC_COMPILE      = $(CCC) $(CPPFLAGS) $(CFLAGS)
+# $(CC) is the c compiler (cc/gcc), $(CXX) is the c++ compiler (CC/g++).
+CC_COMPILE       = $(CC) $(CXXFLAGS) $(CFLAGS)
+CXX_COMPILE      = $(CXX) $(CXXFLAGS) $(CFLAGS)
 
 AS.S            = $(AS) $(ASFLAGS)
 
-COMPILE.c       = $(C_COMPILE) -c
-GENASM.c        = $(C_COMPILE) -S
-LINK.c          = $(CC) $(LFLAGS) $(AOUT_FLAGS) $(PROF_AOUT_FLAGS)
-LINK_LIB.c      = $(CC) $(LFLAGS) $(SHARED_FLAG)
-PREPROCESS.c    = $(C_COMPILE) -E
+COMPILE.CC       = $(CC_COMPILE) -c
+GENASM.CC        = $(CC_COMPILE) -S
+LINK.CC          = $(CC) $(LFLAGS) $(AOUT_FLAGS) $(PROF_AOUT_FLAGS)
+LINK_LIB.CC      = $(CC) $(LFLAGS) $(SHARED_FLAG)
+PREPROCESS.CC    = $(CC_COMPILE) -E
 
-COMPILE.CC      = $(CC_COMPILE) -c
-GENASM.CC       = $(CC_COMPILE) -S
-LINK.CC         = $(CCC) $(LFLAGS) $(AOUT_FLAGS) $(PROF_AOUT_FLAGS)
-LINK_NOPROF.CC  = $(CCC) $(LFLAGS) $(AOUT_FLAGS)
-LINK_LIB.CC     = $(CCC) $(LFLAGS) $(SHARED_FLAG)
-PREPROCESS.CC   = $(CC_COMPILE) -E
+COMPILE.CXX      = $(CXX_COMPILE) -c
+GENASM.CXX       = $(CXX_COMPILE) -S
+LINK.CXX         = $(CXX) $(LFLAGS) $(AOUT_FLAGS) $(PROF_AOUT_FLAGS)
+LINK_NOPROF.CXX  = $(CXX) $(LFLAGS) $(AOUT_FLAGS)
+LINK_LIB.CXX     = $(CXX) $(LFLAGS) $(SHARED_FLAG)
+PREPROCESS.CXX   = $(CXX_COMPILE) -E
 
 # cross compiling the jvm with c2 requires host compilers to build
 # adlc tool
 
-HOST.CC_COMPILE      = $(HOSTCPP) $(CPPFLAGS) $(CFLAGS)
-HOST.COMPILE.CC      = $(HOST.CC_COMPILE) -c
-HOST.LINK_NOPROF.CC  = $(HOSTCPP) $(LFLAGS) $(AOUT_FLAGS)
+HOST.CXX_COMPILE      = $(HOSTCXX) $(CXXFLAGS) $(CFLAGS)
+HOST.COMPILE.CXX      = $(HOST.CXX_COMPILE) -c
+HOST.LINK_NOPROF.CXX  = $(HOSTCXX) $(LFLAGS) $(AOUT_FLAGS)
 
 
 # Effect of REMOVE_TARGET is to delete out-of-date files during "gnumake -k".
 REMOVE_TARGET   = rm -f $@
 
-# Synonyms.
-COMPILE.cpp     = $(COMPILE.CC)
-GENASM.cpp      = $(GENASM.CC)
-LINK.cpp        = $(LINK.CC)
-LINK_LIB.cpp    = $(LINK_LIB.CC)
-PREPROCESS.cpp  = $(PREPROCESS.CC)
-
 # Note use of ALT_BOOTDIR to explicitly specify location of java and
 # javac; this is the same environment variable used in the J2SE build
 # process for overriding the default spec, which is BOOTDIR.
@@ -161,14 +148,14 @@
 %.o: %.cpp
 	@echo Compiling $<
 	$(QUIETLY) $(REMOVE_TARGET)
-	$(QUIETLY) $(COMPILE.CC) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE)
+	$(QUIETLY) $(COMPILE.CXX) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE)
 else
 %.o: %.cpp
 	@echo Compiling $<
 	$(QUIETLY) $(REMOVE_TARGET)
 	$(QUIETLY) $(if $(findstring $@, $(NONPIC_OBJ_FILES)), \
-	   $(subst $(VM_PICFLAG), ,$(COMPILE.CC)) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE), \
-	   $(COMPILE.CC) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE))
+	   $(subst $(VM_PICFLAG), ,$(COMPILE.CXX)) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE), \
+	   $(COMPILE.CXX) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE))
 endif
 
 %.o: %.s
@@ -178,13 +165,13 @@
 
 %.s: %.cpp
 	@echo Generating assembly for $<
-	$(QUIETLY) $(GENASM.CC) -o $@ $<
+	$(QUIETLY) $(GENASM.CXX) -o $@ $<
 	$(QUIETLY) $(DEMANGLE) $(COMPILE_DONE)
 
 # Intermediate files (for debugging macros)
 %.i: %.cpp
 	@echo Preprocessing $< to $@
-	$(QUIETLY) $(PREPROCESS.CC) $< > $@ $(COMPILE_DONE)
+	$(QUIETLY) $(PREPROCESS.CXX) $< > $@ $(COMPILE_DONE)
 
 #  Override gnumake built-in rules which do sccs get operations badly.
 #  (They put the checked out code in the current directory, not in the
--- a/make/bsd/makefiles/sparcWorks.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/bsd/makefiles/sparcWorks.make	Thu Feb 09 07:35:48 2012 -0800
@@ -23,13 +23,13 @@
 #
 
 #------------------------------------------------------------------------
-# CC, CPP & AS
+# CC, CXX & AS
 
-CPP = CC
+CXX = CC
 CC  = cc
 AS  = $(CC) -c
 
-HOSTCPP = $(CPP)
+HOSTCXX = $(CXX)
 HOSTCC  = $(CC)
 
 ARCHFLAG = $(ARCHFLAG/$(BUILDARCH))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/make/bsd/makefiles/universal.gmk	Thu Feb 09 07:35:48 2012 -0800
@@ -0,0 +1,113 @@
+#
+# Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+#  
+#
+
+# macosx universal builds
+universal_product:
+	$(MAKE) MACOSX_UNIVERSAL=true all_product_universal
+universal_fastdebug:
+	$(MAKE) MACOSX_UNIVERSAL=true all_fastdebug_universal
+universal_debug:
+	$(MAKE) MACOSX_UNIVERSAL=true all_debug_universal
+
+
+# Universal builds include 1 or more architectures in a single binary
+all_product_universal:
+#	$(QUIETLY) $(MAKE) ARCH_DATA_MODEL=32 $(COMMON_VM_PRODUCT_TARGETS)
+	$(QUIETLY) $(MAKE) ARCH_DATA_MODEL=64 $(COMMON_VM_PRODUCT_TARGETS)
+	$(QUIETLY) $(MAKE) EXPORT_SUBDIR= universalize
+all_fastdebug_universal:
+#	$(QUIETLY) $(MAKE) ARCH_DATA_MODEL=32 $(COMMON_VM_FASTDEBUG_TARGETS)
+	$(QUIETLY) $(MAKE) ARCH_DATA_MODEL=64 $(COMMON_VM_FASTDEBUG_TARGETS)
+	$(QUIETLY) $(MAKE) EXPORT_SUBDIR=/fastdebug universalize
+all_debug_universal:
+#	$(QUIETLY) $(MAKE) ARCH_DATA_MODEL=32 $(COMMON_VM_DEBUG_TARGETS)
+	$(QUIETLY) $(MAKE) ARCH_DATA_MODEL=64 $(COMMON_VM_DEBUG_TARGETS)
+	$(QUIETLY) $(MAKE) EXPORT_SUBDIR=/debug universalize
+
+
+# Consolidate architecture builds into a single Universal binary
+universalize: $(UNIVERSAL_LIPO_LIST) $(UNIVERSAL_COPY_LIST)
+	$(RM) -r $(EXPORT_PATH)/jre/lib/{i386,amd64}
+
+
+# Package built libraries in a universal binary
+$(UNIVERSAL_LIPO_LIST):
+	BUILT_LIPO_FILES="`find $(EXPORT_JRE_LIB_DIR)/{i386,amd64}/$(subst $(EXPORT_JRE_LIB_DIR)/,,$@) 2>/dev/null`"; \
+	if [ -n "$${BUILT_LIPO_FILES}" ]; then \
+	  $(MKDIR) -p $(shell dirname $@); \
+	  lipo -create -output $@ $${BUILT_LIPO_FILES}; \
+	fi	
+
+
+# Copy built non-universal binaries in place
+$(UNIVERSAL_COPY_LIST):
+	BUILT_COPY_FILES="`find $(EXPORT_JRE_LIB_DIR)/{i386,amd64}/$(subst $(EXPORT_JRE_LIB_DIR)/,,$@) 2>/dev/null`"; \
+	if [ -n "$${BUILT_COPY_FILES}" ]; then \
+	  for i in $${BUILT_COPY_FILES}; do \
+	    if [ -f $${i} ]; then \
+	      $(MKDIR) -p $(shell dirname $@); \
+	      $(CP) $${i} $@; \
+	    fi; \
+	  done; \
+	fi
+
+
+# Replace arch specific binaries with universal binaries
+export_universal:
+	$(RM) -r $(EXPORT_PATH)/jre/lib/{i386,amd64}
+	$(RM) -r $(JDK_IMAGE_DIR)/jre/lib/{i386,amd64}
+	$(RM) $(JDK_IMAGE_DIR)/jre/lib/{client,server}/libjsig.$(LIBRARY_SUFFIX)
+	($(CD) $(EXPORT_PATH) && \
+	  $(TAR) -cf - *) | \
+	  ($(CD) $(JDK_IMAGE_DIR) && $(TAR) -xpf -)
+
+
+# Overlay universal binaries
+copy_universal:
+	$(RM) -r $(JDK_IMAGE_DIR)$(COPY_SUBDIR)/jre/lib/{i386,amd64}
+	$(RM) $(JDK_IMAGE_DIR)$(COPY_SUBDIR)/jre/lib/{client,server}/libjsig.$(LIBRARY_SUFFIX)
+	($(CD) $(EXPORT_PATH)$(COPY_SUBDIR) && \
+	  $(TAR) -cf - *) | \
+	  ($(CD) $(JDK_IMAGE_DIR)$(COPY_SUBDIR) && $(TAR) -xpf -)
+
+
+# Additional processing for universal builds
+export_product_jdk::
+	$(MAKE) EXPORT_SUBDIR=           export_universal
+export_optimized_jdk::
+	$(MAKE) EXPORT_SUBDIR=           export_universal
+export_fastdebug_jdk::
+	$(MAKE) EXPORT_SUBDIR=/fastdebug export_universal
+export_debug_jdk::
+	$(MAKE) EXPORT_SUBDIR=/debug     export_universal
+copy_product_jdk::
+	$(MAKE) COPY_SUBDIR=             copy_universal
+copy_fastdebug_jdk::
+	$(MAKE) COPY_SUBDIR=/fastdebug   copy_universal
+copy_debug_jdk::
+	$(MAKE) COPY_SUBDIR=/debug       copy_universal
+
+.PHONY:	universal_product universal_fastdebug universal_debug \
+	all_product_universal all_fastdebug_universal all_debug_universal \
+	universalize export_universal copy_universal
--- a/make/bsd/makefiles/vm.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/bsd/makefiles/vm.make	Thu Feb 09 07:35:48 2012 -0800
@@ -82,18 +82,22 @@
 BUILD_USER    = -DHOTSPOT_BUILD_USER="\"$(HOTSPOT_BUILD_USER)\""
 VM_DISTRO     = -DHOTSPOT_VM_DISTRO="\"$(HOTSPOT_VM_DISTRO)\""
 
-CPPFLAGS =           \
+CXXFLAGS =           \
   ${SYSDEFS}         \
   ${INCLUDES}        \
   ${BUILD_VERSION}   \
   ${BUILD_TARGET}    \
   ${BUILD_USER}      \
   ${HS_LIB_ARCH}     \
-  ${JRE_VERSION}     \
   ${VM_DISTRO}
 
+# This is VERY important! The version define must only be supplied to vm_version.o
+# If not, ccache will not re-use the cache at all, since the version string might contain
+# a time and date. 
+vm_version.o: CXXFLAGS += ${JRE_VERSION} 
+
 ifdef DEFAULT_LIBPATH
-CPPFLAGS += -DDEFAULT_LIBPATH="\"$(DEFAULT_LIBPATH)\""
+CXXFLAGS += -DDEFAULT_LIBPATH="\"$(DEFAULT_LIBPATH)\""
 endif
 
 ifndef JAVASE_EMBEDDED
@@ -260,9 +264,9 @@
   ifeq ($(STATIC_CXX), true)
     LFLAGS_VM              += $(STATIC_LIBGCC)
     LIBS_VM                += $(STATIC_STDCXX)
-    LINK_VM                = $(LINK_LIB.c)
+    LINK_VM                = $(LINK_LIB.CC)
   else
-    LINK_VM                = $(LINK_LIB.CC)
+    LINK_VM                = $(LINK_LIB.CXX)
   endif
 
   LIBS_VM                  += $(LIBS)
@@ -280,7 +284,7 @@
 $(PRECOMPILED_HEADER):
 	$(QUIETLY) echo Generating precompiled header $@
 	$(QUIETLY) mkdir -p $(PRECOMPILED_HEADER_DIR)
-	$(QUIETLY) $(COMPILE.CC) $(DEPFLAGS) -x c++-header $(PRECOMPILED_HEADER_SRC) -o $@ $(COMPILE_DONE)
+	$(QUIETLY) $(COMPILE.CXX) $(DEPFLAGS) -x c++-header $(PRECOMPILED_HEADER_SRC) -o $@ $(COMPILE_DONE)
 
 # making the library:
 
@@ -305,10 +309,10 @@
 $(LIBJVM): $(LIBJVM.o) $(LIBJVM_MAPFILE) $(LD_SCRIPT)
 	$(QUIETLY) {                                                    \
 	    echo Linking vm...;                                         \
-	    $(LINK_LIB.CC/PRE_HOOK)                                     \
+	    $(LINK_LIB.CXX/PRE_HOOK)                                     \
 	    $(LINK_VM) $(LD_SCRIPT_FLAG)                                \
 		       $(LFLAGS_VM) -o $@ $(LIBJVM.o) $(LIBS_VM);       \
-	    $(LINK_LIB.CC/POST_HOOK)                                    \
+	    $(LINK_LIB.CXX/POST_HOOK)                                    \
 	    rm -f $@.1; ln -s $@ $@.1;                                  \
 	    [ -f $(LIBJVM_G) ] || { ln -s $@ $(LIBJVM_G); ln -s $@.1 $(LIBJVM_G).1; }; \
 	}
--- a/make/defs.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/defs.make	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2006, 2011, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -185,6 +185,15 @@
   BOOTDIR=$(ALT_BOOTDIR)
 endif
 
+# Select name of the export directory and honor ALT overrides
+EXPORT_PATH=$(OUTPUTDIR)/export-$(PLATFORM)$(EXPORT_SUBDIR)
+ifneq ($(ALT_EXPORT_PATH),)
+  EXPORT_PATH=$(ALT_EXPORT_PATH)
+endif
+
+# Default jdk image if one is created for you with create_jdk
+JDK_IMAGE_DIR=$(OUTPUTDIR)/jdk-$(PLATFORM)
+
 # The platform dependent defs.make defines platform specific variable such 
 # as ARCH, EXPORT_LIST etc. We must place the include here after BOOTDIR is defined.
 include $(GAMMADIR)/make/$(OSNAME)/makefiles/defs.make
@@ -263,15 +272,6 @@
 # includes this make/defs.make file.
 MAKE_ARGS += HOTSPOT_BUILD_VERSION=$(HOTSPOT_BUILD_VERSION)
 
-# Select name of export directory
-EXPORT_PATH=$(OUTPUTDIR)/export-$(PLATFORM)$(EXPORT_SUBDIR)
-ifneq ($(ALT_EXPORT_PATH),)
-  EXPORT_PATH=$(ALT_EXPORT_PATH)
-endif
-
-# Default jdk image if one is created for you with create_jdk
-JDK_IMAGE_DIR=$(OUTPUTDIR)/jdk-$(PLATFORM)
-
 # Various export sub directories
 EXPORT_INCLUDE_DIR = $(EXPORT_PATH)/include
 EXPORT_DOCS_DIR = $(EXPORT_PATH)/docs
--- a/make/hotspot_version	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/hotspot_version	Thu Feb 09 07:35:48 2012 -0800
@@ -35,7 +35,7 @@
 
 HS_MAJOR_VER=23
 HS_MINOR_VER=0
-HS_BUILD_NUMBER=12
+HS_BUILD_NUMBER=15
 
 JDK_MAJOR_VER=1
 JDK_MINOR_VER=8
--- a/make/jprt.properties	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/jprt.properties	Thu Feb 09 07:35:48 2012 -0800
@@ -438,12 +438,12 @@
     ${jprt.my.macosx.x64}-{product|fastdebug}-c2-GCOld_ParNewGC, \
     ${jprt.my.macosx.x64}-{product|fastdebug}-c2-GCOld_CMS, \
     ${jprt.my.macosx.x64}-{product|fastdebug}-c2-GCOld_G1, \
-    ${jprt.my.macosx.x64}-{product|fastdebug}-c2-GCOld_ParOldGC \
-    ${jprt.my.macosx.x64}-{product|fastdebug}-c2-jbb_default, \
-    ${jprt.my.macosx.x64}-{product|fastdebug}-c2-jbb_default_tiered, \
-    ${jprt.my.macosx.x64}-{product|fastdebug}-c2-jbb_ParallelGC, \
-    ${jprt.my.macosx.x64}-{product|fastdebug}-c2-jbb_G1, \
-    ${jprt.my.macosx.x64}-{product|fastdebug}-c2-jbb_ParOldGC
+    ${jprt.my.macosx.x64}-{product|fastdebug}-c2-GCOld_ParOldGC
+#    ${jprt.my.macosx.x64}-{product|fastdebug}-c2-jbb_default, \
+#    ${jprt.my.macosx.x64}-{product|fastdebug}-c2-jbb_default_tiered, \
+#    ${jprt.my.macosx.x64}-{product|fastdebug}-c2-jbb_ParallelGC, \
+#    ${jprt.my.macosx.x64}-{product|fastdebug}-c2-jbb_G1, \
+#    ${jprt.my.macosx.x64}-{product|fastdebug}-c2-jbb_ParOldGC
 
 jprt.my.windows.i586.test.targets = \
     ${jprt.my.windows.i586}-{product|fastdebug}-{c1|c2}-jvm98, \
--- a/make/linux/makefiles/adlc.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/linux/makefiles/adlc.make	Thu Feb 09 07:35:48 2012 -0800
@@ -61,10 +61,10 @@
 INCLUDES += $(Src_Dirs_I:%=-I%)
 
 # set flags for adlc compilation
-CPPFLAGS = $(SYSDEFS) $(INCLUDES)
+CXXFLAGS = $(SYSDEFS) $(INCLUDES)
 
 # Force assertions on.
-CPPFLAGS += -DASSERT
+CXXFLAGS += -DASSERT
 
 # CFLAGS_WARN holds compiler options to suppress/enable warnings.
 # Compiler warnings are treated as errors
@@ -109,7 +109,7 @@
 
 $(EXEC) : $(OBJECTS)
 	@echo Making adlc
-	$(QUIETLY) $(HOST.LINK_NOPROF.CC) -o $(EXEC) $(OBJECTS)
+	$(QUIETLY) $(HOST.LINK_NOPROF.CXX) -o $(EXEC) $(OBJECTS)
 
 # Random dependencies:
 $(OBJECTS): opcodes.hpp classes.hpp adlc.hpp adlcVMDeps.hpp adlparse.hpp archDesc.hpp arena.hpp dict2.hpp filebuff.hpp forms.hpp formsopt.hpp formssel.hpp
@@ -211,14 +211,14 @@
 $(OUTDIR)/%.o: %.cpp
 	@echo Compiling $<
 	$(QUIETLY) $(REMOVE_TARGET)
-	$(QUIETLY) $(HOST.COMPILE.CC) -o $@ $< $(COMPILE_DONE)
+	$(QUIETLY) $(HOST.COMPILE.CXX) -o $@ $< $(COMPILE_DONE)
 
 # Some object files are given a prefix, to disambiguate
 # them from objects of the same name built for the VM.
 $(OUTDIR)/adlc-%.o: %.cpp
 	@echo Compiling $<
 	$(QUIETLY) $(REMOVE_TARGET)
-	$(QUIETLY) $(HOST.COMPILE.CC) -o $@ $< $(COMPILE_DONE)
+	$(QUIETLY) $(HOST.COMPILE.CXX) -o $@ $< $(COMPILE_DONE)
 
 # #########################################################################
 
--- a/make/linux/makefiles/gcc.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/linux/makefiles/gcc.make	Thu Feb 09 07:35:48 2012 -0800
@@ -23,19 +23,19 @@
 #
 
 #------------------------------------------------------------------------
-# CC, CPP & AS
+# CC, CXX & AS
 
 # When cross-compiling the ALT_COMPILER_PATH points
 # to the cross-compilation toolset
 ifdef CROSS_COMPILE_ARCH
-CPP = $(ALT_COMPILER_PATH)/g++
+CXX = $(ALT_COMPILER_PATH)/g++
 CC  = $(ALT_COMPILER_PATH)/gcc
-HOSTCPP = g++
+HOSTCXX = g++
 HOSTCC  = gcc
 else
-CPP = g++
+CXX = g++
 CC  = gcc
-HOSTCPP = $(CPP)
+HOSTCXX = $(CXX)
 HOSTCC  = $(CC)
 endif
 
--- a/make/linux/makefiles/launcher.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/linux/makefiles/launcher.make	Thu Feb 09 07:35:48 2012 -0800
@@ -54,10 +54,10 @@
   LIBS_LAUNCHER             += -l$(JVM) $(LIBS)
 endif
 
-LINK_LAUNCHER = $(LINK.c)
+LINK_LAUNCHER = $(LINK.CC)
 
-LINK_LAUNCHER/PRE_HOOK  = $(LINK_LIB.CC/PRE_HOOK)
-LINK_LAUNCHER/POST_HOOK = $(LINK_LIB.CC/POST_HOOK)
+LINK_LAUNCHER/PRE_HOOK  = $(LINK_LIB.CXX/PRE_HOOK)
+LINK_LAUNCHER/POST_HOOK = $(LINK_LIB.CXX/POST_HOOK)
 
 LAUNCHER_OUT = launcher
 
@@ -73,11 +73,11 @@
 
 $(LAUNCHER_OUT)/%.o: $(LAUNCHERDIR_SHARE)/%.c
 	$(QUIETLY) [ -d $(LAUNCHER_OUT) ] || { mkdir -p $(LAUNCHER_OUT); }
-	$(QUIETLY) $(CC) -g -o $@ -c $< -MMD $(LAUNCHERFLAGS) $(CPPFLAGS)
+	$(QUIETLY) $(CC) -g -o $@ -c $< -MMD $(LAUNCHERFLAGS) $(CXXFLAGS)
 
 $(LAUNCHER_OUT)/%.o: $(LAUNCHERDIR)/%.c
 	$(QUIETLY) [ -d $(LAUNCHER_OUT) ] || { mkdir -p $(LAUNCHER_OUT); }
-	$(QUIETLY) $(CC) -g -o $@ -c $< -MMD $(LAUNCHERFLAGS) $(CPPFLAGS)
+	$(QUIETLY) $(CC) -g -o $@ -c $< -MMD $(LAUNCHERFLAGS) $(CXXFLAGS)
 
 $(LAUNCHER): $(OBJS) $(LIBJVM) $(LAUNCHER_MAPFILE)
 	$(QUIETLY) echo Linking launcher...
--- a/make/linux/makefiles/product.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/linux/makefiles/product.make	Thu Feb 09 07:35:48 2012 -0800
@@ -52,4 +52,4 @@
 
 # If we can create .debuginfo files, then the VM is stripped in vm.make
 # and this macro is not used.
-# LINK_LIB.CC/POST_HOOK += $(STRIP_$(LINK_INTO))
+# LINK_LIB.CXX/POST_HOOK += $(STRIP_$(LINK_INTO))
--- a/make/linux/makefiles/rules.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/linux/makefiles/rules.make	Thu Feb 09 07:35:48 2012 -0800
@@ -27,52 +27,39 @@
 # Tell make that .cpp is important
 .SUFFIXES: .cpp $(SUFFIXES)
 
-# For now.  Other makefiles use CPP as the c++ compiler, but that should really
-# name the preprocessor.
-ifeq    ($(CCC),)
-CCC             = $(CPP)
-endif
-
 DEMANGLER       = c++filt
 DEMANGLE        = $(DEMANGLER) < $@ > .$@ && mv -f .$@ $@
 
-# $(CC) is the c compiler (cc/gcc), $(CCC) is the c++ compiler (CC/g++).
-C_COMPILE       = $(CC) $(CPPFLAGS) $(CFLAGS)
-CC_COMPILE      = $(CCC) $(CPPFLAGS) $(CFLAGS)
+# $(CC) is the c compiler (cc/gcc), $(CXX) is the c++ compiler (CC/g++).
+CC_COMPILE       = $(CC) $(CXXFLAGS) $(CFLAGS)
+CXX_COMPILE      = $(CXX) $(CXXFLAGS) $(CFLAGS)
 
 AS.S            = $(AS) $(ASFLAGS)
 
-COMPILE.c       = $(C_COMPILE) -c
-GENASM.c        = $(C_COMPILE) -S
-LINK.c          = $(CC) $(LFLAGS) $(AOUT_FLAGS) $(PROF_AOUT_FLAGS)
-LINK_LIB.c      = $(CC) $(LFLAGS) $(SHARED_FLAG)
-PREPROCESS.c    = $(C_COMPILE) -E
+COMPILE.CC       = $(CC_COMPILE) -c
+GENASM.CC        = $(CC_COMPILE) -S
+LINK.CC          = $(CC) $(LFLAGS) $(AOUT_FLAGS) $(PROF_AOUT_FLAGS)
+LINK_LIB.CC      = $(CC) $(LFLAGS) $(SHARED_FLAG)
+PREPROCESS.CC    = $(CC_COMPILE) -E
 
-COMPILE.CC      = $(CC_COMPILE) -c
-GENASM.CC       = $(CC_COMPILE) -S
-LINK.CC         = $(CCC) $(LFLAGS) $(AOUT_FLAGS) $(PROF_AOUT_FLAGS)
-LINK_NOPROF.CC  = $(CCC) $(LFLAGS) $(AOUT_FLAGS)
-LINK_LIB.CC     = $(CCC) $(LFLAGS) $(SHARED_FLAG)
-PREPROCESS.CC   = $(CC_COMPILE) -E
+COMPILE.CXX      = $(CXX_COMPILE) -c
+GENASM.CXX       = $(CXX_COMPILE) -S
+LINK.CXX         = $(CXX) $(LFLAGS) $(AOUT_FLAGS) $(PROF_AOUT_FLAGS)
+LINK_NOPROF.CXX  = $(CXX) $(LFLAGS) $(AOUT_FLAGS)
+LINK_LIB.CXX     = $(CXX) $(LFLAGS) $(SHARED_FLAG)
+PREPROCESS.CXX   = $(CXX_COMPILE) -E
 
 # cross compiling the jvm with c2 requires host compilers to build
 # adlc tool
 
-HOST.CC_COMPILE      = $(HOSTCPP) $(CPPFLAGS) $(CFLAGS)
-HOST.COMPILE.CC      = $(HOST.CC_COMPILE) -c
-HOST.LINK_NOPROF.CC  = $(HOSTCPP) $(LFLAGS) $(AOUT_FLAGS)
+HOST.CXX_COMPILE      = $(HOSTCXX) $(CXXFLAGS) $(CFLAGS)
+HOST.COMPILE.CXX      = $(HOST.CXX_COMPILE) -c
+HOST.LINK_NOPROF.CXX  = $(HOSTCXX) $(LFLAGS) $(AOUT_FLAGS)
 
 
 # Effect of REMOVE_TARGET is to delete out-of-date files during "gnumake -k".
 REMOVE_TARGET   = rm -f $@
 
-# Synonyms.
-COMPILE.cpp     = $(COMPILE.CC)
-GENASM.cpp      = $(GENASM.CC)
-LINK.cpp        = $(LINK.CC)
-LINK_LIB.cpp    = $(LINK_LIB.CC)
-PREPROCESS.cpp  = $(PREPROCESS.CC)
-
 # Note use of ALT_BOOTDIR to explicitly specify location of java and
 # javac; this is the same environment variable used in the J2SE build
 # process for overriding the default spec, which is BOOTDIR.
@@ -161,14 +148,14 @@
 %.o: %.cpp
 	@echo Compiling $<
 	$(QUIETLY) $(REMOVE_TARGET)
-	$(QUIETLY) $(COMPILE.CC) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE)
+	$(QUIETLY) $(COMPILE.CXX) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE)
 else
 %.o: %.cpp
 	@echo Compiling $<
 	$(QUIETLY) $(REMOVE_TARGET)
 	$(QUIETLY) $(if $(findstring $@, $(NONPIC_OBJ_FILES)), \
-	   $(subst $(VM_PICFLAG), ,$(COMPILE.CC)) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE), \
-	   $(COMPILE.CC) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE))
+	   $(subst $(VM_PICFLAG), ,$(COMPILE.CXX)) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE), \
+	   $(COMPILE.CXX) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE))
 endif
 
 %.o: %.s
@@ -178,13 +165,13 @@
 
 %.s: %.cpp
 	@echo Generating assembly for $<
-	$(QUIETLY) $(GENASM.CC) -o $@ $<
+	$(QUIETLY) $(GENASM.CXX) -o $@ $<
 	$(QUIETLY) $(DEMANGLE) $(COMPILE_DONE)
 
 # Intermediate files (for debugging macros)
 %.i: %.cpp
 	@echo Preprocessing $< to $@
-	$(QUIETLY) $(PREPROCESS.CC) $< > $@ $(COMPILE_DONE)
+	$(QUIETLY) $(PREPROCESS.CXX) $< > $@ $(COMPILE_DONE)
 
 #  Override gnumake built-in rules which do sccs get operations badly.
 #  (They put the checked out code in the current directory, not in the
--- a/make/linux/makefiles/sparcWorks.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/linux/makefiles/sparcWorks.make	Thu Feb 09 07:35:48 2012 -0800
@@ -23,13 +23,13 @@
 #
 
 #------------------------------------------------------------------------
-# CC, CPP & AS
+# CC, CXX & AS
 
-CPP = CC
+CXX = CC
 CC  = cc
 AS  = $(CC) -c
 
-HOSTCPP = $(CPP)
+HOSTCXX = $(CXX)
 HOSTCC  = $(CC)
 
 ARCHFLAG = $(ARCHFLAG/$(BUILDARCH))
--- a/make/linux/makefiles/vm.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/linux/makefiles/vm.make	Thu Feb 09 07:35:48 2012 -0800
@@ -88,16 +88,20 @@
 BUILD_USER    = -DHOTSPOT_BUILD_USER="\"$(HOTSPOT_BUILD_USER)\""
 VM_DISTRO     = -DHOTSPOT_VM_DISTRO="\"$(HOTSPOT_VM_DISTRO)\""
 
-CPPFLAGS =           \
+CXXFLAGS =           \
   ${SYSDEFS}         \
   ${INCLUDES}        \
   ${BUILD_VERSION}   \
   ${BUILD_TARGET}    \
   ${BUILD_USER}      \
   ${HS_LIB_ARCH}     \
-  ${JRE_VERSION}     \
   ${VM_DISTRO}
 
+# This is VERY important! The version define must only be supplied to vm_version.o
+# If not, ccache will not re-use the cache at all, since the version string might contain
+# a time and date. 
+vm_version.o: CXXFLAGS += ${JRE_VERSION}
+
 ifndef JAVASE_EMBEDDED
 CFLAGS += -DINCLUDE_TRACE
 endif
@@ -272,13 +276,13 @@
   LIBS_VM   += $(LLVM_LIBS)
 endif
 
-LINK_VM = $(LINK_LIB.c)
+LINK_VM = $(LINK_LIB.CC)
 
 # rule for building precompiled header
 $(PRECOMPILED_HEADER):
 	$(QUIETLY) echo Generating precompiled header $@
 	$(QUIETLY) mkdir -p $(PRECOMPILED_HEADER_DIR)
-	$(QUIETLY) $(COMPILE.CC) $(DEPFLAGS) -x c++-header $(PRECOMPILED_HEADER_SRC) -o $@ $(COMPILE_DONE)
+	$(QUIETLY) $(COMPILE.CXX) $(DEPFLAGS) -x c++-header $(PRECOMPILED_HEADER_SRC) -o $@ $(COMPILE_DONE)
 
 # making the library:
 
@@ -308,10 +312,10 @@
 $(LIBJVM): $(LIBJVM.o) $(LIBJVM_MAPFILE) $(LD_SCRIPT)
 	$(QUIETLY) {                                                    \
 	    echo Linking vm...;                                         \
-	    $(LINK_LIB.CC/PRE_HOOK)                                     \
+	    $(LINK_LIB.CXX/PRE_HOOK)                                     \
 	    $(LINK_VM) $(LD_SCRIPT_FLAG)                                \
 		       $(LFLAGS_VM) -o $@ $(LIBJVM.o) $(LIBS_VM);       \
-	    $(LINK_LIB.CC/POST_HOOK)                                    \
+	    $(LINK_LIB.CXX/POST_HOOK)                                    \
 	    rm -f $@.1; ln -s $@ $@.1;                                  \
 	    [ -f $(LIBJVM_G) ] || { ln -s $@ $(LIBJVM_G); ln -s $@.1 $(LIBJVM_G).1; }; \
             if [ \"$(CROSS_COMPILE_ARCH)\" = \"\" ] ; then                    \
--- a/make/solaris/makefiles/adlc.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/solaris/makefiles/adlc.make	Thu Feb 09 07:35:48 2012 -0800
@@ -62,10 +62,10 @@
 INCLUDES += $(Src_Dirs_I:%=-I%)
 
 # set flags for adlc compilation
-CPPFLAGS = $(SYSDEFS) $(INCLUDES)
+CXXFLAGS = $(SYSDEFS) $(INCLUDES)
 
 # Force assertions on.
-CPPFLAGS += -DASSERT
+CXXFLAGS += -DASSERT
 
 ifndef USE_GCC
   # We need libCstd.so for adlc 
@@ -130,7 +130,7 @@
 
 $(EXEC) : $(OBJECTS)
 	@echo Making adlc
-	$(QUIETLY) $(LINK_NOPROF.CC) -o $(EXEC) $(OBJECTS)
+	$(QUIETLY) $(LINK_NOPROF.CXX) -o $(EXEC) $(OBJECTS)
 
 # Random dependencies:
 $(OBJECTS): opcodes.hpp classes.hpp adlc.hpp adlcVMDeps.hpp adlparse.hpp archDesc.hpp arena.hpp dict2.hpp filebuff.hpp forms.hpp formsopt.hpp formssel.hpp
@@ -228,14 +228,14 @@
 $(OUTDIR)/%.o: %.cpp
 	@echo Compiling $<
 	$(QUIETLY) $(REMOVE_TARGET)
-	$(QUIETLY) $(COMPILE.CC) -o $@ $< $(COMPILE_DONE)
+	$(QUIETLY) $(COMPILE.CXX) -o $@ $< $(COMPILE_DONE)
 
 # Some object files are given a prefix, to disambiguate
 # them from objects of the same name built for the VM.
 $(OUTDIR)/adlc-%.o: %.cpp
 	@echo Compiling $<
 	$(QUIETLY) $(REMOVE_TARGET)
-	$(QUIETLY) $(COMPILE.CC) -o $@ $< $(COMPILE_DONE)
+	$(QUIETLY) $(COMPILE.CXX) -o $@ $< $(COMPILE_DONE)
 
 # #########################################################################
 
--- a/make/solaris/makefiles/dtrace.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/solaris/makefiles/dtrace.make	Thu Feb 09 07:35:48 2012 -0800
@@ -150,11 +150,11 @@
 
 lib$(GENOFFS).so: $(DTRACE_SRCDIR)/$(GENOFFS).cpp $(DTRACE_SRCDIR)/$(GENOFFS).h \
                   $(LIBJVM.o)
-	$(QUIETLY) $(CCC) $(CPPFLAGS) $(GENOFFS_CFLAGS) $(SHARED_FLAG) $(PICFLAG) \
+	$(QUIETLY) $(CXX) $(CXXFLAGS) $(GENOFFS_CFLAGS) $(SHARED_FLAG) $(PICFLAG) \
 		 $(LFLAGS_GENOFFS) -o $@ $(DTRACE_SRCDIR)/$(GENOFFS).cpp -lc
 
 $(GENOFFS): $(DTRACE_SRCDIR)/$(GENOFFS)Main.c lib$(GENOFFS).so
-	$(QUIETLY) $(LINK.CC) -z nodefs -o $@ $(DTRACE_SRCDIR)/$(GENOFFS)Main.c \
+	$(QUIETLY) $(LINK.CXX) -z nodefs -o $@ $(DTRACE_SRCDIR)/$(GENOFFS)Main.c \
 		./lib$(GENOFFS).so
 
 CONDITIONALLY_UPDATE_JVMOFFS_TARGET = \
@@ -178,7 +178,7 @@
 	$(QUIETLY) $(CONDITIONALLY_UPDATE_JVMOFFS_TARGET)
 
 $(JVMOFFS.o): $(JVMOFFS).h $(JVMOFFS).cpp 
-	$(QUIETLY) $(CCC) -c -I. -o $@ $(ARCHFLAG) -D$(TYPE) $(JVMOFFS).cpp
+	$(QUIETLY) $(CXX) -c -I. -o $@ $(ARCHFLAG) -D$(TYPE) $(JVMOFFS).cpp
 
 $(LIBJVM_DB): $(DTRACE_SRCDIR)/$(JVM_DB).c $(JVMOFFS.o) $(XLIBJVM_DB) $(LIBJVM_DB_MAPFILE)
 	@echo Making $@
--- a/make/solaris/makefiles/gcc.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/solaris/makefiles/gcc.make	Thu Feb 09 07:35:48 2012 -0800
@@ -23,9 +23,9 @@
 #
 
 #------------------------------------------------------------------------
-# CC, CPP & AS
+# CC, CXX & AS
 
-CPP = g++
+CXX = g++
 CC  = gcc
 AS  = $(CC) -c
 
@@ -36,12 +36,12 @@
 CC_VER_MAJOR := $(shell $(CC) -dumpversion | sed 's/egcs-//' | cut -d'.' -f1)
 CC_VER_MINOR := $(shell $(CC) -dumpversion | sed 's/egcs-//' | cut -d'.' -f2)
 
-# Check for the versions of C++ and C compilers ($CPP and $CC) used. 
+# Check for the versions of C++ and C compilers ($CXX and $CC) used. 
 
 # Get the last thing on the line that looks like x.x+ (x is a digit).
 COMPILER_REV := \
-$(shell $(CPP) -dumpversion | sed 's/egcs-//' | cut -d'.' -f1)
-C_COMPILER_REV := \
+$(shell $(CXX) -dumpversion | sed 's/egcs-//' | cut -d'.' -f1)
+CC_COMPILER_REV := \
 $(shell $(CC) -dumpversion | sed 's/egcs-//' | cut -d'.' -f2)
 
 
--- a/make/solaris/makefiles/launcher.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/solaris/makefiles/launcher.make	Thu Feb 09 07:35:48 2012 -0800
@@ -52,10 +52,10 @@
   LIBS_LAUNCHER             += -l$(JVM) $(LIBS)
 endif
 
-LINK_LAUNCHER = $(LINK.CC)
+LINK_LAUNCHER = $(LINK.CXX)
 
-LINK_LAUNCHER/PRE_HOOK  = $(LINK_LIB.CC/PRE_HOOK)
-LINK_LAUNCHER/POST_HOOK = $(LINK_LIB.CC/POST_HOOK)
+LINK_LAUNCHER/PRE_HOOK  = $(LINK_LIB.CXX/PRE_HOOK)
+LINK_LAUNCHER/POST_HOOK = $(LINK_LIB.CXX/POST_HOOK)
 
 ifeq ("${Platform_compiler}", "sparcWorks")
 # Enable the following LAUNCHERFLAGS addition if you need to compare the
@@ -86,11 +86,11 @@
 
 $(LAUNCHER_OUT)/%.o: $(LAUNCHERDIR_SHARE)/%.c
 	$(QUIETLY) [ -d $(LAUNCHER_OUT) ] || { mkdir -p $(LAUNCHER_OUT); }
-	$(QUIETLY) $(CC) -g -o $@ -c $< -MMD $(LAUNCHERFLAGS) $(CPPFLAGS)
+	$(QUIETLY) $(CC) -g -o $@ -c $< -MMD $(LAUNCHERFLAGS) $(CXXFLAGS)
 
 $(LAUNCHER_OUT)/%.o: $(LAUNCHERDIR)/%.c
 	$(QUIETLY) [ -d $(LAUNCHER_OUT) ] || { mkdir -p $(LAUNCHER_OUT); }
-	$(QUIETLY) $(CC) -g -o $@ -c $< -MMD $(LAUNCHERFLAGS) $(CPPFLAGS)
+	$(QUIETLY) $(CC) -g -o $@ -c $< -MMD $(LAUNCHERFLAGS) $(CXXFLAGS)
 
 $(LAUNCHER): $(OBJS) $(LIBJVM) $(LAUNCHER_MAPFILE)
 ifeq ($(filter -sbfast -xsbfast, $(CFLAGS_BROWSE)),)
--- a/make/solaris/makefiles/product.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/solaris/makefiles/product.make	Thu Feb 09 07:35:48 2012 -0800
@@ -70,7 +70,7 @@
 
 # If we can create .debuginfo files, then the VM is stripped in vm.make
 # and this macro is not used.
-# LINK_LIB.CC/POST_HOOK += $(STRIP_LIB.CC/POST_HOOK)
+# LINK_LIB.CXX/POST_HOOK += $(STRIP_LIB.CXX/POST_HOOK)
 
 G_SUFFIX =
 SYSDEFS += -DPRODUCT
--- a/make/solaris/makefiles/rules.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/solaris/makefiles/rules.make	Thu Feb 09 07:35:48 2012 -0800
@@ -27,44 +27,31 @@
 # Tell make that .cpp is important
 .SUFFIXES: .cpp $(SUFFIXES)
 
-# For now.  Other makefiles use CPP as the c++ compiler, but that should really
-# name the preprocessor.
-ifeq    ($(CCC),)
-CCC             = $(CPP)
-endif
-
 DEMANGLER       = c++filt
 DEMANGLE        = $(DEMANGLER) < $@ > .$@ && mv -f .$@ $@
 
-# $(CC) is the c compiler (cc/gcc), $(CCC) is the c++ compiler (CC/g++).
-C_COMPILE       = $(CC) $(CPPFLAGS) $(CFLAGS)
-CC_COMPILE      = $(CCC) $(CPPFLAGS) $(CFLAGS)
+# $(CC) is the c compiler (cc/gcc), $(CXX) is the c++ compiler (CC/g++).
+CC_COMPILE       = $(CC) $(CXXFLAGS) $(CFLAGS)
+CXX_COMPILE      = $(CXX) $(CXXFLAGS) $(CFLAGS)
 
 AS.S            = $(AS) $(ASFLAGS)
 
-COMPILE.c       = $(C_COMPILE) -c
-GENASM.c        = $(C_COMPILE) -S
-LINK.c          = $(CC) $(LFLAGS) $(AOUT_FLAGS) $(PROF_AOUT_FLAGS)
-LINK_LIB.c      = $(CC) $(LFLAGS) $(SHARED_FLAG)
-PREPROCESS.c    = $(C_COMPILE) -E
+COMPILE.CC       = $(CC_COMPILE) -c
+GENASM.CC        = $(CC_COMPILE) -S
+LINK.CC          = $(CC) $(LFLAGS) $(AOUT_FLAGS) $(PROF_AOUT_FLAGS)
+LINK_LIB.CC      = $(CC) $(LFLAGS) $(SHARED_FLAG)
+PREPROCESS.CC    = $(CC_COMPILE) -E
 
-COMPILE.CC      = $(CC_COMPILE) -c
-GENASM.CC       = $(CC_COMPILE) -S
-LINK.CC         = $(CCC) $(LFLAGS) $(AOUT_FLAGS) $(PROF_AOUT_FLAGS)
-LINK_NOPROF.CC  = $(CCC) $(LFLAGS) $(AOUT_FLAGS)
-LINK_LIB.CC     = $(CCC) $(LFLAGS) $(SHARED_FLAG)
-PREPROCESS.CC   = $(CC_COMPILE) -E
+COMPILE.CXX      = $(CXX_COMPILE) -c
+GENASM.CXX       = $(CXX_COMPILE) -S
+LINK.CXX         = $(CXX) $(LFLAGS) $(AOUT_FLAGS) $(PROF_AOUT_FLAGS)
+LINK_NOPROF.CXX  = $(CXX) $(LFLAGS) $(AOUT_FLAGS)
+LINK_LIB.CXX     = $(CXX) $(LFLAGS) $(SHARED_FLAG)
+PREPROCESS.CXX   = $(CXX_COMPILE) -E
 
 # Effect of REMOVE_TARGET is to delete out-of-date files during "gnumake -k".
 REMOVE_TARGET   = rm -f $@
 
-# Synonyms.
-COMPILE.cpp     = $(COMPILE.CC)
-GENASM.cpp      = $(GENASM.CC)
-LINK.cpp        = $(LINK.CC)
-LINK_LIB.cpp    = $(LINK_LIB.CC)
-PREPROCESS.cpp  = $(PREPROCESS.CC)
-
 # Note use of ALT_BOOTDIR to explicitly specify location of java and
 # javac; this is the same environment variable used in the J2SE build
 # process for overriding the default spec, which is BOOTDIR.
@@ -153,14 +140,14 @@
 %.o: %.cpp
 	@echo Compiling $<
 	$(QUIETLY) $(REMOVE_TARGET)
-	$(QUIETLY) $(COMPILE.CC) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE)
+	$(QUIETLY) $(COMPILE.CXX) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE)
 else
 %.o: %.cpp
 	@echo Compiling $<
 	$(QUIETLY) $(REMOVE_TARGET)
 	$(QUIETLY) $(if $(findstring $@, $(NONPIC_OBJ_FILES)), \
-	   $(subst $(VM_PICFLAG), ,$(COMPILE.CC)) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE), \
-	   $(COMPILE.CC) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE))
+	   $(subst $(VM_PICFLAG), ,$(COMPILE.CXX)) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE), \
+	   $(COMPILE.CXX) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE))
 endif
 
 %.o: %.s
@@ -170,13 +157,13 @@
 
 %.s: %.cpp
 	@echo Generating assembly for $<
-	$(QUIETLY) $(GENASM.CC) -o $@ $<
+	$(QUIETLY) $(GENASM.CXX) -o $@ $<
 	$(QUIETLY) $(DEMANGLE) $(COMPILE_DONE)
 
 # Intermediate files (for debugging macros)
 %.i: %.cpp
 	@echo Preprocessing $< to $@
-	$(QUIETLY) $(PREPROCESS.CC) $< > $@ $(COMPILE_DONE)
+	$(QUIETLY) $(PREPROCESS.CXX) $< > $@ $(COMPILE_DONE)
 
 #  Override gnumake built-in rules which do sccs get operations badly.
 #  (They put the checked out code in the current directory, not in the
--- a/make/solaris/makefiles/saproc.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/solaris/makefiles/saproc.make	Thu Feb 09 07:35:48 2012 -0800
@@ -93,7 +93,7 @@
 	  exit 1; \
 	fi
 	@echo Making SA debugger back-end...
-	$(QUIETLY) $(CPP)                                               \
+	$(QUIETLY) $(CXX)                                               \
                    $(SYMFLAG) $(ARCHFLAG) $(SHARED_FLAG) $(PICFLAG)     \
 	           -I$(SASRCDIR)                                        \
 	           -I$(GENERATED)                                       \
--- a/make/solaris/makefiles/sparcWorks.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/solaris/makefiles/sparcWorks.make	Thu Feb 09 07:35:48 2012 -0800
@@ -26,7 +26,7 @@
 
 # tell make which C and C++ compilers to use
 CC	= cc
-CPP	= CC
+CXX	= CC
 
 # Note that this 'as' is an older version of the Sun Studio 'fbe', and will
 #   use the older style options. The 'fbe' options will match 'cc' and 'CC'.
@@ -37,23 +37,23 @@
 
 REORDER_FLAG = -xF
 
-# Check for the versions of C++ and C compilers ($CPP and $CC) used. 
+# Check for the versions of C++ and C compilers ($CXX and $CC) used. 
 
 # Get the last thing on the line that looks like x.x+ (x is a digit).
 COMPILER_REV := \
-$(shell $(CPP) -V 2>&1 | sed -n 's/^.*[ ,\t]C++[ ,\t]\([1-9]\.[0-9][0-9]*\).*/\1/p')
-C_COMPILER_REV := \
+$(shell $(CXX) -V 2>&1 | sed -n 's/^.*[ ,\t]C++[ ,\t]\([1-9]\.[0-9][0-9]*\).*/\1/p')
+CC_COMPILER_REV := \
 $(shell $(CC) -V 2>&1 | sed -n 's/^.*[ ,\t]C[ ,\t]\([1-9]\.[0-9][0-9]*\).*/\1/p')
 
 # Pick which compiler is validated
 ifeq ($(JRE_RELEASE_VER),1.6.0)
   # Validated compiler for JDK6 is SS11 (5.8)
   VALIDATED_COMPILER_REVS   := 5.8
-  VALIDATED_C_COMPILER_REVS := 5.8
+  VALIDATED_CC_COMPILER_REVS := 5.8
 else
   # Validated compiler for JDK7 is SS12 update 1 + patches (5.10)
   VALIDATED_COMPILER_REVS   := 5.10
-  VALIDATED_C_COMPILER_REVS := 5.10
+  VALIDATED_CC_COMPILER_REVS := 5.10
 endif
 
 # Warning messages about not using the above validated versions
@@ -67,13 +67,13 @@
 	warning.)
 endif
 
-ENFORCE_C_COMPILER_REV${ENFORCE_C_COMPILER_REV} := $(strip ${VALIDATED_C_COMPILER_REVS})
-ifeq ($(filter ${ENFORCE_C_COMPILER_REV},${C_COMPILER_REV}),)
-PRINTABLE_C_REVS := $(subst $(shell echo ' '), or ,${ENFORCE_C_COMPILER_REV})
+ENFORCE_CC_COMPILER_REV${ENFORCE_CC_COMPILER_REV} := $(strip ${VALIDATED_CC_COMPILER_REVS})
+ifeq ($(filter ${ENFORCE_CC_COMPILER_REV},${CC_COMPILER_REV}),)
+PRINTABLE_C_REVS := $(subst $(shell echo ' '), or ,${ENFORCE_CC_COMPILER_REV})
 dummy_var_to_enforce_c_compiler_rev := $(shell \
-	echo >&2 WARNING: You are using cc version ${C_COMPILER_REV} and \
+	echo >&2 WARNING: You are using cc version ${CC_COMPILER_REV} and \
 	should be using version ${PRINTABLE_C_REVS}.; \
-	echo >&2 Set ENFORCE_C_COMPILER_REV=${C_COMPILER_REV} to avoid this \
+	echo >&2 Set ENFORCE_CC_COMPILER_REV=${CC_COMPILER_REV} to avoid this \
 	warning.)
 endif
 
@@ -98,7 +98,7 @@
                        } \
 	      END      { exit rc; }'
 
-LINK_LIB.CC/PRE_HOOK += $(JVM_CHECK_SYMBOLS) || exit 1;
+LINK_LIB.CXX/PRE_HOOK += $(JVM_CHECK_SYMBOLS) || exit 1;
 
 # New architecture options started in SS12 (5.9), we need both styles to build.
 #   The older arch options for SS11 (5.8) or older and also for /usr/ccs/bin/as.
@@ -518,7 +518,7 @@
 #FASTDEBUG_CFLAGS += -Qoption ccfe -xglobalstatic
 
 ifeq	(${COMPILER_REV_NUMERIC}, 502)
-COMPILER_DATE := $(shell $(CPP) -V 2>&1 | sed -n '/^.*[ ]C++[ ]\([1-9]\.[0-9][0-9]*\)/p' | awk '{ print $$NF; }')
+COMPILER_DATE := $(shell $(CXX) -V 2>&1 | sed -n '/^.*[ ]C++[ ]\([1-9]\.[0-9][0-9]*\)/p' | awk '{ print $$NF; }')
 ifeq	(${COMPILER_DATE}, 2001/01/31)
 # disable -g0 in fastdebug since SC6.1 dated 2001/01/31 seems to be buggy
 # use an innocuous value because it will get -g if it's empty
@@ -568,7 +568,7 @@
 # removing repeated lines.  The data can be extracted from
 # binaries in the field by using "mcs -p libjvm.so" or the older
 # command "what libjvm.so".
-LINK_LIB.CC/POST_HOOK += $(MCS) -c $@ || exit 1;
+LINK_LIB.CXX/POST_HOOK += $(MCS) -c $@ || exit 1;
 # (The exit 1 is necessary to cause a build failure if the command fails and
 # multiple commands are strung together, and the final semicolon is necessary
 # since the hook must terminate itself as a valid command.)
@@ -576,7 +576,7 @@
 # Also, strip debug and line number information (worth about 1.7Mb).
 # If we can create .debuginfo files, then the VM is stripped in vm.make
 # and this macro is not used.
-STRIP_LIB.CC/POST_HOOK = $(STRIP) -x $@ || exit 1;
-# STRIP_LIB.CC/POST_HOOK is incorporated into LINK_LIB.CC/POST_HOOK
+STRIP_LIB.CXX/POST_HOOK = $(STRIP) -x $@ || exit 1;
+# STRIP_LIB.CXX/POST_HOOK is incorporated into LINK_LIB.CXX/POST_HOOK
 # in certain configurations, such as product.make.  Other configurations,
 # such as debug.make, do not include the strip operation.
--- a/make/solaris/makefiles/vm.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/solaris/makefiles/vm.make	Thu Feb 09 07:35:48 2012 -0800
@@ -76,16 +76,20 @@
 BUILD_USER    = -DHOTSPOT_BUILD_USER="\"$(HOTSPOT_BUILD_USER)\""
 VM_DISTRO     = -DHOTSPOT_VM_DISTRO="\"$(HOTSPOT_VM_DISTRO)\""
 
-CPPFLAGS =           \
+CXXFLAGS =           \
   ${SYSDEFS}         \
   ${INCLUDES}        \
   ${BUILD_VERSION}   \
   ${BUILD_TARGET}    \
   ${BUILD_USER}      \
   ${HS_LIB_ARCH}     \
-  ${JRE_VERSION}     \
   ${VM_DISTRO}
 
+# This is VERY important! The version define must only be supplied to vm_version.o
+# If not, ccache will not re-use the cache at all, since the version string might contain
+# a time and date. 
+vm_version.o: CXXFLAGS += ${JRE_VERSION} 
+
 # CFLAGS_WARN holds compiler options to suppress/enable warnings.
 CFLAGS += $(CFLAGS_WARN)
 
@@ -265,17 +269,17 @@
 endif
 
 ifdef USE_GCC
-LINK_VM = $(LINK_LIB.c)
+LINK_VM = $(LINK_LIB.CC)
 else
-LINK_VM = $(LINK_LIB.CC)
+LINK_VM = $(LINK_LIB.CXX)
 endif
 # making the library:
 $(LIBJVM): $(LIBJVM.o) $(LIBJVM_MAPFILE) 
 ifeq ($(filter -sbfast -xsbfast, $(CFLAGS_BROWSE)),)
 	@echo Linking vm...
-	$(QUIETLY) $(LINK_LIB.CC/PRE_HOOK)
+	$(QUIETLY) $(LINK_LIB.CXX/PRE_HOOK)
 	$(QUIETLY) $(LINK_VM) $(LFLAGS_VM) -o $@ $(LIBJVM.o) $(LIBS_VM)
-	$(QUIETLY) $(LINK_LIB.CC/POST_HOOK)
+	$(QUIETLY) $(LINK_LIB.CXX/POST_HOOK)
 	$(QUIETLY) rm -f $@.1 && ln -s $@ $@.1
 	$(QUIETLY) [ -f $(LIBJVM_G) ] || ln -s $@ $(LIBJVM_G)
 	$(QUIETLY) [ -f $(LIBJVM_G).1 ] || ln -s $@.1 $(LIBJVM_G).1
--- a/make/windows/build_vm_def.sh	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/windows/build_vm_def.sh	Thu Feb 09 07:35:48 2012 -0800
@@ -57,10 +57,10 @@
 
 # When called from IDE the first param should contain the link version, otherwise may be nill
 if [ "x$1" != "x" ]; then
-LINK_VER="$1"
+LD_VER="$1"
 fi
 
-if [ "x$LINK_VER" != "x800" -a  "x$LINK_VER" != "x900" -a "x$LINK_VER" != "x1000" ]; then
+if [ "x$LD_VER" != "x800" -a  "x$LD_VER" != "x900" -a "x$LD_VER" != "x1000" ]; then
 $DUMPBIN /symbols *.obj | "$GREP" "??_7.*@@6B@" | "$GREP" -v "type_info" | "$AWK" '{print $7}' | "$SORT" | "$UNIQ" > vm2.def
 else
 # Can't use pipes when calling cl.exe or link.exe from IDE. Using transit file vm3.def
--- a/make/windows/get_msc_ver.sh	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/windows/get_msc_ver.sh	Thu Feb 09 07:35:48 2012 -0800
@@ -72,8 +72,8 @@
   echo "MSC_VER_RAW=$MSC_VER_RAW"
 fi
 
-if [ "x$FORCE_LINK_VER" != "x" ]; then
-  echo "LINK_VER=$FORCE_LINK_VER"
+if [ "x$FORCE_LD_VER" != "x" ]; then
+  echo "LD_VER=$FORCE_LD_VER"
 else
   # use the "link" command that is co-located with the "cl" command
   cl_cmd=`which cl`
@@ -83,11 +83,11 @@
     # which can't find "cl" so just use which ever "link" we find
     link_cmd="link"
   fi
-  LINK_VER_RAW=`"$link_cmd" 2>&1 | "$HEAD" -n 1 | "$SED" 's/.*Version[\ ]*\([0-9][0-9.]*\).*/\1/'`
-  LINK_VER_MAJOR=`"$ECHO" $LINK_VER_RAW | "$CUT" -d'.' -f1`
-  LINK_VER_MINOR=`"$ECHO" $LINK_VER_RAW | "$CUT" -d'.' -f2`
-  LINK_VER_MICRO=`"$ECHO" $LINK_VER_RAW | "$CUT" -d'.' -f3`
-  LINK_VER=`"$EXPR" $LINK_VER_MAJOR \* 100 + $LINK_VER_MINOR`
-  echo "LINK_VER=$LINK_VER"
-  echo "LINK_VER_RAW=$LINK_VER_RAW"
+  LD_VER_RAW=`"$link_cmd" 2>&1 | "$HEAD" -n 1 | "$SED" 's/.*Version[\ ]*\([0-9][0-9.]*\).*/\1/'`
+  LD_VER_MAJOR=`"$ECHO" $LD_VER_RAW | "$CUT" -d'.' -f1`
+  LD_VER_MINOR=`"$ECHO" $LD_VER_RAW | "$CUT" -d'.' -f2`
+  LD_VER_MICRO=`"$ECHO" $LD_VER_RAW | "$CUT" -d'.' -f3`
+  LD_VER=`"$EXPR" $LD_VER_MAJOR \* 100 + $LD_VER_MINOR`
+  echo "LD_VER=$LD_VER"
+  echo "LD_VER_RAW=$LD_VER_RAW"
 fi
--- a/make/windows/makefiles/adlc.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/windows/makefiles/adlc.make	Thu Feb 09 07:35:48 2012 -0800
@@ -45,9 +45,9 @@
 ADLCFLAGS=-q -T -U_LP64
 !endif
 
-ADLC_CPP_FLAGS=$(CPP_FLAGS) /D _CRT_SECURE_NO_WARNINGS /D _CRT_SECURE_NO_DEPRECATE
+ADLC_CXX_FLAGS=$(CXX_FLAGS) /D _CRT_SECURE_NO_WARNINGS /D _CRT_SECURE_NO_DEPRECATE
 
-CPP_INCLUDE_DIRS=\
+CXX_INCLUDE_DIRS=\
   /I "..\generated" \
   /I "$(WorkSpace)\src\share\vm" \
   /I "$(WorkSpace)\src\os\windows\vm" \
@@ -94,14 +94,14 @@
   $(AdlcOutDir)\dfa_$(Platform_arch_model).cpp
 
 {$(WorkSpace)\src\share\vm\adlc}.cpp.obj::
-        $(CPP) $(ADLC_CPP_FLAGS) $(EXH_FLAGS) $(CPP_INCLUDE_DIRS) /c $<
+        $(CXX) $(ADLC_CXX_FLAGS) $(EXH_FLAGS) $(CXX_INCLUDE_DIRS) /c $<
 
 {$(WorkSpace)\src\share\vm\opto}.cpp.obj::
-        $(CPP) $(ADLC_CPP_FLAGS) $(EXH_FLAGS) $(CPP_INCLUDE_DIRS) /c $<
+        $(CXX) $(ADLC_CXX_FLAGS) $(EXH_FLAGS) $(CXX_INCLUDE_DIRS) /c $<
 
 adlc.exe: main.obj adlparse.obj archDesc.obj arena.obj dfa.obj dict2.obj filebuff.obj \
           forms.obj formsopt.obj formssel.obj opcodes.obj output_c.obj output_h.obj
-	$(LINK) $(LINK_FLAGS) /subsystem:console /out:$@ $**
+	$(LD) $(LD_FLAGS) /subsystem:console /out:$@ $**
 !if "$(MT)" != ""
 # The previous link command created a .manifest file that we want to
 # insert into the linked artifact so we do not need to track it
--- a/make/windows/makefiles/compile.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/windows/makefiles/compile.make	Thu Feb 09 07:35:48 2012 -0800
@@ -23,9 +23,9 @@
 #
 
 # Generic compiler settings
-CPP=cl.exe
+CXX=cl.exe
 
-# CPP Flags: (these vary slightly from VC6->VS2003->VS2005 compilers)
+# CXX Flags: (these vary slightly from VC6->VS2003->VS2005 compilers)
 #   /nologo   Supress copyright message at every cl.exe startup
 #   /W3       Warning level 3
 #   /Zi       Include debugging information
@@ -50,47 +50,47 @@
 # improving the quality of crash log stack traces involving jvm.dll.
 
 # These are always used in all compiles
-CPP_FLAGS=/nologo /W3 /WX
+CXX_FLAGS=/nologo /W3 /WX
 
 # Let's add debug information always too.
-CPP_FLAGS=$(CPP_FLAGS) /Zi
+CXX_FLAGS=$(CXX_FLAGS) /Zi
 
 # Based on BUILDARCH we add some flags and select the default compiler name
 !if "$(BUILDARCH)" == "ia64"
 MACHINE=IA64
 DEFAULT_COMPILER_NAME=VS2003
-CPP_FLAGS=$(CPP_FLAGS) /D "CC_INTERP" /D "_LP64" /D "IA64"
+CXX_FLAGS=$(CXX_FLAGS) /D "CC_INTERP" /D "_LP64" /D "IA64"
 !endif
 
 !if "$(BUILDARCH)" == "amd64"
 MACHINE=AMD64
 DEFAULT_COMPILER_NAME=VS2005
-CPP_FLAGS=$(CPP_FLAGS) /D "_LP64" /D "AMD64"
+CXX_FLAGS=$(CXX_FLAGS) /D "_LP64" /D "AMD64"
 LP64=1
 !endif
 
 !if "$(BUILDARCH)" == "i486"
 MACHINE=I386
 DEFAULT_COMPILER_NAME=VS2003
-CPP_FLAGS=$(CPP_FLAGS) /D "IA32"
+CXX_FLAGS=$(CXX_FLAGS) /D "IA32"
 !endif
 
 # Sanity check, this is the default if not amd64, ia64, or i486
 !ifndef DEFAULT_COMPILER_NAME
-CPP=ARCH_ERROR
+CXX=ARCH_ERROR
 !endif
 
-CPP_FLAGS=$(CPP_FLAGS) /D "WIN32" /D "_WINDOWS"
+CXX_FLAGS=$(CXX_FLAGS) /D "WIN32" /D "_WINDOWS"
 # Must specify this for sharedRuntimeTrig.cpp
-CPP_FLAGS=$(CPP_FLAGS) /D "VM_LITTLE_ENDIAN"
+CXX_FLAGS=$(CXX_FLAGS) /D "VM_LITTLE_ENDIAN"
 
 # Used for platform dispatching
-CPP_FLAGS=$(CPP_FLAGS) /D TARGET_OS_FAMILY_windows
-CPP_FLAGS=$(CPP_FLAGS) /D TARGET_ARCH_$(Platform_arch)
-CPP_FLAGS=$(CPP_FLAGS) /D TARGET_ARCH_MODEL_$(Platform_arch_model)
-CPP_FLAGS=$(CPP_FLAGS) /D TARGET_OS_ARCH_windows_$(Platform_arch)
-CPP_FLAGS=$(CPP_FLAGS) /D TARGET_OS_ARCH_MODEL_windows_$(Platform_arch_model)
-CPP_FLAGS=$(CPP_FLAGS) /D TARGET_COMPILER_visCPP
+CXX_FLAGS=$(CXX_FLAGS) /D TARGET_OS_FAMILY_windows
+CXX_FLAGS=$(CXX_FLAGS) /D TARGET_ARCH_$(Platform_arch)
+CXX_FLAGS=$(CXX_FLAGS) /D TARGET_ARCH_MODEL_$(Platform_arch_model)
+CXX_FLAGS=$(CXX_FLAGS) /D TARGET_OS_ARCH_windows_$(Platform_arch)
+CXX_FLAGS=$(CXX_FLAGS) /D TARGET_OS_ARCH_MODEL_windows_$(Platform_arch_model)
+CXX_FLAGS=$(CXX_FLAGS) /D TARGET_COMPILER_visCPP
 
 
 # MSC_VER is a 4 digit number that tells us what compiler is being used
@@ -150,14 +150,14 @@
 # Always add the _STATIC_CPPLIB flag
 STATIC_CPPLIB_OPTION = /D _STATIC_CPPLIB /D _DISABLE_DEPRECATE_STATIC_CPPLIB
 MS_RUNTIME_OPTION = $(MS_RUNTIME_OPTION) $(STATIC_CPPLIB_OPTION)
-CPP_FLAGS=$(CPP_FLAGS) $(MS_RUNTIME_OPTION)
+CXX_FLAGS=$(CXX_FLAGS) $(MS_RUNTIME_OPTION)
 
 # How /GX option is spelled
 GX_OPTION = /GX
 
 # Optimization settings for various versions of the compilers and types of
 #    builds. Three basic sets of settings: product, fastdebug, and debug.
-#    These get added into CPP_FLAGS as needed by other makefiles.
+#    These get added into CXX_FLAGS as needed by other makefiles.
 !if "$(COMPILER_NAME)" == "VC6"
 PRODUCT_OPT_OPTION   = /Ox /Os /Gy /GF
 FASTDEBUG_OPT_OPTION = /Ox /Os /Gy /GF
@@ -180,7 +180,7 @@
 #    externals at link time. Even with /GS-, you need bufferoverflowU.lib.
 #    NOTE: Currently we decided to not use /GS-
 BUFFEROVERFLOWLIB = bufferoverflowU.lib
-LINK_FLAGS = /manifest $(LINK_FLAGS) $(BUFFEROVERFLOWLIB)
+LD_FLAGS = /manifest $(LD_FLAGS) $(BUFFEROVERFLOWLIB)
 # Manifest Tool - used in VS2005 and later to adjust manifests stored
 # as resources inside build artifacts.
 MT=mt.exe
@@ -191,7 +191,7 @@
 FASTDEBUG_OPT_OPTION = /O2 /Oy-
 DEBUG_OPT_OPTION     = /Od
 GX_OPTION = /EHsc
-LINK_FLAGS = /manifest $(LINK_FLAGS)
+LD_FLAGS = /manifest $(LD_FLAGS)
 # Manifest Tool - used in VS2005 and later to adjust manifests stored
 # as resources inside build artifacts.
 MT=mt.exe
@@ -202,12 +202,12 @@
 FASTDEBUG_OPT_OPTION = /O2 /Oy-
 DEBUG_OPT_OPTION     = /Od
 GX_OPTION = /EHsc
-LINK_FLAGS = /manifest $(LINK_FLAGS)
+LD_FLAGS = /manifest $(LD_FLAGS)
 # Manifest Tool - used in VS2005 and later to adjust manifests stored
 # as resources inside build artifacts.
 MT=mt.exe
 !if "$(BUILDARCH)" == "i486"
-LINK_FLAGS = /SAFESEH $(LINK_FLAGS)
+LD_FLAGS = /SAFESEH $(LD_FLAGS)
 !endif
 !endif
 
@@ -225,15 +225,15 @@
 !endif
 
 # Generic linker settings
-LINK=link.exe
-LINK_FLAGS= $(LINK_FLAGS) kernel32.lib user32.lib gdi32.lib winspool.lib \
+LD=link.exe
+LD_FLAGS= $(LD_FLAGS) kernel32.lib user32.lib gdi32.lib winspool.lib \
  comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib \
  uuid.lib Wsock32.lib winmm.lib /nologo /machine:$(MACHINE) /opt:REF \
  /opt:ICF,8 /map /debug
 
 
 !if $(MSC_VER) >= 1600 
-LINK_FLAGS= $(LINK_FLAGS) psapi.lib
+LD_FLAGS= $(LD_FLAGS) psapi.lib
 !endif
 
 # Resource compiler settings
@@ -250,7 +250,7 @@
 	 /D "HS_INTERNAL_NAME=$(HS_INTERNAL_NAME)" \
 	 /D "HS_NAME=$(HS_NAME)"
 
-# Need this to match the CPP_FLAGS settings
+# Need this to match the CXX_FLAGS settings
 !if "$(MFC_DEBUG)" == "true"
 RC_FLAGS = $(RC_FLAGS) /D "_DEBUG"
 !endif
--- a/make/windows/makefiles/debug.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/windows/makefiles/debug.make	Thu Feb 09 07:35:48 2012 -0800
@@ -38,7 +38,7 @@
 !include ../local.make
 !include compile.make
 
-CPP_FLAGS=$(CPP_FLAGS) $(DEBUG_OPT_OPTION)
+CXX_FLAGS=$(CXX_FLAGS) $(DEBUG_OPT_OPTION)
 
 !include $(WorkSpace)/make/windows/makefiles/vm.make
 !include local.make
@@ -52,8 +52,8 @@
 	sh $(WorkSpace)/make/windows/build_vm_def.sh
 
 $(AOUT): $(Res_Files) $(Obj_Files) vm.def
-	$(LINK) @<<
-  $(LINK_FLAGS) /out:$@ /implib:$*.lib /def:vm.def $(Obj_Files) $(Res_Files)
+	$(LD) @<<
+  $(LD_FLAGS) /out:$@ /implib:$*.lib /def:vm.def $(Obj_Files) $(Res_Files)
 <<
 !if "$(MT)" != ""
 # The previous link command created a .manifest file that we want to
--- a/make/windows/makefiles/fastdebug.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/windows/makefiles/fastdebug.make	Thu Feb 09 07:35:48 2012 -0800
@@ -38,7 +38,7 @@
 !include ../local.make
 !include compile.make
 
-CPP_FLAGS=$(CPP_FLAGS) $(FASTDEBUG_OPT_OPTION)
+CXX_FLAGS=$(CXX_FLAGS) $(FASTDEBUG_OPT_OPTION)
 
 !include $(WorkSpace)/make/windows/makefiles/vm.make
 !include local.make
@@ -52,8 +52,8 @@
 	sh $(WorkSpace)/make/windows/build_vm_def.sh
 
 $(AOUT): $(Res_Files) $(Obj_Files) vm.def
-	$(LINK) @<<
-  $(LINK_FLAGS) /out:$@ /implib:$*.lib /def:vm.def $(Obj_Files) $(Res_Files)
+	$(LD) @<<
+  $(LD_FLAGS) /out:$@ /implib:$*.lib /def:vm.def $(Obj_Files) $(Res_Files)
 <<
 !if "$(MT)" != ""
 # The previous link command created a .manifest file that we want to
--- a/make/windows/makefiles/launcher.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/windows/makefiles/launcher.make	Thu Feb 09 07:35:48 2012 -0800
@@ -23,7 +23,7 @@
 #
 
 
-LAUNCHER_FLAGS=$(CPP_FLAGS) $(ARCHFLAG) \
+LAUNCHER_FLAGS=$(CXX_FLAGS) $(ARCHFLAG) \
 	/D FULL_VERSION=\"$(HOTSPOT_RELEASE_VERSION)\" \
 	/D JDK_MAJOR_VERSION=\"$(JDK_MAJOR_VERSION)\" \
 	/D JDK_MINOR_VERSION=\"$(JDK_MINOR_VERSION)\" \
@@ -39,18 +39,18 @@
 	/I $(WorkSpace)\src\cpu\$(Platform_arch)\vm \
 	/I $(WorkSpace)\src\os\windows\vm
 
-LINK_FLAGS=/manifest $(HS_INTERNAL_NAME).lib kernel32.lib user32.lib /nologo /machine:$(MACHINE) /map /debug /subsystem:console 
+LD_FLAGS=/manifest $(HS_INTERNAL_NAME).lib kernel32.lib user32.lib /nologo /machine:$(MACHINE) /map /debug /subsystem:console 
 
 !if "$(COMPILER_NAME)" == "VS2005"
 # This VS2005 compiler has /GS as a default and requires bufferoverflowU.lib
 #    on the link command line, otherwise we get missing __security_check_cookie
 #    externals at link time. Even with /GS-, you need bufferoverflowU.lib.
 BUFFEROVERFLOWLIB = bufferoverflowU.lib
-LINK_FLAGS = $(LINK_FLAGS) $(BUFFEROVERFLOWLIB)
+LD_FLAGS = $(LD_FLAGS) $(BUFFEROVERFLOWLIB)
 !endif
 
 !if "$(COMPILER_NAME)" == "VS2010" && "$(BUILDARCH)" == "i486"
-LINK_FLAGS = /SAFESEH $(LINK_FLAGS)
+LD_FLAGS = /SAFESEH $(LD_FLAGS)
 !endif
 
 LAUNCHERDIR = $(WorkSpace)/src/os/windows/launcher
@@ -60,14 +60,14 @@
 
 {$(LAUNCHERDIR)}.c{$(OUTDIR)}.obj:
 	-mkdir $(OUTDIR) 2>NUL >NUL
-        $(CPP) $(LAUNCHER_FLAGS) /c /Fo$@ $<
+        $(CXX) $(LAUNCHER_FLAGS) /c /Fo$@ $<
 
 {$(LAUNCHERDIR_SHARE)}.c{$(OUTDIR)}.obj:
 	-mkdir $(OUTDIR) 2>NUL >NUL
-        $(CPP) $(LAUNCHER_FLAGS) /c /Fo$@ $<
+        $(CXX) $(LAUNCHER_FLAGS) /c /Fo$@ $<
 
 $(OUTDIR)\*.obj: $(LAUNCHERDIR)\*.c $(LAUNCHERDIR)\*.h $(LAUNCHERDIR_SHARE)\*.c $(LAUNCHERDIR_SHARE)\*.h
 
 launcher: $(OUTDIR)\java.obj $(OUTDIR)\java_md.obj $(OUTDIR)\jli_util.obj
 	echo $(JAVA_HOME) > jdkpath.txt  
-	$(LINK) $(LINK_FLAGS) /out:hotspot.exe $**
+	$(LD) $(LD_FLAGS) /out:hotspot.exe $**
--- a/make/windows/makefiles/product.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/windows/makefiles/product.make	Thu Feb 09 07:35:48 2012 -0800
@@ -37,7 +37,7 @@
 !include ../local.make
 !include compile.make
 
-CPP_FLAGS=$(CPP_FLAGS) $(PRODUCT_OPT_OPTION)
+CXX_FLAGS=$(CXX_FLAGS) $(PRODUCT_OPT_OPTION)
 
 RELEASE=
 
@@ -54,16 +54,16 @@
 # Kernel doesn't need exported vtbl symbols.
 !if "$(Variant)" == "kernel"
 $(AOUT): $(Res_Files) $(Obj_Files)
-	$(LINK) @<<
-  $(LINK_FLAGS) /out:$@ /implib:$*.lib $(Obj_Files) $(Res_Files)
+	$(LD) @<<
+  $(LD_FLAGS) /out:$@ /implib:$*.lib $(Obj_Files) $(Res_Files)
 <<
 !else
 vm.def: $(Obj_Files)
 	sh $(WorkSpace)/make/windows/build_vm_def.sh
 
 $(AOUT): $(Res_Files) $(Obj_Files) vm.def
-	$(LINK) @<<
-  $(LINK_FLAGS) /out:$@ /implib:$*.lib /def:vm.def $(Obj_Files) $(Res_Files)
+	$(LD) @<<
+  $(LD_FLAGS) /out:$@ /implib:$*.lib /def:vm.def $(Obj_Files) $(Res_Files)
 <<
 !endif
 !if "$(MT)" != ""
--- a/make/windows/makefiles/projectcreator.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/windows/makefiles/projectcreator.make	Thu Feb 09 07:35:48 2012 -0800
@@ -89,7 +89,7 @@
         -jdkTargetRoot $(HOTSPOTJDKDIST) \
         -define ALIGN_STACK_FRAMES \
         -define VM_LITTLE_ENDIAN \
-        -prelink  "" "Generating vm.def..." "cd $(HOTSPOTBUILDSPACE)\%f\%b	set HOTSPOTMKSHOME=$(HOTSPOTMKSHOME)	set JAVA_HOME=$(HOTSPOTJDKDIST)	$(HOTSPOTMKSHOME)\sh $(HOTSPOTWORKSPACE)\make\windows\build_vm_def.sh $(LINK_VER)" \
+        -prelink  "" "Generating vm.def..." "cd $(HOTSPOTBUILDSPACE)\%f\%b	set HOTSPOTMKSHOME=$(HOTSPOTMKSHOME)	set JAVA_HOME=$(HOTSPOTJDKDIST)	$(HOTSPOTMKSHOME)\sh $(HOTSPOTWORKSPACE)\make\windows\build_vm_def.sh $(LD_VER)" \
         -postbuild "" "Building hotspot.exe..." "cd $(HOTSPOTBUILDSPACE)\%f\%b	set HOTSPOTMKSHOME=$(HOTSPOTMKSHOME)	nmake -f $(HOTSPOTWORKSPACE)\make\windows\projectfiles\common\Makefile LOCAL_MAKE=$(HOTSPOTBUILDSPACE)\%f\local.make JAVA_HOME=$(HOTSPOTJDKDIST) launcher" \
         -ignoreFile jsig.c \
         -ignoreFile jvmtiEnvRecommended.cpp \
--- a/make/windows/makefiles/sa.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/windows/makefiles/sa.make	Thu Feb 09 07:35:48 2012 -0800
@@ -91,16 +91,16 @@
 !if "$(COMPILER_NAME)" == "VS2005"
 # On amd64, VS2005 compiler requires bufferoverflowU.lib on the link command line, 
 # otherwise we get missing __security_check_cookie externals at link time. 
-SA_LINK_FLAGS = bufferoverflowU.lib
+SA_LD_FLAGS = bufferoverflowU.lib
 !endif
 !else
 SA_CFLAGS = /nologo $(MS_RUNTIME_OPTION) /W3 /Gm $(GX_OPTION) /ZI /Od /D "WIN32" /D "_WINDOWS" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /GZ /c
 !endif
 !if "$(MT)" != ""
-SA_LINK_FLAGS = /manifest $(SA_LINK_FLAGS)
+SA_LD_FLAGS = /manifest $(SA_LD_FLAGS)
 !endif
 SASRCFILE = $(AGENT_DIR)/src/os/win32/windbg/sawindbg.cpp
-SA_LFLAGS = $(SA_LINK_FLAGS) /nologo /subsystem:console /map /debug /machine:$(MACHINE)
+SA_LFLAGS = $(SA_LD_FLAGS) /nologo /subsystem:console /map /debug /machine:$(MACHINE)
 
 # Note that we do not keep sawindbj.obj around as it would then
 # get included in the dumpbin command in build_vm_def.sh
@@ -110,14 +110,14 @@
 # Use ";#2" for .dll and ";#1" for .exe in the MT command below:
 $(SAWINDBG): $(SASRCFILE)
 	set INCLUDE=$(SA_INCLUDE)$(INCLUDE)
-	$(CPP) @<<
+	$(CXX) @<<
 	  /I"$(BootStrapDir)/include" /I"$(BootStrapDir)/include/win32" 
 	  /I"$(GENERATED)" $(SA_CFLAGS)
 	  $(SASRCFILE)
 	  /out:sawindbg.obj
 <<
 	set LIB=$(SA_LIB)$(LIB)
-	$(LINK) /out:$@ /DLL sawindbg.obj dbgeng.lib $(SA_LFLAGS)
+	$(LD) /out:$@ /DLL sawindbg.obj dbgeng.lib $(SA_LFLAGS)
 !if "$(MT)" != ""
 	$(MT) /manifest $(@F).manifest /outputresource:$(@F);#2
 !endif
--- a/make/windows/makefiles/sanity.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/windows/makefiles/sanity.make	Thu Feb 09 07:35:48 2012 -0800
@@ -31,5 +31,5 @@
 	echo *** WARNING *** unrecognized cl.exe version $(MSC_VER) ($(RAW_MSC_VER)).  Use FORCE_MSC_VER to override automatic detection.
 
 checkLink:
-	@ if "$(LINK_VER)" NEQ "710" if "$(LINK_VER)" NEQ "800" if "$(LINK_VER)" NEQ "900" if "$(LINK_VER)" NEQ "1000" \
-	echo *** WARNING *** unrecognized link.exe version $(LINK_VER) ($(RAW_LINK_VER)).  Use FORCE_LINK_VER to override automatic detection.
+	@ if "$(LD_VER)" NEQ "710" if "$(LD_VER)" NEQ "800" if "$(LD_VER)" NEQ "900" if "$(LD_VER)" NEQ "1000" \
+	echo *** WARNING *** unrecognized link.exe version $(LD_VER) ($(RAW_LD_VER)).  Use FORCE_LD_VER to override automatic detection.
--- a/make/windows/makefiles/shared.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/windows/makefiles/shared.make	Thu Feb 09 07:35:48 2012 -0800
@@ -30,8 +30,8 @@
 DIR=.
 !endif
 
-!ifndef CPP
-CPP=cl.exe
+!ifndef CXX
+CXX=cl.exe
 !endif
 
 
--- a/make/windows/makefiles/vm.make	Thu Feb 09 10:16:26 2012 -0500
+++ b/make/windows/makefiles/vm.make	Thu Feb 09 07:35:48 2012 -0800
@@ -32,12 +32,12 @@
 
 !ifdef RELEASE
 !ifdef DEVELOP
-CPP_FLAGS=$(CPP_FLAGS) /D "DEBUG"
+CXX_FLAGS=$(CXX_FLAGS) /D "DEBUG"
 !else
-CPP_FLAGS=$(CPP_FLAGS) /D "PRODUCT"
+CXX_FLAGS=$(CXX_FLAGS) /D "PRODUCT"
 !endif
 !else
-CPP_FLAGS=$(CPP_FLAGS) /D "ASSERT"
+CXX_FLAGS=$(CXX_FLAGS) /D "ASSERT"
 !endif
 
 !if "$(Variant)" == "core"
@@ -45,19 +45,19 @@
 !endif
 
 !if "$(Variant)" == "kernel"
-CPP_FLAGS=$(CPP_FLAGS) /D "KERNEL"
+CXX_FLAGS=$(CXX_FLAGS) /D "KERNEL"
 !endif
 
 !if "$(Variant)" == "compiler1"
-CPP_FLAGS=$(CPP_FLAGS) /D "COMPILER1"
+CXX_FLAGS=$(CXX_FLAGS) /D "COMPILER1"
 !endif
 
 !if "$(Variant)" == "compiler2"
-CPP_FLAGS=$(CPP_FLAGS) /D "COMPILER2"
+CXX_FLAGS=$(CXX_FLAGS) /D "COMPILER2"
 !endif
 
 !if "$(Variant)" == "tiered"
-CPP_FLAGS=$(CPP_FLAGS) /D "COMPILER1" /D "COMPILER2"
+CXX_FLAGS=$(CXX_FLAGS) /D "COMPILER1" /D "COMPILER2"
 !endif
 
 !if "$(BUILDARCH)" == "i486"
@@ -67,21 +67,21 @@
 !endif
 
 # The following variables are defined in the generated local.make file.
-CPP_FLAGS=$(CPP_FLAGS) /D "HOTSPOT_RELEASE_VERSION=\"$(HS_BUILD_VER)\""
-CPP_FLAGS=$(CPP_FLAGS) /D "JRE_RELEASE_VERSION=\"$(JRE_RELEASE_VER)\""
-CPP_FLAGS=$(CPP_FLAGS) /D "HOTSPOT_LIB_ARCH=\"$(HOTSPOT_LIB_ARCH)\""
-CPP_FLAGS=$(CPP_FLAGS) /D "HOTSPOT_BUILD_TARGET=\"$(BUILD_FLAVOR)\""
-CPP_FLAGS=$(CPP_FLAGS) /D "HOTSPOT_BUILD_USER=\"$(BuildUser)\""
-CPP_FLAGS=$(CPP_FLAGS) /D "HOTSPOT_VM_DISTRO=\"$(HOTSPOT_VM_DISTRO)\""
+CXX_FLAGS=$(CXX_FLAGS) /D "HOTSPOT_RELEASE_VERSION=\"$(HS_BUILD_VER)\""
+CXX_FLAGS=$(CXX_FLAGS) /D "JRE_RELEASE_VERSION=\"$(JRE_RELEASE_VER)\""
+CXX_FLAGS=$(CXX_FLAGS) /D "HOTSPOT_LIB_ARCH=\"$(HOTSPOT_LIB_ARCH)\""
+CXX_FLAGS=$(CXX_FLAGS) /D "HOTSPOT_BUILD_TARGET=\"$(BUILD_FLAVOR)\""
+CXX_FLAGS=$(CXX_FLAGS) /D "HOTSPOT_BUILD_USER=\"$(BuildUser)\""
+CXX_FLAGS=$(CXX_FLAGS) /D "HOTSPOT_VM_DISTRO=\"$(HOTSPOT_VM_DISTRO)\""
 
 !ifndef JAVASE_EMBEDDED
-CPP_FLAGS=$(CPP_FLAGS) /D "INCLUDE_TRACE"
+CXX_FLAGS=$(CXX_FLAGS) /D "INCLUDE_TRACE"
 !endif
 
-CPP_FLAGS=$(CPP_FLAGS) $(CPP_INCLUDE_DIRS)
+CXX_FLAGS=$(CXX_FLAGS) $(CXX_INCLUDE_DIRS)
 
 # Define that so jni.h is on correct side
-CPP_FLAGS=$(CPP_FLAGS) /D "_JNI_IMPLEMENTATION_"
+CXX_FLAGS=$(CXX_FLAGS) /D "_JNI_IMPLEMENTATION_"
 
 !if "$(BUILDARCH)" == "ia64"
 STACK_SIZE="/STACK:1048576,262144"
@@ -102,7 +102,7 @@
 
 # If you modify exports below please do the corresponding changes in
 # src/share/tools/ProjectCreator/WinGammaPlatformVC7.java
-LINK_FLAGS=$(LINK_FLAGS) $(STACK_SIZE) /subsystem:windows /dll /base:0x8000000 \
+LD_FLAGS=$(LD_FLAGS) $(STACK_SIZE) /subsystem:windows /dll /base:0x8000000 \
   /export:JNI_GetDefaultJavaVMInitArgs       \
   /export:JNI_CreateJavaVM                   \
   /export:JVM_FindClassFromBootLoader        \
@@ -118,25 +118,25 @@
   /export:JVM_GetThreadStateValues           \
   /export:JVM_InitAgentProperties
 
-CPP_INCLUDE_DIRS=/I "..\generated"
+CXX_INCLUDE_DIRS=/I "..\generated"
 
 !if exists($(ALTSRC)\share\vm)
-CPP_INCLUDE_DIRS=$(CPP_INCLUDE_DIRS) /I "$(ALTSRC)\share\vm"
+CXX_INCLUDE_DIRS=$(CXX_INCLUDE_DIRS) /I "$(ALTSRC)\share\vm"
 !endif
 
 !if exists($(ALTSRC)\os\windows\vm)
-CPP_INCLUDE_DIRS=$(CPP_INCLUDE_DIRS) /I "$(ALTSRC)\os\windows\vm"
+CXX_INCLUDE_DIRS=$(CXX_INCLUDE_DIRS) /I "$(ALTSRC)\os\windows\vm"
 !endif
 
 !if exists($(ALTSRC)\os_cpu\windows_$(Platform_arch)\vm)
-CPP_INCLUDE_DIRS=$(CPP_INCLUDE_DIRS) /I "$(ALTSRC)\os_cpu\windows_$(Platform_arch)\vm"
+CXX_INCLUDE_DIRS=$(CXX_INCLUDE_DIRS) /I "$(ALTSRC)\os_cpu\windows_$(Platform_arch)\vm"
 !endif
 
 !if exists($(ALTSRC)\cpu\$(Platform_arch)\vm)
-CPP_INCLUDE_DIRS=$(CPP_INCLUDE_DIRS) /I "$(ALTSRC)\cpu\$(Platform_arch)\vm"
+CXX_INCLUDE_DIRS=$(CXX_INCLUDE_DIRS) /I "$(ALTSRC)\cpu\$(Platform_arch)\vm"
 !endif
 
-CPP_INCLUDE_DIRS=$(CPP_INCLUDE_DIRS) \
+CXX_INCLUDE_DIRS=$(CXX_INCLUDE_DIRS) \
   /I "$(COMMONSRC)\share\vm" \
   /I "$(COMMONSRC)\share\vm\precompiled" \
   /I "$(COMMONSRC)\share\vm\prims" \
@@ -144,12 +144,12 @@
   /I "$(COMMONSRC)\os_cpu\windows_$(Platform_arch)\vm" \
   /I "$(COMMONSRC)\cpu\$(Platform_arch)\vm"
 
-CPP_DONT_USE_PCH=/D DONT_USE_PRECOMPILED_HEADER
+CXX_DONT_USE_PCH=/D DONT_USE_PRECOMPILED_HEADER
 
 !if "$(USE_PRECOMPILED_HEADER)" != "0"
-CPP_USE_PCH=/Fp"vm.pch" /Yu"precompiled.hpp"
+CXX_USE_PCH=/Fp"vm.pch" /Yu"precompiled.hpp"
 !else
-CPP_USE_PCH=$(CPP_DONT_USE_PCH)
+CXX_USE_PCH=$(CXX_DONT_USE_PCH)
 !endif
 
 # Where to find the source code for the virtual machine (is this used?)
@@ -194,101 +194,101 @@
 # Special case files not using precompiled header files.
 
 c1_RInfo_$(Platform_arch).obj: $(WorkSpace)\src\cpu\$(Platform_arch)\vm\c1_RInfo_$(Platform_arch).cpp 
-	 $(CPP) $(CPP_FLAGS) $(CPP_DONT_USE_PCH) /c $(WorkSpace)\src\cpu\$(Platform_arch)\vm\c1_RInfo_$(Platform_arch).cpp
+	 $(CXX) $(CXX_FLAGS) $(CXX_DONT_USE_PCH) /c $(WorkSpace)\src\cpu\$(Platform_arch)\vm\c1_RInfo_$(Platform_arch).cpp
 
 os_windows.obj: $(WorkSpace)\src\os\windows\vm\os_windows.cpp
-        $(CPP) $(CPP_FLAGS) $(CPP_DONT_USE_PCH) /c $(WorkSpace)\src\os\windows\vm\os_windows.cpp
+        $(CXX) $(CXX_FLAGS) $(CXX_DONT_USE_PCH) /c $(WorkSpace)\src\os\windows\vm\os_windows.cpp
 
 os_windows_$(Platform_arch).obj: $(WorkSpace)\src\os_cpu\windows_$(Platform_arch)\vm\os_windows_$(Platform_arch).cpp
-        $(CPP) $(CPP_FLAGS) $(CPP_DONT_USE_PCH) /c $(WorkSpace)\src\os_cpu\windows_$(Platform_arch)\vm\os_windows_$(Platform_arch).cpp
+        $(CXX) $(CXX_FLAGS) $(CXX_DONT_USE_PCH) /c $(WorkSpace)\src\os_cpu\windows_$(Platform_arch)\vm\os_windows_$(Platform_arch).cpp
 
 osThread_windows.obj: $(WorkSpace)\src\os\windows\vm\osThread_windows.cpp
-        $(CPP) $(CPP_FLAGS) $(CPP_DONT_USE_PCH) /c $(WorkSpace)\src\os\windows\vm\osThread_windows.cpp
+        $(CXX) $(CXX_FLAGS) $(CXX_DONT_USE_PCH) /c $(WorkSpace)\src\os\windows\vm\osThread_windows.cpp
 
 conditionVar_windows.obj: $(WorkSpace)\src\os\windows\vm\conditionVar_windows.cpp
-        $(CPP) $(CPP_FLAGS) $(CPP_DONT_USE_PCH) /c $(WorkSpace)\src\os\windows\vm\conditionVar_windows.cpp
+        $(CXX) $(CXX_FLAGS) $(CXX_DONT_USE_PCH) /c $(WorkSpace)\src\os\windows\vm\conditionVar_windows.cpp
 
 getThread_windows_$(Platform_arch).obj: $(WorkSpace)\src\os_cpu\windows_$(Platform_arch)\vm\getThread_windows_$(Platform_arch).cpp
-        $(CPP) $(CPP_FLAGS) $(CPP_DONT_USE_PCH) /c $(WorkSpace)\src\os_cpu\windows_$(Platform_arch)\vm\getThread_windows_$(Platform_arch).cpp
+        $(CXX) $(CXX_FLAGS) $(CXX_DONT_USE_PCH) /c $(WorkSpace)\src\os_cpu\windows_$(Platform_arch)\vm\getThread_windows_$(Platform_arch).cpp
 
 opcodes.obj: $(WorkSpace)\src\share\vm\opto\opcodes.cpp
-        $(CPP) $(CPP_FLAGS) $(CPP_DONT_USE_PCH) /c $(WorkSpace)\src\share\vm\opto\opcodes.cpp
+        $(CXX) $(CXX_FLAGS) $(CXX_DONT_USE_PCH) /c $(WorkSpace)\src\share\vm\opto\opcodes.cpp
 
 bytecodeInterpreter.obj: $(WorkSpace)\src\share\vm\interpreter\bytecodeInterpreter.cpp
-        $(CPP) $(CPP_FLAGS) $(CPP_DONT_USE_PCH) /c $(WorkSpace)\src\share\vm\interpreter\bytecodeInterpreter.cpp
+        $(CXX) $(CXX_FLAGS) $(CXX_DONT_USE_PCH) /c $(WorkSpace)\src\share\vm\interpreter\bytecodeInterpreter.cpp
 
 bytecodeInterpreterWithChecks.obj: ..\generated\jvmtifiles\bytecodeInterpreterWithChecks.cpp
-        $(CPP) $(CPP_FLAGS) $(CPP_DONT_USE_PCH) /c ..\generated\jvmtifiles\bytecodeInterpreterWithChecks.cpp
+        $(CXX) $(CXX_FLAGS) $(CXX_DONT_USE_PCH) /c ..\generated\jvmtifiles\bytecodeInterpreterWithChecks.cpp
 
 # Default rules for the Virtual Machine
 {$(COMMONSRC)\share\vm\c1}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\compiler}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\code}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\interpreter}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\ci}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\classfile}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\gc_implementation\parallelScavenge}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\gc_implementation\shared}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\gc_implementation\parNew}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\gc_implementation\concurrentMarkSweep}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\gc_implementation\g1}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\gc_interface}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\asm}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\memory}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\oops}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\prims}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\runtime}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\services}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\trace}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\utilities}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\libadt}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\opto}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\os\windows\vm}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 # This guy should remain a single colon rule because
 # otherwise we can't specify the output filename.
@@ -296,113 +296,113 @@
         @$(RC) $(RC_FLAGS) /fo"$@" $<
 
 {$(COMMONSRC)\cpu\$(Platform_arch)\vm}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\os_cpu\windows_$(Platform_arch)\vm}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\c1}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\compiler}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\code}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\interpreter}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\ci}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\classfile}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\gc_implementation\parallelScavenge}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\gc_implementation\shared}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\gc_implementation\parNew}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\gc_implementation\concurrentMarkSweep}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\gc_implementation\g1}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\gc_interface}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\asm}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\memory}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\oops}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\prims}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\runtime}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\services}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\trace}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\utilities}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\libadt}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\opto}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\os\windows\vm}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 # otherwise we can't specify the output filename.
 {$(ALTSRC)\os\windows\vm}.rc.res:
         @$(RC) $(RC_FLAGS) /fo"$@" $<
 
 {$(ALTSRC)\cpu\$(Platform_arch)\vm}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\os_cpu\windows_$(Platform_arch)\vm}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {..\generated\incls}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {..\generated\adfiles}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {..\generated\jvmtifiles}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\jfr}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\jfr\agent}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\jfr\agent\isolated_deps\util}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\jfr\jvm}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 default::
 
 _build_pch_file.obj:
         @echo #include "precompiled.hpp" > ../generated/_build_pch_file.cpp
-        $(CPP) $(CPP_FLAGS) /Fp"vm.pch" /Yc"precompiled.hpp" /c ../generated/_build_pch_file.cpp
+        $(CXX) $(CXX_FLAGS) /Fp"vm.pch" /Yc"precompiled.hpp" /c ../generated/_build_pch_file.cpp
--- a/src/cpu/sparc/vm/assembler_sparc.hpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/cpu/sparc/vm/assembler_sparc.hpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -2134,6 +2134,7 @@
   // address pseudos: make these names unlike instruction names to avoid confusion
   inline intptr_t load_pc_address( Register reg, int bytes_to_skip );
   inline void load_contents(const AddressLiteral& addrlit, Register d, int offset = 0);
+  inline void load_bool_contents(const AddressLiteral& addrlit, Register d, int offset = 0);
   inline void load_ptr_contents(const AddressLiteral& addrlit, Register d, int offset = 0);
   inline void store_contents(Register s, const AddressLiteral& addrlit, Register temp, int offset = 0);
   inline void store_ptr_contents(Register s, const AddressLiteral& addrlit, Register temp, int offset = 0);
@@ -2249,7 +2250,7 @@
   // this platform we assume byte size
 
   inline void stbool(Register d, const Address& a) { stb(d, a); }
-  inline void ldbool(const Address& a, Register d) { ldsb(a, d); }
+  inline void ldbool(const Address& a, Register d) { ldub(a, d); }
   inline void movbool( bool boolconst, Register d) { mov( (int) boolconst, d); }
 
   // klass oop manipulations if compressed
--- a/src/cpu/sparc/vm/assembler_sparc.inline.hpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/cpu/sparc/vm/assembler_sparc.inline.hpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -692,6 +692,17 @@
 }
 
 
+inline void MacroAssembler::load_bool_contents(const AddressLiteral& addrlit, Register d, int offset) {
+  assert_not_delayed();
+  if (ForceUnreachable) {
+    patchable_sethi(addrlit, d);
+  } else {
+    sethi(addrlit, d);
+  }
+  ldub(d, addrlit.low10() + offset, d);
+}
+
+
 inline void MacroAssembler::load_ptr_contents(const AddressLiteral& addrlit, Register d, int offset) {
   assert_not_delayed();
   if (ForceUnreachable) {
--- a/src/cpu/sparc/vm/c2_globals_sparc.hpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/cpu/sparc/vm/c2_globals_sparc.hpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -42,7 +42,7 @@
 #else
 define_pd_global(bool, ProfileInterpreter,           true);
 #endif // CC_INTERP
-define_pd_global(bool, TieredCompilation,            true);
+define_pd_global(bool, TieredCompilation,            trueInTiered);
 define_pd_global(intx, CompileThreshold,             10000);
 define_pd_global(intx, BackEdgeThreshold,            140000);
 
--- a/src/cpu/sparc/vm/frame_sparc.cpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/cpu/sparc/vm/frame_sparc.cpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,6 +28,7 @@
 #include "oops/markOop.hpp"
 #include "oops/methodOop.hpp"
 #include "oops/oop.inline.hpp"
+#include "prims/methodHandles.hpp"
 #include "runtime/frame.inline.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/javaCalls.hpp"
@@ -810,7 +811,7 @@
 }
 
 
-#ifdef ASSERT
+#ifndef PRODUCT
 
 #define DESCRIBE_FP_OFFSET(name) \
   values.describe(frame_no, fp() + frame::name##_offset, #name)
@@ -820,11 +821,19 @@
     values.describe(frame_no, sp() + w, err_msg("register save area word %d", w), 1);
   }
 
-  if (is_interpreted_frame()) {
+  if (is_ricochet_frame()) {
+    MethodHandles::RicochetFrame::describe(this, values, frame_no);
+  } else if (is_interpreted_frame()) {
     DESCRIBE_FP_OFFSET(interpreter_frame_d_scratch_fp);
     DESCRIBE_FP_OFFSET(interpreter_frame_l_scratch_fp);
     DESCRIBE_FP_OFFSET(interpreter_frame_padding);
     DESCRIBE_FP_OFFSET(interpreter_frame_oop_temp);
+
+    // esp, according to Lesp (e.g. not depending on bci), if seems valid
+    intptr_t* esp = *interpreter_frame_esp_addr();
+    if ((esp >= sp()) && (esp < fp())) {
+      values.describe(-1, esp, "*Lesp");
+    }
   }
 
   if (!is_compiled_frame()) {
@@ -844,4 +853,3 @@
   // unused... but returns fp() to minimize changes introduced by 7087445
   return fp();
 }
-
--- a/src/cpu/sparc/vm/methodHandles_sparc.cpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/cpu/sparc/vm/methodHandles_sparc.cpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -177,7 +177,7 @@
   BLOCK_COMMENT("ricochet_blob.bounce");
 
   if (VerifyMethodHandles)  RicochetFrame::verify_clean(_masm);
-  trace_method_handle(_masm, "ricochet_blob.bounce");
+  trace_method_handle(_masm, "return/ricochet_blob.bounce");
 
   __ JMP(L1_continuation, 0);
   __ delayed()->nop();
@@ -268,14 +268,16 @@
 }
 
 // Emit code to verify that FP is pointing at a valid ricochet frame.
-#ifdef ASSERT
+#ifndef PRODUCT
 enum {
   ARG_LIMIT = 255, SLOP = 45,
   // use this parameter for checking for garbage stack movements:
   UNREASONABLE_STACK_MOVE = (ARG_LIMIT + SLOP)
   // the slop defends against false alarms due to fencepost errors
 };
+#endif
 
+#ifdef ASSERT
 void MethodHandles::RicochetFrame::verify_clean(MacroAssembler* _masm) {
   // The stack should look like this:
   //    ... keep1 | dest=42 | keep2 | magic | handler | magic | recursive args | [RF]
@@ -1001,31 +1003,142 @@
 }
 
 #ifndef PRODUCT
+void MethodHandles::RicochetFrame::describe(const frame* fr, FrameValues& values, int frame_no)  {
+    RicochetFrame* rf = new RicochetFrame(*fr);
+
+    // ricochet slots (kept in registers for sparc)
+    values.describe(frame_no, rf->register_addr(I5_savedSP), err_msg("exact_sender_sp reg for #%d", frame_no));
+    values.describe(frame_no, rf->register_addr(L5_conversion), err_msg("conversion reg for #%d", frame_no));
+    values.describe(frame_no, rf->register_addr(L4_saved_args_base), err_msg("saved_args_base reg for #%d", frame_no));
+    values.describe(frame_no, rf->register_addr(L3_saved_args_layout), err_msg("saved_args_layout reg for #%d", frame_no));
+    values.describe(frame_no, rf->register_addr(L2_saved_target), err_msg("saved_target reg for #%d", frame_no));
+    values.describe(frame_no, rf->register_addr(L1_continuation), err_msg("continuation reg for #%d", frame_no));
+
+    // relevant ricochet targets (in caller frame)
+    values.describe(-1, rf->saved_args_base(),  err_msg("*saved_args_base for #%d", frame_no));
+    values.describe(-1, (intptr_t *)(STACK_BIAS+(uintptr_t)rf->exact_sender_sp()),  err_msg("*exact_sender_sp+STACK_BIAS for #%d", frame_no));
+}
+#endif // ASSERT
+
+#ifndef PRODUCT
 extern "C" void print_method_handle(oop mh);
 void trace_method_handle_stub(const char* adaptername,
                               oopDesc* mh,
-                              intptr_t* saved_sp) {
+                              intptr_t* saved_sp,
+                              intptr_t* args,
+                              intptr_t* tracing_fp) {
   bool has_mh = (strstr(adaptername, "return/") == NULL);  // return adapters don't have mh
-  tty->print_cr("MH %s mh="INTPTR_FORMAT " saved_sp=" INTPTR_FORMAT, adaptername, (intptr_t) mh, saved_sp);
-  if (has_mh)
+
+  tty->print_cr("MH %s mh="INTPTR_FORMAT " saved_sp=" INTPTR_FORMAT " args=" INTPTR_FORMAT, adaptername, (intptr_t) mh, saved_sp, args);
+
+  if (Verbose) {
+    // dumping last frame with frame::describe
+
+    JavaThread* p = JavaThread::active();
+
+    ResourceMark rm;
+    PRESERVE_EXCEPTION_MARK; // may not be needed by safer and unexpensive here
+    FrameValues values;
+
+    // Note: We want to allow trace_method_handle from any call site.
+    // While trace_method_handle creates a frame, it may be entered
+    // without a valid return PC in O7 (e.g. not just after a call).
+    // Walking that frame could lead to failures due to that invalid PC.
+    // => carefully detect that frame when doing the stack walking
+
+    // walk up to the right frame using the "tracing_fp" argument
+    intptr_t* cur_sp = StubRoutines::Sparc::flush_callers_register_windows_func()();
+    frame cur_frame(cur_sp, frame::unpatchable, NULL);
+
+    while (cur_frame.fp() != (intptr_t *)(STACK_BIAS+(uintptr_t)tracing_fp)) {
+      cur_frame = os::get_sender_for_C_frame(&cur_frame);
+    }
+
+    // safely create a frame and call frame::describe
+    intptr_t *dump_sp = cur_frame.sender_sp();
+    intptr_t *dump_fp = cur_frame.link();
+
+    bool walkable = has_mh; // whether the traced frame shoud be walkable
+
+    // the sender for cur_frame is the caller of trace_method_handle
+    if (walkable) {
+      // The previous definition of walkable may have to be refined
+      // if new call sites cause the next frame constructor to start
+      // failing. Alternatively, frame constructors could be
+      // modified to support the current or future non walkable
+      // frames (but this is more intrusive and is not considered as
+      // part of this RFE, which will instead use a simpler output).
+      frame dump_frame = frame(dump_sp,
+                               cur_frame.sp(), // younger_sp
+                               false); // no adaptation
+      dump_frame.describe(values, 1);
+    } else {
+      // Robust dump for frames which cannot be constructed from sp/younger_sp
+      // Add descriptions without building a Java frame to avoid issues
+      values.describe(-1, dump_fp, "fp for #1 <not parsed, cannot trust pc>");
+      values.describe(-1, dump_sp, "sp");
+    }
+
+    bool has_args = has_mh; // whether Gargs is meaningful
+
+    // mark args, if seems valid (may not be valid for some adapters)
+    if (has_args) {
+      if ((args >= dump_sp) && (args < dump_fp)) {
+        values.describe(-1, args, "*G4_args");
+      }
+    }
+
+    // mark saved_sp, if seems valid (may not be valid for some adapters)
+    intptr_t *unbiased_sp = (intptr_t *)(STACK_BIAS+(uintptr_t)saved_sp);
+    if ((unbiased_sp >= dump_sp - UNREASONABLE_STACK_MOVE) && (unbiased_sp < dump_fp)) {
+      values.describe(-1, unbiased_sp, "*saved_sp+STACK_BIAS");
+    }
+
+    // Note: the unextended_sp may not be correct
+    tty->print_cr("  stack layout:");
+    values.print(p);
+  }
+
+  if (has_mh) {
     print_method_handle(mh);
+  }
 }
+
 void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {
   if (!TraceMethodHandles)  return;
   BLOCK_COMMENT("trace_method_handle {");
   // save: Gargs, O5_savedSP
-  __ save_frame(16);
+  __ save_frame(16); // need space for saving required FPU state
+
   __ set((intptr_t) adaptername, O0);
   __ mov(G3_method_handle, O1);
   __ mov(I5_savedSP, O2);
+  __ mov(Gargs, O3);
+  __ mov(I6, O4); // frame identifier for safe stack walking
+
+  // Save scratched registers that might be needed. Robustness is more
+  // important than optimizing the saves for this debug only code.
+
+  // save FP result, valid at some call sites (adapter_opt_return_float, ...)
+  Address d_save(FP, -sizeof(jdouble) + STACK_BIAS);
+  __ stf(FloatRegisterImpl::D, Ftos_d, d_save);
+  // Safely save all globals but G2 (handled by call_VM_leaf) and G7
+  // (OS reserved).
   __ mov(G3_method_handle, L3);
   __ mov(Gargs, L4);
   __ mov(G5_method_type, L5);
-  __ call_VM_leaf(L7, CAST_FROM_FN_PTR(address, trace_method_handle_stub));
+  __ mov(G6, L6);
+  __ mov(G1, L1);
+
+  __ call_VM_leaf(L2 /* for G2 */, CAST_FROM_FN_PTR(address, trace_method_handle_stub));
 
   __ mov(L3, G3_method_handle);
   __ mov(L4, Gargs);
   __ mov(L5, G5_method_type);
+  __ mov(L6, G6);
+  __ mov(L1, G1);
+  __ ldf(FloatRegisterImpl::D, d_save, Ftos_d);
+
   __ restore();
   BLOCK_COMMENT("} trace_method_handle");
 }
@@ -1250,7 +1363,7 @@
         move_typed_arg(_masm, arg_type, false,
                        prim_value_addr,
                        Address(O0_argslot, 0),
-                       O2_scratch);  // must be an even register for !_LP64 long moves (uses O2/O3)
+                      O2_scratch);  // must be an even register for !_LP64 long moves (uses O2/O3)
       }
 
       if (direct_to_method) {
--- a/src/cpu/sparc/vm/methodHandles_sparc.hpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/cpu/sparc/vm/methodHandles_sparc.hpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -145,6 +145,8 @@
   }
 
   static void verify_clean(MacroAssembler* _masm) NOT_DEBUG_RETURN;
+
+  static void describe(const frame* fr, FrameValues& values, int frame_no) PRODUCT_RETURN;
 };
 
 // Additional helper methods for MethodHandles code generation:
--- a/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -321,6 +321,16 @@
   return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
 }
 
+static VMRegPair reg64_to_VMRegPair(Register r) {
+  VMRegPair ret;
+  if (wordSize == 8) {
+    ret.set2(r->as_VMReg());
+  } else {
+    ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg());
+  }
+  return ret;
+}
+
 // ---------------------------------------------------------------------------
 // Read the array of BasicTypes from a signature, and compute where the
 // arguments should go.  Values in the VMRegPair regs array refer to 4-byte (VMRegImpl::stack_slot_size)
@@ -1444,6 +1454,25 @@
 }
 
 
+static void move_ptr(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
+  if (src.first()->is_stack()) {
+    if (dst.first()->is_stack()) {
+      // stack to stack
+      __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, L5);
+      __ st_ptr(L5, SP, reg2offset(dst.first()) + STACK_BIAS);
+    } else {
+      // stack to reg
+      __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
+    }
+  } else if (dst.first()->is_stack()) {
+    // reg to stack
+    __ st_ptr(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS);
+  } else {
+    __ mov(src.first()->as_Register(), dst.first()->as_Register());
+  }
+}
+
+
 // An oop arg. Must pass a handle not the oop itself
 static void object_move(MacroAssembler* masm,
                         OopMap* map,
@@ -1748,6 +1777,166 @@
   }
 }
 
+
+static void save_or_restore_arguments(MacroAssembler* masm,
+                                      const int stack_slots,
+                                      const int total_in_args,
+                                      const int arg_save_area,
+                                      OopMap* map,
+                                      VMRegPair* in_regs,
+                                      BasicType* in_sig_bt) {
+  // if map is non-NULL then the code should store the values,
+  // otherwise it should load them.
+  if (map != NULL) {
+    // Fill in the map
+    for (int i = 0; i < total_in_args; i++) {
+      if (in_sig_bt[i] == T_ARRAY) {
+        if (in_regs[i].first()->is_stack()) {
+          int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
+          map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
+        } else if (in_regs[i].first()->is_Register()) {
+          map->set_oop(in_regs[i].first());
+        } else {
+          ShouldNotReachHere();
+        }
+      }
+    }
+  }
+
+  // Save or restore double word values
+  int handle_index = 0;
+  for (int i = 0; i < total_in_args; i++) {
+    int slot = handle_index + arg_save_area;
+    int offset = slot * VMRegImpl::stack_slot_size;
+    if (in_sig_bt[i] == T_LONG && in_regs[i].first()->is_Register()) {
+      const Register reg = in_regs[i].first()->as_Register();
+      if (reg->is_global()) {
+        handle_index += 2;
+        assert(handle_index <= stack_slots, "overflow");
+        if (map != NULL) {
+          __ stx(reg, SP, offset + STACK_BIAS);
+        } else {
+          __ ldx(SP, offset + STACK_BIAS, reg);
+        }
+      }
+    } else if (in_sig_bt[i] == T_DOUBLE && in_regs[i].first()->is_FloatRegister()) {
+      handle_index += 2;
+      assert(handle_index <= stack_slots, "overflow");
+      if (map != NULL) {
+        __ stf(FloatRegisterImpl::D, in_regs[i].first()->as_FloatRegister(), SP, offset + STACK_BIAS);
+      } else {
+        __ ldf(FloatRegisterImpl::D, SP, offset + STACK_BIAS, in_regs[i].first()->as_FloatRegister());
+      }
+    }
+  }
+  // Save floats
+  for (int i = 0; i < total_in_args; i++) {
+    int slot = handle_index + arg_save_area;
+    int offset = slot * VMRegImpl::stack_slot_size;
+    if (in_sig_bt[i] == T_FLOAT && in_regs[i].first()->is_FloatRegister()) {
+      handle_index++;
+      assert(handle_index <= stack_slots, "overflow");
+      if (map != NULL) {
+        __ stf(FloatRegisterImpl::S, in_regs[i].first()->as_FloatRegister(), SP, offset + STACK_BIAS);
+      } else {
+        __ ldf(FloatRegisterImpl::S, SP, offset + STACK_BIAS, in_regs[i].first()->as_FloatRegister());
+      }
+    }
+  }
+
+}
+
+
+// Check GC_locker::needs_gc and enter the runtime if it's true.  This
+// keeps a new JNI critical region from starting until a GC has been
+// forced.  Save down any oops in registers and describe them in an
+// OopMap.
+static void check_needs_gc_for_critical_native(MacroAssembler* masm,
+                                               const int stack_slots,
+                                               const int total_in_args,
+                                               const int arg_save_area,
+                                               OopMapSet* oop_maps,
+                                               VMRegPair* in_regs,
+                                               BasicType* in_sig_bt) {
+  __ block_comment("check GC_locker::needs_gc");
+  Label cont;
+  AddressLiteral sync_state(GC_locker::needs_gc_address());
+  __ load_bool_contents(sync_state, G3_scratch);
+  __ cmp_zero_and_br(Assembler::equal, G3_scratch, cont);
+  __ delayed()->nop();
+
+  // Save down any values that are live in registers and call into the
+  // runtime to halt for a GC
+  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
+  save_or_restore_arguments(masm, stack_slots, total_in_args,
+                            arg_save_area, map, in_regs, in_sig_bt);
+
+  __ mov(G2_thread, L7_thread_cache);
+
+  __ set_last_Java_frame(SP, noreg);
+
+  __ block_comment("block_for_jni_critical");
+  __ call(CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical), relocInfo::runtime_call_type);
+  __ delayed()->mov(L7_thread_cache, O0);
+  oop_maps->add_gc_map( __ offset(), map);
+
+  __ restore_thread(L7_thread_cache); // restore G2_thread
+  __ reset_last_Java_frame();
+
+  // Reload all the register arguments
+  save_or_restore_arguments(masm, stack_slots, total_in_args,
+                            arg_save_area, NULL, in_regs, in_sig_bt);
+
+  __ bind(cont);
+#ifdef ASSERT
+  if (StressCriticalJNINatives) {
+    // Stress register saving
+    OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
+    save_or_restore_arguments(masm, stack_slots, total_in_args,
+                              arg_save_area, map, in_regs, in_sig_bt);
+    // Destroy argument registers
+    for (int i = 0; i < total_in_args; i++) {
+      if (in_regs[i].first()->is_Register()) {
+        const Register reg = in_regs[i].first()->as_Register();
+        if (reg->is_global()) {
+          __ mov(G0, reg);
+        }
+      } else if (in_regs[i].first()->is_FloatRegister()) {
+        __ fneg(FloatRegisterImpl::D, in_regs[i].first()->as_FloatRegister(), in_regs[i].first()->as_FloatRegister());
+      }
+    }
+
+    save_or_restore_arguments(masm, stack_slots, total_in_args,
+                              arg_save_area, NULL, in_regs, in_sig_bt);
+  }
+#endif
+}
+
+// Unpack an array argument into a pointer to the body and the length
+// if the array is non-null, otherwise pass 0 for both.
+static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) {
+  // Pass the length, ptr pair
+  Label is_null, done;
+  if (reg.first()->is_stack()) {
+    VMRegPair tmp  = reg64_to_VMRegPair(L2);
+    // Load the arg up from the stack
+    move_ptr(masm, reg, tmp);
+    reg = tmp;
+  }
+  __ cmp(reg.first()->as_Register(), G0);
+  __ brx(Assembler::equal, false, Assembler::pt, is_null);
+  __ delayed()->add(reg.first()->as_Register(), arrayOopDesc::base_offset_in_bytes(in_elem_type), L4);
+  move_ptr(masm, reg64_to_VMRegPair(L4), body_arg);
+  __ ld(reg.first()->as_Register(), arrayOopDesc::length_offset_in_bytes(), L4);
+  move32_64(masm, reg64_to_VMRegPair(L4), length_arg);
+  __ ba_short(done);
+  __ bind(is_null);
+  // Pass zeros
+  move_ptr(masm, reg64_to_VMRegPair(G0), body_arg);
+  move32_64(masm, reg64_to_VMRegPair(G0), length_arg);
+  __ bind(done);
+}
+
 // ---------------------------------------------------------------------------
 // Generate a native wrapper for a given method.  The method takes arguments
 // in the Java compiled code convention, marshals them to the native
@@ -1762,6 +1951,13 @@
                                                 BasicType *in_sig_bt,
                                                 VMRegPair *in_regs,
                                                 BasicType ret_type) {
+  bool is_critical_native = true;
+  address native_func = method->critical_native_function();
+  if (native_func == NULL) {
+    native_func = method->native_function();
+    is_critical_native = false;
+  }
+  assert(native_func != NULL, "must have function");
 
   // Native nmethod wrappers never take possesion of the oop arguments.
   // So the caller will gc the arguments. The only thing we need an
@@ -1841,22 +2037,70 @@
   // we convert the java signature to a C signature by inserting
   // the hidden arguments as arg[0] and possibly arg[1] (static method)
 
-  int total_c_args = total_in_args + 1;
-  if (method->is_static()) {
-    total_c_args++;
+  int total_c_args = total_in_args;
+  int total_save_slots = 6 * VMRegImpl::slots_per_word;
+  if (!is_critical_native) {
+    total_c_args += 1;
+    if (method->is_static()) {
+      total_c_args++;
+    }
+  } else {
+    for (int i = 0; i < total_in_args; i++) {
+      if (in_sig_bt[i] == T_ARRAY) {
+        // These have to be saved and restored across the safepoint
+        total_c_args++;
+      }
+    }
   }
 
   BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
-  VMRegPair  * out_regs   = NEW_RESOURCE_ARRAY(VMRegPair,   total_c_args);
+  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
+  BasicType* in_elem_bt = NULL;
 
   int argc = 0;
-  out_sig_bt[argc++] = T_ADDRESS;
-  if (method->is_static()) {
-    out_sig_bt[argc++] = T_OBJECT;
-  }
-
-  for (int i = 0; i < total_in_args ; i++ ) {
-    out_sig_bt[argc++] = in_sig_bt[i];
+  if (!is_critical_native) {
+    out_sig_bt[argc++] = T_ADDRESS;
+    if (method->is_static()) {
+      out_sig_bt[argc++] = T_OBJECT;
+    }
+
+    for (int i = 0; i < total_in_args ; i++ ) {
+      out_sig_bt[argc++] = in_sig_bt[i];
+    }
+  } else {
+    Thread* THREAD = Thread::current();
+    in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
+    SignatureStream ss(method->signature());
+    for (int i = 0; i < total_in_args ; i++ ) {
+      if (in_sig_bt[i] == T_ARRAY) {
+        // Arrays are passed as int, elem* pair
+        out_sig_bt[argc++] = T_INT;
+        out_sig_bt[argc++] = T_ADDRESS;
+        Symbol* atype = ss.as_symbol(CHECK_NULL);
+        const char* at = atype->as_C_string();
+        if (strlen(at) == 2) {
+          assert(at[0] == '[', "must be");
+          switch (at[1]) {
+            case 'B': in_elem_bt[i]  = T_BYTE; break;
+            case 'C': in_elem_bt[i]  = T_CHAR; break;
+            case 'D': in_elem_bt[i]  = T_DOUBLE; break;
+            case 'F': in_elem_bt[i]  = T_FLOAT; break;
+            case 'I': in_elem_bt[i]  = T_INT; break;
+            case 'J': in_elem_bt[i]  = T_LONG; break;
+            case 'S': in_elem_bt[i]  = T_SHORT; break;
+            case 'Z': in_elem_bt[i]  = T_BOOLEAN; break;
+            default: ShouldNotReachHere();
+          }
+        }
+      } else {
+        out_sig_bt[argc++] = in_sig_bt[i];
+        in_elem_bt[i] = T_VOID;
+      }
+      if (in_sig_bt[i] != T_VOID) {
+        assert(in_sig_bt[i] == ss.type(), "must match");
+        ss.next();
+      }
+    }
   }
 
   // Now figure out where the args must be stored and how much stack space
@@ -1866,6 +2110,35 @@
   int out_arg_slots;
   out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
 
+  if (is_critical_native) {
+    // Critical natives may have to call out so they need a save area
+    // for register arguments.
+    int double_slots = 0;
+    int single_slots = 0;
+    for ( int i = 0; i < total_in_args; i++) {
+      if (in_regs[i].first()->is_Register()) {
+        const Register reg = in_regs[i].first()->as_Register();
+        switch (in_sig_bt[i]) {
+          case T_ARRAY:
+          case T_BOOLEAN:
+          case T_BYTE:
+          case T_SHORT:
+          case T_CHAR:
+          case T_INT:  assert(reg->is_in(), "don't need to save these"); break;
+          case T_LONG: if (reg->is_global()) double_slots++; break;
+          default:  ShouldNotReachHere();
+        }
+      } else if (in_regs[i].first()->is_FloatRegister()) {
+        switch (in_sig_bt[i]) {
+          case T_FLOAT:  single_slots++; break;
+          case T_DOUBLE: double_slots++; break;
+          default:  ShouldNotReachHere();
+        }
+      }
+    }
+    total_save_slots = double_slots * 2 + single_slots;
+  }
+
   // Compute framesize for the wrapper.  We need to handlize all oops in
   // registers. We must create space for them here that is disjoint from
   // the windowed save area because we have no control over when we might
@@ -1885,12 +2158,11 @@
 
   // Now the space for the inbound oop handle area
 
-  int oop_handle_offset = stack_slots;
-  stack_slots += 6*VMRegImpl::slots_per_word;
+  int oop_handle_offset = round_to(stack_slots, 2);
+  stack_slots += total_save_slots;
 
   // Now any space we need for handlizing a klass if static method
 
-  int oop_temp_slot_offset = 0;
   int klass_slot_offset = 0;
   int klass_offset = -1;
   int lock_slot_offset = 0;
@@ -1954,6 +2226,10 @@
 
   __ verify_thread();
 
+  if (is_critical_native) {
+    check_needs_gc_for_critical_native(masm, stack_slots,  total_in_args,
+                                       oop_handle_offset, oop_maps, in_regs, in_sig_bt);
+  }
 
   //
   // We immediately shuffle the arguments so that any vm call we have to
@@ -1982,7 +2258,6 @@
   // caller.
   //
   OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
-  int c_arg = total_c_args - 1;
   // Record sp-based slot for receiver on stack for non-static methods
   int receiver_offset = -1;
 
@@ -2002,7 +2277,7 @@
 
 #endif /* ASSERT */
 
-  for ( int i = total_in_args - 1; i >= 0 ; i--, c_arg-- ) {
+  for ( int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0 ; i--, c_arg-- ) {
 
 #ifdef ASSERT
     if (in_regs[i].first()->is_Register()) {
@@ -2019,7 +2294,13 @@
 
     switch (in_sig_bt[i]) {
       case T_ARRAY:
+        if (is_critical_native) {
+          unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg], out_regs[c_arg - 1]);
+          c_arg--;
+          break;
+        }
       case T_OBJECT:
+        assert(!is_critical_native, "no oop arguments");
         object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
                     ((i == 0) && (!is_static)),
                     &receiver_offset);
@@ -2029,7 +2310,7 @@
 
       case T_FLOAT:
         float_move(masm, in_regs[i], out_regs[c_arg]);
-          break;
+        break;
 
       case T_DOUBLE:
         assert( i + 1 < total_in_args &&
@@ -2051,7 +2332,7 @@
 
   // Pre-load a static method's oop into O1.  Used both by locking code and
   // the normal JNI call code.
-  if (method->is_static()) {
+  if (method->is_static() && !is_critical_native) {
     __ set_oop_constant(JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror()), O1);
 
     // Now handlize the static class mirror in O1.  It's known not-null.
@@ -2064,13 +2345,13 @@
   const Register L6_handle = L6;
 
   if (method->is_synchronized()) {
+    assert(!is_critical_native, "unhandled");
     __ mov(O1, L6_handle);
   }
 
   // We have all of the arguments setup at this point. We MUST NOT touch any Oregs
   // except O6/O7. So if we must call out we must push a new frame. We immediately
   // push a new frame and flush the windows.
-
 #ifdef _LP64
   intptr_t thepc = (intptr_t) __ pc();
   {
@@ -2202,32 +2483,28 @@
   }
 
   // get JNIEnv* which is first argument to native
-
-  __ add(G2_thread, in_bytes(JavaThread::jni_environment_offset()), O0);
+  if (!is_critical_native) {
+    __ add(G2_thread, in_bytes(JavaThread::jni_environment_offset()), O0);
+  }
 
   // Use that pc we placed in O7 a while back as the current frame anchor
-
   __ set_last_Java_frame(SP, O7);
 
+  // We flushed the windows ages ago now mark them as flushed before transitioning.
+  __ set(JavaFrameAnchor::flushed, G3_scratch);
+  __ st(G3_scratch, G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::flags_offset());
+
   // Transition from _thread_in_Java to _thread_in_native.
   __ set(_thread_in_native, G3_scratch);
-  __ st(G3_scratch, G2_thread, JavaThread::thread_state_offset());
-
-  // We flushed the windows ages ago now mark them as flushed
-
-  // mark windows as flushed
-  __ set(JavaFrameAnchor::flushed, G3_scratch);
-
-  Address flags(G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::flags_offset());
 
 #ifdef _LP64
-  AddressLiteral dest(method->native_function());
+  AddressLiteral dest(native_func);
   __ relocate(relocInfo::runtime_call_type);
   __ jumpl_to(dest, O7, O7);
 #else
-  __ call(method->native_function(), relocInfo::runtime_call_type);
+  __ call(native_func, relocInfo::runtime_call_type);
 #endif
-  __ delayed()->st(G3_scratch, flags);
+  __ delayed()->st(G3_scratch, G2_thread, JavaThread::thread_state_offset());
 
   __ restore_thread(L7_thread_cache); // restore G2_thread
 
@@ -2259,6 +2536,7 @@
     ShouldNotReachHere();
   }
 
+  Label after_transition;
   // must we block?
 
   // Block, if necessary, before resuming in _thread_in_Java state.
@@ -2303,22 +2581,34 @@
     // a distinct one for this pc
     //
     save_native_result(masm, ret_type, stack_slots);
-    __ call_VM_leaf(L7_thread_cache,
-                    CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans),
-                    G2_thread);
+    if (!is_critical_native) {
+      __ call_VM_leaf(L7_thread_cache,
+                      CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans),
+                      G2_thread);
+    } else {
+      __ call_VM_leaf(L7_thread_cache,
+                      CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition),
+                      G2_thread);
+    }
 
     // Restore any method result value
     restore_native_result(masm, ret_type, stack_slots);
+
+    if (is_critical_native) {
+      // The call above performed the transition to thread_in_Java so
+      // skip the transition logic below.
+      __ ba(after_transition);
+      __ delayed()->nop();
+    }
+
     __ bind(no_block);
   }
 
   // thread state is thread_in_native_trans. Any safepoint blocking has already
   // happened so we can now change state to _thread_in_Java.
-
-
   __ set(_thread_in_Java, G3_scratch);
   __ st(G3_scratch, G2_thread, JavaThread::thread_state_offset());
-
+  __ bind(after_transition);
 
   Label no_reguard;
   __ ld(G2_thread, JavaThread::stack_guard_state_offset(), G3_scratch);
@@ -2416,12 +2706,14 @@
       __ verify_oop(I0);
   }
 
-  // reset handle block
-  __ ld_ptr(G2_thread, in_bytes(JavaThread::active_handles_offset()), L5);
-  __ st_ptr(G0, L5, JNIHandleBlock::top_offset_in_bytes());
-
-  __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), G3_scratch);
-  check_forward_pending_exception(masm, G3_scratch);
+  if (!is_critical_native) {
+    // reset handle block
+    __ ld_ptr(G2_thread, in_bytes(JavaThread::active_handles_offset()), L5);
+    __ st_ptr(G0, L5, JNIHandleBlock::top_offset_in_bytes());
+
+    __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), G3_scratch);
+    check_forward_pending_exception(masm, G3_scratch);
+  }
 
 
   // Return
@@ -2450,6 +2742,10 @@
                                             (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
                                             in_ByteSize(lock_offset),
                                             oop_maps);
+
+  if (is_critical_native) {
+    nm->set_lazy_critical_native(true);
+  }
   return nm;
 
 }
@@ -2473,17 +2769,6 @@
 static int  fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 };
 static bool offsets_initialized = false;
 
-static VMRegPair reg64_to_VMRegPair(Register r) {
-  VMRegPair ret;
-  if (wordSize == 8) {
-    ret.set2(r->as_VMReg());
-  } else {
-    ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg());
-  }
-  return ret;
-}
-
-
 nmethod *SharedRuntime::generate_dtrace_nmethod(
     MacroAssembler *masm, methodHandle method) {
 
--- a/src/cpu/x86/vm/c2_globals_x86.hpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/cpu/x86/vm/c2_globals_x86.hpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -44,7 +44,7 @@
 #else
 define_pd_global(bool, ProfileInterpreter,           true);
 #endif // CC_INTERP
-define_pd_global(bool, TieredCompilation,            true);
+define_pd_global(bool, TieredCompilation,            trueInTiered);
 define_pd_global(intx, CompileThreshold,             10000);
 define_pd_global(intx, BackEdgeThreshold,            100000);
 
--- a/src/cpu/x86/vm/frame_x86.cpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/cpu/x86/vm/frame_x86.cpp	Thu Feb 09 07:35:48 2012 -0800
@@ -28,6 +28,7 @@
 #include "oops/markOop.hpp"
 #include "oops/methodOop.hpp"
 #include "oops/oop.inline.hpp"
+#include "prims/methodHandles.hpp"
 #include "runtime/frame.inline.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/javaCalls.hpp"
@@ -651,13 +652,15 @@
   return &interpreter_frame_tos_address()[index];
 }
 
-#ifdef ASSERT
+#ifndef PRODUCT
 
 #define DESCRIBE_FP_OFFSET(name) \
   values.describe(frame_no, fp() + frame::name##_offset, #name)
 
 void frame::describe_pd(FrameValues& values, int frame_no) {
-  if (is_interpreted_frame()) {
+  if (is_ricochet_frame()) {
+    MethodHandles::RicochetFrame::describe(this, values, frame_no);
+  } else if (is_interpreted_frame()) {
     DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
     DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
     DESCRIBE_FP_OFFSET(interpreter_frame_method);
@@ -667,7 +670,6 @@
     DESCRIBE_FP_OFFSET(interpreter_frame_bcx);
     DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp);
   }
-
 }
 #endif
 
--- a/src/cpu/x86/vm/methodHandles_x86.cpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/cpu/x86/vm/methodHandles_x86.cpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -279,14 +279,16 @@
 }
 
 // Emit code to verify that RBP is pointing at a valid ricochet frame.
-#ifdef ASSERT
+#ifndef PRODUCT
 enum {
   ARG_LIMIT = 255, SLOP = 4,
   // use this parameter for checking for garbage stack movements:
   UNREASONABLE_STACK_MOVE = (ARG_LIMIT + SLOP)
   // the slop defends against false alarms due to fencepost errors
 };
+#endif
 
+#ifdef ASSERT
 void MethodHandles::RicochetFrame::verify_clean(MacroAssembler* _masm) {
   // The stack should look like this:
   //    ... keep1 | dest=42 | keep2 | RF | magic | handler | magic | recursive args |
@@ -990,6 +992,26 @@
   BLOCK_COMMENT("} move_return_value");
 }
 
+#ifndef PRODUCT
+#define DESCRIBE_RICOCHET_OFFSET(rf, name) \
+  values.describe(frame_no, (intptr_t *) (((uintptr_t)rf) + MethodHandles::RicochetFrame::name##_offset_in_bytes()), #name)
+
+void MethodHandles::RicochetFrame::describe(const frame* fr, FrameValues& values, int frame_no)  {
+    address bp = (address) fr->fp();
+    RicochetFrame* rf = (RicochetFrame*)(bp - sender_link_offset_in_bytes());
+
+    // ricochet slots
+    DESCRIBE_RICOCHET_OFFSET(rf, exact_sender_sp);
+    DESCRIBE_RICOCHET_OFFSET(rf, conversion);
+    DESCRIBE_RICOCHET_OFFSET(rf, saved_args_base);
+    DESCRIBE_RICOCHET_OFFSET(rf, saved_args_layout);
+    DESCRIBE_RICOCHET_OFFSET(rf, saved_target);
+    DESCRIBE_RICOCHET_OFFSET(rf, continuation);
+
+    // relevant ricochet targets (in caller frame)
+    values.describe(-1, rf->saved_args_base(),  err_msg("*saved_args_base for #%d", frame_no));
+}
+#endif // ASSERT
 
 #ifndef PRODUCT
 extern "C" void print_method_handle(oop mh);
@@ -1001,6 +1023,7 @@
                               intptr_t* saved_bp) {
   // called as a leaf from native code: do not block the JVM!
   bool has_mh = (strstr(adaptername, "return/") == NULL);  // return adapters don't have rcx_mh
+
   intptr_t* last_sp = (intptr_t*) saved_bp[frame::interpreter_frame_last_sp_offset];
   intptr_t* base_sp = last_sp;
   typedef MethodHandles::RicochetFrame RicochetFrame;
@@ -1030,13 +1053,64 @@
     tty->cr();
     if (last_sp != saved_sp && last_sp != NULL)
       tty->print_cr("*** last_sp="PTR_FORMAT, (intptr_t)last_sp);
-    int stack_dump_count = 16;
-    if (stack_dump_count < (int)(saved_bp + 2 - saved_sp))
-      stack_dump_count = (int)(saved_bp + 2 - saved_sp);
-    if (stack_dump_count > 64)  stack_dump_count = 48;
-    for (i = 0; i < stack_dump_count; i += 4) {
-      tty->print_cr(" dump at SP[%d] "PTR_FORMAT": "PTR_FORMAT" "PTR_FORMAT" "PTR_FORMAT" "PTR_FORMAT,
-                    i, (intptr_t) &entry_sp[i+0], entry_sp[i+0], entry_sp[i+1], entry_sp[i+2], entry_sp[i+3]);
+
+    {
+     // dumping last frame with frame::describe
+
+      JavaThread* p = JavaThread::active();
+
+      ResourceMark rm;
+      PRESERVE_EXCEPTION_MARK; // may not be needed by safer and unexpensive here
+      FrameValues values;
+
+      // Note: We want to allow trace_method_handle from any call site.
+      // While trace_method_handle creates a frame, it may be entered
+      // without a PC on the stack top (e.g. not just after a call).
+      // Walking that frame could lead to failures due to that invalid PC.
+      // => carefully detect that frame when doing the stack walking
+
+      // Current C frame
+      frame cur_frame = os::current_frame();
+
+      // Robust search of trace_calling_frame (independant of inlining).
+      // Assumes saved_regs comes from a pusha in the trace_calling_frame.
+      assert(cur_frame.sp() < saved_regs, "registers not saved on stack ?");
+      frame trace_calling_frame = os::get_sender_for_C_frame(&cur_frame);
+      while (trace_calling_frame.fp() < saved_regs) {
+        trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame);
+      }
+
+      // safely create a frame and call frame::describe
+      intptr_t *dump_sp = trace_calling_frame.sender_sp();
+      intptr_t *dump_fp = trace_calling_frame.link();
+
+      bool walkable = has_mh; // whether the traced frame shoud be walkable
+
+      if (walkable) {
+        // The previous definition of walkable may have to be refined
+        // if new call sites cause the next frame constructor to start
+        // failing. Alternatively, frame constructors could be
+        // modified to support the current or future non walkable
+        // frames (but this is more intrusive and is not considered as
+        // part of this RFE, which will instead use a simpler output).
+        frame dump_frame = frame(dump_sp, dump_fp);
+        dump_frame.describe(values, 1);
+      } else {
+        // Stack may not be walkable (invalid PC above FP):
+        // Add descriptions without building a Java frame to avoid issues
+        values.describe(-1, dump_fp, "fp for #1 <not parsed, cannot trust pc>");
+        values.describe(-1, dump_sp, "sp for #1");
+      }
+
+      // mark saved_sp if seems valid
+      if (has_mh) {
+        if ((saved_sp >= dump_sp - UNREASONABLE_STACK_MOVE) && (saved_sp < dump_fp)) {
+          values.describe(-1, saved_sp, "*saved_sp");
+        }
+      }
+
+      tty->print_cr("  stack layout:");
+      values.print(p);
     }
     if (has_mh)
       print_method_handle(mh);
@@ -1066,26 +1140,49 @@
 void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {
   if (!TraceMethodHandles)  return;
   BLOCK_COMMENT("trace_method_handle {");
-  __ push(rax);
-  __ lea(rax, Address(rsp, wordSize * NOT_LP64(6) LP64_ONLY(14))); // entry_sp  __ pusha();
+  __ enter();
+  __ andptr(rsp, -16); // align stack if needed for FPU state
   __ pusha();
-  __ mov(rbx, rsp);
-  __ enter();
+  __ mov(rbx, rsp); // for retreiving saved_regs
+  // Note: saved_regs must be in the entered frame for the
+  // robust stack walking implemented in trace_method_handle_stub.
+
+  // save FP result, valid at some call sites (adapter_opt_return_float, ...)
+  __ increment(rsp, -2 * wordSize);
+  if  (UseSSE >= 2) {
+    __ movdbl(Address(rsp, 0), xmm0);
+  } else if (UseSSE == 1) {
+    __ movflt(Address(rsp, 0), xmm0);
+  } else {
+    __ fst_d(Address(rsp, 0));
+  }
+
   // incoming state:
   // rcx: method handle
   // r13 or rsi: saved sp
   // To avoid calling convention issues, build a record on the stack and pass the pointer to that instead.
+  // Note: fix the increment below if pushing more arguments
   __ push(rbp);               // saved_bp
-  __ push(rsi);               // saved_sp
-  __ push(rax);               // entry_sp
+  __ push(saved_last_sp_register()); // saved_sp
+  __ push(rbp);               // entry_sp (with extra align space)
   __ push(rbx);               // pusha saved_regs
   __ push(rcx);               // mh
-  __ push(rcx);               // adaptername
+  __ push(rcx);               // slot for adaptername
   __ movptr(Address(rsp, 0), (intptr_t) adaptername);
   __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, trace_method_handle_stub_wrapper), rsp);
+  __ increment(rsp, 6 * wordSize); // MethodHandleStubArguments
+
+  if  (UseSSE >= 2) {
+    __ movdbl(xmm0, Address(rsp, 0));
+  } else if (UseSSE == 1) {
+    __ movflt(xmm0, Address(rsp, 0));
+  } else {
+    __ fld_d(Address(rsp, 0));
+  }
+  __ increment(rsp, 2 * wordSize);
+
+  __ popa();
   __ leave();
-  __ popa();
-  __ pop(rax);
   BLOCK_COMMENT("} trace_method_handle");
 }
 #endif //PRODUCT
@@ -2267,23 +2364,19 @@
 
       // grab another temp
       Register rsi_temp = rsi;
-      { if (rsi_temp == saved_last_sp)  __ push(saved_last_sp); }
-      // (preceding push must be done after argslot address is taken!)
-#define UNPUSH_RSI \
-      { if (rsi_temp == saved_last_sp)  __ pop(saved_last_sp); }
 
       // arx_argslot points both to the array and to the first output arg
       vmarg = Address(rax_argslot, 0);
 
       // Get the array value.
-      Register  rsi_array       = rsi_temp;
+      Register  rdi_array       = rdi_temp;
       Register  rdx_array_klass = rdx_temp;
       BasicType elem_type = ek_adapter_opt_spread_type(ek);
       int       elem_slots = type2size[elem_type];  // 1 or 2
       int       array_slots = 1;  // array is always a T_OBJECT
       int       length_offset   = arrayOopDesc::length_offset_in_bytes();
       int       elem0_offset    = arrayOopDesc::base_offset_in_bytes(elem_type);
-      __ movptr(rsi_array, vmarg);
+      __ movptr(rdi_array, vmarg);
 
       Label L_array_is_empty, L_insert_arg_space, L_copy_args, L_args_done;
       if (length_can_be_zero) {
@@ -2294,12 +2387,30 @@
           __ testl(rbx_temp, rbx_temp);
           __ jcc(Assembler::notZero, L_skip);
         }
-        __ testptr(rsi_array, rsi_array);
-        __ jcc(Assembler::zero, L_array_is_empty);
+        __ testptr(rdi_array, rdi_array);
+        __ jcc(Assembler::notZero, L_skip);
+
+        // If 'rsi' contains the 'saved_last_sp' (this is only the
+        // case in a 32-bit version of the VM) we have to save 'rsi'
+        // on the stack because later on (at 'L_array_is_empty') 'rsi'
+        // will be overwritten.
+        { if (rsi_temp == saved_last_sp)  __ push(saved_last_sp); }
+        // Also prepare a handy macro which restores 'rsi' if required.
+#define UNPUSH_RSI                                                      \
+        { if (rsi_temp == saved_last_sp)  __ pop(saved_last_sp); }
+
+        __ jmp(L_array_is_empty);
         __ bind(L_skip);
       }
-      __ null_check(rsi_array, oopDesc::klass_offset_in_bytes());
-      __ load_klass(rdx_array_klass, rsi_array);
+      __ null_check(rdi_array, oopDesc::klass_offset_in_bytes());
+      __ load_klass(rdx_array_klass, rdi_array);
+
+      // Save 'rsi' if required (see comment above).  Do this only
+      // after the null check such that the exception handler which is
+      // called in the case of a null pointer exception will not be
+      // confused by the extra value on the stack (it expects the
+      // return pointer on top of the stack)
+      { if (rsi_temp == saved_last_sp)  __ push(saved_last_sp); }
 
       // Check the array type.
       Register rbx_klass = rbx_temp;
@@ -2307,18 +2418,18 @@
       load_klass_from_Class(_masm, rbx_klass);
 
       Label ok_array_klass, bad_array_klass, bad_array_length;
-      __ check_klass_subtype(rdx_array_klass, rbx_klass, rdi_temp, ok_array_klass);
+      __ check_klass_subtype(rdx_array_klass, rbx_klass, rsi_temp, ok_array_klass);
       // If we get here, the type check failed!
       __ jmp(bad_array_klass);
       __ BIND(ok_array_klass);
 
       // Check length.
       if (length_constant >= 0) {
-        __ cmpl(Address(rsi_array, length_offset), length_constant);
+        __ cmpl(Address(rdi_array, length_offset), length_constant);
       } else {
         Register rbx_vminfo = rbx_temp;
         load_conversion_vminfo(_masm, rbx_vminfo, rcx_amh_conversion);
-        __ cmpl(rbx_vminfo, Address(rsi_array, length_offset));
+        __ cmpl(rbx_vminfo, Address(rdi_array, length_offset));
       }
       __ jcc(Assembler::notEqual, bad_array_length);
 
@@ -2330,9 +2441,9 @@
         __ lea(rdx_argslot_limit, Address(rax_argslot, Interpreter::stackElementSize));
         // 'stack_move' is negative number of words to insert
         // This number already accounts for elem_slots.
-        Register rdi_stack_move = rdi_temp;
-        load_stack_move(_masm, rdi_stack_move, rcx_recv, true);
-        __ cmpptr(rdi_stack_move, 0);
+        Register rsi_stack_move = rsi_temp;
+        load_stack_move(_masm, rsi_stack_move, rcx_recv, true);
+        __ cmpptr(rsi_stack_move, 0);
         assert(stack_move_unit() < 0, "else change this comparison");
         __ jcc(Assembler::less, L_insert_arg_space);
         __ jcc(Assembler::equal, L_copy_args);
@@ -2343,12 +2454,12 @@
         __ jmp(L_args_done);  // no spreading to do
         __ BIND(L_insert_arg_space);
         // come here in the usual case, stack_move < 0 (2 or more spread arguments)
-        Register rsi_temp = rsi_array;  // spill this
-        insert_arg_slots(_masm, rdi_stack_move,
-                         rax_argslot, rbx_temp, rsi_temp);
+        Register rdi_temp = rdi_array;  // spill this
+        insert_arg_slots(_masm, rsi_stack_move,
+                         rax_argslot, rbx_temp, rdi_temp);
         // reload the array since rsi was killed
         // reload from rdx_argslot_limit since rax_argslot is now decremented
-        __ movptr(rsi_array, Address(rdx_argslot_limit, -Interpreter::stackElementSize));
+        __ movptr(rdi_array, Address(rdx_argslot_limit, -Interpreter::stackElementSize));
       } else if (length_constant >= 1) {
         int new_slots = (length_constant * elem_slots) - array_slots;
         insert_arg_slots(_masm, new_slots * stack_move_unit(),
@@ -2371,16 +2482,16 @@
       if (length_constant == -1) {
         // [rax_argslot, rdx_argslot_limit) is the area we are inserting into.
         // Array element [0] goes at rdx_argslot_limit[-wordSize].
-        Register rsi_source = rsi_array;
-        __ lea(rsi_source, Address(rsi_array, elem0_offset));
+        Register rdi_source = rdi_array;
+        __ lea(rdi_source, Address(rdi_array, elem0_offset));
         Register rdx_fill_ptr = rdx_argslot_limit;
         Label loop;
         __ BIND(loop);
         __ addptr(rdx_fill_ptr, -Interpreter::stackElementSize * elem_slots);
         move_typed_arg(_masm, elem_type, true,
-                       Address(rdx_fill_ptr, 0), Address(rsi_source, 0),
-                       rbx_temp, rdi_temp);
-        __ addptr(rsi_source, type2aelembytes(elem_type));
+                       Address(rdx_fill_ptr, 0), Address(rdi_source, 0),
+                       rbx_temp, rsi_temp);
+        __ addptr(rdi_source, type2aelembytes(elem_type));
         __ cmpptr(rdx_fill_ptr, rax_argslot);
         __ jcc(Assembler::above, loop);
       } else if (length_constant == 0) {
@@ -2391,8 +2502,8 @@
         for (int index = 0; index < length_constant; index++) {
           slot_offset -= Interpreter::stackElementSize * elem_slots;  // fill backward
           move_typed_arg(_masm, elem_type, true,
-                         Address(rax_argslot, slot_offset), Address(rsi_array, elem_offset),
-                         rbx_temp, rdi_temp);
+                         Address(rax_argslot, slot_offset), Address(rdi_array, elem_offset),
+                         rbx_temp, rsi_temp);
           elem_offset += type2aelembytes(elem_type);
         }
       }
--- a/src/cpu/x86/vm/methodHandles_x86.hpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/cpu/x86/vm/methodHandles_x86.hpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -224,6 +224,8 @@
   }
 
   static void verify_clean(MacroAssembler* _masm) NOT_DEBUG_RETURN;
+
+  static void describe(const frame* fr, FrameValues& values, int frame_no) PRODUCT_RETURN;
 };
 
 // Additional helper methods for MethodHandles code generation:
--- a/src/cpu/x86/vm/sharedRuntime_x86_32.cpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/cpu/x86/vm/sharedRuntime_x86_32.cpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1091,12 +1091,238 @@
   }
 }
 
+
+static void save_or_restore_arguments(MacroAssembler* masm,
+                                      const int stack_slots,
+                                      const int total_in_args,
+                                      const int arg_save_area,
+                                      OopMap* map,
+                                      VMRegPair* in_regs,
+                                      BasicType* in_sig_bt) {
+  // if map is non-NULL then the code should store the values,
+  // otherwise it should load them.
+  int handle_index = 0;
+  // Save down double word first
+  for ( int i = 0; i < total_in_args; i++) {
+    if (in_regs[i].first()->is_XMMRegister() && in_sig_bt[i] == T_DOUBLE) {
+      int slot = handle_index * VMRegImpl::slots_per_word + arg_save_area;
+      int offset = slot * VMRegImpl::stack_slot_size;
+      handle_index += 2;
+      assert(handle_index <= stack_slots, "overflow");
+      if (map != NULL) {
+        __ movdbl(Address(rsp, offset), in_regs[i].first()->as_XMMRegister());
+      } else {
+        __ movdbl(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset));
+      }
+    }
+    if (in_regs[i].first()->is_Register() && in_sig_bt[i] == T_LONG) {
+      int slot = handle_index * VMRegImpl::slots_per_word + arg_save_area;
+      int offset = slot * VMRegImpl::stack_slot_size;
+      handle_index += 2;
+      assert(handle_index <= stack_slots, "overflow");
+      if (map != NULL) {
+        __ movl(Address(rsp, offset), in_regs[i].first()->as_Register());
+        if (in_regs[i].second()->is_Register()) {
+          __ movl(Address(rsp, offset + 4), in_regs[i].second()->as_Register());
+        }
+      } else {
+        __ movl(in_regs[i].first()->as_Register(), Address(rsp, offset));
+        if (in_regs[i].second()->is_Register()) {
+          __ movl(in_regs[i].second()->as_Register(), Address(rsp, offset + 4));
+        }
+      }
+    }
+  }
+  // Save or restore single word registers
+  for ( int i = 0; i < total_in_args; i++) {
+    if (in_regs[i].first()->is_Register()) {
+      int slot = handle_index++ * VMRegImpl::slots_per_word + arg_save_area;
+      int offset = slot * VMRegImpl::stack_slot_size;
+      assert(handle_index <= stack_slots, "overflow");
+      if (in_sig_bt[i] == T_ARRAY && map != NULL) {
+        map->set_oop(VMRegImpl::stack2reg(slot));;
+      }
+
+      // Value is in an input register pass we must flush it to the stack
+      const Register reg = in_regs[i].first()->as_Register();
+      switch (in_sig_bt[i]) {
+        case T_ARRAY:
+          if (map != NULL) {
+            __ movptr(Address(rsp, offset), reg);
+          } else {
+            __ movptr(reg, Address(rsp, offset));
+          }
+          break;
+        case T_BOOLEAN:
+        case T_CHAR:
+        case T_BYTE:
+        case T_SHORT:
+        case T_INT:
+          if (map != NULL) {
+            __ movl(Address(rsp, offset), reg);
+          } else {
+            __ movl(reg, Address(rsp, offset));
+          }
+          break;
+        case T_OBJECT:
+        default: ShouldNotReachHere();
+      }
+    } else if (in_regs[i].first()->is_XMMRegister()) {
+      if (in_sig_bt[i] == T_FLOAT) {
+        int slot = handle_index++ * VMRegImpl::slots_per_word + arg_save_area;
+        int offset = slot * VMRegImpl::stack_slot_size;
+        assert(handle_index <= stack_slots, "overflow");
+        if (map != NULL) {
+          __ movflt(Address(rsp, offset), in_regs[i].first()->as_XMMRegister());
+        } else {
+          __ movflt(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset));
+        }
+      }
+    } else if (in_regs[i].first()->is_stack()) {
+      if (in_sig_bt[i] == T_ARRAY && map != NULL) {
+        int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
+        map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
+      }
+    }
+  }
+}
+
+// Check GC_locker::needs_gc and enter the runtime if it's true.  This
+// keeps a new JNI critical region from starting until a GC has been
+// forced.  Save down any oops in registers and describe them in an
+// OopMap.
+static void check_needs_gc_for_critical_native(MacroAssembler* masm,
+                                               Register thread,
+                                               int stack_slots,
+                                               int total_c_args,
+                                               int total_in_args,
+                                               int arg_save_area,
+                                               OopMapSet* oop_maps,
+                                               VMRegPair* in_regs,
+                                               BasicType* in_sig_bt) {
+  __ block_comment("check GC_locker::needs_gc");
+  Label cont;
+  __ cmp8(ExternalAddress((address)GC_locker::needs_gc_address()), false);
+  __ jcc(Assembler::equal, cont);
+
+  // Save down any incoming oops and call into the runtime to halt for a GC
+
+  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
+
+  save_or_restore_arguments(masm, stack_slots, total_in_args,
+                            arg_save_area, map, in_regs, in_sig_bt);
+
+  address the_pc = __ pc();
+  oop_maps->add_gc_map( __ offset(), map);
+  __ set_last_Java_frame(thread, rsp, noreg, the_pc);
+
+  __ block_comment("block_for_jni_critical");
+  __ push(thread);
+  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical)));
+  __ increment(rsp, wordSize);
+
+  __ get_thread(thread);
+  __ reset_last_Java_frame(thread, false, true);
+
+  save_or_restore_arguments(masm, stack_slots, total_in_args,
+                            arg_save_area, NULL, in_regs, in_sig_bt);
+
+  __ bind(cont);
+#ifdef ASSERT
+  if (StressCriticalJNINatives) {
+    // Stress register saving
+    OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
+    save_or_restore_arguments(masm, stack_slots, total_in_args,
+                              arg_save_area, map, in_regs, in_sig_bt);
+    // Destroy argument registers
+    for (int i = 0; i < total_in_args - 1; i++) {
+      if (in_regs[i].first()->is_Register()) {
+        const Register reg = in_regs[i].first()->as_Register();
+        __ xorptr(reg, reg);
+      } else if (in_regs[i].first()->is_XMMRegister()) {
+        __ xorpd(in_regs[i].first()->as_XMMRegister(), in_regs[i].first()->as_XMMRegister());
+      } else if (in_regs[i].first()->is_FloatRegister()) {
+        ShouldNotReachHere();
+      } else if (in_regs[i].first()->is_stack()) {
+        // Nothing to do
+      } else {
+        ShouldNotReachHere();
+      }
+      if (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_DOUBLE) {
+        i++;
+      }
+    }
+
+    save_or_restore_arguments(masm, stack_slots, total_in_args,
+                              arg_save_area, NULL, in_regs, in_sig_bt);
+  }
+#endif
+}
+
+// Unpack an array argument into a pointer to the body and the length
+// if the array is non-null, otherwise pass 0 for both.
+static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) {
+  Register tmp_reg = rax;
+  assert(!body_arg.first()->is_Register() || body_arg.first()->as_Register() != tmp_reg,
+         "possible collision");
+  assert(!length_arg.first()->is_Register() || length_arg.first()->as_Register() != tmp_reg,
+         "possible collision");
+
+  // Pass the length, ptr pair
+  Label is_null, done;
+  VMRegPair tmp(tmp_reg->as_VMReg());
+  if (reg.first()->is_stack()) {
+    // Load the arg up from the stack
+    simple_move32(masm, reg, tmp);
+    reg = tmp;
+  }
+  __ testptr(reg.first()->as_Register(), reg.first()->as_Register());
+  __ jccb(Assembler::equal, is_null);
+  __ lea(tmp_reg, Address(reg.first()->as_Register(), arrayOopDesc::base_offset_in_bytes(in_elem_type)));
+  simple_move32(masm, tmp, body_arg);
+  // load the length relative to the body.
+  __ movl(tmp_reg, Address(tmp_reg, arrayOopDesc::length_offset_in_bytes() -
+                           arrayOopDesc::base_offset_in_bytes(in_elem_type)));
+  simple_move32(masm, tmp, length_arg);
+  __ jmpb(done);
+  __ bind(is_null);
+  // Pass zeros
+  __ xorptr(tmp_reg, tmp_reg);
+  simple_move32(masm, tmp, body_arg);
+  simple_move32(masm, tmp, length_arg);
+  __ bind(done);
+}
+
+
 // ---------------------------------------------------------------------------
 // Generate a native wrapper for a given method.  The method takes arguments
 // in the Java compiled code convention, marshals them to the native
 // convention (handlizes oops, etc), transitions to native, makes the call,
 // returns to java state (possibly blocking), unhandlizes any result and
 // returns.
+//
+// Critical native functions are a shorthand for the use of
+// GetPrimtiveArrayCritical and disallow the use of any other JNI
+// functions.  The wrapper is expected to unpack the arguments before
+// passing them to the callee and perform checks before and after the
+// native call to ensure that they GC_locker
+// lock_critical/unlock_critical semantics are followed.  Some other
+// parts of JNI setup are skipped like the tear down of the JNI handle
+// block and the check for pending exceptions it's impossible for them
+// to be thrown.
+//
+// They are roughly structured like this:
+//    if (GC_locker::needs_gc())
+//      SharedRuntime::block_for_jni_critical();
+//    tranistion to thread_in_native
+//    unpack arrray arguments and call native entry point
+//    check for safepoint in progress
+//    check if any thread suspend flags are set
+//      call into JVM and possible unlock the JNI critical
+//      if a GC was suppressed while in the critical native.
+//    transition back to thread_in_Java
+//    return to caller
+//
 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
                                                 methodHandle method,
                                                 int compile_id,
@@ -1105,6 +1331,13 @@
                                                 BasicType *in_sig_bt,
                                                 VMRegPair *in_regs,
                                                 BasicType ret_type) {
+  bool is_critical_native = true;
+  address native_func = method->critical_native_function();
+  if (native_func == NULL) {
+    native_func = method->native_function();
+    is_critical_native = false;
+  }
+  assert(native_func != NULL, "must have function");
 
   // An OopMap for lock (and class if static)
   OopMapSet *oop_maps = new OopMapSet();
@@ -1115,30 +1348,72 @@
   // we convert the java signature to a C signature by inserting
   // the hidden arguments as arg[0] and possibly arg[1] (static method)
 
-  int total_c_args = total_in_args + 1;
-  if (method->is_static()) {
-    total_c_args++;
+  int total_c_args = total_in_args;
+  if (!is_critical_native) {
+    total_c_args += 1;
+    if (method->is_static()) {
+      total_c_args++;
+    }
+  } else {
+    for (int i = 0; i < total_in_args; i++) {
+      if (in_sig_bt[i] == T_ARRAY) {
+        total_c_args++;
+      }
+    }
   }
 
   BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
-  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair,   total_c_args);
+  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
+  BasicType* in_elem_bt = NULL;
 
   int argc = 0;
-  out_sig_bt[argc++] = T_ADDRESS;
-  if (method->is_static()) {
-    out_sig_bt[argc++] = T_OBJECT;
+  if (!is_critical_native) {
+    out_sig_bt[argc++] = T_ADDRESS;
+    if (method->is_static()) {
+      out_sig_bt[argc++] = T_OBJECT;
+    }
+
+    for (int i = 0; i < total_in_args ; i++ ) {
+      out_sig_bt[argc++] = in_sig_bt[i];
+    }
+  } else {
+    Thread* THREAD = Thread::current();
+    in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
+    SignatureStream ss(method->signature());
+    for (int i = 0; i < total_in_args ; i++ ) {
+      if (in_sig_bt[i] == T_ARRAY) {
+        // Arrays are passed as int, elem* pair
+        out_sig_bt[argc++] = T_INT;
+        out_sig_bt[argc++] = T_ADDRESS;
+        Symbol* atype = ss.as_symbol(CHECK_NULL);
+        const char* at = atype->as_C_string();
+        if (strlen(at) == 2) {
+          assert(at[0] == '[', "must be");
+          switch (at[1]) {
+            case 'B': in_elem_bt[i]  = T_BYTE; break;
+            case 'C': in_elem_bt[i]  = T_CHAR; break;
+            case 'D': in_elem_bt[i]  = T_DOUBLE; break;
+            case 'F': in_elem_bt[i]  = T_FLOAT; break;
+            case 'I': in_elem_bt[i]  = T_INT; break;
+            case 'J': in_elem_bt[i]  = T_LONG; break;
+            case 'S': in_elem_bt[i]  = T_SHORT; break;
+            case 'Z': in_elem_bt[i]  = T_BOOLEAN; break;
+            default: ShouldNotReachHere();
+          }
+        }
+      } else {
+        out_sig_bt[argc++] = in_sig_bt[i];
+        in_elem_bt[i] = T_VOID;
+      }
+      if (in_sig_bt[i] != T_VOID) {
+        assert(in_sig_bt[i] == ss.type(), "must match");
+        ss.next();
+      }
+    }
   }
 
-  int i;
-  for (i = 0; i < total_in_args ; i++ ) {
-    out_sig_bt[argc++] = in_sig_bt[i];
-  }
-
-
   // Now figure out where the args must be stored and how much stack space
-  // they require (neglecting out_preserve_stack_slots but space for storing
-  // the 1st six register arguments). It's weird see int_stk_helper.
-  //
+  // they require.
   int out_arg_slots;
   out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
 
@@ -1151,9 +1426,44 @@
   int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
 
   // Now the space for the inbound oop handle area
+  int total_save_slots = 2 * VMRegImpl::slots_per_word; // 2 arguments passed in registers
+  if (is_critical_native) {
+    // Critical natives may have to call out so they need a save area
+    // for register arguments.
+    int double_slots = 0;
+    int single_slots = 0;
+    for ( int i = 0; i < total_in_args; i++) {
+      if (in_regs[i].first()->is_Register()) {
+        const Register reg = in_regs[i].first()->as_Register();
+        switch (in_sig_bt[i]) {
+          case T_ARRAY:
+          case T_BOOLEAN:
+          case T_BYTE:
+          case T_SHORT:
+          case T_CHAR:
+          case T_INT:  single_slots++; break;
+          case T_LONG: double_slots++; break;
+          default:  ShouldNotReachHere();
+        }
+      } else if (in_regs[i].first()->is_XMMRegister()) {
+        switch (in_sig_bt[i]) {
+          case T_FLOAT:  single_slots++; break;
+          case T_DOUBLE: double_slots++; break;
+          default:  ShouldNotReachHere();
+        }
+      } else if (in_regs[i].first()->is_FloatRegister()) {
+        ShouldNotReachHere();
+      }
+    }
+    total_save_slots = double_slots * 2 + single_slots;
+    // align the save area
+    if (double_slots != 0) {
+      stack_slots = round_to(stack_slots, 2);
+    }
+  }
 
   int oop_handle_offset = stack_slots;
-  stack_slots += 2*VMRegImpl::slots_per_word;
+  stack_slots += total_save_slots;
 
   // Now any space we need for handlizing a klass if static method
 
@@ -1161,7 +1471,6 @@
   int klass_offset = -1;
   int lock_slot_offset = 0;
   bool is_static = false;
-  int oop_temp_slot_offset = 0;
 
   if (method->is_static()) {
     klass_slot_offset = stack_slots;
@@ -1221,7 +1530,7 @@
   // First thing make an ic check to see if we should even be here
 
   // We are free to use all registers as temps without saving them and
-  // restoring them except rbp,. rbp, is the only callee save register
+  // restoring them except rbp. rbp is the only callee save register
   // as far as the interpreter and the compiler(s) are concerned.
 
 
@@ -1230,7 +1539,6 @@
   Label hit;
   Label exception_pending;
 
-
   __ verify_oop(receiver);
   __ cmpptr(ic_reg, Address(receiver, oopDesc::klass_offset_in_bytes()));
   __ jcc(Assembler::equal, hit);
@@ -1292,11 +1600,10 @@
 
   // Generate a new frame for the wrapper.
   __ enter();
-  // -2 because return address is already present and so is saved rbp,
+  // -2 because return address is already present and so is saved rbp
   __ subptr(rsp, stack_size - 2*wordSize);
 
-  // Frame is now completed as far a size and linkage.
-
+  // Frame is now completed as far as size and linkage.
   int frame_complete = ((intptr_t)__ pc()) - start;
 
   // Calculate the difference between rsp and rbp,. We need to know it
@@ -1319,7 +1626,6 @@
   // Compute the rbp, offset for any slots used after the jni call
 
   int lock_slot_rbp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment;
-  int oop_temp_slot_rbp_offset = (oop_temp_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment;
 
   // We use rdi as a thread pointer because it is callee save and
   // if we load it once it is usable thru the entire wrapper
@@ -1332,6 +1638,10 @@
 
   __ get_thread(thread);
 
+  if (is_critical_native) {
+    check_needs_gc_for_critical_native(masm, thread, stack_slots, total_c_args, total_in_args,
+                                       oop_handle_offset, oop_maps, in_regs, in_sig_bt);
+  }
 
   //
   // We immediately shuffle the arguments so that any vm call we have to
@@ -1353,7 +1663,7 @@
   // vectors we have in our possession. We simply walk the java vector to
   // get the source locations and the c vector to get the destinations.
 
-  int c_arg = method->is_static() ? 2 : 1 ;
+  int c_arg = is_critical_native ? 0 : (method->is_static() ? 2 : 1 );
 
   // Record rsp-based slot for receiver on stack for non-static methods
   int receiver_offset = -1;
@@ -1373,10 +1683,16 @@
   // Are free to temporaries if we have to do  stack to steck moves.
   // All inbound args are referenced based on rbp, and all outbound args via rsp.
 
-  for (i = 0; i < total_in_args ; i++, c_arg++ ) {
+  for (int i = 0; i < total_in_args ; i++, c_arg++ ) {
     switch (in_sig_bt[i]) {
       case T_ARRAY:
+        if (is_critical_native) {
+          unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
+          c_arg++;
+          break;
+        }
       case T_OBJECT:
+        assert(!is_critical_native, "no oop arguments");
         object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
                     ((i == 0) && (!is_static)),
                     &receiver_offset);
@@ -1408,7 +1724,7 @@
 
   // Pre-load a static method's oop into rsi.  Used both by locking code and
   // the normal JNI call code.
-  if (method->is_static()) {
+  if (method->is_static() && !is_critical_native) {
 
     //  load opp into a register
     __ movoop(oop_handle_reg, JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror()));
@@ -1463,6 +1779,7 @@
 
   // Lock a synchronized method
   if (method->is_synchronized()) {
+    assert(!is_critical_native, "unhandled");
 
 
     const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
@@ -1529,14 +1846,15 @@
 
 
   // get JNIEnv* which is first argument to native
-
-  __ lea(rdx, Address(thread, in_bytes(JavaThread::jni_environment_offset())));
-  __ movptr(Address(rsp, 0), rdx);
+  if (!is_critical_native) {
+    __ lea(rdx, Address(thread, in_bytes(JavaThread::jni_environment_offset())));
+    __ movptr(Address(rsp, 0), rdx);
+  }
 
   // Now set thread in native
   __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native);
 
-  __ call(RuntimeAddress(method->native_function()));
+  __ call(RuntimeAddress(native_func));
 
   // WARNING - on Windows Java Natives use pascal calling convention and pop the
   // arguments off of the stack. We could just re-adjust the stack pointer here
@@ -1591,6 +1909,8 @@
     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
   }
 
+  Label after_transition;
+
   // check for safepoint operation in progress and/or pending suspend requests
   { Label Continue;
 
@@ -1611,17 +1931,29 @@
     //
     save_native_result(masm, ret_type, stack_slots);
     __ push(thread);
-    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address,
-                                            JavaThread::check_special_condition_for_native_trans)));
+    if (!is_critical_native) {
+      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address,
+                                              JavaThread::check_special_condition_for_native_trans)));
+    } else {
+      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address,
+                                              JavaThread::check_special_condition_for_native_trans_and_transition)));
+    }
     __ increment(rsp, wordSize);
     // Restore any method result value
     restore_native_result(masm, ret_type, stack_slots);
 
+    if (is_critical_native) {
+      // The call above performed the transition to thread_in_Java so
+      // skip the transition logic below.
+      __ jmpb(after_transition);
+    }
+
     __ bind(Continue);
   }
 
   // change thread state
   __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_Java);
+  __ bind(after_transition);
 
   Label reguard;
   Label reguard_done;
@@ -1710,15 +2042,15 @@
       __ verify_oop(rax);
   }
 
-  // reset handle block
-  __ movptr(rcx, Address(thread, JavaThread::active_handles_offset()));
-
-  __ movptr(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), NULL_WORD);
-
-  // Any exception pending?
-  __ cmpptr(Address(thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
-  __ jcc(Assembler::notEqual, exception_pending);
-
+  if (!is_critical_native) {
+    // reset handle block
+    __ movptr(rcx, Address(thread, JavaThread::active_handles_offset()));
+    __ movptr(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), NULL_WORD);
+
+    // Any exception pending?
+    __ cmpptr(Address(thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
+    __ jcc(Assembler::notEqual, exception_pending);
+  }
 
   // no exception, we're almost done
 
@@ -1829,16 +2161,18 @@
 
   // BEGIN EXCEPTION PROCESSING
 
-  // Forward  the exception
-  __ bind(exception_pending);
-
-  // remove possible return value from FPU register stack
-  __ empty_FPU_stack();
-
-  // pop our frame
-  __ leave();
-  // and forward the exception
-  __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+  if (!is_critical_native) {
+    // Forward  the exception
+    __ bind(exception_pending);
+
+    // remove possible return value from FPU register stack
+    __ empty_FPU_stack();
+
+    // pop our frame
+    __ leave();
+    // and forward the exception
+    __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+  }
 
   __ flush();
 
@@ -1851,6 +2185,11 @@
                                             (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
                                             in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
                                             oop_maps);
+
+  if (is_critical_native) {
+    nm->set_lazy_critical_native(true);
+  }
+
   return nm;
 
 }
--- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -938,6 +938,25 @@
   }
 }
 
+static void move_ptr(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
+  if (src.first()->is_stack()) {
+    if (dst.first()->is_stack()) {
+      // stack to stack
+      __ movq(rax, Address(rbp, reg2offset_in(src.first())));
+      __ movq(Address(rsp, reg2offset_out(dst.first())), rax);
+    } else {
+      // stack to reg
+      __ movq(dst.first()->as_Register(), Address(rbp, reg2offset_in(src.first())));
+    }
+  } else if (dst.first()->is_stack()) {
+    // reg to stack
+    __ movq(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register());
+  } else {
+    if (dst.first() != src.first()) {
+      __ movq(dst.first()->as_Register(), src.first()->as_Register());
+    }
+  }
+}
 
 // An oop arg. Must pass a handle not the oop itself
 static void object_move(MacroAssembler* masm,
@@ -1152,6 +1171,203 @@
     }
 }
 
+
+static void save_or_restore_arguments(MacroAssembler* masm,
+                                      const int stack_slots,
+                                      const int total_in_args,
+                                      const int arg_save_area,
+                                      OopMap* map,
+                                      VMRegPair* in_regs,
+                                      BasicType* in_sig_bt) {
+  // if map is non-NULL then the code should store the values,
+  // otherwise it should load them.
+  int handle_index = 0;
+  // Save down double word first
+  for ( int i = 0; i < total_in_args; i++) {
+    if (in_regs[i].first()->is_XMMRegister() && in_sig_bt[i] == T_DOUBLE) {
+      int slot = handle_index * VMRegImpl::slots_per_word + arg_save_area;
+      int offset = slot * VMRegImpl::stack_slot_size;
+      handle_index += 2;
+      assert(handle_index <= stack_slots, "overflow");
+      if (map != NULL) {
+        __ movdbl(Address(rsp, offset), in_regs[i].first()->as_XMMRegister());
+      } else {
+        __ movdbl(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset));
+      }
+    }
+    if (in_regs[i].first()->is_Register() &&
+        (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_ARRAY)) {
+      int slot = handle_index * VMRegImpl::slots_per_word + arg_save_area;
+      int offset = slot * VMRegImpl::stack_slot_size;
+      handle_index += 2;
+      assert(handle_index <= stack_slots, "overflow");
+      if (map != NULL) {
+        __ movq(Address(rsp, offset), in_regs[i].first()->as_Register());
+        if (in_sig_bt[i] == T_ARRAY) {
+          map->set_oop(VMRegImpl::stack2reg(slot));;
+        }
+      } else {
+        __ movq(in_regs[i].first()->as_Register(), Address(rsp, offset));
+      }
+    }
+  }
+  // Save or restore single word registers
+  for ( int i = 0; i < total_in_args; i++) {
+    if (in_regs[i].first()->is_Register()) {
+      int slot = handle_index++ * VMRegImpl::slots_per_word + arg_save_area;
+      int offset = slot * VMRegImpl::stack_slot_size;
+      assert(handle_index <= stack_slots, "overflow");
+
+      // Value is in an input register pass we must flush it to the stack
+      const Register reg = in_regs[i].first()->as_Register();
+      switch (in_sig_bt[i]) {
+        case T_BOOLEAN:
+        case T_CHAR:
+        case T_BYTE:
+        case T_SHORT:
+        case T_INT:
+          if (map != NULL) {
+            __ movl(Address(rsp, offset), reg);
+          } else {
+            __ movl(reg, Address(rsp, offset));
+          }
+          break;
+        case T_ARRAY:
+        case T_LONG:
+          // handled above
+          break;
+        case T_OBJECT:
+        default: ShouldNotReachHere();
+      }
+    } else if (in_regs[i].first()->is_XMMRegister()) {
+      if (in_sig_bt[i] == T_FLOAT) {
+        int slot = handle_index++ * VMRegImpl::slots_per_word + arg_save_area;
+        int offset = slot * VMRegImpl::stack_slot_size;
+        assert(handle_index <= stack_slots, "overflow");
+        if (map != NULL) {
+          __ movflt(Address(rsp, offset), in_regs[i].first()->as_XMMRegister());
+        } else {
+          __ movflt(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset));
+        }
+      }
+    } else if (in_regs[i].first()->is_stack()) {
+      if (in_sig_bt[i] == T_ARRAY && map != NULL) {
+        int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
+        map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
+      }
+    }
+  }
+}
+
+
+// Check GC_locker::needs_gc and enter the runtime if it's true.  This
+// keeps a new JNI critical region from starting until a GC has been
+// forced.  Save down any oops in registers and describe them in an
+// OopMap.
+static void check_needs_gc_for_critical_native(MacroAssembler* masm,
+                                               int stack_slots,
+                                               int total_c_args,
+                                               int total_in_args,
+                                               int arg_save_area,
+                                               OopMapSet* oop_maps,
+                                               VMRegPair* in_regs,
+                                               BasicType* in_sig_bt) {
+  __ block_comment("check GC_locker::needs_gc");
+  Label cont;
+  __ cmp8(ExternalAddress((address)GC_locker::needs_gc_address()), false);
+  __ jcc(Assembler::equal, cont);
+
+  // Save down any incoming oops and call into the runtime to halt for a GC
+
+  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
+  save_or_restore_arguments(masm, stack_slots, total_in_args,
+                            arg_save_area, map, in_regs, in_sig_bt);
+
+  address the_pc = __ pc();
+  oop_maps->add_gc_map( __ offset(), map);
+  __ set_last_Java_frame(rsp, noreg, the_pc);
+
+  __ block_comment("block_for_jni_critical");
+  __ movptr(c_rarg0, r15_thread);
+  __ mov(r12, rsp); // remember sp
+  __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
+  __ andptr(rsp, -16); // align stack as required by ABI
+  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical)));
+  __ mov(rsp, r12); // restore sp
+  __ reinit_heapbase();
+
+  __ reset_last_Java_frame(false, true);
+
+  save_or_restore_arguments(masm, stack_slots, total_in_args,
+                            arg_save_area, NULL, in_regs, in_sig_bt);
+
+  __ bind(cont);
+#ifdef ASSERT
+  if (StressCriticalJNINatives) {
+    // Stress register saving
+    OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
+    save_or_restore_arguments(masm, stack_slots, total_in_args,
+                              arg_save_area, map, in_regs, in_sig_bt);
+    // Destroy argument registers
+    for (int i = 0; i < total_in_args - 1; i++) {
+      if (in_regs[i].first()->is_Register()) {
+        const Register reg = in_regs[i].first()->as_Register();
+        __ xorptr(reg, reg);
+      } else if (in_regs[i].first()->is_XMMRegister()) {
+        __ xorpd(in_regs[i].first()->as_XMMRegister(), in_regs[i].first()->as_XMMRegister());
+      } else if (in_regs[i].first()->is_FloatRegister()) {
+        ShouldNotReachHere();
+      } else if (in_regs[i].first()->is_stack()) {
+        // Nothing to do
+      } else {
+        ShouldNotReachHere();
+      }
+      if (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_DOUBLE) {
+        i++;
+      }
+    }
+
+    save_or_restore_arguments(masm, stack_slots, total_in_args,
+                              arg_save_area, NULL, in_regs, in_sig_bt);
+  }
+#endif
+}
+
+// Unpack an array argument into a pointer to the body and the length
+// if the array is non-null, otherwise pass 0 for both.
+static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) {
+  Register tmp_reg = rax;
+  assert(!body_arg.first()->is_Register() || body_arg.first()->as_Register() != tmp_reg,
+         "possible collision");
+  assert(!length_arg.first()->is_Register() || length_arg.first()->as_Register() != tmp_reg,
+         "possible collision");
+
+  // Pass the length, ptr pair
+  Label is_null, done;
+  VMRegPair tmp;
+  tmp.set_ptr(tmp_reg->as_VMReg());
+  if (reg.first()->is_stack()) {
+    // Load the arg up from the stack
+    move_ptr(masm, reg, tmp);
+    reg = tmp;
+  }
+  __ testptr(reg.first()->as_Register(), reg.first()->as_Register());
+  __ jccb(Assembler::equal, is_null);
+  __ lea(tmp_reg, Address(reg.first()->as_Register(), arrayOopDesc::base_offset_in_bytes(in_elem_type)));
+  move_ptr(masm, tmp, body_arg);
+  // load the length relative to the body.
+  __ movl(tmp_reg, Address(tmp_reg, arrayOopDesc::length_offset_in_bytes() -
+                           arrayOopDesc::base_offset_in_bytes(in_elem_type)));
+  move32_64(masm, tmp, length_arg);
+  __ jmpb(done);
+  __ bind(is_null);
+  // Pass zeros
+  __ xorptr(tmp_reg, tmp_reg);
+  move_ptr(masm, tmp, body_arg);
+  move32_64(masm, tmp, length_arg);
+  __ bind(done);
+}
+
 // ---------------------------------------------------------------------------
 // Generate a native wrapper for a given method.  The method takes arguments
 // in the Java compiled code convention, marshals them to the native
@@ -1166,10 +1382,14 @@
                                                 BasicType *in_sig_bt,
                                                 VMRegPair *in_regs,
                                                 BasicType ret_type) {
-  // Native nmethod wrappers never take possesion of the oop arguments.
-  // So the caller will gc the arguments. The only thing we need an
-  // oopMap for is if the call is static
-  //
+  bool is_critical_native = true;
+  address native_func = method->critical_native_function();
+  if (native_func == NULL) {
+    native_func = method->native_function();
+    is_critical_native = false;
+  }
+  assert(native_func != NULL, "must have function");
+
   // An OopMap for lock (and class if static)
   OopMapSet *oop_maps = new OopMapSet();
   intptr_t start = (intptr_t)__ pc();
@@ -1180,27 +1400,72 @@
   // we convert the java signature to a C signature by inserting
   // the hidden arguments as arg[0] and possibly arg[1] (static method)
 
-  int total_c_args = total_in_args + 1;
-  if (method->is_static()) {
-    total_c_args++;
+  int total_c_args = total_in_args;
+  if (!is_critical_native) {
+    total_c_args += 1;
+    if (method->is_static()) {
+      total_c_args++;
+    }
+  } else {
+    for (int i = 0; i < total_in_args; i++) {
+      if (in_sig_bt[i] == T_ARRAY) {
+        total_c_args++;
+      }
+    }
   }
 
   BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
-  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair,   total_c_args);
+  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
+  BasicType* in_elem_bt = NULL;
 
   int argc = 0;
-  out_sig_bt[argc++] = T_ADDRESS;
-  if (method->is_static()) {
-    out_sig_bt[argc++] = T_OBJECT;
-  }
-
-  for (int i = 0; i < total_in_args ; i++ ) {
-    out_sig_bt[argc++] = in_sig_bt[i];
+  if (!is_critical_native) {
+    out_sig_bt[argc++] = T_ADDRESS;
+    if (method->is_static()) {
+      out_sig_bt[argc++] = T_OBJECT;
+    }
+
+    for (int i = 0; i < total_in_args ; i++ ) {
+      out_sig_bt[argc++] = in_sig_bt[i];
+    }
+  } else {
+    Thread* THREAD = Thread::current();
+    in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
+    SignatureStream ss(method->signature());
+    for (int i = 0; i < total_in_args ; i++ ) {
+      if (in_sig_bt[i] == T_ARRAY) {
+        // Arrays are passed as int, elem* pair
+        out_sig_bt[argc++] = T_INT;
+        out_sig_bt[argc++] = T_ADDRESS;
+        Symbol* atype = ss.as_symbol(CHECK_NULL);
+        const char* at = atype->as_C_string();
+        if (strlen(at) == 2) {
+          assert(at[0] == '[', "must be");
+          switch (at[1]) {
+            case 'B': in_elem_bt[i]  = T_BYTE; break;
+            case 'C': in_elem_bt[i]  = T_CHAR; break;
+            case 'D': in_elem_bt[i]  = T_DOUBLE; break;
+            case 'F': in_elem_bt[i]  = T_FLOAT; break;
+            case 'I': in_elem_bt[i]  = T_INT; break;
+            case 'J': in_elem_bt[i]  = T_LONG; break;
+            case 'S': in_elem_bt[i]  = T_SHORT; break;
+            case 'Z': in_elem_bt[i]  = T_BOOLEAN; break;
+            default: ShouldNotReachHere();
+          }
+        }
+      } else {
+        out_sig_bt[argc++] = in_sig_bt[i];
+        in_elem_bt[i] = T_VOID;
+      }
+      if (in_sig_bt[i] != T_VOID) {
+        assert(in_sig_bt[i] == ss.type(), "must match");
+        ss.next();
+      }
+    }
   }
 
   // Now figure out where the args must be stored and how much stack space
   // they require.
-  //
   int out_arg_slots;
   out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
 
@@ -1213,13 +1478,47 @@
   int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
 
   // Now the space for the inbound oop handle area
+  int total_save_slots = 6 * VMRegImpl::slots_per_word;  // 6 arguments passed in registers
+  if (is_critical_native) {
+    // Critical natives may have to call out so they need a save area
+    // for register arguments.
+    int double_slots = 0;
+    int single_slots = 0;
+    for ( int i = 0; i < total_in_args; i++) {
+      if (in_regs[i].first()->is_Register()) {
+        const Register reg = in_regs[i].first()->as_Register();
+        switch (in_sig_bt[i]) {
+          case T_ARRAY:
+          case T_BOOLEAN:
+          case T_BYTE:
+          case T_SHORT:
+          case T_CHAR:
+          case T_INT:  single_slots++; break;
+          case T_LONG: double_slots++; break;
+          default:  ShouldNotReachHere();
+        }
+      } else if (in_regs[i].first()->is_XMMRegister()) {
+        switch (in_sig_bt[i]) {
+          case T_FLOAT:  single_slots++; break;
+          case T_DOUBLE: double_slots++; break;
+          default:  ShouldNotReachHere();
+        }
+      } else if (in_regs[i].first()->is_FloatRegister()) {
+        ShouldNotReachHere();
+      }
+    }
+    total_save_slots = double_slots * 2 + single_slots;
+    // align the save area
+    if (double_slots != 0) {
+      stack_slots = round_to(stack_slots, 2);
+    }
+  }
 
   int oop_handle_offset = stack_slots;
-  stack_slots += 6*VMRegImpl::slots_per_word;
+  stack_slots += total_save_slots;
 
   // Now any space we need for handlizing a klass if static method
 
-  int oop_temp_slot_offset = 0;
   int klass_slot_offset = 0;
   int klass_offset = -1;
   int lock_slot_offset = 0;
@@ -1272,7 +1571,6 @@
 
   int stack_size = stack_slots * VMRegImpl::stack_slot_size;
 
-
   // First thing make an ic check to see if we should even be here
 
   // We are free to use all registers as temps without saving them and
@@ -1283,22 +1581,22 @@
   const Register ic_reg = rax;
   const Register receiver = j_rarg0;
 
-  Label ok;
+  Label hit;
   Label exception_pending;
 
   assert_different_registers(ic_reg, receiver, rscratch1);
   __ verify_oop(receiver);
   __ load_klass(rscratch1, receiver);
   __ cmpq(ic_reg, rscratch1);
-  __ jcc(Assembler::equal, ok);
+  __ jcc(Assembler::equal, hit);
 
   __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 
-  __ bind(ok);
-
   // Verified entry point must be aligned
   __ align(8);
 
+  __ bind(hit);
+
   int vep_offset = ((intptr_t)__ pc()) - start;
 
   // The instruction at the verified entry point must be 5 bytes or longer
@@ -1319,9 +1617,8 @@
   // -2 because return address is already present and so is saved rbp
   __ subptr(rsp, stack_size - 2*wordSize);
 
-    // Frame is now completed as far as size and linkage.
-
-    int frame_complete = ((intptr_t)__ pc()) - start;
+  // Frame is now completed as far as size and linkage.
+  int frame_complete = ((intptr_t)__ pc()) - start;
 
 #ifdef ASSERT
     {
@@ -1341,7 +1638,10 @@
 
   const Register oop_handle_reg = r14;
 
-
+  if (is_critical_native) {
+    check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args,
+                                       oop_handle_offset, oop_maps, in_regs, in_sig_bt);
+  }
 
   //
   // We immediately shuffle the arguments so that any vm call we have to
@@ -1390,9 +1690,36 @@
 
 #endif /* ASSERT */
 
-
+  if (is_critical_native) {
+    // The mapping of Java and C arguments passed in registers are
+    // rotated by one, which helps when passing arguments to regular
+    // Java method but for critical natives that creates a cycle which
+    // can cause arguments to be killed before they are used.  Break
+    // the cycle by moving the first argument into a temporary
+    // register.
+    for (int i = 0; i < total_c_args; i++) {
+      if (in_regs[i].first()->is_Register() &&
+          in_regs[i].first()->as_Register() == rdi) {
+        __ mov(rbx, rdi);
+        in_regs[i].set1(rbx->as_VMReg());
+      }
+    }
+  }
+
+  // This may iterate in two different directions depending on the
+  // kind of native it is.  The reason is that for regular JNI natives
+  // the incoming and outgoing registers are offset upwards and for
+  // critical natives they are offset down.
   int c_arg = total_c_args - 1;
-  for ( int i = total_in_args - 1; i >= 0 ; i--, c_arg-- ) {
+  int stride = -1;
+  int init = total_in_args - 1;
+  if (is_critical_native) {
+    // stride forwards
+    c_arg = 0;
+    stride = 1;
+    init = 0;
+  }
+  for (int i = init, count = 0; count < total_in_args; i += stride, c_arg += stride, count++ ) {
 #ifdef ASSERT
     if (in_regs[i].first()->is_Register()) {
       assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
@@ -1407,7 +1734,20 @@
 #endif /* ASSERT */
     switch (in_sig_bt[i]) {
       case T_ARRAY:
+        if (is_critical_native) {
+          unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
+          c_arg++;
+#ifdef ASSERT
+          if (out_regs[c_arg].first()->is_Register()) {
+            reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
+          } else if (out_regs[c_arg].first()->is_XMMRegister()) {
+            freg_destroyed[out_regs[c_arg].first()->as_XMMRegister()->encoding()] = true;
+          }
+#endif
+          break;
+        }
       case T_OBJECT:
+        assert(!is_critical_native, "no oop arguments");
         object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
                     ((i == 0) && (!is_static)),
                     &receiver_offset);
@@ -1443,7 +1783,7 @@
 
   // Pre-load a static method's oop into r14.  Used both by locking code and
   // the normal JNI call code.
-  if (method->is_static()) {
+  if (method->is_static() && !is_critical_native) {
 
     //  load oop into a register
     __ movoop(oop_handle_reg, JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror()));
@@ -1509,6 +1849,7 @@
   Label lock_done;
 
   if (method->is_synchronized()) {
+    assert(!is_critical_native, "unhandled");
 
 
     const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
@@ -1572,13 +1913,14 @@
 
 
   // get JNIEnv* which is first argument to native
-
-  __ lea(c_rarg0, Address(r15_thread, in_bytes(JavaThread::jni_environment_offset())));
+  if (!is_critical_native) {
+    __ lea(c_rarg0, Address(r15_thread, in_bytes(JavaThread::jni_environment_offset())));
+  }
 
   // Now set thread in native
   __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native);
 
-  __ call(RuntimeAddress(method->native_function()));
+  __ call(RuntimeAddress(native_func));
 
     // Either restore the MXCSR register after returning from the JNI Call
     // or verify that it wasn't changed.
@@ -1634,6 +1976,7 @@
     }
   }
 
+  Label after_transition;
 
   // check for safepoint operation in progress and/or pending suspend requests
   {
@@ -1659,16 +2002,28 @@
     __ mov(r12, rsp); // remember sp
     __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
     __ andptr(rsp, -16); // align stack as required by ABI
-    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
+    if (!is_critical_native) {
+      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
+    } else {
+      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)));
+    }
     __ mov(rsp, r12); // restore sp
     __ reinit_heapbase();
     // Restore any method result value
     restore_native_result(masm, ret_type, stack_slots);
+
+    if (is_critical_native) {
+      // The call above performed the transition to thread_in_Java so
+      // skip the transition logic below.
+      __ jmpb(after_transition);
+    }
+
     __ bind(Continue);
   }
 
   // change thread state
   __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_Java);
+  __ bind(after_transition);
 
   Label reguard;
   Label reguard_done;
@@ -1746,17 +2101,21 @@
       __ verify_oop(rax);
   }
 
-  // reset handle block
-  __ movptr(rcx, Address(r15_thread, JavaThread::active_handles_offset()));
-  __ movptr(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), (int32_t)NULL_WORD);
+  if (!is_critical_native) {
+    // reset handle block
+    __ movptr(rcx, Address(r15_thread, JavaThread::active_handles_offset()));
+    __ movptr(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), (int32_t)NULL_WORD);
+  }
 
   // pop our frame
 
   __ leave();
 
-  // Any exception pending?
-  __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
-  __ jcc(Assembler::notEqual, exception_pending);
+  if (!is_critical_native) {
+    // Any exception pending?
+    __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
+    __ jcc(Assembler::notEqual, exception_pending);
+  }
 
   // Return
 
@@ -1764,12 +2123,13 @@
 
   // Unexpected paths are out of line and go here
 
-  // forward the exception
-  __ bind(exception_pending);
-
-  // and forward the exception
-  __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
-
+  if (!is_critical_native) {
+    // forward the exception
+    __ bind(exception_pending);
+
+    // and forward the exception
+    __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+  }
 
   // Slow path locking & unlocking
   if (method->is_synchronized()) {
@@ -1876,6 +2236,11 @@
                                             (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
                                             in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
                                             oop_maps);
+
+  if (is_critical_native) {
+    nm->set_lazy_critical_native(true);
+  }
+
   return nm;
 
 }
--- a/src/cpu/zero/vm/frame_zero.cpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/cpu/zero/vm/frame_zero.cpp	Thu Feb 09 07:35:48 2012 -0800
@@ -418,7 +418,7 @@
   }
 }
 
-#ifdef ASSERT
+#ifndef PRODUCT
 
 void frame::describe_pd(FrameValues& values, int frame_no) {
 
--- a/src/os/solaris/vm/os_solaris.cpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/os/solaris/vm/os_solaris.cpp	Thu Feb 09 07:35:48 2012 -0800
@@ -4007,7 +4007,6 @@
     iaInfo->ia_uprilim = cur_class == new_class
                            ? IA_NOCHANGE : (pri_t)iaLimits.maxPrio;
     iaInfo->ia_mode    = IA_NOCHANGE;
-    iaInfo->ia_nice    = cur_class == new_class ? IA_NOCHANGE : NZERO;
     if (ThreadPriorityVerbose) {
       tty->print_cr("IA: [%d...%d] %d->%d\n",
                     iaLimits.minPrio, maxClamped, newPrio, iaInfo->ia_upri);
--- a/src/os/windows/vm/os_windows.cpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/os/windows/vm/os_windows.cpp	Thu Feb 09 07:35:48 2012 -0800
@@ -2088,7 +2088,6 @@
 #elif _M_AMD64
   PCONTEXT ctx = exceptionInfo->ContextRecord;
   address pc = (address)ctx->Rip;
-  NOT_PRODUCT(Events::log("idiv overflow exception at " INTPTR_FORMAT , pc));
   assert(pc[0] == 0xF7, "not an idiv opcode");
   assert((pc[1] & ~0x7) == 0xF8, "cannot handle non-register operands");
   assert(ctx->Rax == min_jint, "unexpected idiv exception");
@@ -2100,7 +2099,6 @@
 #else
   PCONTEXT ctx = exceptionInfo->ContextRecord;
   address pc = (address)ctx->Eip;
-  NOT_PRODUCT(Events::log("idiv overflow exception at " INTPTR_FORMAT , pc));
   assert(pc[0] == 0xF7, "not an idiv opcode");
   assert((pc[1] & ~0x7) == 0xF8, "cannot handle non-register operands");
   assert(ctx->Eax == min_jint, "unexpected idiv exception");
@@ -5336,4 +5334,3 @@
 }
 
 #endif
-
--- a/src/share/tools/ProjectCreator/BuildConfig.java	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/tools/ProjectCreator/BuildConfig.java	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -243,6 +243,7 @@
         sysDefines.add("_WINDOWS");
         sysDefines.add("HOTSPOT_BUILD_USER=\\\""+System.getProperty("user.name")+"\\\"");
         sysDefines.add("HOTSPOT_BUILD_TARGET=\\\""+get("Build")+"\\\"");
+        sysDefines.add("INCLUDE_TRACE");
         sysDefines.add("_JNI_IMPLEMENTATION_");
         if (vars.get("PlatformName").equals("Win32")) {
             sysDefines.add("HOTSPOT_LIB_ARCH=\\\"i386\\\"");
--- a/src/share/vm/c1/c1_GraphBuilder.cpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/c1/c1_GraphBuilder.cpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1592,6 +1592,7 @@
   // this happened while running the JCK invokevirtual tests under doit.  TKR
   ciMethod* cha_monomorphic_target = NULL;
   ciMethod* exact_target = NULL;
+  Value better_receiver = NULL;
   if (UseCHA && DeoptC1 && klass->is_loaded() && target->is_loaded() &&
       !target->is_method_handle_invoke()) {
     Value receiver = NULL;
@@ -1653,6 +1654,18 @@
       ciInstanceKlass* singleton = NULL;
       if (target->holder()->nof_implementors() == 1) {
         singleton = target->holder()->implementor(0);
+
+        assert(holder->is_interface(), "invokeinterface to non interface?");
+        ciInstanceKlass* decl_interface = (ciInstanceKlass*)holder;
+        // the number of implementors for decl_interface is less or
+        // equal to the number of implementors for target->holder() so
+        // if number of implementors of target->holder() == 1 then
+        // number of implementors for decl_interface is 0 or 1. If
+        // it's 0 then no class implements decl_interface and there's
+        // no point in inlining.
+        if (!holder->is_loaded() || decl_interface->nof_implementors() != 1) {
+          singleton = NULL;
+        }
       }
       if (singleton) {
         cha_monomorphic_target = target->find_monomorphic_target(calling_klass, target->holder(), singleton);
@@ -1667,7 +1680,9 @@
           CheckCast* c = new CheckCast(klass, receiver, copy_state_for_exception());
           c->set_incompatible_class_change_check();
           c->set_direct_compare(klass->is_final());
-          append_split(c);
+          // pass the result of the checkcast so that the compiler has
+          // more accurate type info in the inlinee
+          better_receiver = append_split(c);
         }
       }
     }
@@ -1709,7 +1724,7 @@
       }
       if (!success) {
         // static binding => check if callee is ok
-        success = try_inline(inline_target, (cha_monomorphic_target != NULL) || (exact_target != NULL));
+        success = try_inline(inline_target, (cha_monomorphic_target != NULL) || (exact_target != NULL), better_receiver);
       }
       CHECK_BAILOUT();
 
@@ -3034,7 +3049,7 @@
 }
 
 
-bool GraphBuilder::try_inline(ciMethod* callee, bool holder_known) {
+bool GraphBuilder::try_inline(ciMethod* callee, bool holder_known, Value receiver) {
   // Clear out any existing inline bailout condition
   clear_inline_bailout();
 
@@ -3056,7 +3071,7 @@
   } else if (callee->is_abstract()) {
     INLINE_BAILOUT("abstract")
   } else {
-    return try_inline_full(callee, holder_known);
+    return try_inline_full(callee, holder_known, NULL, receiver);
   }
 }
 
@@ -3405,7 +3420,7 @@
 }
 
 
-bool GraphBuilder::try_inline_full(ciMethod* callee, bool holder_known, BlockBegin* cont_block) {
+bool GraphBuilder::try_inline_full(ciMethod* callee, bool holder_known, BlockBegin* cont_block, Value receiver) {
   assert(!callee->is_native(), "callee must not be native");
   if (CompilationPolicy::policy()->should_not_inline(compilation()->env(), callee)) {
     INLINE_BAILOUT("inlining prohibited by policy");
@@ -3541,6 +3556,9 @@
       Value  arg = caller_state->stack_at_inc(i);
       // NOTE: take base() of arg->type() to avoid problems storing
       // constants
+      if (receiver != NULL && par_no == 0) {
+        arg = receiver;
+      }
       store_local(callee_state, arg, arg->type()->base(), par_no);
     }
   }
@@ -3683,56 +3701,61 @@
       // Get the two MethodHandle inputs from the Phi.
       Value op1 = phi->operand_at(0);
       Value op2 = phi->operand_at(1);
-      ciMethodHandle* mh1 = op1->type()->as_ObjectType()->constant_value()->as_method_handle();
-      ciMethodHandle* mh2 = op2->type()->as_ObjectType()->constant_value()->as_method_handle();
-
-      // Set the callee to have access to the class and signature in
-      // the MethodHandleCompiler.
-      mh1->set_callee(callee);
-      mh1->set_caller(method());
-      mh2->set_callee(callee);
-      mh2->set_caller(method());
-
-      // Get adapters for the MethodHandles.
-      ciMethod* mh1_adapter = mh1->get_method_handle_adapter();
-      ciMethod* mh2_adapter = mh2->get_method_handle_adapter();
-
-      if (mh1_adapter != NULL && mh2_adapter != NULL) {
-        set_inline_cleanup_info();
-
-        // Build the If guard
-        BlockBegin* one = new BlockBegin(next_bci());
-        BlockBegin* two = new BlockBegin(next_bci());
-        BlockBegin* end = new BlockBegin(next_bci());
-        Instruction* iff = append(new If(phi, If::eql, false, op1, one, two, NULL, false));
-        block()->set_end(iff->as_BlockEnd());
-
-        // Connect up the states
-        one->merge(block()->end()->state());
-        two->merge(block()->end()->state());
-
-        // Save the state for the second inlinee
-        ValueStack* state_before = copy_state_before();
-
-        // Parse first adapter
-        _last = _block = one;
-        if (!try_inline_full(mh1_adapter, /*holder_known=*/ true, end)) {
-          restore_inline_cleanup_info();
-          block()->clear_end();  // remove appended iff
-          return false;
+      ObjectType* op1type = op1->type()->as_ObjectType();
+      ObjectType* op2type = op2->type()->as_ObjectType();
+
+      if (op1type->is_constant() && op2type->is_constant()) {
+        ciMethodHandle* mh1 = op1type->constant_value()->as_method_handle();
+        ciMethodHandle* mh2 = op2type->constant_value()->as_method_handle();
+
+        // Set the callee to have access to the class and signature in
+        // the MethodHandleCompiler.
+        mh1->set_callee(callee);
+        mh1->set_caller(method());
+        mh2->set_callee(callee);
+        mh2->set_caller(method());
+
+        // Get adapters for the MethodHandles.
+        ciMethod* mh1_adapter = mh1->get_method_handle_adapter();
+        ciMethod* mh2_adapter = mh2->get_method_handle_adapter();
+
+        if (mh1_adapter != NULL && mh2_adapter != NULL) {
+          set_inline_cleanup_info();
+
+          // Build the If guard
+          BlockBegin* one = new BlockBegin(next_bci());
+          BlockBegin* two = new BlockBegin(next_bci());
+          BlockBegin* end = new BlockBegin(next_bci());
+          Instruction* iff = append(new If(phi, If::eql, false, op1, one, two, NULL, false));
+          block()->set_end(iff->as_BlockEnd());
+
+          // Connect up the states
+          one->merge(block()->end()->state());
+          two->merge(block()->end()->state());
+
+          // Save the state for the second inlinee
+          ValueStack* state_before = copy_state_before();
+
+          // Parse first adapter
+          _last = _block = one;
+          if (!try_inline_full(mh1_adapter, /*holder_known=*/ true, end, NULL)) {
+            restore_inline_cleanup_info();
+            block()->clear_end();  // remove appended iff
+            return false;
+          }
+
+          // Parse second adapter
+          _last = _block = two;
+          _state = state_before;
+          if (!try_inline_full(mh2_adapter, /*holder_known=*/ true, end, NULL)) {
+            restore_inline_cleanup_info();
+            block()->clear_end();  // remove appended iff
+            return false;
+          }
+
+          connect_to_end(end);
+          return true;
         }
-
-        // Parse second adapter
-        _last = _block = two;
-        _state = state_before;
-        if (!try_inline_full(mh2_adapter, /*holder_known=*/ true, end)) {
-          restore_inline_cleanup_info();
-          block()->clear_end();  // remove appended iff
-          return false;
-        }
-
-        connect_to_end(end);
-        return true;
       }
     }
   }
--- a/src/share/vm/c1/c1_GraphBuilder.hpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/c1/c1_GraphBuilder.hpp	Thu Feb 09 07:35:48 2012 -0800
@@ -337,9 +337,9 @@
   void fill_sync_handler(Value lock, BlockBegin* sync_handler, bool default_handler = false);
 
   // inliners
-  bool try_inline(           ciMethod* callee, bool holder_known);
+  bool try_inline(           ciMethod* callee, bool holder_known, Value receiver = NULL);
   bool try_inline_intrinsics(ciMethod* callee);
-  bool try_inline_full(      ciMethod* callee, bool holder_known, BlockBegin* cont_block = NULL);
+  bool try_inline_full(      ciMethod* callee, bool holder_known, BlockBegin* cont_block, Value receiver);
   bool try_inline_jsr(int jsr_dest_bci);
 
   // JSR 292 support
--- a/src/share/vm/c1/c1_LIR.hpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/c1/c1_LIR.hpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1354,9 +1354,10 @@
   CodeStub*     _stub;   // if this is a branch to a stub, this is the stub
 
  public:
-  LIR_OpBranch(LIR_Condition cond, Label* lbl)
+  LIR_OpBranch(LIR_Condition cond, BasicType type, Label* lbl)
     : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL)
     , _cond(cond)
+    , _type(type)
     , _label(lbl)
     , _block(NULL)
     , _ublock(NULL)
@@ -2053,7 +2054,7 @@
   void jump(CodeStub* stub) {
     append(new LIR_OpBranch(lir_cond_always, T_ILLEGAL, stub));
   }
-  void branch(LIR_Condition cond, Label* lbl)        { append(new LIR_OpBranch(cond, lbl)); }
+  void branch(LIR_Condition cond, BasicType type, Label* lbl)        { append(new LIR_OpBranch(cond, type, lbl)); }
   void branch(LIR_Condition cond, BasicType type, BlockBegin* block) {
     assert(type != T_FLOAT && type != T_DOUBLE, "no fp comparisons");
     append(new LIR_OpBranch(cond, type, block));
--- a/src/share/vm/c1/c1_LIRGenerator.cpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/c1/c1_LIRGenerator.cpp	Thu Feb 09 07:35:48 2012 -0800
@@ -2350,7 +2350,7 @@
     } else {
       LabelObj* L = new LabelObj();
       __ cmp(lir_cond_less, value, low_key);
-      __ branch(lir_cond_less, L->label());
+      __ branch(lir_cond_less, T_INT, L->label());
       __ cmp(lir_cond_lessEqual, value, high_key);
       __ branch(lir_cond_lessEqual, T_INT, dest);
       __ branch_destination(L->label());
--- a/src/share/vm/c1/c1_Runtime1.cpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/c1/c1_Runtime1.cpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -413,8 +413,9 @@
     }
     bci = branch_bci + offset;
   }
-
+  assert(!HAS_PENDING_EXCEPTION, "Should not have any exceptions pending");
   osr_nm = CompilationPolicy::policy()->event(enclosing_method, method, branch_bci, bci, level, nm, THREAD);
+  assert(!HAS_PENDING_EXCEPTION, "Event handler should not throw any exceptions");
   return osr_nm;
 }
 
@@ -596,7 +597,6 @@
 
 JRT_ENTRY(void, Runtime1::throw_range_check_exception(JavaThread* thread, int index))
   NOT_PRODUCT(_throw_range_check_exception_count++;)
-  Events::log("throw_range_check");
   char message[jintAsStringSize];
   sprintf(message, "%d", index);
   SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_ArrayIndexOutOfBoundsException(), message);
@@ -605,7 +605,6 @@
 
 JRT_ENTRY(void, Runtime1::throw_index_exception(JavaThread* thread, int index))
   NOT_PRODUCT(_throw_index_exception_count++;)
-  Events::log("throw_index");
   char message[16];
   sprintf(message, "%d", index);
   SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_IndexOutOfBoundsException(), message);
@@ -803,11 +802,7 @@
   // Note also that in the presence of inlining it is not guaranteed
   // that caller_method() == caller_code->method()
 
-
   int bci = vfst.bci();
-
-  Events::log("patch_code @ " INTPTR_FORMAT , caller_frame.pc());
-
   Bytecodes::Code code = caller_method()->java_code_at(bci);
 
 #ifndef PRODUCT
--- a/src/share/vm/c1/c1_ValueMap.cpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/c1/c1_ValueMap.cpp	Thu Feb 09 07:35:48 2012 -0800
@@ -125,6 +125,7 @@
             // otherwise it is possible that they are not evaluated
             f->pin(Instruction::PinGlobalValueNumbering);
           }
+          assert(x->type()->tag() == f->type()->tag(), "should have same type");
 
           return f;
 
--- a/src/share/vm/ci/bcEscapeAnalyzer.cpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/ci/bcEscapeAnalyzer.cpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -359,7 +359,7 @@
       case Bytecodes::_nop:
         break;
       case Bytecodes::_aconst_null:
-        state.apush(empty_map);
+        state.apush(unknown_obj);
         break;
       case Bytecodes::_iconst_m1:
       case Bytecodes::_iconst_0:
@@ -392,6 +392,8 @@
         if (tag.is_long() || tag.is_double()) {
           // Only longs and doubles use 2 stack slots.
           state.lpush();
+        } else if (tag.basic_type() == T_OBJECT) {
+          state.apush(unknown_obj);
         } else {
           state.spush();
         }
--- a/src/share/vm/ci/ciEnv.hpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/ci/ciEnv.hpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -284,6 +284,20 @@
   // Return state of appropriate compilability
   int compilable() { return _compilable; }
 
+  const char* retry_message() const {
+    switch (_compilable) {
+      case ciEnv::MethodCompilable_not_at_tier:
+        return "retry at different tier";
+      case ciEnv::MethodCompilable_never:
+        return "not retryable";
+      case ciEnv::MethodCompilable:
+        return NULL;
+      default:
+        ShouldNotReachHere();
+        return NULL;
+    }
+  }
+
   bool break_at_compile() { return _break_at_compile; }
   void set_break_at_compile(bool z) { _break_at_compile = z; }
 
--- a/src/share/vm/classfile/javaClasses.cpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/classfile/javaClasses.cpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1347,7 +1347,13 @@
     return _backtrace();
   }
 
-  inline void push(methodOop method, short bci, TRAPS) {
+  inline void push(methodOop method, int bci, TRAPS) {
+    // Smear the -1 bci to 0 since the array only holds unsigned
+    // shorts.  The later line number lookup would just smear the -1
+    // to a 0 even if it could be recorded.
+    if (bci == SynchronizationEntryBCI) bci = 0;
+    assert(bci == (jushort)bci, "doesn't fit");
+
     if (_index >= trace_chunk_size) {
       methodHandle mhandle(THREAD, method);
       expand(CHECK);
@@ -1574,8 +1580,13 @@
   int chunk_count = 0;
 
   for (;!st.at_end(); st.next()) {
-    // add element
-    bcis->ushort_at_put(chunk_count, st.bci());
+    // Add entry and smear the -1 bci to 0 since the array only holds
+    // unsigned shorts.  The later line number lookup would just smear
+    // the -1 to a 0 even if it could be recorded.
+    int bci = st.bci();
+    if (bci == SynchronizationEntryBCI) bci = 0;
+    assert(bci == (jushort)bci, "doesn't fit");
+    bcis->ushort_at_put(chunk_count, bci);
     methods->obj_at_put(chunk_count, st.method());
 
     chunk_count++;
--- a/src/share/vm/classfile/vmSymbols.hpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/classfile/vmSymbols.hpp	Thu Feb 09 07:35:48 2012 -0800
@@ -284,6 +284,7 @@
   template(run_method_name,                           "run")                                      \
   template(exit_method_name,                          "exit")                                     \
   template(add_method_name,                           "add")                                      \
+  template(remove_method_name,                        "remove")                                   \
   template(parent_name,                               "parent")                                   \
   template(threads_name,                              "threads")                                  \
   template(groups_name,                               "groups")                                   \
--- a/src/share/vm/code/compiledIC.cpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/code/compiledIC.cpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -165,7 +165,6 @@
                    instruction_address(), method->print_value_string(), entry);
   }
 
-  Events::log("compiledIC " INTPTR_FORMAT " --> megamorphic " INTPTR_FORMAT, this, (address)method());
   // We can't check this anymore. With lazy deopt we could have already
   // cleaned this IC entry before we even return. This is possible if
   // we ran out of space in the inline cache buffer trying to do the
--- a/src/share/vm/code/nmethod.cpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/code/nmethod.cpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -462,6 +462,7 @@
   _speculatively_disconnected = 0;
   _has_unsafe_access          = 0;
   _has_method_handle_invokes  = 0;
+  _lazy_critical_native       = 0;
   _marked_for_deoptimization  = 0;
   _lock_count                 = 0;
   _stack_traversal_mark       = 0;
@@ -704,7 +705,6 @@
       xtty->tail("print_native_nmethod");
     }
   }
-  Events::log("Create nmethod " INTPTR_FORMAT, this);
 }
 
 // For dtrace wrappers
@@ -781,7 +781,6 @@
       xtty->tail("print_dtrace_nmethod");
     }
   }
-  Events::log("Create nmethod " INTPTR_FORMAT, this);
 }
 #endif // def HAVE_DTRACE_H
 
@@ -889,13 +888,6 @@
   if (printnmethods || PrintDebugInfo || PrintRelocations || PrintDependencies || PrintExceptionHandlers) {
     print_nmethod(printnmethods);
   }
-
-  // Note: Do not verify in here as the CodeCache_lock is
-  //       taken which would conflict with the CompiledIC_lock
-  //       which taken during the verification of call sites.
-  //       (was bug - gri 10/25/99)
-
-  Events::log("Create nmethod " INTPTR_FORMAT, this);
 }
 
 
@@ -1386,7 +1378,7 @@
   assert_locked_or_safepoint(CodeCache_lock);
 
   // completely deallocate this method
-  EventMark m("flushing nmethod " INTPTR_FORMAT " %s", this, "");
+  Events::log(JavaThread::current(), "flushing nmethod " INTPTR_FORMAT, this);
   if (PrintMethodFlushing) {
     tty->print_cr("*flushing nmethod %3d/" INTPTR_FORMAT ". Live blobs:" UINT32_FORMAT "/Free CodeCache:" SIZE_FORMAT "Kb",
         _compile_id, this, CodeCache::nof_blobs(), CodeCache::unallocated_capacity()/1024);
--- a/src/share/vm/code/nmethod.hpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/code/nmethod.hpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -175,6 +175,7 @@
   // set during construction
   unsigned int _has_unsafe_access:1;         // May fault due to unsafe access.
   unsigned int _has_method_handle_invokes:1; // Has this method MethodHandle invokes?
+  unsigned int _lazy_critical_native:1;      // Lazy JNI critical native
 
   // Protected by Patching_lock
   unsigned char _state;                      // {alive, not_entrant, zombie, unloaded}
@@ -430,7 +431,10 @@
   void  set_has_method_handle_invokes(bool z)     { _has_method_handle_invokes = z; }
 
   bool  is_speculatively_disconnected() const     { return _speculatively_disconnected; }
-  void  set_speculatively_disconnected(bool z)     { _speculatively_disconnected = z; }
+  void  set_speculatively_disconnected(bool z)    { _speculatively_disconnected = z; }
+
+  bool  is_lazy_critical_native() const           { return _lazy_critical_native; }
+  void  set_lazy_critical_native(bool z)          { _lazy_critical_native = z; }
 
   int   comp_level() const                        { return _comp_level; }
 
--- a/src/share/vm/compiler/compileBroker.cpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/compiler/compileBroker.cpp	Thu Feb 09 07:35:48 2012 -0800
@@ -44,6 +44,7 @@
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/sweeper.hpp"
 #include "utilities/dtrace.hpp"
+#include "utilities/events.hpp"
 #ifdef COMPILER1
 #include "c1/c1_Compiler.hpp"
 #endif
@@ -189,6 +190,43 @@
 GrowableArray<CompilerThread*>* CompileBroker::_method_threads = NULL;
 
 
+class CompilationLog : public StringEventLog {
+ public:
+  CompilationLog() : StringEventLog("Compilation events") {
+  }
+
+  void log_compile(JavaThread* thread, CompileTask* task) {
+    StringLogMessage lm;
+    stringStream msg = lm.stream();
+    // msg.time_stamp().update_to(tty->time_stamp().ticks());
+    task->print_compilation(&msg, true);
+    log(thread, "%s", (const char*)lm);
+  }
+
+  void log_nmethod(JavaThread* thread, nmethod* nm) {
+    log(thread, "nmethod " INTPTR_FORMAT " code ["INTPTR_FORMAT ", " INTPTR_FORMAT "]",
+        nm, nm->code_begin(), nm->code_end());
+  }
+
+  void log_failure(JavaThread* thread, CompileTask* task, const char* reason, const char* retry_message) {
+    StringLogMessage lm;
+    lm.print("%4d   COMPILE SKIPPED: %s", task->compile_id(), reason);
+    if (retry_message != NULL) {
+      lm.append(" (%s)", retry_message);
+    }
+    lm.print("\n");
+    log(thread, "%s", (const char*)lm);
+  }
+};
+
+static CompilationLog* _compilation_log = NULL;
+
+void compileBroker_init() {
+  if (LogEvents) {
+    _compilation_log = new CompilationLog();
+  }
+}
+
 CompileTaskWrapper::CompileTaskWrapper(CompileTask* task) {
   CompilerThread* thread = CompilerThread::current();
   thread->set_task(task);
@@ -326,8 +364,12 @@
 
 // ------------------------------------------------------------------
 // CompileTask::print_compilation_impl
-void CompileTask::print_compilation_impl(outputStream* st, methodOop method, int compile_id, int comp_level, bool is_osr_method, int osr_bci, bool is_blocking, const char* msg) {
-  st->print("%7d ", (int) st->time_stamp().milliseconds());  // print timestamp
+void CompileTask::print_compilation_impl(outputStream* st, methodOop method, int compile_id, int comp_level,
+                                         bool is_osr_method, int osr_bci, bool is_blocking,
+                                         const char* msg, bool short_form) {
+  if (!short_form) {
+    st->print("%7d ", (int) st->time_stamp().milliseconds());  // print timestamp
+  }
   st->print("%4d ", compile_id);    // print compilation number
 
   // For unloaded methods the transition to zombie occurs after the
@@ -370,7 +412,9 @@
   if (msg != NULL) {
     st->print("   %s", msg);
   }
-  st->cr();
+  if (!short_form) {
+    st->cr();
+  }
 }
 
 // ------------------------------------------------------------------
@@ -426,12 +470,12 @@
 
 // ------------------------------------------------------------------
 // CompileTask::print_compilation
-void CompileTask::print_compilation(outputStream* st) {
+void CompileTask::print_compilation(outputStream* st, bool short_form) {
   oop rem = JNIHandles::resolve(method_handle());
   assert(rem != NULL && rem->is_method(), "must be");
   methodOop method = (methodOop) rem;
   bool is_osr_method = osr_bci() != InvocationEntryBci;
-  print_compilation_impl(st, method, compile_id(), comp_level(), is_osr_method, osr_bci(), is_blocking());
+  print_compilation_impl(st, method, compile_id(), comp_level(), is_osr_method, osr_bci(), is_blocking(), NULL, short_form);
 }
 
 // ------------------------------------------------------------------
@@ -962,7 +1006,7 @@
                                         methodHandle hot_method,
                                         int hot_count,
                                         const char* comment,
-                                        TRAPS) {
+                                        Thread* thread) {
   // do nothing if compiler thread(s) is not available
   if (!_initialized ) {
     return;
@@ -1038,7 +1082,7 @@
 
   // Acquire our lock.
   {
-    MutexLocker locker(queue->lock(), THREAD);
+    MutexLocker locker(queue->lock(), thread);
 
     // Make sure the method has not slipped into the queues since
     // last we checked; note that those checks were "fast bail-outs".
@@ -1120,7 +1164,7 @@
 nmethod* CompileBroker::compile_method(methodHandle method, int osr_bci,
                                        int comp_level,
                                        methodHandle hot_method, int hot_count,
-                                       const char* comment, TRAPS) {
+                                       const char* comment, Thread* THREAD) {
   // make sure arguments make sense
   assert(method->method_holder()->klass_part()->oop_is_instance(), "not an instance method");
   assert(osr_bci == InvocationEntryBci || (0 <= osr_bci && osr_bci < method->code_size()), "bci out of range");
@@ -1174,10 +1218,10 @@
   assert(!HAS_PENDING_EXCEPTION, "No exception should be present");
   // some prerequisites that are compiler specific
   if (compiler(comp_level)->is_c2() || compiler(comp_level)->is_shark()) {
-    method->constants()->resolve_string_constants(CHECK_0);
+    method->constants()->resolve_string_constants(CHECK_AND_CLEAR_NULL);
     // Resolve all classes seen in the signature of the method
     // we are compiling.
-    methodOopDesc::load_signature_classes(method, CHECK_0);
+    methodOopDesc::load_signature_classes(method, CHECK_AND_CLEAR_NULL);
   }
 
   // If the method is native, do the lookup in the thread requesting
@@ -1231,7 +1275,7 @@
       return NULL;
     }
   } else {
-    compile_method_base(method, osr_bci, comp_level, hot_method, hot_count, comment, CHECK_0);
+    compile_method_base(method, osr_bci, comp_level, hot_method, hot_count, comment, THREAD);
   }
 
   // return requested nmethod
@@ -1649,6 +1693,10 @@
   CompilerThread* thread = CompilerThread::current();
   ResourceMark rm(thread);
 
+  if (LogEvents) {
+    _compilation_log->log_compile(thread, task);
+  }
+
   // Common flags.
   uint compile_id = task->compile_id();
   int osr_bci = task->osr_bci();
@@ -1717,22 +1765,30 @@
       ci_env.record_method_not_compilable("compile failed", !TieredCompilation);
     }
 
+    // Copy this bit to the enclosing block:
+    compilable = ci_env.compilable();
+
     if (ci_env.failing()) {
-      // Copy this bit to the enclosing block:
-      compilable = ci_env.compilable();
+      const char* retry_message = ci_env.retry_message();
+      if (_compilation_log != NULL) {
+        _compilation_log->log_failure(thread, task, ci_env.failure_reason(), retry_message);
+      }
       if (PrintCompilation) {
-        const char* reason = ci_env.failure_reason();
-        if (compilable == ciEnv::MethodCompilable_not_at_tier) {
-          tty->print_cr("%4d   COMPILE SKIPPED: %s (retry at different tier)", compile_id, reason);
-        } else if (compilable == ciEnv::MethodCompilable_never) {
-          tty->print_cr("%4d   COMPILE SKIPPED: %s (not retryable)", compile_id, reason);
-        } else if (compilable == ciEnv::MethodCompilable) {
-          tty->print_cr("%4d   COMPILE SKIPPED: %s", compile_id, reason);
+        tty->print("%4d   COMPILE SKIPPED: %s", compile_id, ci_env.failure_reason());
+        if (retry_message != NULL) {
+          tty->print(" (%s)", retry_message);
         }
+        tty->cr();
       }
     } else {
       task->mark_success();
       task->set_num_inlined_bytecodes(ci_env.num_inlined_bytecodes());
+      if (_compilation_log != NULL) {
+        nmethod* code = task->code();
+        if (code != NULL) {
+          _compilation_log->log_nmethod(thread, code);
+        }
+      }
     }
   }
   pop_jni_handle_block();
--- a/src/share/vm/compiler/compileBroker.hpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/compiler/compileBroker.hpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -98,12 +98,16 @@
   void         set_prev(CompileTask* prev)       { _prev = prev; }
 
 private:
-  static void  print_compilation_impl(outputStream* st, methodOop method, int compile_id, int comp_level, bool is_osr_method = false, int osr_bci = -1, bool is_blocking = false, const char* msg = NULL);
+  static void  print_compilation_impl(outputStream* st, methodOop method, int compile_id, int comp_level,
+                                      bool is_osr_method = false, int osr_bci = -1, bool is_blocking = false,
+                                      const char* msg = NULL, bool short_form = false);
 
 public:
-  void         print_compilation(outputStream* st = tty);
+  void         print_compilation(outputStream* st = tty, bool short_form = false);
   static void  print_compilation(outputStream* st, const nmethod* nm, const char* msg = NULL) {
-    print_compilation_impl(st, nm->method(), nm->compile_id(), nm->comp_level(), nm->is_osr_method(), nm->is_osr_method() ? nm->osr_entry_bci() : -1, /*is_blocking*/ false, msg);
+    print_compilation_impl(st, nm->method(), nm->compile_id(), nm->comp_level(),
+                           nm->is_osr_method(), nm->is_osr_method() ? nm->osr_entry_bci() : -1, /*is_blocking*/ false,
+                           msg);
   }
 
   static void  print_inlining(outputStream* st, ciMethod* method, int inline_level, int bci, const char* msg = NULL);
@@ -333,7 +337,7 @@
                                   methodHandle hot_method,
                                   int hot_count,
                                   const char* comment,
-                                  TRAPS);
+                                  Thread* thread);
   static CompileQueue* compile_queue(int comp_level) {
     if (is_c2_compile(comp_level)) return _c2_method_queue;
     if (is_c1_compile(comp_level)) return _c1_method_queue;
@@ -363,7 +367,7 @@
                                  int comp_level,
                                  methodHandle hot_method,
                                  int hot_count,
-                                 const char* comment, TRAPS);
+                                 const char* comment, Thread* thread);
 
   static void compiler_thread_loop();
 
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -5594,6 +5594,7 @@
     GenCollectedHeap::StrongRootsScope srs(gch);
     workers->run_task(&tsk);
   } else {
+    ReferenceProcessorMTDiscoveryMutator mt(ref_processor(), false);
     GenCollectedHeap::StrongRootsScope srs(gch);
     tsk.work(0);
   }
@@ -5608,6 +5609,8 @@
   ResourceMark rm;
   HandleMark   hm;
   GenCollectedHeap* gch = GenCollectedHeap::heap();
+  ReferenceProcessorMTDiscoveryMutator mt(ref_processor(), false);
+
   MarkRefsIntoAndScanClosure
     mrias_cl(_span, ref_processor(), &_markBitMap, &_modUnionTable,
              &_markStack, &_revisitStack, this,
--- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Thu Feb 09 07:35:48 2012 -0800
@@ -42,8 +42,7 @@
 #include "runtime/handles.inline.hpp"
 #include "runtime/java.hpp"
 
-//
-// CMS Bit Map Wrapper
+// Concurrent marking bit map wrapper
 
 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) :
   _bm((uintptr_t*)NULL,0),
@@ -53,13 +52,13 @@
   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
 
-  guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
+  guarantee(brs.is_reserved(), "couldn't allocate concurrent marking bit map");
   // For now we'll just commit all of the bit map up fromt.
   // Later on we'll try to be more parsimonious with swap.
   guarantee(_virtual_space.initialize(brs, brs.size()),
-            "couldn't reseve backing store for CMS bit map");
+            "couldn't reseve backing store for concurrent marking bit map");
   assert(_virtual_space.committed_size() == brs.size(),
-         "didn't reserve backing store for all of CMS bit map?");
+         "didn't reserve backing store for all of concurrent marking bit map?");
   _bm.set_map((uintptr_t*)_virtual_space.low());
   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
          _bmWordSize, "inconsistency in bit map sizing");
@@ -104,17 +103,6 @@
   return (int) (diff >> _shifter);
 }
 
-bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
-  HeapWord* left  = MAX2(_bmStartWord, mr.start());
-  HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
-  if (right > left) {
-    // Right-open interval [leftOffset, rightOffset).
-    return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
-  } else {
-    return true;
-  }
-}
-
 void CMBitMapRO::mostly_disjoint_range_union(BitMap*   from_bitmap,
                                              size_t    from_start_index,
                                              HeapWord* to_start_word,
@@ -431,8 +419,6 @@
     assert(newOop->is_oop(), "Expected an oop");
     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
            "only grey objects on this stack");
-    // iterate over the oops in this oop, marking and pushing
-    // the ones in CMS generation.
     newOop->oop_iterate(cl);
     if (yield_after && _cm->do_yield_check()) {
       res = false;
@@ -474,6 +460,84 @@
               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
 }
 
+CMRootRegions::CMRootRegions() :
+  _young_list(NULL), _cm(NULL), _scan_in_progress(false),
+  _should_abort(false),  _next_survivor(NULL) { }
+
+void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
+  _young_list = g1h->young_list();
+  _cm = cm;
+}
+
+void CMRootRegions::prepare_for_scan() {
+  assert(!scan_in_progress(), "pre-condition");
+
+  // Currently, only survivors can be root regions.
+  assert(_next_survivor == NULL, "pre-condition");
+  _next_survivor = _young_list->first_survivor_region();
+  _scan_in_progress = (_next_survivor != NULL);
+  _should_abort = false;
+}
+
+HeapRegion* CMRootRegions::claim_next() {
+  if (_should_abort) {
+    // If someone has set the should_abort flag, we return NULL to
+    // force the caller to bail out of their loop.
+    return NULL;
+  }
+
+  // Currently, only survivors can be root regions.
+  HeapRegion* res = _next_survivor;
+  if (res != NULL) {
+    MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
+    // Read it again in case it changed while we were waiting for the lock.
+    res = _next_survivor;
+    if (res != NULL) {
+      if (res == _young_list->last_survivor_region()) {
+        // We just claimed the last survivor so store NULL to indicate
+        // that we're done.
+        _next_survivor = NULL;
+      } else {
+        _next_survivor = res->get_next_young_region();
+      }
+    } else {
+      // Someone else claimed the last survivor while we were trying
+      // to take the lock so nothing else to do.
+    }
+  }
+  assert(res == NULL || res->is_survivor(), "post-condition");
+
+  return res;
+}
+
+void CMRootRegions::scan_finished() {
+  assert(scan_in_progress(), "pre-condition");
+
+  // Currently, only survivors can be root regions.
+  if (!_should_abort) {
+    assert(_next_survivor == NULL, "we should have claimed all survivors");
+  }
+  _next_survivor = NULL;
+
+  {
+    MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
+    _scan_in_progress = false;
+    RootRegionScan_lock->notify_all();
+  }
+}
+
+bool CMRootRegions::wait_until_scan_finished() {
+  if (!scan_in_progress()) return false;
+
+  {
+    MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
+    while (scan_in_progress()) {
+      RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
+    }
+  }
+  return true;
+}
+
 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 #endif // _MSC_VER
@@ -498,6 +562,7 @@
   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
            CardTableModRefBS::card_shift,
            false /* in_resource_area*/),
+
   _prevMarkBitMap(&_markBitMap1),
   _nextMarkBitMap(&_markBitMap2),
   _at_least_one_mark_complete(false),
@@ -526,7 +591,11 @@
   _cleanup_times(),
   _total_counting_time(0.0),
   _total_rs_scrub_time(0.0),
-  _parallel_workers(NULL) {
+
+  _parallel_workers(NULL),
+
+  _count_card_bitmaps(NULL),
+  _count_marked_bytes(NULL) {
   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
   if (verbose_level < no_verbose) {
     verbose_level = no_verbose;
@@ -557,9 +626,16 @@
   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
   satb_qs.set_buffer_size(G1SATBBufferSize);
 
+  _root_regions.init(_g1h, this);
+
   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
 
+  _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_task_num);
+  _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num);
+
+  BitMap::idx_t card_bm_size = _card_bm.size();
+
   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
   _active_tasks = _max_task_num;
   for (int i = 0; i < (int) _max_task_num; ++i) {
@@ -567,10 +643,26 @@
     task_queue->initialize();
     _task_queues->register_queue(i, task_queue);
 
-    _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
+    _count_card_bitmaps[i] = BitMap(card_bm_size, false);
+    _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions);
+
+    _tasks[i] = new CMTask(i, this,
+                           _count_marked_bytes[i],
+                           &_count_card_bitmaps[i],
+                           task_queue, _task_queues);
+
     _accum_task_vtime[i] = 0.0;
   }
 
+  // Calculate the card number for the bottom of the heap. Used
+  // in biasing indexes into the accounting card bitmaps.
+  _heap_bottom_card_num =
+    intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
+                                CardTableModRefBS::card_shift);
+
+  // Clear all the liveness counting data
+  clear_all_count_data();
+
   if (ConcGCThreads > ParallelGCThreads) {
     vm_exit_during_initialization("Can't have more ConcGCThreads "
                                   "than ParallelGCThreads.");
@@ -750,11 +842,6 @@
   ShouldNotReachHere();
 }
 
-// This closure is used to mark refs into the g1 generation
-// from external roots in the CMS bit map.
-// Called at the first checkpoint.
-//
-
 void ConcurrentMark::clearNextBitmap() {
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   G1CollectorPolicy* g1p = g1h->g1_policy();
@@ -794,6 +881,9 @@
     assert(!g1h->mark_in_progress(), "invariant");
   }
 
+  // Clear the liveness counting data
+  clear_all_count_data();
+
   // Repeat the asserts from above.
   guarantee(cmThread()->during_cycle(), "invariant");
   guarantee(!g1h->mark_in_progress(), "invariant");
@@ -854,6 +944,8 @@
   satb_mq_set.set_active_all_threads(true, /* new active value */
                                      false /* expected_active */);
 
+  _root_regions.prepare_for_scan();
+
   // update_g1_committed() will be called at the end of an evac pause
   // when marking is on. So, it's also called at the end of the
   // initial-mark pause to update the heap end, if the heap expands
@@ -1147,6 +1239,69 @@
   return 0;
 }
 
+void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
+  // Currently, only survivors can be root regions.
+  assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
+  G1RootRegionScanClosure cl(_g1h, this, worker_id);
+
+  const uintx interval = PrefetchScanIntervalInBytes;
+  HeapWord* curr = hr->bottom();
+  const HeapWord* end = hr->top();
+  while (curr < end) {
+    Prefetch::read(curr, interval);
+    oop obj = oop(curr);
+    int size = obj->oop_iterate(&cl);
+    assert(size == obj->size(), "sanity");
+    curr += size;
+  }
+}
+
+class CMRootRegionScanTask : public AbstractGangTask {
+private:
+  ConcurrentMark* _cm;
+
+public:
+  CMRootRegionScanTask(ConcurrentMark* cm) :
+    AbstractGangTask("Root Region Scan"), _cm(cm) { }
+
+  void work(uint worker_id) {
+    assert(Thread::current()->is_ConcurrentGC_thread(),
+           "this should only be done by a conc GC thread");
+
+    CMRootRegions* root_regions = _cm->root_regions();
+    HeapRegion* hr = root_regions->claim_next();
+    while (hr != NULL) {
+      _cm->scanRootRegion(hr, worker_id);
+      hr = root_regions->claim_next();
+    }
+  }
+};
+
+void ConcurrentMark::scanRootRegions() {
+  // scan_in_progress() will have been set to true only if there was
+  // at least one root region to scan. So, if it's false, we
+  // should not attempt to do any further work.
+  if (root_regions()->scan_in_progress()) {
+    _parallel_marking_threads = calc_parallel_marking_threads();
+    assert(parallel_marking_threads() <= max_parallel_marking_threads(),
+           "Maximum number of marking threads exceeded");
+    uint active_workers = MAX2(1U, parallel_marking_threads());
+
+    CMRootRegionScanTask task(this);
+    if (parallel_marking_threads() > 0) {
+      _parallel_workers->set_active_workers((int) active_workers);
+      _parallel_workers->run_task(&task);
+    } else {
+      task.work(0);
+    }
+
+    // It's possible that has_aborted() is true here without actually
+    // aborting the survivor scan earlier. This is OK as it's
+    // mainly used for sanity checking.
+    root_regions()->scan_finished();
+  }
+}
+
 void ConcurrentMark::markFromRoots() {
   // we might be tempted to assert that:
   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
@@ -1225,6 +1380,10 @@
       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
     }
   } else {
+    // Aggregate the per-task counting data that we have accumulated
+    // while marking.
+    aggregate_count_data();
+
     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
     // We're done with marking.
     // This is the end of  the marking cycle, we're expected all
@@ -1262,48 +1421,41 @@
   g1p->record_concurrent_mark_remark_end();
 }
 
-#define CARD_BM_TEST_MODE 0
-
+// Used to calculate the # live objects per region
+// for verification purposes
 class CalcLiveObjectsClosure: public HeapRegionClosure {
 
   CMBitMapRO* _bm;
   ConcurrentMark* _cm;
-  bool _changed;
-  bool _yield;
-  size_t _words_done;
+  BitMap* _region_bm;
+  BitMap* _card_bm;
+
+  // Debugging
+  size_t _tot_words_done;
   size_t _tot_live;
   size_t _tot_used;
-  size_t _regions_done;
-  double _start_vtime_sec;
-
-  BitMap* _region_bm;
-  BitMap* _card_bm;
+
+  size_t _region_marked_bytes;
+
   intptr_t _bottom_card_num;
-  bool _final;
 
   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
-    for (intptr_t i = start_card_num; i <= last_card_num; i++) {
-#if CARD_BM_TEST_MODE
-      guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
-#else
-      _card_bm->par_at_put(i - _bottom_card_num, 1);
-#endif
+    assert(start_card_num <= last_card_num, "sanity");
+    BitMap::idx_t start_idx = start_card_num - _bottom_card_num;
+    BitMap::idx_t last_idx = last_card_num - _bottom_card_num;
+
+    for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) {
+      _card_bm->par_at_put(i, 1);
     }
   }
 
 public:
-  CalcLiveObjectsClosure(bool final,
-                         CMBitMapRO *bm, ConcurrentMark *cm,
+  CalcLiveObjectsClosure(CMBitMapRO *bm, ConcurrentMark *cm,
                          BitMap* region_bm, BitMap* card_bm) :
-    _bm(bm), _cm(cm), _changed(false), _yield(true),
-    _words_done(0), _tot_live(0), _tot_used(0),
-    _region_bm(region_bm), _card_bm(card_bm),_final(final),
-    _regions_done(0), _start_vtime_sec(0.0)
-  {
-    _bottom_card_num =
-      intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
-               CardTableModRefBS::card_shift);
-  }
+    _bm(bm), _cm(cm), _region_bm(region_bm), _card_bm(card_bm),
+    _region_marked_bytes(0), _tot_words_done(0),
+    _tot_live(0), _tot_used(0),
+    _bottom_card_num(cm->heap_bottom_card_num()) { }
 
   // It takes a region that's not empty (i.e., it has at least one
   // live object in it and sets its corresponding bit on the region
@@ -1319,29 +1471,16 @@
       _region_bm->par_at_put((BitMap::idx_t) index, true);
     } else {
       // Starts humongous case: calculate how many regions are part of
-      // this humongous region and then set the bit range. It might
-      // have been a bit more efficient to look at the object that
-      // spans these humongous regions to calculate their number from
-      // the object's size. However, it's a good idea to calculate
-      // this based on the metadata itself, and not the region
-      // contents, so that this code is not aware of what goes into
-      // the humongous regions (in case this changes in the future).
+      // this humongous region and then set the bit range.
       G1CollectedHeap* g1h = G1CollectedHeap::heap();
-      size_t end_index = index + 1;
-      while (end_index < g1h->n_regions()) {
-        HeapRegion* chr = g1h->region_at(end_index);
-        if (!chr->continuesHumongous()) break;
-        end_index += 1;
-      }
+      HeapRegion *last_hr = g1h->heap_region_containing_raw(hr->end() - 1);
+      size_t end_index = last_hr->hrs_index() + 1;
       _region_bm->par_at_put_range((BitMap::idx_t) index,
                                    (BitMap::idx_t) end_index, true);
     }
   }
 
   bool doHeapRegion(HeapRegion* hr) {
-    if (!_final && _regions_done == 0) {
-      _start_vtime_sec = os::elapsedVTime();
-    }
 
     if (hr->continuesHumongous()) {
       // We will ignore these here and process them when their
@@ -1355,48 +1494,41 @@
     }
 
     HeapWord* nextTop = hr->next_top_at_mark_start();
-    HeapWord* start   = hr->top_at_conc_mark_count();
-    assert(hr->bottom() <= start && start <= hr->end() &&
-           hr->bottom() <= nextTop && nextTop <= hr->end() &&
-           start <= nextTop,
-           "Preconditions.");
-    // Otherwise, record the number of word's we'll examine.
+    HeapWord* start   = hr->bottom();
+
+    assert(start <= hr->end() && start <= nextTop && nextTop <= hr->end(),
+           err_msg("Preconditions not met - "
+                   "start: "PTR_FORMAT", nextTop: "PTR_FORMAT", end: "PTR_FORMAT,
+                   start, nextTop, hr->end()));
+
+    // Record the number of word's we'll examine.
     size_t words_done = (nextTop - start);
+
     // Find the first marked object at or after "start".
     start = _bm->getNextMarkedWordAddress(start, nextTop);
+
     size_t marked_bytes = 0;
 
     // Below, the term "card num" means the result of shifting an address
     // by the card shift -- address 0 corresponds to card number 0.  One
     // must subtract the card num of the bottom of the heap to obtain a
     // card table index.
+
     // The first card num of the sequence of live cards currently being
     // constructed.  -1 ==> no sequence.
     intptr_t start_card_num = -1;
+
     // The last card num of the sequence of live cards currently being
     // constructed.  -1 ==> no sequence.
     intptr_t last_card_num = -1;
 
     while (start < nextTop) {
-      if (_yield && _cm->do_yield_check()) {
-        // We yielded.  It might be for a full collection, in which case
-        // all bets are off; terminate the traversal.
-        if (_cm->has_aborted()) {
-          _changed = false;
-          return true;
-        } else {
-          // Otherwise, it might be a collection pause, and the region
-          // we're looking at might be in the collection set.  We'll
-          // abandon this region.
-          return false;
-        }
-      }
       oop obj = oop(start);
       int obj_sz = obj->size();
+
       // The card num of the start of the current object.
       intptr_t obj_card_num =
         intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
-
       HeapWord* obj_last = start + obj_sz - 1;
       intptr_t obj_last_card_num =
         intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
@@ -1414,110 +1546,404 @@
             start_card_num = obj_card_num;
           }
         }
-#if CARD_BM_TEST_MODE
-        /*
-        gclog_or_tty->print_cr("Setting bits from %d/%d.",
-                               obj_card_num - _bottom_card_num,
-                               obj_last_card_num - _bottom_card_num);
-        */
-        for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
-          _card_bm->par_at_put(j - _bottom_card_num, 1);
-        }
-#endif
       }
       // In any case, we set the last card num.
       last_card_num = obj_last_card_num;
 
       marked_bytes += (size_t)obj_sz * HeapWordSize;
+
       // Find the next marked object after this one.
       start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
-      _changed = true;
     }
+
     // Handle the last range, if any.
     if (start_card_num != -1) {
       mark_card_num_range(start_card_num, last_card_num);
     }
-    if (_final) {
-      // Mark the allocated-since-marking portion...
-      HeapWord* tp = hr->top();
-      if (nextTop < tp) {
-        start_card_num =
-          intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
-        last_card_num =
-          intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
-        mark_card_num_range(start_card_num, last_card_num);
-        // This definitely means the region has live objects.
-        set_bit_for_region(hr);
-      }
+
+    // Mark the allocated-since-marking portion...
+    HeapWord* top = hr->top();
+    if (nextTop < top) {
+      start_card_num = intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
+      last_card_num = intptr_t(uintptr_t(top) >> CardTableModRefBS::card_shift);
+
+      mark_card_num_range(start_card_num, last_card_num);
+
+      // This definitely means the region has live objects.
+      set_bit_for_region(hr);
     }
 
-    hr->add_to_marked_bytes(marked_bytes);
     // Update the live region bitmap.
     if (marked_bytes > 0) {
       set_bit_for_region(hr);
     }
-    hr->set_top_at_conc_mark_count(nextTop);
+
+    // Set the marked bytes for the current region so that
+    // it can be queried by a calling verificiation routine
+    _region_marked_bytes = marked_bytes;
+
     _tot_live += hr->next_live_bytes();
     _tot_used += hr->used();
-    _words_done = words_done;
-
-    if (!_final) {
-      ++_regions_done;
-      if (_regions_done % 10 == 0) {
-        double end_vtime_sec = os::elapsedVTime();
-        double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
-        if (elapsed_vtime_sec > (10.0 / 1000.0)) {
-          jlong sleep_time_ms =
-            (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
-          os::sleep(Thread::current(), sleep_time_ms, false);
-          _start_vtime_sec = end_vtime_sec;
+    _tot_words_done = words_done;
+
+    return false;
+  }
+
+  size_t region_marked_bytes() const { return _region_marked_bytes; }
+
+  // Debugging
+  size_t tot_words_done() const      { return _tot_words_done; }
+  size_t tot_live() const            { return _tot_live; }
+  size_t tot_used() const            { return _tot_used; }
+};
+
+// Heap region closure used for verifying the counting data
+// that was accumulated concurrently and aggregated during
+// the remark pause. This closure is applied to the heap
+// regions during the STW cleanup pause.
+
+class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
+  ConcurrentMark* _cm;
+  CalcLiveObjectsClosure _calc_cl;
+  BitMap* _region_bm;   // Region BM to be verified
+  BitMap* _card_bm;     // Card BM to be verified
+  bool _verbose;        // verbose output?
+
+  BitMap* _exp_region_bm; // Expected Region BM values
+  BitMap* _exp_card_bm;   // Expected card BM values
+
+  int _failures;
+
+public:
+  VerifyLiveObjectDataHRClosure(ConcurrentMark* cm,
+                                BitMap* region_bm,
+                                BitMap* card_bm,
+                                BitMap* exp_region_bm,
+                                BitMap* exp_card_bm,
+                                bool verbose) :
+    _cm(cm),
+    _calc_cl(_cm->nextMarkBitMap(), _cm, exp_region_bm, exp_card_bm),
+    _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
+    _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
+    _failures(0) { }
+
+  int failures() const { return _failures; }
+
+  bool doHeapRegion(HeapRegion* hr) {
+    if (hr->continuesHumongous()) {
+      // We will ignore these here and process them when their
+      // associated "starts humongous" region is processed (see
+      // set_bit_for_heap_region()). Note that we cannot rely on their
+      // associated "starts humongous" region to have their bit set to
+      // 1 since, due to the region chunking in the parallel region
+      // iteration, a "continues humongous" region might be visited
+      // before its associated "starts humongous".
+      return false;
+    }
+
+    int failures = 0;
+
+    // Call the CalcLiveObjectsClosure to walk the marking bitmap for
+    // this region and set the corresponding bits in the expected region
+    // and card bitmaps.
+    bool res = _calc_cl.doHeapRegion(hr);
+    assert(res == false, "should be continuing");
+
+    MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
+                    Mutex::_no_safepoint_check_flag);
+
+    // Verify that _top_at_conc_count == ntams
+    if (hr->top_at_conc_mark_count() != hr->next_top_at_mark_start()) {
+      if (_verbose) {
+        gclog_or_tty->print_cr("Region " SIZE_FORMAT ": top at conc count incorrect: "
+                               "expected " PTR_FORMAT ", actual: " PTR_FORMAT,
+                               hr->hrs_index(), hr->next_top_at_mark_start(),
+                               hr->top_at_conc_mark_count());
+      }
+      failures += 1;
+    }
+
+    // Verify the marked bytes for this region.
+    size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
+    size_t act_marked_bytes = hr->next_marked_bytes();
+
+    // We're not OK if expected marked bytes > actual marked bytes. It means
+    // we have missed accounting some objects during the actual marking.
+    if (exp_marked_bytes > act_marked_bytes) {
+      if (_verbose) {
+        gclog_or_tty->print_cr("Region " SIZE_FORMAT ": marked bytes mismatch: "
+                               "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
+                               hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
+      }
+      failures += 1;
+    }
+
+    // Verify the bit, for this region, in the actual and expected
+    // (which was just calculated) region bit maps.
+    // We're not OK if the bit in the calculated expected region
+    // bitmap is set and the bit in the actual region bitmap is not.
+    BitMap::idx_t index = (BitMap::idx_t)hr->hrs_index();
+
+    bool expected = _exp_region_bm->at(index);
+    bool actual = _region_bm->at(index);
+    if (expected && !actual) {
+      if (_verbose) {
+        gclog_or_tty->print_cr("Region " SIZE_FORMAT ": region bitmap mismatch: "
+                               "expected: %d, actual: %d",
+                               hr->hrs_index(), expected, actual);
+      }
+      failures += 1;
+    }
+
+    // Verify that the card bit maps for the cards spanned by the current
+    // region match. We have an error if we have a set bit in the expected
+    // bit map and the corresponding bit in the actual bitmap is not set.
+
+    BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
+    BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
+
+    for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
+      expected = _exp_card_bm->at(i);
+      actual = _card_bm->at(i);
+
+      if (expected && !actual) {
+        if (_verbose) {
+          gclog_or_tty->print_cr("Region " SIZE_FORMAT ": card bitmap mismatch at " SIZE_FORMAT ": "
+                                 "expected: %d, actual: %d",
+                                 hr->hrs_index(), i, expected, actual);
         }
+        failures += 1;
       }
     }
 
+    if (failures > 0 && _verbose)  {
+      gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
+                             "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
+                             HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(),
+                             _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
+    }
+
+    _failures += failures;
+
+    // We could stop iteration over the heap when we
+    // find the first voilating region by returning true.
     return false;
   }
-
-  bool changed() { return _changed;  }
-  void reset()   { _changed = false; _words_done = 0; }
-  void no_yield() { _yield = false; }
-  size_t words_done() { return _words_done; }
-  size_t tot_live() { return _tot_live; }
-  size_t tot_used() { return _tot_used; }
 };
 
 
-void ConcurrentMark::calcDesiredRegions() {
-  _region_bm.clear();
-  _card_bm.clear();
-  CalcLiveObjectsClosure calccl(false /*final*/,
-                                nextMarkBitMap(), this,
-                                &_region_bm, &_card_bm);
-  G1CollectedHeap *g1h = G1CollectedHeap::heap();
-  g1h->heap_region_iterate(&calccl);
-
-  do {
-    calccl.reset();
-    g1h->heap_region_iterate(&calccl);
-  } while (calccl.changed());
-}
+class G1ParVerifyFinalCountTask: public AbstractGangTask {
+protected:
+  G1CollectedHeap* _g1h;
+  ConcurrentMark* _cm;
+  BitMap* _actual_region_bm;
+  BitMap* _actual_card_bm;
+
+  uint    _n_workers;
+
+  BitMap* _expected_region_bm;
+  BitMap* _expected_card_bm;
+
+  int  _failures;
+  bool _verbose;
+
+public:
+  G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
+                            BitMap* region_bm, BitMap* card_bm,
+                            BitMap* expected_region_bm, BitMap* expected_card_bm)
+    : AbstractGangTask("G1 verify final counting"),
+      _g1h(g1h), _cm(_g1h->concurrent_mark()),
+      _actual_region_bm(region_bm), _actual_card_bm(card_bm),
+      _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
+      _failures(0), _verbose(false),
+      _n_workers(0) {
+    assert(VerifyDuringGC, "don't call this otherwise");
+
+    // Use the value already set as the number of active threads
+    // in the call to run_task().
+    if (G1CollectedHeap::use_parallel_gc_threads()) {
+      assert( _g1h->workers()->active_workers() > 0,
+        "Should have been previously set");
+      _n_workers = _g1h->workers()->active_workers();
+    } else {
+      _n_workers = 1;
+    }
+
+    assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
+    assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
+
+    _verbose = _cm->verbose_medium();
+  }
+
+  void work(uint worker_id) {
+    assert(worker_id < _n_workers, "invariant");
+
+    VerifyLiveObjectDataHRClosure verify_cl(_cm,
+                                            _actual_region_bm, _actual_card_bm,
+                                            _expected_region_bm,
+                                            _expected_card_bm,
+                                            _verbose);
+
+    if (G1CollectedHeap::use_parallel_gc_threads()) {
+      _g1h->heap_region_par_iterate_chunked(&verify_cl,
+                                            worker_id,
+                                            _n_workers,
+                                            HeapRegion::VerifyCountClaimValue);
+    } else {
+      _g1h->heap_region_iterate(&verify_cl);
+    }
+
+    Atomic::add(verify_cl.failures(), &_failures);
+  }
+
+  int failures() const { return _failures; }
+};
+
+// Final update of count data (during cleanup).
+// Adds [top_at_count, NTAMS) to the marked bytes for each
+// region. Sets the bits in the card bitmap corresponding
+// to the interval [top_at_count, top], and sets the
+// liveness bit for each region containing live data
+// in the region bitmap.
+
+class FinalCountDataUpdateClosure: public HeapRegionClosure {
+  ConcurrentMark* _cm;
+  BitMap* _region_bm;
+  BitMap* _card_bm;
+
+  size_t _total_live_bytes;
+  size_t _total_used_bytes;
+  size_t _total_words_done;
+
+  void set_card_bitmap_range(BitMap::idx_t start_idx, BitMap::idx_t last_idx) {
+    assert(start_idx <= last_idx, "sanity");
+
+    // Set the inclusive bit range [start_idx, last_idx].
+    // For small ranges (up to 8 cards) use a simple loop; otherwise
+    // use par_at_put_range.
+    if ((last_idx - start_idx) <= 8) {
+      for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) {
+        _card_bm->par_set_bit(i);
+      }
+    } else {
+      assert(last_idx < _card_bm->size(), "sanity");
+      // Note BitMap::par_at_put_range() is exclusive.
+      _card_bm->par_at_put_range(start_idx, last_idx+1, true);
+    }
+  }
+
+  // It takes a region that's not empty (i.e., it has at least one
+  // live object in it and sets its corresponding bit on the region
+  // bitmap to 1. If the region is "starts humongous" it will also set
+  // to 1 the bits on the region bitmap that correspond to its
+  // associated "continues humongous" regions.
+  void set_bit_for_region(HeapRegion* hr) {
+    assert(!hr->continuesHumongous(), "should have filtered those out");
+
+    size_t index = hr->hrs_index();
+    if (!hr->startsHumongous()) {
+      // Normal (non-humongous) case: just set the bit.
+      _region_bm->par_set_bit((BitMap::idx_t) index);
+    } else {
+      // Starts humongous case: calculate how many regions are part of
+      // this humongous region and then set the bit range.
+      G1CollectedHeap* g1h = G1CollectedHeap::heap();
+      HeapRegion *last_hr = g1h->heap_region_containing_raw(hr->end() - 1);
+      size_t end_index = last_hr->hrs_index() + 1;
+      _region_bm->par_at_put_range((BitMap::idx_t) index,
+                                   (BitMap::idx_t) end_index, true);
+    }
+  }
+
+ public:
+  FinalCountDataUpdateClosure(ConcurrentMark* cm,
+                              BitMap* region_bm,
+                              BitMap* card_bm) :
+    _cm(cm), _region_bm(region_bm), _card_bm(card_bm),
+    _total_words_done(0), _total_live_bytes(0), _total_used_bytes(0) { }
+
+  bool doHeapRegion(HeapRegion* hr) {
+
+    if (hr->continuesHumongous()) {
+      // We will ignore these here and process them when their
+      // associated "starts humongous" region is processed (see
+      // set_bit_for_heap_region()). Note that we cannot rely on their
+      // associated "starts humongous" region to have their bit set to
+      // 1 since, due to the region chunking in the parallel region
+      // iteration, a "continues humongous" region might be visited
+      // before its associated "starts humongous".
+      return false;
+    }
+
+    HeapWord* start = hr->top_at_conc_mark_count();
+    HeapWord* ntams = hr->next_top_at_mark_start();
+    HeapWord* top   = hr->top();
+
+    assert(hr->bottom() <= start && start <= hr->end() &&
+           hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
+
+    size_t words_done = ntams - hr->bottom();
+
+    if (start < ntams) {
+      // Region was changed between remark and cleanup pauses
+      // We need to add (ntams - start) to the marked bytes
+      // for this region, and set bits for the range
+      // [ card_idx(start), card_idx(ntams) ) in the card bitmap.
+      size_t live_bytes = (ntams - start) * HeapWordSize;
+      hr->add_to_marked_bytes(live_bytes);
+
+      // Record the new top at conc count
+      hr->set_top_at_conc_mark_count(ntams);
+
+      // The setting of the bits in the card bitmap takes place below
+    }
+
+    // Mark the allocated-since-marking portion...
+    if (ntams < top) {
+      // This definitely means the region has live objects.
+      set_bit_for_region(hr);
+    }
+
+    // Now set the bits for [start, top]
+    BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
+    BitMap::idx_t last_idx = _cm->card_bitmap_index_for(top);
+    set_card_bitmap_range(start_idx, last_idx);
+
+    // Set the bit for the region if it contains live data
+    if (hr->next_marked_bytes() > 0) {
+      set_bit_for_region(hr);
+    }
+
+    _total_words_done += words_done;
+    _total_used_bytes += hr->used();
+    _total_live_bytes += hr->next_marked_bytes();
+
+    return false;
+  }
+
+  size_t total_words_done() const { return _total_words_done; }
+  size_t total_live_bytes() const { return _total_live_bytes; }
+  size_t total_used_bytes() const { return _total_used_bytes; }
+};
 
 class G1ParFinalCountTask: public AbstractGangTask {
 protected:
   G1CollectedHeap* _g1h;
-  CMBitMap* _bm;
+  ConcurrentMark* _cm;
+  BitMap* _actual_region_bm;
+  BitMap* _actual_card_bm;
+
   uint    _n_workers;
+
   size_t *_live_bytes;
   size_t *_used_bytes;
-  BitMap* _region_bm;
-  BitMap* _card_bm;
+
 public:
-  G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
-                      BitMap* region_bm, BitMap* card_bm)
-    : AbstractGangTask("G1 final counting"), _g1h(g1h),
-    _bm(bm), _region_bm(region_bm), _card_bm(card_bm),
-    _n_workers(0)
-  {
+  G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
+    : AbstractGangTask("G1 final counting"),
+      _g1h(g1h), _cm(_g1h->concurrent_mark()),
+      _actual_region_bm(region_bm), _actual_card_bm(card_bm),
+      _n_workers(0) {
     // Use the value already set as the number of active threads
     // in the call to run_task().  Needed for the allocation of
     // _live_bytes and _used_bytes.
@@ -1539,29 +1965,32 @@
   }
 
   void work(uint worker_id) {
-    CalcLiveObjectsClosure calccl(true /*final*/,
-                                  _bm, _g1h->concurrent_mark(),
-                                  _region_bm, _card_bm);
-    calccl.no_yield();
+    assert(worker_id < _n_workers, "invariant");
+
+    FinalCountDataUpdateClosure final_update_cl(_cm,
+                                                _actual_region_bm,
+                                                _actual_card_bm);
+
     if (G1CollectedHeap::use_parallel_gc_threads()) {
-      _g1h->heap_region_par_iterate_chunked(&calccl, worker_id,
-                                            (int) _n_workers,
+      _g1h->heap_region_par_iterate_chunked(&final_update_cl,
+                                            worker_id,
+                                            _n_workers,
                                             HeapRegion::FinalCountClaimValue);
     } else {
-      _g1h->heap_region_iterate(&calccl);
+      _g1h->heap_region_iterate(&final_update_cl);
     }
-    assert(calccl.complete(), "Shouldn't have yielded!");
-
-    assert(worker_id < _n_workers, "invariant");
-    _live_bytes[worker_id] = calccl.tot_live();
-    _used_bytes[worker_id] = calccl.tot_used();
-  }
+
+    _live_bytes[worker_id] = final_update_cl.total_live_bytes();
+    _used_bytes[worker_id] = final_update_cl.total_used_bytes();
+  }
+
   size_t live_bytes()  {
     size_t live_bytes = 0;
     for (uint i = 0; i < _n_workers; ++i)
       live_bytes += _live_bytes[i];
     return live_bytes;
   }
+
   size_t used_bytes()  {
     size_t used_bytes = 0;
     for (uint i = 0; i < _n_workers; ++i)
@@ -1724,8 +2153,7 @@
   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
                        BitMap* region_bm, BitMap* card_bm) :
     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
-    _region_bm(region_bm), _card_bm(card_bm)
-  {}
+    _region_bm(region_bm), _card_bm(card_bm) { }
 
   void work(uint worker_id) {
     if (G1CollectedHeap::use_parallel_gc_threads()) {
@@ -1772,11 +2200,10 @@
   uint n_workers;
 
   // Do counting once more with the world stopped for good measure.
-  G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
-                                        &_region_bm, &_card_bm);
+  G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
+
   if (G1CollectedHeap::use_parallel_gc_threads()) {
-    assert(g1h->check_heap_region_claim_values(
-                                               HeapRegion::InitialClaimValue),
+   assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
            "sanity check");
 
     g1h->set_par_threads();
@@ -1787,14 +2214,42 @@
     // Done with the parallel phase so reset to 0.
     g1h->set_par_threads(0);
 
-    assert(g1h->check_heap_region_claim_values(
-                                             HeapRegion::FinalCountClaimValue),
+    assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
            "sanity check");
   } else {
     n_workers = 1;
     g1_par_count_task.work(0);
   }
 
+  if (VerifyDuringGC) {
+    // Verify that the counting data accumulated during marking matches
+    // that calculated by walking the marking bitmap.
+
+    // Bitmaps to hold expected values
+    BitMap expected_region_bm(_region_bm.size(), false);
+    BitMap expected_card_bm(_card_bm.size(), false);
+
+    G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
+                                                 &_region_bm,
+                                                 &_card_bm,
+                                                 &expected_region_bm,
+                                                 &expected_card_bm);
+
+    if (G1CollectedHeap::use_parallel_gc_threads()) {
+      g1h->set_par_threads((int)n_workers);
+      g1h->workers()->run_task(&g1_par_verify_task);
+      // Done with the parallel phase so reset to 0.
+      g1h->set_par_threads(0);
+
+      assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
+             "sanity check");
+    } else {
+      g1_par_verify_task.work(0);
+    }
+
+    guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
+  }
+
   size_t known_garbage_bytes =
     g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
   g1p->set_known_garbage_bytes(known_garbage_bytes);
@@ -1905,6 +2360,10 @@
   // races with it goes around and waits for completeCleanup to finish.
   g1h->increment_total_collections();
 
+  // We reclaimed old regions so we should calculate the sizes to make
+  // sure we update the old gen/space data.
+  g1h->g1mm()->update_sizes();
+
   if (VerifyDuringGC) {
     HandleMark hm;  // handle scope
     gclog_or_tty->print(" VerifyDuringGC:(after)");
@@ -1983,12 +2442,11 @@
 class G1CMKeepAliveClosure: public OopClosure {
   G1CollectedHeap* _g1;
   ConcurrentMark*  _cm;
-  CMBitMap*        _bitMap;
  public:
-  G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
-                       CMBitMap* bitMap) :
-    _g1(g1), _cm(cm),
-    _bitMap(bitMap) {}
+  G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) :
+    _g1(g1), _cm(cm) {
+    assert(Thread::current()->is_VM_thread(), "otherwise fix worker id");
+  }
 
   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
   virtual void do_oop(      oop* p) { do_oop_work(p); }
@@ -2004,26 +2462,25 @@
     }
 
     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
-      _bitMap->mark(addr);
+      _cm->mark_and_count(obj);
       _cm->mark_stack_push(obj);
     }
   }
 };
 
 class G1CMDrainMarkingStackClosure: public VoidClosure {
+  ConcurrentMark*               _cm;
   CMMarkStack*                  _markStack;
-  CMBitMap*                     _bitMap;
   G1CMKeepAliveClosure*         _oopClosure;
  public:
-  G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
+  G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack,
                                G1CMKeepAliveClosure* oopClosure) :
-    _bitMap(bitMap),
+    _cm(cm),
     _markStack(markStack),
-    _oopClosure(oopClosure)
-  {}
+    _oopClosure(oopClosure) { }
 
   void do_void() {
-    _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
+    _markStack->drain((OopClosure*)_oopClosure, _cm->nextMarkBitMap(), false);
   }
 };
 
@@ -2102,8 +2559,7 @@
   CMTask* _task;
  public:
   G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
-    _cm(cm), _task(task)
-  {}
+    _cm(cm), _task(task) { }
 
   void do_void() {
     do {
@@ -2242,9 +2698,9 @@
     rp->setup_policy(clear_all_soft_refs);
     assert(_markStack.isEmpty(), "mark stack should be empty");
 
-    G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap());
+    G1CMKeepAliveClosure g1_keep_alive(g1h, this);
     G1CMDrainMarkingStackClosure
-      g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive);
+      g1_drain_mark_stack(this, &_markStack, &g1_keep_alive);
 
     // We use the work gang from the G1CollectedHeap and we utilize all
     // the worker threads.
@@ -2616,18 +3072,6 @@
 // during an evacuation pause). This was a late change to the code and
 // is currently not being taken advantage of.
 
-class CMGlobalObjectClosure : public ObjectClosure {
-private:
-  ConcurrentMark* _cm;
-
-public:
-  void do_object(oop obj) {
-    _cm->deal_with_reference(obj);
-  }
-
-  CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
-};
-
 void ConcurrentMark::deal_with_reference(oop obj) {
   if (verbose_high()) {
     gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
@@ -2672,6 +3116,18 @@
   }
 }
 
+class CMGlobalObjectClosure : public ObjectClosure {
+private:
+  ConcurrentMark* _cm;
+
+public:
+  void do_object(oop obj) {
+    _cm->deal_with_reference(obj);
+  }
+
+  CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
+};
+
 void ConcurrentMark::drainAllSATBBuffers() {
   guarantee(false, "drainAllSATBBuffers(): don't call this any more");
 
@@ -2693,15 +3149,6 @@
   assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
 }
 
-void ConcurrentMark::clear(oop p) {
-  assert(p != NULL && p->is_oop(), "expected an oop");
-  HeapWord* addr = (HeapWord*)p;
-  assert(addr >= _nextMarkBitMap->startWord() ||
-         addr < _nextMarkBitMap->endWord(), "in a region");
-
-  _nextMarkBitMap->clear(addr);
-}
-
 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
   // Note we are overriding the read-only view of the prev map here, via
   // the cast.
@@ -3015,6 +3462,192 @@
   }
 }
 
+// Aggregate the counting data that was constructed concurrently
+// with marking.
+class AggregateCountDataHRClosure: public HeapRegionClosure {
+  ConcurrentMark* _cm;
+  BitMap* _cm_card_bm;
+  size_t _max_task_num;
+
+ public:
+  AggregateCountDataHRClosure(ConcurrentMark *cm,
+                              BitMap* cm_card_bm,
+                              size_t max_task_num) :
+    _cm(cm), _cm_card_bm(cm_card_bm),
+    _max_task_num(max_task_num) { }
+
+  bool is_card_aligned(HeapWord* p) {
+    return ((uintptr_t(p) & (CardTableModRefBS::card_size - 1)) == 0);
+  }
+
+  bool doHeapRegion(HeapRegion* hr) {
+    if (hr->continuesHumongous()) {
+      // We will ignore these here and process them when their
+      // associated "starts humongous" region is processed.
+      // Note that we cannot rely on their associated
+      // "starts humongous" region to have their bit set to 1
+      // since, due to the region chunking in the parallel region
+      // iteration, a "continues humongous" region might be visited
+      // before its associated "starts humongous".
+      return false;
+    }
+
+    HeapWord* start = hr->bottom();
+    HeapWord* limit = hr->next_top_at_mark_start();
+    HeapWord* end = hr->end();
+
+    assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
+           err_msg("Preconditions not met - "
+                   "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
+                   "top: "PTR_FORMAT", end: "PTR_FORMAT,
+                   start, limit, hr->top(), hr->end()));
+
+    assert(hr->next_marked_bytes() == 0, "Precondition");
+
+    if (start == limit) {
+      // NTAMS of this region has not been set so nothing to do.
+      return false;
+    }
+
+    assert(is_card_aligned(start), "sanity");
+    assert(is_card_aligned(end), "sanity");
+
+    BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
+    BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
+    BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
+
+    // If ntams is not card aligned then we bump the index for
+    // limit so that we get the card spanning ntams.
+    if (!is_card_aligned(limit)) {
+      limit_idx += 1;
+    }
+
+    assert(limit_idx <= end_idx, "or else use atomics");
+
+    // Aggregate the "stripe" in the count data associated with hr.
+    size_t hrs_index = hr->hrs_index();
+    size_t marked_bytes = 0;
+
+    for (int i = 0; (size_t)i < _max_task_num; i += 1) {
+      size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
+      BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
+
+      // Fetch the marked_bytes in this region for task i and
+      // add it to the running total for this region.
+      marked_bytes += marked_bytes_array[hrs_index];
+
+      // Now union the bitmaps[0,max_task_num)[start_idx..limit_idx)
+      // into the global card bitmap.
+      BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
+
+      while (scan_idx < limit_idx) {
+        assert(task_card_bm->at(scan_idx) == true, "should be");
+        _cm_card_bm->set_bit(scan_idx);
+        assert(_cm_card_bm->at(scan_idx) == true, "should be");
+
+        // BitMap::get_next_one_offset() can handle the case when
+        // its left_offset parameter is greater than its right_offset
+        // parameter. If does, however, have an early exit if
+        // left_offset == right_offset. So let's limit the value
+        // passed in for left offset here.
+        BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
+        scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
+      }
+    }
+
+    // Update the marked bytes for this region.
+    hr->add_to_marked_bytes(marked_bytes);
+
+    // Now set the top at count to NTAMS.
+    hr->set_top_at_conc_mark_count(limit);
+
+    // Next heap region
+    return false;
+  }
+};
+
+class G1AggregateCountDataTask: public AbstractGangTask {
+protected:
+  G1CollectedHeap* _g1h;
+  ConcurrentMark* _cm;
+  BitMap* _cm_card_bm;
+  size_t _max_task_num;
+  int _active_workers;
+
+public:
+  G1AggregateCountDataTask(G1CollectedHeap* g1h,
+                           ConcurrentMark* cm,
+                           BitMap* cm_card_bm,
+                           size_t max_task_num,
+                           int n_workers) :
+    AbstractGangTask("Count Aggregation"),
+    _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
+    _max_task_num(max_task_num),
+    _active_workers(n_workers) { }
+
+  void work(uint worker_id) {
+    AggregateCountDataHRClosure cl(_cm, _cm_card_bm, _max_task_num);
+
+    if (G1CollectedHeap::use_parallel_gc_threads()) {
+      _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
+                                            _active_workers,
+                                            HeapRegion::AggregateCountClaimValue);
+    } else {
+      _g1h->heap_region_iterate(&cl);
+    }
+  }
+};
+
+
+void ConcurrentMark::aggregate_count_data() {
+  int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
+                        _g1h->workers()->active_workers() :
+                        1);
+
+  G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
+                                           _max_task_num, n_workers);
+
+  if (G1CollectedHeap::use_parallel_gc_threads()) {
+    assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
+           "sanity check");
+    _g1h->set_par_threads(n_workers);
+    _g1h->workers()->run_task(&g1_par_agg_task);
+    _g1h->set_par_threads(0);
+
+    assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
+           "sanity check");
+    _g1h->reset_heap_region_claim_values();
+  } else {
+    g1_par_agg_task.work(0);
+  }
+}
+
+// Clear the per-worker arrays used to store the per-region counting data
+void ConcurrentMark::clear_all_count_data() {
+  // Clear the global card bitmap - it will be filled during
+  // liveness count aggregation (during remark) and the
+  // final counting task.
+  _card_bm.clear();
+
+  // Clear the global region bitmap - it will be filled as part
+  // of the final counting task.
+  _region_bm.clear();
+
+  size_t max_regions = _g1h->max_regions();
+  assert(_max_task_num != 0, "unitialized");
+
+  for (int i = 0; (size_t) i < _max_task_num; i += 1) {
+    BitMap* task_card_bm = count_card_bitmap_for(i);
+    size_t* marked_bytes_array = count_marked_bytes_array_for(i);
+
+    assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
+    assert(marked_bytes_array != NULL, "uninitialized");
+
+    memset(marked_bytes_array, 0, (max_regions * sizeof(size_t)));
+    task_card_bm->clear();
+  }
+}
+
 void ConcurrentMark::print_stats() {
   if (verbose_stats()) {
     gclog_or_tty->print_cr("---------------------------------------------------------------------");
@@ -3350,6 +3983,8 @@
 void ConcurrentMark::abort() {
   // Clear all marks to force marking thread to do nothing
   _nextMarkBitMap->clearAll();
+  // Clear the liveness counting data
+  clear_all_count_data();
   // Empty mark stack
   clear_marking_state();
   for (int i = 0; i < (int)_max_task_num; ++i) {
@@ -3402,23 +4037,15 @@
                          (_init_times.sum() + _remark_times.sum() +
                           _cleanup_times.sum())/1000.0);
   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
-                "(%8.2f s marking, %8.2f s counting).",
+                "(%8.2f s marking).",
                 cmThread()->vtime_accum(),
-                cmThread()->vtime_mark_accum(),
-                cmThread()->vtime_count_accum());
+                cmThread()->vtime_mark_accum());
 }
 
 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
   _parallel_workers->print_worker_threads_on(st);
 }
 
-// Closures
-// XXX: there seems to be a lot of code  duplication here;
-// should refactor and consolidate the shared code.
-
-// This closure is used to mark refs into the CMS generation in
-// the CMS bit map. Called at the first checkpoint.
-
 // We take a break if someone is trying to stop the world.
 bool ConcurrentMark::do_yield_check(uint worker_id) {
   if (should_yield()) {
@@ -4704,6 +5331,8 @@
 
 CMTask::CMTask(int task_id,
                ConcurrentMark* cm,
+               size_t* marked_bytes,
+               BitMap* card_bm,
                CMTaskQueue* task_queue,
                CMTaskQueueSet* task_queues)
   : _g1h(G1CollectedHeap::heap()),
@@ -4713,7 +5342,9 @@
     _task_queue(task_queue),
     _task_queues(task_queues),
     _cm_oop_closure(NULL),
-    _aborted_region(MemRegion()) {
+    _aborted_region(MemRegion()),
+    _marked_bytes_array(marked_bytes),
+    _card_bm(card_bm) {
   guarantee(task_queue != NULL, "invariant");
   guarantee(task_queues != NULL, "invariant");
 
--- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Thu Feb 09 07:35:48 2012 -0800
@@ -84,8 +84,8 @@
   }
 
   // iteration
-  bool iterate(BitMapClosure* cl) { return _bm.iterate(cl); }
-  bool iterate(BitMapClosure* cl, MemRegion mr);
+  inline bool iterate(BitMapClosure* cl, MemRegion mr);
+  inline bool iterate(BitMapClosure* cl);
 
   // Return the address corresponding to the next marked bit at or after
   // "addr", and before "limit", if "limit" is non-NULL.  If there is no
@@ -349,10 +349,62 @@
   high_verbose       // per object verbose
 } CMVerboseLevel;
 
+class YoungList;
+
+// Root Regions are regions that are not empty at the beginning of a
+// marking cycle and which we might collect during an evacuation pause
+// while the cycle is active. Given that, during evacuation pauses, we
+// do not copy objects that are explicitly marked, what we have to do
+// for the root regions is to scan them and mark all objects reachable
+// from them. According to the SATB assumptions, we only need to visit
+// each object once during marking. So, as long as we finish this scan
+// before the next evacuation pause, we can copy the objects from the
+// root regions without having to mark them or do anything else to them.
+//
+// Currently, we only support root region scanning once (at the start
+// of the marking cycle) and the root regions are all the survivor
+// regions populated during the initial-mark pause.
+class CMRootRegions VALUE_OBJ_CLASS_SPEC {
+private:
+  YoungList*           _young_list;
+  ConcurrentMark*      _cm;
+
+  volatile bool        _scan_in_progress;
+  volatile bool        _should_abort;
+  HeapRegion* volatile _next_survivor;
+
+public:
+  CMRootRegions();
+  // We actually do most of the initialization in this method.
+  void init(G1CollectedHeap* g1h, ConcurrentMark* cm);
+
+  // Reset the claiming / scanning of the root regions.
+  void prepare_for_scan();
+
+  // Forces get_next() to return NULL so that the iteration aborts early.
+  void abort() { _should_abort = true; }
+
+  // Return true if the CM thread are actively scanning root regions,
+  // false otherwise.
+  bool scan_in_progress() { return _scan_in_progress; }
+
+  // Claim the next root region to scan atomically, or return NULL if
+  // all have been claimed.
+  HeapRegion* claim_next();
+
+  // Flag that we're done with root region scanning and notify anyone
+  // who's waiting on it. If aborted is false, assume that all regions
+  // have been claimed.
+  void scan_finished();
+
+  // If CM threads are still scanning root regions, wait until they
+  // are done. Return true if we had to wait, false otherwise.
+  bool wait_until_scan_finished();
+};
 
 class ConcurrentMarkThread;
 
-class ConcurrentMark: public CHeapObj {
+class ConcurrentMark : public CHeapObj {
   friend class ConcurrentMarkThread;
   friend class CMTask;
   friend class CMBitMapClosure;
@@ -386,7 +438,7 @@
 
   FreeRegionList        _cleanup_list;
 
-  // CMS marking support structures
+  // Concurrent marking support structures
   CMBitMap                _markBitMap1;
   CMBitMap                _markBitMap2;
   CMBitMapRO*             _prevMarkBitMap; // completed mark bitmap
@@ -400,6 +452,9 @@
   HeapWord*               _heap_start;
   HeapWord*               _heap_end;
 
+  // Root region tracking and claiming.
+  CMRootRegions           _root_regions;
+
   // For gray objects
   CMMarkStack             _markStack; // Grey objects behind global finger.
   CMRegionStack           _regionStack; // Grey regions behind global finger.
@@ -426,7 +481,6 @@
   WorkGangBarrierSync     _first_overflow_barrier_sync;
   WorkGangBarrierSync     _second_overflow_barrier_sync;
 
-
   // this is set by any task, when an overflow on the global data
   // structures is detected.
   volatile bool           _has_overflown;
@@ -554,9 +608,9 @@
   bool has_overflown()           { return _has_overflown; }
   void set_has_overflown()       { _has_overflown = true; }
   void clear_has_overflown()     { _has_overflown = false; }
+  bool restart_for_overflow()    { return _restart_for_overflow; }
 
   bool has_aborted()             { return _has_aborted; }
-  bool restart_for_overflow()    { return _restart_for_overflow; }
 
   // Methods to enter the two overflow sync barriers
   void enter_first_sync_barrier(int task_num);
@@ -578,6 +632,27 @@
     }
   }
 
+  // Live Data Counting data structures...
+  // These data structures are initialized at the start of
+  // marking. They are written to while marking is active.
+  // They are aggregated during remark; the aggregated values
+  // are then used to populate the _region_bm, _card_bm, and
+  // the total live bytes, which are then subsequently updated
+  // during cleanup.
+
+  // An array of bitmaps (one bit map per task). Each bitmap
+  // is used to record the cards spanned by the live objects
+  // marked by that task/worker.
+  BitMap*  _count_card_bitmaps;
+
+  // Used to record the number of marked live bytes
+  // (for each region, by worker thread).
+  size_t** _count_marked_bytes;
+
+  // Card index of the bottom of the G1 heap. Used for biasing indices into
+  // the card bitmaps.
+  intptr_t _heap_bottom_card_num;
+
 public:
   // Manipulation of the global mark stack.
   // Notice that the first mark_stack_push is CAS-based, whereas the
@@ -671,6 +746,8 @@
   // Returns true if there are any aborted memory regions.
   bool has_aborted_regions();
 
+  CMRootRegions* root_regions() { return &_root_regions; }
+
   bool concurrent_marking_in_progress() {
     return _concurrent_marking_in_progress;
   }
@@ -703,6 +780,7 @@
 
   ConcurrentMark(ReservedSpace rs, int max_regions);
   ~ConcurrentMark();
+
   ConcurrentMarkThread* cmThread() { return _cmThread; }
 
   CMBitMapRO* prevMarkBitMap() const { return _prevMarkBitMap; }
@@ -720,8 +798,17 @@
   // G1CollectedHeap
 
   // This notifies CM that a root during initial-mark needs to be
-  // grayed. It is MT-safe.
-  inline void grayRoot(oop obj, size_t word_size);
+  // grayed. It is MT-safe. word_size is the size of the object in
+  // words. It is passed explicitly as sometimes we cannot calculate
+  // it from the given object because it might be in an inconsistent
+  // state (e.g., in to-space and being copied). So the caller is
+  // responsible for dealing with this issue (e.g., get the size from
+  // the from-space image when the to-space image might be
+  // inconsistent) and always passing the size. hr is the region that
+  // contains the object and it's passed optionally from callers who
+  // might already have it (no point in recalculating it).
+  inline void grayRoot(oop obj, size_t word_size,
+                       uint worker_id, HeapRegion* hr = NULL);
 
   // It's used during evacuation pauses to gray a region, if
   // necessary, and it's MT-safe. It assumes that the caller has
@@ -772,6 +859,13 @@
   void checkpointRootsInitialPre();
   void checkpointRootsInitialPost();
 
+  // Scan all the root regions and mark everything reachable from
+  // them.
+  void scanRootRegions();
+
+  // Scan a single root region and mark everything reachable from it.
+  void scanRootRegion(HeapRegion* hr, uint worker_id);
+
   // Do concurrent phase of marking, to a tentative transitive closure.
   void markFromRoots();
 
@@ -781,15 +875,13 @@
 
   void checkpointRootsFinal(bool clear_all_soft_refs);
   void checkpointRootsFinalWork();
-  void calcDesiredRegions();
   void cleanup();
   void completeCleanup();
 
   // Mark in the previous bitmap.  NB: this is usually read-only, so use
   // this carefully!
   inline void markPrev(oop p);
-  inline void markNext(oop p);
-  void clear(oop p);
+
   // Clears marks for all objects in the given range, for the prev,
   // next, or both bitmaps.  NB: the previous bitmap is usually
   // read-only, so use this carefully!
@@ -913,6 +1005,114 @@
   bool verbose_high() {
     return _MARKING_VERBOSE_ && _verbose_level >= high_verbose;
   }
+
+  // Counting data structure accessors
+
+  // Returns the card number of the bottom of the G1 heap.
+  // Used in biasing indices into accounting card bitmaps.
+  intptr_t heap_bottom_card_num() const {
+    return _heap_bottom_card_num;
+  }
+
+  // Returns the card bitmap for a given task or worker id.
+  BitMap* count_card_bitmap_for(uint worker_id) {
+    assert(0 <= worker_id && worker_id < _max_task_num, "oob");
+    assert(_count_card_bitmaps != NULL, "uninitialized");
+    BitMap* task_card_bm = &_count_card_bitmaps[worker_id];
+    assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
+    return task_card_bm;
+  }
+
+  // Returns the array containing the marked bytes for each region,
+  // for the given worker or task id.
+  size_t* count_marked_bytes_array_for(uint worker_id) {
+    assert(0 <= worker_id && worker_id < _max_task_num, "oob");
+    assert(_count_marked_bytes != NULL, "uninitialized");
+    size_t* marked_bytes_array = _count_marked_bytes[worker_id];
+    assert(marked_bytes_array != NULL, "uninitialized");
+    return marked_bytes_array;
+  }
+
+  // Returns the index in the liveness accounting card table bitmap
+  // for the given address
+  inline BitMap::idx_t card_bitmap_index_for(HeapWord* addr);
+
+  // Counts the size of the given memory region in the the given
+  // marked_bytes array slot for the given HeapRegion.
+  // Sets the bits in the given card bitmap that are associated with the
+  // cards that are spanned by the memory region.
+  inline void count_region(MemRegion mr, HeapRegion* hr,
+                           size_t* marked_bytes_array,
+                           BitMap* task_card_bm);
+
+  // Counts the given memory region in the task/worker counting
+  // data structures for the given worker id.
+  inline void count_region(MemRegion mr, HeapRegion* hr, uint worker_id);
+
+  // Counts the given memory region in the task/worker counting
+  // data structures for the given worker id.
+  inline void count_region(MemRegion mr, uint worker_id);
+
+  // Counts the given object in the given task/worker counting
+  // data structures.
+  inline void count_object(oop obj, HeapRegion* hr,
+                           size_t* marked_bytes_array,
+                           BitMap* task_card_bm);
+
+  // Counts the given object in the task/worker counting data
+  // structures for the given worker id.
+  inline void count_object(oop obj, HeapRegion* hr, uint worker_id);
+
+  // Attempts to mark the given object and, if successful, counts
+  // the object in the given task/worker counting structures.
+  inline bool par_mark_and_count(oop obj, HeapRegion* hr,
+                                 size_t* marked_bytes_array,
+                                 BitMap* task_card_bm);
+
+  // Attempts to mark the given object and, if successful, counts
+  // the object in the task/worker counting structures for the
+  // given worker id.
+  inline bool par_mark_and_count(oop obj, size_t word_size,
+                                 HeapRegion* hr, uint worker_id);
+
+  // Attempts to mark the given object and, if successful, counts
+  // the object in the task/worker counting structures for the
+  // given worker id.
+  inline bool par_mark_and_count(oop obj, HeapRegion* hr, uint worker_id);
+
+  // Similar to the above routine but we don't know the heap region that
+  // contains the object to be marked/counted, which this routine looks up.
+  inline bool par_mark_and_count(oop obj, uint worker_id);
+
+  // Similar to the above routine but there are times when we cannot
+  // safely calculate the size of obj due to races and we, therefore,
+  // pass the size in as a parameter. It is the caller's reponsibility
+  // to ensure that the size passed in for obj is valid.
+  inline bool par_mark_and_count(oop obj, size_t word_size, uint worker_id);
+
+  // Unconditionally mark the given object, and unconditinally count
+  // the object in the counting structures for worker id 0.
+  // Should *not* be called from parallel code.
+  inline bool mark_and_count(oop obj, HeapRegion* hr);
+
+  // Similar to the above routine but we don't know the heap region that
+  // contains the object to be marked/counted, which this routine looks up.
+  // Should *not* be called from parallel code.
+  inline bool mark_and_count(oop obj);
+
+protected:
+  // Clear all the per-task bitmaps and arrays used to store the
+  // counting data.
+  void clear_all_count_data();
+
+  // Aggregates the counting data for each worker/task
+  // that was constructed while marking. Also sets
+  // the amount of marked bytes for each region and
+  // the top at concurrent mark count.
+  void aggregate_count_data();
+
+  // Verification routine
+  void verify_count_data();
 };
 
 // A class representing a marking task.
@@ -1031,6 +1231,12 @@
 
   TruncatedSeq                _marking_step_diffs_ms;
 
+  // Counting data structures. Embedding the task's marked_bytes_array
+  // and card bitmap into the actual task saves having to go through
+  // the ConcurrentMark object.
+  size_t*                     _marked_bytes_array;
+  BitMap*                     _card_bm;
+
   // LOTS of statistics related with this task
 #if _MARKING_STATS_
   NumberSeq                   _all_clock_intervals_ms;
@@ -1196,6 +1402,7 @@
   }
 
   CMTask(int task_num, ConcurrentMark *cm,
+         size_t* marked_bytes, BitMap* card_bm,
          CMTaskQueue* task_queue, CMTaskQueueSet* task_queues);
 
   // it prints statistics associated with this task
--- a/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp	Thu Feb 09 07:35:48 2012 -0800
@@ -28,6 +28,214 @@
 #include "gc_implementation/g1/concurrentMark.hpp"
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 
+// Returns the index in the liveness accounting card bitmap
+// for the given address
+inline BitMap::idx_t ConcurrentMark::card_bitmap_index_for(HeapWord* addr) {
+  // Below, the term "card num" means the result of shifting an address
+  // by the card shift -- address 0 corresponds to card number 0.  One
+  // must subtract the card num of the bottom of the heap to obtain a
+  // card table index.
+
+  intptr_t card_num = intptr_t(uintptr_t(addr) >> CardTableModRefBS::card_shift);
+  return card_num - heap_bottom_card_num();
+}
+
+// Counts the given memory region in the given task/worker
+// counting data structures.
+inline void ConcurrentMark::count_region(MemRegion mr, HeapRegion* hr,
+                                         size_t* marked_bytes_array,
+                                         BitMap* task_card_bm) {
+  G1CollectedHeap* g1h = _g1h;
+  HeapWord* start = mr.start();
+  HeapWord* last = mr.last();
+  size_t region_size_bytes = mr.byte_size();
+  size_t index = hr->hrs_index();
+
+  assert(!hr->continuesHumongous(), "should not be HC region");
+  assert(hr == g1h->heap_region_containing(start), "sanity");
+  assert(hr == g1h->heap_region_containing(mr.last()), "sanity");
+  assert(marked_bytes_array != NULL, "pre-condition");
+  assert(task_card_bm != NULL, "pre-condition");
+
+  // Add to the task local marked bytes for this region.
+  marked_bytes_array[index] += region_size_bytes;
+
+  BitMap::idx_t start_idx = card_bitmap_index_for(start);
+  BitMap::idx_t last_idx = card_bitmap_index_for(last);
+
+  // The card bitmap is task/worker specific => no need to use 'par' routines.
+  // Set bits in the inclusive bit range [start_idx, last_idx].
+  //
+  // For small ranges use a simple loop; otherwise use set_range
+  // The range are the cards that are spanned by the object/region
+  // so 8 cards will allow objects/regions up to 4K to be handled
+  // using the loop.
+  if ((last_idx - start_idx) <= 8) {
+    for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) {
+     task_card_bm->set_bit(i);
+    }
+  } else {
+    assert(last_idx < task_card_bm->size(), "sanity");
+    // Note: BitMap::set_range() is exclusive.
+    task_card_bm->set_range(start_idx, last_idx+1);
+  }
+}
+
+// Counts the given memory region in the task/worker counting
+// data structures for the given worker id.
+inline void ConcurrentMark::count_region(MemRegion mr,
+                                         HeapRegion* hr,
+                                         uint worker_id) {
+  size_t* marked_bytes_array = count_marked_bytes_array_for(worker_id);
+  BitMap* task_card_bm = count_card_bitmap_for(worker_id);
+  count_region(mr, hr, marked_bytes_array, task_card_bm);
+}
+
+// Counts the given memory region, which may be a single object, in the
+// task/worker counting data structures for the given worker id.
+inline void ConcurrentMark::count_region(MemRegion mr, uint worker_id) {
+  HeapWord* addr = mr.start();
+  HeapRegion* hr = _g1h->heap_region_containing_raw(addr);
+  count_region(mr, hr, worker_id);
+}
+
+// Counts the given object in the given task/worker counting data structures.
+inline void ConcurrentMark::count_object(oop obj,
+                                         HeapRegion* hr,
+                                         size_t* marked_bytes_array,
+                                         BitMap* task_card_bm) {
+  MemRegion mr((HeapWord*)obj, obj->size());
+  count_region(mr, hr, marked_bytes_array, task_card_bm);
+}
+
+// Counts the given object in the task/worker counting data
+// structures for the given worker id.
+inline void ConcurrentMark::count_object(oop obj,
+                                         HeapRegion* hr,
+                                         uint worker_id) {
+  size_t* marked_bytes_array = count_marked_bytes_array_for(worker_id);
+  BitMap* task_card_bm = count_card_bitmap_for(worker_id);
+  HeapWord* addr = (HeapWord*) obj;
+  count_object(obj, hr, marked_bytes_array, task_card_bm);
+}
+
+// Attempts to mark the given object and, if successful, counts
+// the object in the given task/worker counting structures.
+inline bool ConcurrentMark::par_mark_and_count(oop obj,
+                                               HeapRegion* hr,
+                                               size_t* marked_bytes_array,
+                                               BitMap* task_card_bm) {
+  HeapWord* addr = (HeapWord*)obj;
+  if (_nextMarkBitMap->parMark(addr)) {
+    // Update the task specific count data for the object.
+    count_object(obj, hr, marked_bytes_array, task_card_bm);
+    return true;
+  }
+  return false;
+}
+
+// Attempts to mark the given object and, if successful, counts
+// the object in the task/worker counting structures for the
+// given worker id.
+inline bool ConcurrentMark::par_mark_and_count(oop obj,
+                                               size_t word_size,
+                                               HeapRegion* hr,
+                                               uint worker_id) {
+  HeapWord* addr = (HeapWord*)obj;
+  if (_nextMarkBitMap->parMark(addr)) {
+    MemRegion mr(addr, word_size);
+    count_region(mr, hr, worker_id);
+    return true;
+  }
+  return false;
+}
+
+// Attempts to mark the given object and, if successful, counts
+// the object in the task/worker counting structures for the
+// given worker id.
+inline bool ConcurrentMark::par_mark_and_count(oop obj,
+                                               HeapRegion* hr,
+                                               uint worker_id) {
+  HeapWord* addr = (HeapWord*)obj;
+  if (_nextMarkBitMap->parMark(addr)) {
+    // Update the task specific count data for the object.
+    count_object(obj, hr, worker_id);
+    return true;
+  }
+  return false;
+}
+
+// As above - but we don't know the heap region containing the
+// object and so have to supply it.
+inline bool ConcurrentMark::par_mark_and_count(oop obj, uint worker_id) {
+  HeapWord* addr = (HeapWord*)obj;
+  HeapRegion* hr = _g1h->heap_region_containing_raw(addr);
+  return par_mark_and_count(obj, hr, worker_id);
+}
+
+// Similar to the above routine but we already know the size, in words, of
+// the object that we wish to mark/count
+inline bool ConcurrentMark::par_mark_and_count(oop obj,
+                                               size_t word_size,
+                                               uint worker_id) {
+  HeapWord* addr = (HeapWord*)obj;
+  if (_nextMarkBitMap->parMark(addr)) {
+    // Update the task specific count data for the object.
+    MemRegion mr(addr, word_size);
+    count_region(mr, worker_id);
+    return true;
+  }
+  return false;
+}
+
+// Unconditionally mark the given object, and unconditinally count
+// the object in the counting structures for worker id 0.
+// Should *not* be called from parallel code.
+inline bool ConcurrentMark::mark_and_count(oop obj, HeapRegion* hr) {
+  HeapWord* addr = (HeapWord*)obj;
+  _nextMarkBitMap->mark(addr);
+  // Update the task specific count data for the object.
+  count_object(obj, hr, 0 /* worker_id */);
+  return true;
+}
+
+// As above - but we don't have the heap region containing the
+// object, so we have to supply it.
+inline bool ConcurrentMark::mark_and_count(oop obj) {
+  HeapWord* addr = (HeapWord*)obj;
+  HeapRegion* hr = _g1h->heap_region_containing_raw(addr);
+  return mark_and_count(obj, hr);
+}
+
+inline bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
+  HeapWord* start_addr = MAX2(startWord(), mr.start());
+  HeapWord* end_addr = MIN2(endWord(), mr.end());
+
+  if (end_addr > start_addr) {
+    // Right-open interval [start-offset, end-offset).
+    BitMap::idx_t start_offset = heapWordToOffset(start_addr);
+    BitMap::idx_t end_offset = heapWordToOffset(end_addr);
+
+    start_offset = _bm.get_next_one_offset(start_offset, end_offset);
+    while (start_offset < end_offset) {
+      HeapWord* obj_addr = offsetToHeapWord(start_offset);
+      oop obj = (oop) obj_addr;
+      if (!cl->do_bit(start_offset)) {
+        return false;
+      }
+      HeapWord* next_addr = MIN2(obj_addr + obj->size(), end_addr);
+      BitMap::idx_t next_offset = heapWordToOffset(next_addr);
+      start_offset = _bm.get_next_one_offset(next_offset, end_offset);
+    }
+  }
+  return true;
+}
+
+inline bool CMBitMapRO::iterate(BitMapClosure* cl) {
+  MemRegion mr(startWord(), sizeInWords());
+  return iterate(cl, mr);
+}
+
 inline void CMTask::push(oop obj) {
   HeapWord* objAddr = (HeapWord*) obj;
   assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
@@ -84,7 +292,7 @@
 
   HeapWord* objAddr = (HeapWord*) obj;
   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
- if (_g1h->is_in_g1_reserved(objAddr)) {
+  if (_g1h->is_in_g1_reserved(objAddr)) {
     assert(obj != NULL, "null check is implicit");
     if (!_nextMarkBitMap->isMarked(objAddr)) {
       // Only get the containing region if the object is not marked on the
@@ -98,9 +306,9 @@
         }
 
         // we need to mark it first
-        if (_nextMarkBitMap->parMark(objAddr)) {
+        if (_cm->par_mark_and_count(obj, hr, _marked_bytes_array, _card_bm)) {
           // No OrderAccess:store_load() is needed. It is implicit in the
-          // CAS done in parMark(objAddr) above
+          // CAS done in CMBitMap::parMark() call in the routine above.
           HeapWord* global_finger = _cm->finger();
 
 #if _CHECK_BOTH_FINGERS_
@@ -160,25 +368,20 @@
   ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*) p);
 }
 
-inline void ConcurrentMark::markNext(oop p) {
-  assert(!_nextMarkBitMap->isMarked((HeapWord*) p), "sanity");
-  _nextMarkBitMap->mark((HeapWord*) p);
-}
+inline void ConcurrentMark::grayRoot(oop obj, size_t word_size,
+                                     uint worker_id, HeapRegion* hr) {
+  assert(obj != NULL, "pre-condition");
+  HeapWord* addr = (HeapWord*) obj;
+  if (hr == NULL) {
+    hr = _g1h->heap_region_containing_raw(addr);
+  } else {
+    assert(hr->is_in(addr), "pre-condition");
+  }
+  assert(hr != NULL, "sanity");
+  // Given that we're looking for a region that contains an object
+  // header it's impossible to get back a HC region.
+  assert(!hr->continuesHumongous(), "sanity");
 
-inline void ConcurrentMark::grayRoot(oop obj, size_t word_size) {
-  HeapWord* addr = (HeapWord*) obj;
-
-  // Currently we don't do anything with word_size but we will use it
-  // in the very near future in the liveness calculation piggy-backing
-  // changes.
-
-#ifdef ASSERT
-  HeapRegion* hr = _g1h->heap_region_containing(addr);
-  assert(hr != NULL, "sanity");
-  assert(!hr->is_survivor(), "should not allocate survivors during IM");
-  assert(addr < hr->next_top_at_mark_start(),
-         err_msg("addr: "PTR_FORMAT" hr: "HR_FORMAT" NTAMS: "PTR_FORMAT,
-                 addr, HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start()));
   // We cannot assert that word_size == obj->size() given that obj
   // might not be in a consistent state (another thread might be in
   // the process of copying it). So the best thing we can do is to
@@ -188,10 +391,11 @@
          err_msg("size: "SIZE_FORMAT" capacity: "SIZE_FORMAT" "HR_FORMAT,
                  word_size * HeapWordSize, hr->capacity(),
                  HR_FORMAT_PARAMS(hr)));
-#endif // ASSERT
 
-  if (!_nextMarkBitMap->isMarked(addr)) {
-    _nextMarkBitMap->parMark(addr);
+  if (addr < hr->next_top_at_mark_start()) {
+    if (!_nextMarkBitMap->isMarked(addr)) {
+      par_mark_and_count(obj, word_size, hr, worker_id);
+    }
   }
 }
 
--- a/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -44,9 +44,7 @@
   _started(false),
   _in_progress(false),
   _vtime_accum(0.0),
-  _vtime_mark_accum(0.0),
-  _vtime_count_accum(0.0)
-{
+  _vtime_mark_accum(0.0) {
   create_and_start();
 }
 
@@ -94,9 +92,36 @@
       ResourceMark rm;
       HandleMark   hm;
       double cycle_start = os::elapsedVTime();
-      double mark_start_sec = os::elapsedTime();
       char verbose_str[128];
 
+      // We have to ensure that we finish scanning the root regions
+      // before the next GC takes place. To ensure this we have to
+      // make sure that we do not join the STS until the root regions
+      // have been scanned. If we did then it's possible that a
+      // subsequent GC could block us from joining the STS and proceed
+      // without the root regions have been scanned which would be a
+      // correctness issue.
+
+      double scan_start = os::elapsedTime();
+      if (!cm()->has_aborted()) {
+        if (PrintGC) {
+          gclog_or_tty->date_stamp(PrintGCDateStamps);
+          gclog_or_tty->stamp(PrintGCTimeStamps);
+          gclog_or_tty->print_cr("[GC concurrent-root-region-scan-start]");
+        }
+
+        _cm->scanRootRegions();
+
+        double scan_end = os::elapsedTime();
+        if (PrintGC) {
+          gclog_or_tty->date_stamp(PrintGCDateStamps);
+          gclog_or_tty->stamp(PrintGCTimeStamps);
+          gclog_or_tty->print_cr("[GC concurrent-root-region-scan-end, %1.7lf]",
+                                 scan_end - scan_start);
+        }
+      }
+
+      double mark_start_sec = os::elapsedTime();
       if (PrintGC) {
         gclog_or_tty->date_stamp(PrintGCDateStamps);
         gclog_or_tty->stamp(PrintGCTimeStamps);
@@ -148,36 +173,12 @@
         }
       } while (cm()->restart_for_overflow());
 
-      double counting_start_time = os::elapsedVTime();
-      if (!cm()->has_aborted()) {
-        double count_start_sec = os::elapsedTime();
-        if (PrintGC) {
-          gclog_or_tty->date_stamp(PrintGCDateStamps);
-          gclog_or_tty->stamp(PrintGCTimeStamps);
-          gclog_or_tty->print_cr("[GC concurrent-count-start]");
-        }
-
-        _sts.join();
-        _cm->calcDesiredRegions();
-        _sts.leave();
-
-        if (!cm()->has_aborted()) {
-          double count_end_sec = os::elapsedTime();
-          if (PrintGC) {
-            gclog_or_tty->date_stamp(PrintGCDateStamps);
-            gclog_or_tty->stamp(PrintGCTimeStamps);
-            gclog_or_tty->print_cr("[GC concurrent-count-end, %1.7lf]",
-                                   count_end_sec - count_start_sec);
-          }
-        }
-      }
-
       double end_time = os::elapsedVTime();
-      _vtime_count_accum += (end_time - counting_start_time);
       // Update the total virtual time before doing this, since it will try
       // to measure it to get the vtime for this marking.  We purposely
       // neglect the presumably-short "completeCleanup" phase here.
       _vtime_accum = (end_time - _vtime_start);
+
       if (!cm()->has_aborted()) {
         if (g1_policy->adaptive_young_list_length()) {
           double now = os::elapsedTime();
--- a/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -40,7 +40,6 @@
   double _vtime_accum;  // Accumulated virtual time.
 
   double _vtime_mark_accum;
-  double _vtime_count_accum;
 
  public:
   virtual void run();
@@ -69,8 +68,6 @@
   double vtime_accum();
   // Marking virtual time so far
   double vtime_mark_accum();
-  // Counting virtual time so far.
-  double vtime_count_accum() { return _vtime_count_accum; }
 
   ConcurrentMark* cm()     { return _cm; }
 
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Thu Feb 09 07:35:48 2012 -0800
@@ -174,13 +174,10 @@
   }
 };
 
-YoungList::YoungList(G1CollectedHeap* g1h)
-  : _g1h(g1h), _head(NULL),
-    _length(0),
-    _last_sampled_rs_lengths(0),
-    _survivor_head(NULL), _survivor_tail(NULL), _survivor_length(0)
-{
-  guarantee( check_list_empty(false), "just making sure..." );
+YoungList::YoungList(G1CollectedHeap* g1h) :
+    _g1h(g1h), _head(NULL), _length(0), _last_sampled_rs_lengths(0),
+    _survivor_head(NULL), _survivor_tail(NULL), _survivor_length(0) {
+  guarantee(check_list_empty(false), "just making sure...");
 }
 
 void YoungList::push_region(HeapRegion *hr) {
@@ -1029,6 +1026,15 @@
   assert(isHumongous(word_size), "attempt_allocation_humongous() "
          "should only be called for humongous allocations");
 
+  // Humongous objects can exhaust the heap quickly, so we should check if we
+  // need to start a marking cycle at each humongous object allocation. We do
+  // the check before we do the actual allocation. The reason for doing it
+  // before the allocation is that we avoid having to keep track of the newly
+  // allocated memory while we do a GC.
+  if (g1_policy()->need_to_start_conc_mark("concurrent humongous allocation", word_size)) {
+    collect(GCCause::_g1_humongous_allocation);
+  }
+
   // We will loop until a) we manage to successfully perform the
   // allocation or b) we successfully schedule a collection which
   // fails to perform the allocation. b) is the only case when we'll
@@ -1111,7 +1117,11 @@
     return _mutator_alloc_region.attempt_allocation_locked(word_size,
                                                       false /* bot_updates */);
   } else {
-    return humongous_obj_allocate(word_size);
+    HeapWord* result = humongous_obj_allocate(word_size);
+    if (result != NULL && g1_policy()->need_to_start_conc_mark("STW humongous allocation")) {
+      g1_policy()->set_initiate_conc_mark_if_possible();
+    }
+    return result;
   }
 
   ShouldNotReachHere();
@@ -1228,9 +1238,7 @@
   SvcGCMarker sgcm(SvcGCMarker::FULL);
   ResourceMark rm;
 
-  if (PrintHeapAtGC) {
-    Universe::print_heap_before_gc();
-  }
+  print_heap_before_gc();
 
   HRSPhaseSetter x(HRSPhaseFullGC);
   verify_region_sets_optional();
@@ -1257,7 +1265,18 @@
     double start = os::elapsedTime();
     g1_policy()->record_full_collection_start();
 
+    // Note: When we have a more flexible GC logging framework that
+    // allows us to add optional attributes to a GC log record we
+    // could consider timing and reporting how long we wait in the
+    // following two methods.
     wait_while_free_regions_coming();
+    // If we start the compaction before the CM threads finish
+    // scanning the root regions we might trip them over as we'll
+    // be moving objects / updating references. So let's wait until
+    // they are done. By telling them to abort, they should complete
+    // early.
+    _cm->root_regions()->abort();
+    _cm->root_regions()->wait_until_scan_finished();
     append_secondary_free_list_if_not_empty_with_lock();
 
     gc_prologue(true);
@@ -1286,7 +1305,8 @@
     ref_processor_cm()->verify_no_references_recorded();
 
     // Abandon current iterations of concurrent marking and concurrent
-    // refinement, if any are in progress.
+    // refinement, if any are in progress. We have to do this before
+    // wait_until_scan_finished() below.
     concurrent_mark()->abort();
 
     // Make sure we'll choose a new allocation region afterwards.
@@ -1470,9 +1490,7 @@
   _hrs.verify_optional();
   verify_region_sets_optional();
 
-  if (PrintHeapAtGC) {
-    Universe::print_heap_after_gc();
-  }
+  print_heap_after_gc();
   g1mm()->update_sizes();
   post_full_gc_dump();
 
@@ -2295,7 +2313,8 @@
 bool G1CollectedHeap::should_do_concurrent_full_gc(GCCause::Cause cause) {
   return
     ((cause == GCCause::_gc_locker           && GCLockerInvokesConcurrent) ||
-     (cause == GCCause::_java_lang_system_gc && ExplicitGCInvokesConcurrent));
+     (cause == GCCause::_java_lang_system_gc && ExplicitGCInvokesConcurrent) ||
+      cause == GCCause::_g1_humongous_allocation);
 }
 
 #ifndef PRODUCT
@@ -3537,27 +3556,31 @@
   SvcGCMarker sgcm(SvcGCMarker::MINOR);
   ResourceMark rm;
 
-  if (PrintHeapAtGC) {
-    Universe::print_heap_before_gc();
-  }
+  print_heap_before_gc();
 
   HRSPhaseSetter x(HRSPhaseEvacuation);
   verify_region_sets_optional();
   verify_dirty_young_regions();
 
+  // This call will decide whether this pause is an initial-mark
+  // pause. If it is, during_initial_mark_pause() will return true
+  // for the duration of this pause.
+  g1_policy()->decide_on_conc_mark_initiation();
+
+  // We do not allow initial-mark to be piggy-backed on a mixed GC.
+  assert(!g1_policy()->during_initial_mark_pause() ||
+          g1_policy()->gcs_are_young(), "sanity");
+
+  // We also do not allow mixed GCs during marking.
+  assert(!mark_in_progress() || g1_policy()->gcs_are_young(), "sanity");
+
+  // Record whether this pause is an initial mark. When the current
+  // thread has completed its logging output and it's safe to signal
+  // the CM thread, the flag's value in the policy has been reset.
+  bool should_start_conc_mark = g1_policy()->during_initial_mark_pause();
+
+  // Inner scope for scope based logging, timers, and stats collection
   {
-    // This call will decide whether this pause is an initial-mark
-    // pause. If it is, during_initial_mark_pause() will return true
-    // for the duration of this pause.
-    g1_policy()->decide_on_conc_mark_initiation();
-
-    // We do not allow initial-mark to be piggy-backed on a mixed GC.
-    assert(!g1_policy()->during_initial_mark_pause() ||
-            g1_policy()->gcs_are_young(), "sanity");
-
-    // We also do not allow mixed GCs during marking.
-    assert(!mark_in_progress() || g1_policy()->gcs_are_young(), "sanity");
-
     char verbose_str[128];
     sprintf(verbose_str, "GC pause ");
     if (g1_policy()->gcs_are_young()) {
@@ -3613,7 +3636,6 @@
         Universe::verify(/* allow dirty */ false,
                          /* silent      */ false,
                          /* option      */ VerifyOption_G1UsePrevMarking);
-
       }
 
       COMPILER2_PRESENT(DerivedPointerTable::clear());
@@ -3656,6 +3678,18 @@
         g1_policy()->record_collection_pause_start(start_time_sec,
                                                    start_used_bytes);
 
+        double scan_wait_start = os::elapsedTime();
+        // We have to wait until the CM threads finish scanning the
+        // root regions as it's the only way to ensure that all the
+        // objects on them have been correctly scanned before we start
+        // moving them during the GC.
+        bool waited = _cm->root_regions()->wait_until_scan_finished();
+        if (waited) {
+          double scan_wait_end = os::elapsedTime();
+          double wait_time_ms = (scan_wait_end - scan_wait_start) * 1000.0;
+          g1_policy()->record_root_region_scan_wait_time(wait_time_ms);
+        }
+
 #if YOUNG_LIST_VERBOSE
         gclog_or_tty->print_cr("\nAfter recording pause start.\nYoung_list:");
         _young_list->print();
@@ -3765,16 +3799,14 @@
         }
 
         if (g1_policy()->during_initial_mark_pause()) {
+          // We have to do this before we notify the CM threads that
+          // they can start working to make sure that all the
+          // appropriate initialization is done on the CM object.
           concurrent_mark()->checkpointRootsInitialPost();
           set_marking_started();
-          // CAUTION: after the doConcurrentMark() call below,
-          // the concurrent marking thread(s) could be running
-          // concurrently with us. Make sure that anything after
-          // this point does not assume that we are the only GC thread
-          // running. Note: of course, the actual marking work will
-          // not start until the safepoint itself is released in
-          // ConcurrentGCThread::safepoint_desynchronize().
-          doConcurrentMark();
+          // Note that we don't actually trigger the CM thread at
+          // this point. We do that later when we're sure that
+          // the current thread has completed its logging output.
         }
 
         allocate_dummy_regions();
@@ -3884,15 +3916,22 @@
     }
   }
 
+  // The closing of the inner scope, immediately above, will complete
+  // the PrintGC logging output. The record_collection_pause_end() call
+  // above will complete the logging output of PrintGCDetails.
+  //
+  // It is not yet to safe, however, to tell the concurrent mark to
+  // start as we have some optional output below. We don't want the
+  // output from the concurrent mark thread interfering with this
+  // logging output either.
+
   _hrs.verify_optional();
   verify_region_sets_optional();
 
   TASKQUEUE_STATS_ONLY(if (ParallelGCVerbose) print_taskqueue_stats());
   TASKQUEUE_STATS_ONLY(reset_taskqueue_stats());
 
-  if (PrintHeapAtGC) {
-    Universe::print_heap_after_gc();
-  }
+  print_heap_after_gc();
   g1mm()->update_sizes();
 
   if (G1SummarizeRSetStats &&
@@ -3901,6 +3940,21 @@
     g1_rem_set()->print_summary_info();
   }
 
+  // It should now be safe to tell the concurrent mark thread to start
+  // without its logging output interfering with the logging output
+  // that came from the pause.
+
+  if (should_start_conc_mark) {
+    // CAUTION: after the doConcurrentMark() call below,
+    // the concurrent marking thread(s) could be running
+    // concurrently with us. Make sure that anything after
+    // this point does not assume that we are the only GC thread
+    // running. Note: of course, the actual marking work will
+    // not start until the safepoint itself is released in
+    // ConcurrentGCThread::safepoint_desynchronize().
+    doConcurrentMark();
+  }
+
   return true;
 }
 
@@ -4162,7 +4216,7 @@
 G1ParGCAllocBuffer::G1ParGCAllocBuffer(size_t gclab_word_size) :
   ParGCAllocBuffer(gclab_word_size), _retired(false) { }
 
-G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, int queue_num)
+G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, uint queue_num)
   : _g1h(g1h),
     _refs(g1h->task_queue(queue_num)),
     _dcq(&g1h->dirty_card_queue_set()),
@@ -4283,6 +4337,7 @@
                                      G1ParScanThreadState* par_scan_state) :
   _g1(g1), _g1_rem(_g1->g1_rem_set()), _cm(_g1->concurrent_mark()),
   _par_scan_state(par_scan_state),
+  _worker_id(par_scan_state->queue_num()),
   _during_initial_mark(_g1->g1_policy()->during_initial_mark_pause()),
   _mark_in_progress(_g1->mark_in_progress()) { }
 
@@ -4294,7 +4349,7 @@
 #endif // ASSERT
 
   // We know that the object is not moving so it's safe to read its size.
-  _cm->grayRoot(obj, (size_t) obj->size());
+  _cm->grayRoot(obj, (size_t) obj->size(), _worker_id);
 }
 
 void G1ParCopyHelper::mark_forwarded_object(oop from_obj, oop to_obj) {
@@ -4316,7 +4371,7 @@
   // worker so we cannot trust that its to-space image is
   // well-formed. So we have to read its size from its from-space
   // image which we know should not be changing.
-  _cm->grayRoot(to_obj, (size_t) from_obj->size());
+  _cm->grayRoot(to_obj, (size_t) from_obj->size(), _worker_id);
 }
 
 oop G1ParCopyHelper::copy_to_survivor_space(oop old) {
@@ -4406,6 +4461,8 @@
   assert(barrier != G1BarrierRS || obj != NULL,
          "Precondition: G1BarrierRS implies obj is non-NULL");
 
+  assert(_worker_id == _par_scan_state->queue_num(), "sanity");
+
   // here the null check is implicit in the cset_fast_test() test
   if (_g1->in_cset_fast_test(obj)) {
     oop forwardee;
@@ -4424,7 +4481,7 @@
 
     // When scanning the RS, we only care about objs in CS.
     if (barrier == G1BarrierRS) {
-      _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num());
+      _par_scan_state->update_rs(_from, p, _worker_id);
     }
   } else {
     // The object is not in collection set. If we're a root scanning
@@ -4436,7 +4493,7 @@
   }
 
   if (barrier == G1BarrierEvac && obj != NULL) {
-    _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num());
+    _par_scan_state->update_rs(_from, p, _worker_id);
   }
 
   if (do_gen_barrier && obj != NULL) {
@@ -5666,16 +5723,6 @@
 
       // And the region is empty.
       assert(!used_mr.is_empty(), "Should not have empty regions in a CS.");
-
-      // If marking is in progress then clear any objects marked in
-      // the current region. Note mark_in_progress() returns false,
-      // even during an initial mark pause, until the set_marking_started()
-      // call which takes place later in the pause.
-      if (mark_in_progress()) {
-        assert(!g1_policy()->during_initial_mark_pause(), "sanity");
-        _cm->nextMarkBitMap()->clearRange(used_mr);
-      }
-
       free_region(cur, &pre_used, &local_free_list, false /* par */);
     } else {
       cur->uninstall_surv_rate_group();
@@ -5742,8 +5789,9 @@
 }
 
 void G1CollectedHeap::reset_free_regions_coming() {
+  assert(free_regions_coming(), "pre-condition");
+
   {
-    assert(free_regions_coming(), "pre-condition");
     MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
     _free_regions_coming = false;
     SecondaryFreeList_lock->notify_all();
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Thu Feb 09 07:35:48 2012 -0800
@@ -355,6 +355,7 @@
   // explicitly started if:
   // (a) cause == _gc_locker and +GCLockerInvokesConcurrent, or
   // (b) cause == _java_lang_system_gc and +ExplicitGCInvokesConcurrent.
+  // (c) cause == _g1_humongous_allocation
   bool should_do_concurrent_full_gc(GCCause::Cause cause);
 
   // Keeps track of how many "full collections" (i.e., Full GCs or
@@ -1172,6 +1173,10 @@
     _old_set.remove(hr);
   }
 
+  size_t non_young_capacity_bytes() {
+    return _old_set.total_capacity_bytes() + _humongous_set.total_capacity_bytes();
+  }
+
   void set_free_regions_coming();
   void reset_free_regions_coming();
   bool free_regions_coming() { return _free_regions_coming; }
@@ -1904,7 +1909,7 @@
   G1ParScanPartialArrayClosure* _partial_scan_cl;
 
   int _hash_seed;
-  int _queue_num;
+  uint _queue_num;
 
   size_t _term_attempts;
 
@@ -1948,7 +1953,7 @@
   }
 
 public:
-  G1ParScanThreadState(G1CollectedHeap* g1h, int queue_num);
+  G1ParScanThreadState(G1CollectedHeap* g1h, uint queue_num);
 
   ~G1ParScanThreadState() {
     FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_base);
@@ -2040,7 +2045,7 @@
   }
 
   int* hash_seed() { return &_hash_seed; }
-  int  queue_num() { return _queue_num; }
+  uint queue_num() { return _queue_num; }
 
   size_t term_attempts() const  { return _term_attempts; }
   void note_term_attempt() { _term_attempts++; }
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Thu Feb 09 07:35:48 2012 -0800
@@ -141,6 +141,7 @@
 
   _cur_clear_ct_time_ms(0.0),
   _mark_closure_time_ms(0.0),
+  _root_region_scan_wait_time_ms(0.0),
 
   _cur_ref_proc_time_ms(0.0),
   _cur_ref_enq_time_ms(0.0),
@@ -213,8 +214,6 @@
   _survivor_bytes_before_gc(0),
   _capacity_before_gc(0),
 
-  _prev_collection_pause_used_at_end_bytes(0),
-
   _eden_cset_region_length(0),
   _survivor_cset_region_length(0),
   _old_cset_region_length(0),
@@ -905,19 +904,10 @@
     gclog_or_tty->print(" (%s)", gcs_are_young() ? "young" : "mixed");
   }
 
-  if (!during_initial_mark_pause()) {
-    // We only need to do this here as the policy will only be applied
-    // to the GC we're about to start. so, no point is calculating this
-    // every time we calculate / recalculate the target young length.
-    update_survivors_policy();
-  } else {
-    // The marking phase has a "we only copy implicitly live
-    // objects during marking" invariant. The easiest way to ensure it
-    // holds is not to allocate any survivor regions and tenure all
-    // objects. In the future we might change this and handle survivor
-    // regions specially during marking.
-    tenure_all_objects();
-  }
+  // We only need to do this here as the policy will only be applied
+  // to the GC we're about to start. so, no point is calculating this
+  // every time we calculate / recalculate the target young length.
+  update_survivors_policy();
 
   assert(_g1->used() == _g1->recalculate_used(),
          err_msg("sanity, used: "SIZE_FORMAT" recalculate_used: "SIZE_FORMAT,
@@ -969,6 +959,9 @@
   // This is initialized to zero here and is set during
   // the evacuation pause if marking is in progress.
   _cur_satb_drain_time_ms = 0.0;
+  // This is initialized to zero here and is set during the evacuation
+  // pause if we actually waited for the root region scanning to finish.
+  _root_region_scan_wait_time_ms = 0.0;
 
   _last_gc_was_young = false;
 
@@ -1140,6 +1133,50 @@
   return ret;
 }
 
+bool G1CollectorPolicy::need_to_start_conc_mark(const char* source, size_t alloc_word_size) {
+  if (_g1->concurrent_mark()->cmThread()->during_cycle()) {
+    return false;
+  }
+
+  size_t marking_initiating_used_threshold =
+    (_g1->capacity() / 100) * InitiatingHeapOccupancyPercent;
+  size_t cur_used_bytes = _g1->non_young_capacity_bytes();
+  size_t alloc_byte_size = alloc_word_size * HeapWordSize;
+
+  if ((cur_used_bytes + alloc_byte_size) > marking_initiating_used_threshold) {
+    if (gcs_are_young()) {
+      ergo_verbose5(ErgoConcCycles,
+        "request concurrent cycle initiation",
+        ergo_format_reason("occupancy higher than threshold")
+        ergo_format_byte("occupancy")
+        ergo_format_byte("allocation request")
+        ergo_format_byte_perc("threshold")
+        ergo_format_str("source"),
+        cur_used_bytes,
+        alloc_byte_size,
+        marking_initiating_used_threshold,
+        (double) InitiatingHeapOccupancyPercent,
+        source);
+      return true;
+    } else {
+      ergo_verbose5(ErgoConcCycles,
+        "do not request concurrent cycle initiation",
+        ergo_format_reason("still doing mixed collections")
+        ergo_format_byte("occupancy")
+        ergo_format_byte("allocation request")
+        ergo_format_byte_perc("threshold")
+        ergo_format_str("source"),
+        cur_used_bytes,
+        alloc_byte_size,
+        marking_initiating_used_threshold,
+        (double) InitiatingHeapOccupancyPercent,
+        source);
+    }
+  }
+
+  return false;
+}
+
 // Anything below that is considered to be zero
 #define MIN_TIMER_GRANULARITY 0.0000001
 
@@ -1166,44 +1203,16 @@
 #endif // PRODUCT
 
   last_pause_included_initial_mark = during_initial_mark_pause();
-  if (last_pause_included_initial_mark)
+  if (last_pause_included_initial_mark) {
     record_concurrent_mark_init_end(0.0);
-
-  size_t marking_initiating_used_threshold =
-    (_g1->capacity() / 100) * InitiatingHeapOccupancyPercent;
-
-  if (!_g1->mark_in_progress() && !_last_young_gc) {
-    assert(!last_pause_included_initial_mark, "invariant");
-    if (cur_used_bytes > marking_initiating_used_threshold) {
-      if (cur_used_bytes > _prev_collection_pause_used_at_end_bytes) {
-        assert(!during_initial_mark_pause(), "we should not see this here");
-
-        ergo_verbose3(ErgoConcCycles,
-                      "request concurrent cycle initiation",
-                      ergo_format_reason("occupancy higher than threshold")
-                      ergo_format_byte("occupancy")
-                      ergo_format_byte_perc("threshold"),
-                      cur_used_bytes,
-                      marking_initiating_used_threshold,
-                      (double) InitiatingHeapOccupancyPercent);
-
-        // Note: this might have already been set, if during the last
-        // pause we decided to start a cycle but at the beginning of
-        // this pause we decided to postpone it. That's OK.
-        set_initiate_conc_mark_if_possible();
-      } else {
-        ergo_verbose2(ErgoConcCycles,
-                  "do not request concurrent cycle initiation",
-                  ergo_format_reason("occupancy lower than previous occupancy")
-                  ergo_format_byte("occupancy")
-                  ergo_format_byte("previous occupancy"),
-                  cur_used_bytes,
-                  _prev_collection_pause_used_at_end_bytes);
-      }
-    }
   }
 
-  _prev_collection_pause_used_at_end_bytes = cur_used_bytes;
+  if (!_last_young_gc && need_to_start_conc_mark("end of GC")) {
+    // Note: this might have already been set, if during the last
+    // pause we decided to start a cycle but at the beginning of
+    // this pause we decided to postpone it. That's OK.
+    set_initiate_conc_mark_if_possible();
+  }
 
   _mmu_tracker->add_pause(end_time_sec - elapsed_ms/1000.0,
                           end_time_sec, false);
@@ -1257,6 +1266,10 @@
   // is in progress.
   other_time_ms -= _cur_satb_drain_time_ms;
 
+  // Subtract the root region scanning wait time. It's initialized to
+  // zero at the start of the pause.
+  other_time_ms -= _root_region_scan_wait_time_ms;
+
   if (parallel) {
     other_time_ms -= _cur_collection_par_time_ms;
   } else {
@@ -1289,6 +1302,8 @@
     // each other. Therefore we unconditionally record the SATB drain
     // time - even if it's zero.
     body_summary->record_satb_drain_time_ms(_cur_satb_drain_time_ms);
+    body_summary->record_root_region_scan_wait_time_ms(
+                                               _root_region_scan_wait_time_ms);
 
     body_summary->record_ext_root_scan_time_ms(ext_root_scan_time);
     body_summary->record_satb_filtering_time_ms(satb_filtering_time);
@@ -1385,6 +1400,9 @@
                            (last_pause_included_initial_mark) ? " (initial-mark)" : "",
                            elapsed_ms / 1000.0);
 
+    if (_root_region_scan_wait_time_ms > 0.0) {
+      print_stats(1, "Root Region Scan Waiting", _root_region_scan_wait_time_ms);
+    }
     if (parallel) {
       print_stats(1, "Parallel Time", _cur_collection_par_time_ms);
       print_par_stats(2, "GC Worker Start", _par_last_gc_worker_start_times_ms);
@@ -1988,6 +2006,7 @@
   if (summary->get_total_seq()->num() > 0) {
     print_summary_sd(0, "Evacuation Pauses", summary->get_total_seq());
     if (body_summary != NULL) {
+      print_summary(1, "Root Region Scan Wait", body_summary->get_root_region_scan_wait_seq());
       if (parallel) {
         print_summary(1, "Parallel Time", body_summary->get_parallel_seq());
         print_summary(2, "Ext Root Scanning", body_summary->get_ext_root_scan_seq());
@@ -2029,15 +2048,17 @@
           // parallel
           NumberSeq* other_parts[] = {
             body_summary->get_satb_drain_seq(),
+            body_summary->get_root_region_scan_wait_seq(),
             body_summary->get_parallel_seq(),
             body_summary->get_clear_ct_seq()
           };
           calc_other_times_ms = NumberSeq(summary->get_total_seq(),
-                                                3, other_parts);
+                                          4, other_parts);
         } else {
           // serial
           NumberSeq* other_parts[] = {
             body_summary->get_satb_drain_seq(),
+            body_summary->get_root_region_scan_wait_seq(),
             body_summary->get_update_rs_seq(),
             body_summary->get_ext_root_scan_seq(),
             body_summary->get_satb_filtering_seq(),
@@ -2045,7 +2066,7 @@
             body_summary->get_obj_copy_seq()
           };
           calc_other_times_ms = NumberSeq(summary->get_total_seq(),
-                                                6, other_parts);
+                                          7, other_parts);
         }
         check_other_times(1,  summary->get_other_seq(), &calc_other_times_ms);
       }
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Thu Feb 09 07:35:48 2012 -0800
@@ -65,6 +65,7 @@
 
 class MainBodySummary: public CHeapObj {
   define_num_seq(satb_drain) // optional
+  define_num_seq(root_region_scan_wait)
   define_num_seq(parallel) // parallel only
     define_num_seq(ext_root_scan)
     define_num_seq(satb_filtering)
@@ -177,7 +178,6 @@
   double _cur_collection_start_sec;
   size_t _cur_collection_pause_used_at_start_bytes;
   size_t _cur_collection_pause_used_regions_at_start;
-  size_t _prev_collection_pause_used_at_end_bytes;
   double _cur_collection_par_time_ms;
   double _cur_satb_drain_time_ms;
   double _cur_clear_ct_time_ms;
@@ -716,6 +716,7 @@
   double _mark_remark_start_sec;
   double _mark_cleanup_start_sec;
   double _mark_closure_time_ms;
+  double _root_region_scan_wait_time_ms;
 
   // Update the young list target length either by setting it to the
   // desired fixed value or by calculating it using G1's pause
@@ -800,6 +801,8 @@
 
   GenRemSet::Name  rem_set_name()     { return GenRemSet::CardTable; }
 
+  bool need_to_start_conc_mark(const char* source, size_t alloc_word_size = 0);
+
   // Update the heuristic info to record a collection pause of the given
   // start time, where the given number of bytes were used at the start.
   // This may involve changing the desired size of a collection set.
@@ -816,6 +819,10 @@
     _mark_closure_time_ms = mark_closure_time_ms;
   }
 
+  void record_root_region_scan_wait_time(double time_ms) {
+    _root_region_scan_wait_time_ms = time_ms;
+  }
+
   void record_concurrent_mark_remark_start();
   void record_concurrent_mark_remark_end();
 
@@ -1146,11 +1153,6 @@
     _survivor_surv_rate_group->stop_adding_regions();
   }
 
-  void tenure_all_objects() {
-    _max_survivor_regions = 0;
-    _tenuring_threshold = 0;
-  }
-
   void record_survivor_regions(size_t      regions,
                                HeapRegion* head,
                                HeapRegion* tail) {
--- a/src/share/vm/gc_implementation/g1/g1EvacFailure.hpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1EvacFailure.hpp	Thu Feb 09 07:35:48 2012 -0800
@@ -70,16 +70,20 @@
   OopsInHeapRegionClosure *_update_rset_cl;
   bool _during_initial_mark;
   bool _during_conc_mark;
+  uint _worker_id;
+
 public:
   RemoveSelfForwardPtrObjClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
                                  HeapRegion* hr,
                                  OopsInHeapRegionClosure* update_rset_cl,
                                  bool during_initial_mark,
-                                 bool during_conc_mark) :
+                                 bool during_conc_mark,
+                                 uint worker_id) :
     _g1(g1), _cm(cm), _hr(hr), _marked_bytes(0),
     _update_rset_cl(update_rset_cl),
     _during_initial_mark(during_initial_mark),
-    _during_conc_mark(during_conc_mark) { }
+    _during_conc_mark(during_conc_mark),
+    _worker_id(worker_id) { }
 
   size_t marked_bytes() { return _marked_bytes; }
 
@@ -123,7 +127,7 @@
         // explicitly and all objects in the CSet are considered
         // (implicitly) live. So, we won't mark them explicitly and
         // we'll leave them over NTAMS.
-        _cm->markNext(obj);
+        _cm->grayRoot(obj, obj_size, _worker_id, _hr);
       }
       _marked_bytes += (obj_size * HeapWordSize);
       obj->set_mark(markOopDesc::prototype());
@@ -155,12 +159,14 @@
   G1CollectedHeap* _g1h;
   ConcurrentMark* _cm;
   OopsInHeapRegionClosure *_update_rset_cl;
+  uint _worker_id;
 
 public:
   RemoveSelfForwardPtrHRClosure(G1CollectedHeap* g1h,
-                                OopsInHeapRegionClosure* update_rset_cl) :
+                                OopsInHeapRegionClosure* update_rset_cl,
+                                uint worker_id) :
     _g1h(g1h), _update_rset_cl(update_rset_cl),
-    _cm(_g1h->concurrent_mark()) { }
+    _worker_id(worker_id), _cm(_g1h->concurrent_mark()) { }
 
   bool doHeapRegion(HeapRegion *hr) {
     bool during_initial_mark = _g1h->g1_policy()->during_initial_mark_pause();
@@ -173,7 +179,8 @@
       if (hr->evacuation_failed()) {
         RemoveSelfForwardPtrObjClosure rspc(_g1h, _cm, hr, _update_rset_cl,
                                             during_initial_mark,
-                                            during_conc_mark);
+                                            during_conc_mark,
+                                            _worker_id);
 
         MemRegion mr(hr->bottom(), hr->end());
         // We'll recreate the prev marking info so we'll first clear
@@ -226,7 +233,7 @@
       update_rset_cl = &immediate_update;
     }
 
-    RemoveSelfForwardPtrHRClosure rsfp_cl(_g1h, update_rset_cl);
+    RemoveSelfForwardPtrHRClosure rsfp_cl(_g1h, update_rset_cl, worker_id);
 
     HeapRegion* hr = _g1h->start_cset_region_for_worker(worker_id);
     _g1h->collection_set_iterate_from(hr, &rsfp_cl);
--- a/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -126,7 +126,6 @@
 void G1MarkSweep::mark_sweep_phase1(bool& marked_for_unloading,
                                     bool clear_all_softrefs) {
   // Recursively traverse all live objects and mark them
-  EventMark m("1 mark object");
   TraceTime tm("phase 1", PrintGC && Verbose, true, gclog_or_tty);
   GenMarkSweep::trace(" 1");
 
@@ -292,7 +291,6 @@
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   Generation* pg = g1h->perm_gen();
 
-  EventMark m("2 compute new addresses");
   TraceTime tm("phase 2", PrintGC && Verbose, true, gclog_or_tty);
   GenMarkSweep::trace("2");
 
@@ -337,7 +335,6 @@
   Generation* pg = g1h->perm_gen();
 
   // Adjust the pointers to reflect the new locations
-  EventMark m("3 adjust pointers");
   TraceTime tm("phase 3", PrintGC && Verbose, true, gclog_or_tty);
   GenMarkSweep::trace("3");
 
@@ -402,7 +399,6 @@
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   Generation* pg = g1h->perm_gen();
 
-  EventMark m("4 compact heap");
   TraceTime tm("phase 4", PrintGC && Verbose, true, gclog_or_tty);
   GenMarkSweep::trace("4");
 
--- a/src/share/vm/gc_implementation/g1/g1MonitoringSupport.hpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1MonitoringSupport.hpp	Thu Feb 09 07:35:48 2012 -0800
@@ -89,16 +89,15 @@
 //
 // * Min Capacity
 //
-//    We set this to 0 for all spaces. We could consider setting the old
-//    min capacity to the min capacity of the heap (see 7078465).
+//    We set this to 0 for all spaces.
 //
 // * Max Capacity
 //
 //    For jstat, we set the max capacity of all spaces to heap_capacity,
-//    given that we don't always have a reasonably upper bound on how big
-//    each space can grow. For the memory pools, we actually make the max
-//    capacity undefined. We could consider setting the old max capacity
-//    to the max capacity of the heap (see 7078465).
+//    given that we don't always have a reasonable upper bound on how big
+//    each space can grow. For the memory pools, we make the max
+//    capacity undefined with the exception of the old memory pool for
+//    which we make the max capacity same as the max heap capacity.
 //
 // If we had more accurate occupancy / capacity information per
 // region set the above calculations would be greatly simplified and
--- a/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Thu Feb 09 07:35:48 2012 -0800
@@ -51,6 +51,7 @@
   G1RemSet* _g1_rem;
   ConcurrentMark* _cm;
   G1ParScanThreadState* _par_scan_state;
+  uint _worker_id;
   bool _during_initial_mark;
   bool _mark_in_progress;
 public:
@@ -219,6 +220,7 @@
 
 // Closure for iterating over object fields during concurrent marking
 class G1CMOopClosure : public OopClosure {
+private:
   G1CollectedHeap*   _g1h;
   ConcurrentMark*    _cm;
   CMTask*            _task;
@@ -229,4 +231,92 @@
   virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
 };
 
+// Closure to scan the root regions during concurrent marking
+class G1RootRegionScanClosure : public OopClosure {
+private:
+  G1CollectedHeap* _g1h;
+  ConcurrentMark*  _cm;
+  uint _worker_id;
+public:
+  G1RootRegionScanClosure(G1CollectedHeap* g1h, ConcurrentMark* cm,
+                          uint worker_id) :
+    _g1h(g1h), _cm(cm), _worker_id(worker_id) { }
+  template <class T> void do_oop_nv(T* p);
+  virtual void do_oop(      oop* p) { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
+};
+
+// Closure that applies the given two closures in sequence.
+// Used by the RSet refinement code (when updating RSets
+// during an evacuation pause) to record cards containing
+// pointers into the collection set.
+
+class G1Mux2Closure : public OopClosure {
+  OopClosure* _c1;
+  OopClosure* _c2;
+public:
+  G1Mux2Closure(OopClosure *c1, OopClosure *c2);
+  template <class T> void do_oop_nv(T* p);
+  virtual void do_oop(oop* p)        { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
+};
+
+// A closure that returns true if it is actually applied
+// to a reference
+
+class G1TriggerClosure : public OopClosure {
+  bool _triggered;
+public:
+  G1TriggerClosure();
+  bool triggered() const { return _triggered; }
+  template <class T> void do_oop_nv(T* p);
+  virtual void do_oop(oop* p)        { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
+};
+
+// A closure which uses a triggering closure to determine
+// whether to apply an oop closure.
+
+class G1InvokeIfNotTriggeredClosure: public OopClosure {
+  G1TriggerClosure* _trigger_cl;
+  OopClosure* _oop_cl;
+public:
+  G1InvokeIfNotTriggeredClosure(G1TriggerClosure* t, OopClosure* oc);
+  template <class T> void do_oop_nv(T* p);
+  virtual void do_oop(oop* p)        { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
+};
+
+class G1UpdateRSOrPushRefOopClosure: public OopClosure {
+  G1CollectedHeap* _g1;
+  G1RemSet* _g1_rem_set;
+  HeapRegion* _from;
+  OopsInHeapRegionClosure* _push_ref_cl;
+  bool _record_refs_into_cset;
+  int _worker_i;
+
+public:
+  G1UpdateRSOrPushRefOopClosure(G1CollectedHeap* g1h,
+                                G1RemSet* rs,
+                                OopsInHeapRegionClosure* push_ref_cl,
+                                bool record_refs_into_cset,
+                                int worker_i = 0);
+
+  void set_from(HeapRegion* from) {
+    assert(from != NULL, "from region must be non-NULL");
+    _from = from;
+  }
+
+  bool self_forwarded(oop obj) {
+    bool result = (obj->is_forwarded() && (obj->forwardee()== obj));
+    return result;
+  }
+
+  bool apply_to_weak_ref_discovered_field() { return true; }
+
+  template <class T> void do_oop_nv(T* p);
+  virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
+  virtual void do_oop(oop* p)       { do_oop_nv(p); }
+};
+
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1OOPCLOSURES_HPP
--- a/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -39,7 +39,8 @@
 // perf-critical inner loop.
 #define FILTERINTOCSCLOSURE_DOHISTOGRAMCOUNT 0
 
-template <class T> inline void FilterIntoCSClosure::do_oop_nv(T* p) {
+template <class T>
+inline void FilterIntoCSClosure::do_oop_nv(T* p) {
   T heap_oop = oopDesc::load_heap_oop(p);
   if (!oopDesc::is_null(heap_oop) &&
       _g1->obj_in_cs(oopDesc::decode_heap_oop_not_null(heap_oop))) {
@@ -53,7 +54,8 @@
 
 #define FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT 0
 
-template <class T> inline void FilterOutOfRegionClosure::do_oop_nv(T* p) {
+template <class T>
+inline void FilterOutOfRegionClosure::do_oop_nv(T* p) {
   T heap_oop = oopDesc::load_heap_oop(p);
   if (!oopDesc::is_null(heap_oop)) {
     HeapWord* obj_hw = (HeapWord*)oopDesc::decode_heap_oop_not_null(heap_oop);
@@ -67,7 +69,8 @@
 }
 
 // This closure is applied to the fields of the objects that have just been copied.
-template <class T> inline void G1ParScanClosure::do_oop_nv(T* p) {
+template <class T>
+inline void G1ParScanClosure::do_oop_nv(T* p) {
   T heap_oop = oopDesc::load_heap_oop(p);
 
   if (!oopDesc::is_null(heap_oop)) {
@@ -96,7 +99,8 @@
   }
 }
 
-template <class T> inline void G1ParPushHeapRSClosure::do_oop_nv(T* p) {
+template <class T>
+inline void G1ParPushHeapRSClosure::do_oop_nv(T* p) {
   T heap_oop = oopDesc::load_heap_oop(p);
 
   if (!oopDesc::is_null(heap_oop)) {
@@ -111,7 +115,8 @@
   }
 }
 
-template <class T> inline void G1CMOopClosure::do_oop_nv(T* p) {
+template <class T>
+inline void G1CMOopClosure::do_oop_nv(T* p) {
   assert(_g1h->is_in_g1_reserved((HeapWord*) p), "invariant");
   assert(!_g1h->is_on_master_free_list(
                     _g1h->heap_region_containing((HeapWord*) p)), "invariant");
@@ -125,4 +130,97 @@
   _task->deal_with_reference(obj);
 }
 
+template <class T>
+inline void G1RootRegionScanClosure::do_oop_nv(T* p) {
+  T heap_oop = oopDesc::load_heap_oop(p);
+  if (!oopDesc::is_null(heap_oop)) {
+    oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+    HeapRegion* hr = _g1h->heap_region_containing((HeapWord*) obj);
+    if (hr != NULL) {
+      _cm->grayRoot(obj, obj->size(), _worker_id, hr);
+    }
+  }
+}
+
+template <class T>
+inline void G1Mux2Closure::do_oop_nv(T* p) {
+  // Apply first closure; then apply the second.
+  _c1->do_oop(p);
+  _c2->do_oop(p);
+}
+
+template <class T>
+inline void G1TriggerClosure::do_oop_nv(T* p) {
+  // Record that this closure was actually applied (triggered).
+  _triggered = true;
+}
+
+template <class T>
+inline void G1InvokeIfNotTriggeredClosure::do_oop_nv(T* p) {
+  if (!_trigger_cl->triggered()) {
+    _oop_cl->do_oop(p);
+  }
+}
+
+template <class T>
+inline void G1UpdateRSOrPushRefOopClosure::do_oop_nv(T* p) {
+  oop obj = oopDesc::load_decode_heap_oop(p);
+#ifdef ASSERT
+  // can't do because of races
+  // assert(obj == NULL || obj->is_oop(), "expected an oop");
+
+  // Do the safe subset of is_oop
+  if (obj != NULL) {
+#ifdef CHECK_UNHANDLED_OOPS
+    oopDesc* o = obj.obj();
+#else
+    oopDesc* o = obj;
+#endif // CHECK_UNHANDLED_OOPS
+    assert((intptr_t)o % MinObjAlignmentInBytes == 0, "not oop aligned");
+    assert(Universe::heap()->is_in_reserved(obj), "must be in heap");
+  }
+#endif // ASSERT
+
+  assert(_from != NULL, "from region must be non-NULL");
+
+  HeapRegion* to = _g1->heap_region_containing(obj);
+  if (to != NULL && _from != to) {
+    // The _record_refs_into_cset flag is true during the RSet
+    // updating part of an evacuation pause. It is false at all
+    // other times:
+    //  * rebuilding the rembered sets after a full GC
+    //  * during concurrent refinement.
+    //  * updating the remembered sets of regions in the collection
+    //    set in the event of an evacuation failure (when deferred
+    //    updates are enabled).
+
+    if (_record_refs_into_cset && to->in_collection_set()) {
+      // We are recording references that point into the collection
+      // set and this particular reference does exactly that...
+      // If the referenced object has already been forwarded
+      // to itself, we are handling an evacuation failure and
+      // we have already visited/tried to copy this object
+      // there is no need to retry.
+      if (!self_forwarded(obj)) {
+        assert(_push_ref_cl != NULL, "should not be null");
+        // Push the reference in the refs queue of the G1ParScanThreadState
+        // instance for this worker thread.
+        _push_ref_cl->do_oop(p);
+      }
+
+      // Deferred updates to the CSet are either discarded (in the normal case),
+      // or processed (if an evacuation failure occurs) at the end
+      // of the collection.
+      // See G1RemSet::cleanup_after_oops_into_collection_set_do().
+    } else {
+      // We either don't care about pushing references that point into the
+      // collection set (i.e. we're not during an evacuation pause) _or_
+      // the reference doesn't point into the collection set. Either way
+      // we add the reference directly to the RSet of the region containing
+      // the referenced object.
+      _g1_rem_set->par_write_ref(_from, p, _worker_i);
+    }
+  }
+}
+
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1OOPCLOSURES_INLINE_HPP
--- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -569,40 +569,26 @@
 
 static IntHistogram out_of_histo(50, 50);
 
-class TriggerClosure : public OopClosure {
-  bool _trigger;
-public:
-  TriggerClosure() : _trigger(false) { }
-  bool value() const { return _trigger; }
-  template <class T> void do_oop_nv(T* p) { _trigger = true; }
-  virtual void do_oop(oop* p)        { do_oop_nv(p); }
-  virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
-};
 
-class InvokeIfNotTriggeredClosure: public OopClosure {
-  TriggerClosure* _t;
-  OopClosure* _oc;
-public:
-  InvokeIfNotTriggeredClosure(TriggerClosure* t, OopClosure* oc):
-    _t(t), _oc(oc) { }
-  template <class T> void do_oop_nv(T* p) {
-    if (!_t->value()) _oc->do_oop(p);
-  }
-  virtual void do_oop(oop* p)        { do_oop_nv(p); }
-  virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
-};
+G1TriggerClosure::G1TriggerClosure() :
+  _triggered(false) { }
 
-class Mux2Closure : public OopClosure {
-  OopClosure* _c1;
-  OopClosure* _c2;
-public:
-  Mux2Closure(OopClosure *c1, OopClosure *c2) : _c1(c1), _c2(c2) { }
-  template <class T> void do_oop_nv(T* p) {
-    _c1->do_oop(p); _c2->do_oop(p);
-  }
-  virtual void do_oop(oop* p)        { do_oop_nv(p); }
-  virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
-};
+G1InvokeIfNotTriggeredClosure::G1InvokeIfNotTriggeredClosure(G1TriggerClosure* t_cl,
+                                                             OopClosure* oop_cl)  :
+  _trigger_cl(t_cl), _oop_cl(oop_cl) { }
+
+G1Mux2Closure::G1Mux2Closure(OopClosure *c1, OopClosure *c2) :
+  _c1(c1), _c2(c2) { }
+
+G1UpdateRSOrPushRefOopClosure::
+G1UpdateRSOrPushRefOopClosure(G1CollectedHeap* g1h,
+                              G1RemSet* rs,
+                              OopsInHeapRegionClosure* push_ref_cl,
+                              bool record_refs_into_cset,
+                              int worker_i) :
+  _g1(g1h), _g1_rem_set(rs), _from(NULL),
+  _record_refs_into_cset(record_refs_into_cset),
+  _push_ref_cl(push_ref_cl), _worker_i(worker_i) { }
 
 bool G1RemSet::concurrentRefineOneCard_impl(jbyte* card_ptr, int worker_i,
                                                    bool check_for_refs_into_cset) {
@@ -629,17 +615,17 @@
     assert((size_t)worker_i < n_workers(), "index of worker larger than _cset_rs_update_cl[].length");
     oops_in_heap_closure = _cset_rs_update_cl[worker_i];
   }
-  UpdateRSOrPushRefOopClosure update_rs_oop_cl(_g1,
-                                               _g1->g1_rem_set(),
-                                               oops_in_heap_closure,
-                                               check_for_refs_into_cset,
-                                               worker_i);
+  G1UpdateRSOrPushRefOopClosure update_rs_oop_cl(_g1,
+                                                 _g1->g1_rem_set(),
+                                                 oops_in_heap_closure,
+                                                 check_for_refs_into_cset,
+                                                 worker_i);
   update_rs_oop_cl.set_from(r);
 
-  TriggerClosure trigger_cl;
+  G1TriggerClosure trigger_cl;
   FilterIntoCSClosure into_cs_cl(NULL, _g1, &trigger_cl);
-  InvokeIfNotTriggeredClosure invoke_cl(&trigger_cl, &into_cs_cl);
-  Mux2Closure mux(&invoke_cl, &update_rs_oop_cl);
+  G1InvokeIfNotTriggeredClosure invoke_cl(&trigger_cl, &into_cs_cl);
+  G1Mux2Closure mux(&invoke_cl, &update_rs_oop_cl);
 
   FilterOutOfRegionClosure filter_then_update_rs_oop_cl(r,
                         (check_for_refs_into_cset ?
@@ -688,7 +674,7 @@
     _conc_refine_cards++;
   }
 
-  return trigger_cl.value();
+  return trigger_cl.triggered();
 }
 
 bool G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i,
--- a/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -191,44 +191,5 @@
   virtual void do_oop(      oop* p) { do_oop_work(p); }
 };
 
-class UpdateRSOrPushRefOopClosure: public OopClosure {
-  G1CollectedHeap* _g1;
-  G1RemSet* _g1_rem_set;
-  HeapRegion* _from;
-  OopsInHeapRegionClosure* _push_ref_cl;
-  bool _record_refs_into_cset;
-  int _worker_i;
-
-  template <class T> void do_oop_work(T* p);
-
-public:
-  UpdateRSOrPushRefOopClosure(G1CollectedHeap* g1h,
-                              G1RemSet* rs,
-                              OopsInHeapRegionClosure* push_ref_cl,
-                              bool record_refs_into_cset,
-                              int worker_i = 0) :
-    _g1(g1h),
-    _g1_rem_set(rs),
-    _from(NULL),
-    _record_refs_into_cset(record_refs_into_cset),
-    _push_ref_cl(push_ref_cl),
-    _worker_i(worker_i) { }
-
-  void set_from(HeapRegion* from) {
-    assert(from != NULL, "from region must be non-NULL");
-    _from = from;
-  }
-
-  bool self_forwarded(oop obj) {
-    bool result = (obj->is_forwarded() && (obj->forwardee()== obj));
-    return result;
-  }
-
-  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
-  virtual void do_oop(oop* p)       { do_oop_work(p); }
-
-  bool apply_to_weak_ref_discovered_field() { return true; }
-};
-
 
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1REMSET_HPP
--- a/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -85,66 +85,4 @@
   }
 }
 
-template <class T>
-inline void UpdateRSOrPushRefOopClosure::do_oop_work(T* p) {
-  oop obj = oopDesc::load_decode_heap_oop(p);
-#ifdef ASSERT
-  // can't do because of races
-  // assert(obj == NULL || obj->is_oop(), "expected an oop");
-
-  // Do the safe subset of is_oop
-  if (obj != NULL) {
-#ifdef CHECK_UNHANDLED_OOPS
-    oopDesc* o = obj.obj();
-#else
-    oopDesc* o = obj;
-#endif // CHECK_UNHANDLED_OOPS
-    assert((intptr_t)o % MinObjAlignmentInBytes == 0, "not oop aligned");
-    assert(Universe::heap()->is_in_reserved(obj), "must be in heap");
-  }
-#endif // ASSERT
-
-  assert(_from != NULL, "from region must be non-NULL");
-
-  HeapRegion* to = _g1->heap_region_containing(obj);
-  if (to != NULL && _from != to) {
-    // The _record_refs_into_cset flag is true during the RSet
-    // updating part of an evacuation pause. It is false at all
-    // other times:
-    //  * rebuilding the rembered sets after a full GC
-    //  * during concurrent refinement.
-    //  * updating the remembered sets of regions in the collection
-    //    set in the event of an evacuation failure (when deferred
-    //    updates are enabled).
-
-    if (_record_refs_into_cset && to->in_collection_set()) {
-      // We are recording references that point into the collection
-      // set and this particular reference does exactly that...
-      // If the referenced object has already been forwarded
-      // to itself, we are handling an evacuation failure and
-      // we have already visited/tried to copy this object
-      // there is no need to retry.
-      if (!self_forwarded(obj)) {
-        assert(_push_ref_cl != NULL, "should not be null");
-        // Push the reference in the refs queue of the G1ParScanThreadState
-        // instance for this worker thread.
-        _push_ref_cl->do_oop(p);
-      }
-
-      // Deferred updates to the CSet are either discarded (in the normal case),
-      // or processed (if an evacuation failure occurs) at the end
-      // of the collection.
-      // See G1RemSet::cleanup_after_oops_into_collection_set_do().
-    } else {
-      // We either don't care about pushing references that point into the
-      // collection set (i.e. we're not during an evacuation pause) _or_
-      // the reference doesn't point into the collection set. Either way
-      // we add the reference directly to the RSet of the region containing
-      // the referenced object.
-      _g1_rem_set->par_write_ref(_from, p, _worker_i);
-    }
-  }
-}
-
-
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1REMSET_INLINE_HPP
--- a/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -32,12 +32,14 @@
 
 // Forward declarations.
 enum G1Barrier {
-  G1BarrierNone, G1BarrierRS, G1BarrierEvac
+  G1BarrierNone,
+  G1BarrierRS,
+  G1BarrierEvac
 };
 
-template<bool do_gen_barrier, G1Barrier barrier,
-         bool do_mark_object>
+template<bool do_gen_barrier, G1Barrier barrier, bool do_mark_object>
 class G1ParCopyClosure;
+
 class G1ParScanClosure;
 class G1ParPushHeapRSClosure;
 
@@ -46,6 +48,13 @@
 class FilterIntoCSClosure;
 class FilterOutOfRegionClosure;
 class G1CMOopClosure;
+class G1RootRegionScanClosure;
+
+// Specialized oop closures from g1RemSet.cpp
+class G1Mux2Closure;
+class G1TriggerClosure;
+class G1InvokeIfNotTriggeredClosure;
+class G1UpdateRSOrPushRefOopClosure;
 
 #ifdef FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES
 #error "FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES already defined."
@@ -57,7 +66,12 @@
       f(G1ParPushHeapRSClosure,_nv)                     \
       f(FilterIntoCSClosure,_nv)                        \
       f(FilterOutOfRegionClosure,_nv)                   \
-      f(G1CMOopClosure,_nv)
+      f(G1CMOopClosure,_nv)                             \
+      f(G1RootRegionScanClosure,_nv)                    \
+      f(G1Mux2Closure,_nv)                              \
+      f(G1TriggerClosure,_nv)                           \
+      f(G1InvokeIfNotTriggeredClosure,_nv)              \
+      f(G1UpdateRSOrPushRefOopClosure,_nv)
 
 #ifdef FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES
 #error "FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES already defined."
--- a/src/share/vm/gc_implementation/g1/heapRegion.cpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/heapRegion.cpp	Thu Feb 09 07:35:48 2012 -0800
@@ -659,7 +659,7 @@
   // If we're within a stop-world GC, then we might look at a card in a
   // GC alloc region that extends onto a GC LAB, which may not be
   // parseable.  Stop such at the "saved_mark" of the region.
-  if (G1CollectedHeap::heap()->is_gc_active()) {
+  if (g1h->is_gc_active()) {
     mr = mr.intersection(used_region_at_save_marks());
   } else {
     mr = mr.intersection(used_region());
@@ -688,53 +688,63 @@
     OrderAccess::storeload();
   }
 
+  // Cache the boundaries of the memory region in some const locals
+  HeapWord* const start = mr.start();
+  HeapWord* const end = mr.end();
+
   // We used to use "block_start_careful" here.  But we're actually happy
   // to update the BOT while we do this...
-  HeapWord* cur = block_start(mr.start());
-  assert(cur <= mr.start(), "Postcondition");
+  HeapWord* cur = block_start(start);
+  assert(cur <= start, "Postcondition");
 
-  while (cur <= mr.start()) {
-    if (oop(cur)->klass_or_null() == NULL) {
+  oop obj;
+
+  HeapWord* next = cur;
+  while (next <= start) {
+    cur = next;
+    obj = oop(cur);
+    if (obj->klass_or_null() == NULL) {
       // Ran into an unparseable point.
       return cur;
     }
     // Otherwise...
-    int sz = oop(cur)->size();
-    if (cur + sz > mr.start()) break;
-    // Otherwise, go on.
-    cur = cur + sz;
+    next = (cur + obj->size());
   }
-  oop obj;
-  obj = oop(cur);
-  // If we finish this loop...
-  assert(cur <= mr.start()
-         && obj->klass_or_null() != NULL
-         && cur + obj->size() > mr.start(),
+
+  // If we finish the above loop...We have a parseable object that
+  // begins on or before the start of the memory region, and ends
+  // inside or spans the entire region.
+
+  assert(obj == oop(cur), "sanity");
+  assert(cur <= start &&
+         obj->klass_or_null() != NULL &&
+         (cur + obj->size()) > start,
          "Loop postcondition");
+
   if (!g1h->is_obj_dead(obj)) {
     obj->oop_iterate(cl, mr);
   }
 
-  HeapWord* next;
-  while (cur < mr.end()) {
+  while (cur < end) {
     obj = oop(cur);
     if (obj->klass_or_null() == NULL) {
       // Ran into an unparseable point.
       return cur;
     };
+
     // Otherwise:
     next = (cur + obj->size());
+
     if (!g1h->is_obj_dead(obj)) {
-      if (next < mr.end()) {
+      if (next < end || !obj->is_objArray()) {
+        // This object either does not span the MemRegion
+        // boundary, or if it does it's not an array.
+        // Apply closure to whole object.
         obj->oop_iterate(cl);
       } else {
-        // this obj spans the boundary.  If it's an array, stop at the
-        // boundary.
-        if (obj->is_objArray()) {
-          obj->oop_iterate(cl, mr);
-        } else {
-          obj->oop_iterate(cl);
-        }
+        // This obj is an array that spans the boundary.
+        // Stop at the boundary.
+        obj->oop_iterate(cl, mr);
       }
     }
     cur = next;
--- a/src/share/vm/gc_implementation/g1/heapRegion.hpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp	Thu Feb 09 07:35:48 2012 -0800
@@ -374,7 +374,9 @@
     ParVerifyClaimValue        = 4,
     RebuildRSClaimValue        = 5,
     CompleteMarkCSetClaimValue = 6,
-    ParEvacFailureClaimValue   = 7
+    ParEvacFailureClaimValue   = 7,
+    AggregateCountClaimValue   = 8,
+    VerifyCountClaimValue      = 9
   };
 
   inline HeapWord* par_allocate_no_bot_updates(size_t word_size) {
--- a/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp	Thu Feb 09 07:35:48 2012 -0800
@@ -72,10 +72,11 @@
 }
 
 inline void HeapRegion::note_start_of_copying(bool during_initial_mark) {
-  if (during_initial_mark) {
-    if (is_survivor()) {
-      assert(false, "should not allocate survivors during IM");
-    } else {
+  if (is_survivor()) {
+    // This is how we always allocate survivors.
+    assert(_next_top_at_mark_start == bottom(), "invariant");
+  } else {
+    if (during_initial_mark) {
       // During initial-mark we'll explicitly mark any objects on old
       // regions that are pointed to by roots. Given that explicit
       // marks only make sense under NTAMS it'd be nice if we could
@@ -84,11 +85,6 @@
       // NTAMS to the end of the region so all marks will be below
       // NTAMS. We'll set it to the actual top when we retire this region.
       _next_top_at_mark_start = end();
-    }
-  } else {
-    if (is_survivor()) {
-      // This is how we always allocate survivors.
-      assert(_next_top_at_mark_start == bottom(), "invariant");
     } else {
       // We could have re-used this old region as to-space over a
       // couple of GCs since the start of the concurrent marking
@@ -101,19 +97,15 @@
 }
 
 inline void HeapRegion::note_end_of_copying(bool during_initial_mark) {
-  if (during_initial_mark) {
-    if (is_survivor()) {
-      assert(false, "should not allocate survivors during IM");
-    } else {
+  if (is_survivor()) {
+    // This is how we always allocate survivors.
+    assert(_next_top_at_mark_start == bottom(), "invariant");
+  } else {
+    if (during_initial_mark) {
       // See the comment for note_start_of_copying() for the details
       // on this.
       assert(_next_top_at_mark_start == end(), "pre-condition");
       _next_top_at_mark_start = top();
-    }
-  } else {
-    if (is_survivor()) {
-      // This is how we always allocate survivors.
-      assert(_next_top_at_mark_start == bottom(), "invariant");
     } else {
       // See the comment for note_start_of_copying() for the details
       // on this.
--- a/src/share/vm/gc_implementation/g1/heapRegionSet.hpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/heapRegionSet.hpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -59,6 +59,7 @@
 class HeapRegionSetBase VALUE_OBJ_CLASS_SPEC {
   friend class hrs_ext_msg;
   friend class HRSPhaseSetter;
+  friend class VMStructs;
 
 protected:
   static size_t calculate_region_num(HeapRegion* hr);
--- a/src/share/vm/gc_implementation/g1/vmStructs_g1.hpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/vmStructs_g1.hpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -40,6 +40,8 @@
   nonstatic_field(G1CollectedHeap, _g1_committed,       MemRegion)            \
   nonstatic_field(G1CollectedHeap, _summary_bytes_used, size_t)               \
   nonstatic_field(G1CollectedHeap, _g1mm,               G1MonitoringSupport*) \
+  nonstatic_field(G1CollectedHeap, _old_set,            HeapRegionSetBase)    \
+  nonstatic_field(G1CollectedHeap, _humongous_set,      HeapRegionSetBase)    \
                                                                               \
   nonstatic_field(G1MonitoringSupport, _eden_committed,     size_t)           \
   nonstatic_field(G1MonitoringSupport, _eden_used,          size_t)           \
@@ -47,6 +49,10 @@
   nonstatic_field(G1MonitoringSupport, _survivor_used,      size_t)           \
   nonstatic_field(G1MonitoringSupport, _old_committed,      size_t)           \
   nonstatic_field(G1MonitoringSupport, _old_used,           size_t)           \
+                                                                              \
+  nonstatic_field(HeapRegionSetBase,   _length,             size_t)           \
+  nonstatic_field(HeapRegionSetBase,   _region_num,         size_t)           \
+  nonstatic_field(HeapRegionSetBase,   _total_used_bytes,   size_t)           \
 
 
 #define VM_TYPES_G1(declare_type, declare_toplevel_type)                      \
@@ -55,6 +61,7 @@
                                                                               \
   declare_type(HeapRegion, ContiguousSpace)                                   \
   declare_toplevel_type(HeapRegionSeq)                                        \
+  declare_toplevel_type(HeapRegionSetBase)                                    \
   declare_toplevel_type(G1MonitoringSupport)                                  \
                                                                               \
   declare_toplevel_type(G1CollectedHeap*)                                     \
--- a/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -74,8 +74,9 @@
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   assert(!_should_initiate_conc_mark ||
   ((_gc_cause == GCCause::_gc_locker && GCLockerInvokesConcurrent) ||
-   (_gc_cause == GCCause::_java_lang_system_gc && ExplicitGCInvokesConcurrent)),
-         "only a GC locker or a System.gc() induced GC should start a cycle");
+   (_gc_cause == GCCause::_java_lang_system_gc && ExplicitGCInvokesConcurrent) ||
+    _gc_cause == GCCause::_g1_humongous_allocation),
+         "only a GC locker, a System.gc() or a hum allocation induced GC should start a cycle");
 
   if (_word_size > 0) {
     // An allocation has been requested. So, try to do that first.
--- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -132,9 +132,7 @@
 
   AdaptiveSizePolicyOutput(size_policy, heap->total_collections());
 
-  if (PrintHeapAtGC) {
-    Universe::print_heap_before_gc();
-  }
+  heap->print_heap_before_gc();
 
   // Fill in TLABs
   heap->accumulate_statistics_all_tlabs();
@@ -377,9 +375,7 @@
 
   NOT_PRODUCT(ref_processor()->verify_no_references_recorded());
 
-  if (PrintHeapAtGC) {
-    Universe::print_heap_after_gc();
-  }
+  heap->print_heap_after_gc();
 
   heap->post_full_gc_dump();
 
@@ -504,7 +500,6 @@
 
 void PSMarkSweep::mark_sweep_phase1(bool clear_all_softrefs) {
   // Recursively traverse all live objects and mark them
-  EventMark m("1 mark object");
   TraceTime tm("phase 1", PrintGCDetails && Verbose, true, gclog_or_tty);
   trace(" 1");
 
@@ -563,7 +558,6 @@
 
 
 void PSMarkSweep::mark_sweep_phase2() {
-  EventMark m("2 compute new addresses");
   TraceTime tm("phase 2", PrintGCDetails && Verbose, true, gclog_or_tty);
   trace("2");
 
@@ -608,7 +602,6 @@
 
 void PSMarkSweep::mark_sweep_phase3() {
   // Adjust the pointers to reflect the new locations
-  EventMark m("3 adjust pointers");
   TraceTime tm("phase 3", PrintGCDetails && Verbose, true, gclog_or_tty);
   trace("3");
 
--- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -983,9 +983,7 @@
   // We need to track unique mark sweep invocations as well.
   _total_invocations++;
 
-  if (PrintHeapAtGC) {
-    Universe::print_heap_before_gc();
-  }
+  heap->print_heap_before_gc();
 
   // Fill in TLABs
   heap->accumulate_statistics_all_tlabs();
@@ -1838,7 +1836,6 @@
 void PSParallelCompact::summary_phase(ParCompactionManager* cm,
                                       bool maximum_compaction)
 {
-  EventMark m("2 summarize");
   TraceTime tm("summary phase", print_phases(), true, gclog_or_tty);
   // trace("2");
 
@@ -2237,9 +2234,7 @@
 
   collection_exit.update();
 
-  if (PrintHeapAtGC) {
-    Universe::print_heap_after_gc();
-  }
+  heap->print_heap_after_gc();
   if (PrintGCTaskTimeStamps) {
     gclog_or_tty->print_cr("VM-Thread " INT64_FORMAT " " INT64_FORMAT " "
                            INT64_FORMAT,
@@ -2352,7 +2347,6 @@
 void PSParallelCompact::marking_phase(ParCompactionManager* cm,
                                       bool maximum_heap_compaction) {
   // Recursively traverse all live objects and mark them
-  EventMark m("1 mark object");
   TraceTime tm("marking phase", print_phases(), true, gclog_or_tty);
 
   ParallelScavengeHeap* heap = gc_heap();
@@ -2438,7 +2432,6 @@
 
 void PSParallelCompact::adjust_roots() {
   // Adjust the pointers to reflect the new locations
-  EventMark m("3 adjust roots");
   TraceTime tm("adjust roots", print_phases(), true, gclog_or_tty);
 
   // General strong roots.
@@ -2469,7 +2462,6 @@
 }
 
 void PSParallelCompact::compact_perm(ParCompactionManager* cm) {
-  EventMark m("4 compact perm");
   TraceTime tm("compact perm gen", print_phases(), true, gclog_or_tty);
   // trace("4");
 
@@ -2647,7 +2639,6 @@
 }
 
 void PSParallelCompact::compact() {
-  EventMark m("5 compact");
   // trace("5");
   TraceTime tm("compaction phase", print_phases(), true, gclog_or_tty);
 
@@ -3502,4 +3493,3 @@
   _updated_int_array_klass_obj = (klassOop)
     summary_data().calc_new_pointer(Universe::intArrayKlassObj());
 }
-
--- a/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp	Thu Feb 09 10:16:26 2012 -0500
+++ b/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp	Thu Feb 09 07:35:48 2012 -0800
@@ -1,5 +1,5 @@
 /*