changeset 3193:1647361df7ba jdk7u4-b05

Merge
author amurillo
date Fri, 16 Dec 2011 15:07:10 -0800
parents 7bb156f60fdc 61165f53f165
children de5af98a6fac
files .hgtags make/hotspot_version src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp
diffstat 141 files changed, 4729 insertions(+), 1813 deletions(-) [+]
line wrap: on
line diff
--- a/.hgtags	Thu Dec 15 12:57:57 2011 -0800
+++ b/.hgtags	Fri Dec 16 15:07:10 2011 -0800
@@ -218,3 +218,8 @@
 35aadd2e739b7231dd4c2a26d9fc24b286afc576 jdk7u4-b03
 278a1c1706f0e83c1c3401cde5bf4cf814adbc9d jdk7u4-b04
 35aadd2e739b7231dd4c2a26d9fc24b286afc576 jdk7u4-b02
+fde2a39ed7f39233b287fbc278f437aac06c275b jdk8-b15
+d1f29d4e0bc60e8bd7ae961f1306d8ab33290212 jdk8-b17
+d1f29d4e0bc60e8bd7ae961f1306d8ab33290212 jdk8-b16
+6de8c9ba5907e4c5ca05ac4b8d84a8e2cbd92399 hs23-b07
+a2fef924d8e6f37dac2a887315e3502876cc8e24 hs23-b08
--- a/make/bsd/makefiles/buildtree.make	Thu Dec 15 12:57:57 2011 -0800
+++ b/make/bsd/makefiles/buildtree.make	Fri Dec 16 15:07:10 2011 -0800
@@ -55,6 +55,9 @@
 # The makefiles are split this way so that "make foo" will run faster by not
 # having to read the dependency files for the vm.
 
+# needs to be set here since this Makefile doesn't include defs.make
+OS_VENDOR:=$(shell uname -s)
+
 include $(GAMMADIR)/make/scm.make
 include $(GAMMADIR)/make/altsrc.make
 
@@ -159,8 +162,15 @@
   endif
 endif
 
-# MACOSX FIXME: we should be able to run test_gamma (see MACOSX_PORT-214)
-ifdef ALWAYS_PASS_TEST_GAMMA
+ifeq ($(OS_VENDOR), Darwin)
+  # MACOSX FIXME: we should be able to run test_gamma (see MACOSX_PORT-214)
+  ifeq ($(ALWAYS_PASS_TEST_GAMMA),)
+    # ALWAYS_PASS_TEST_GAMMA wasn't set so we default to true on MacOS X
+    # until MACOSX_PORT-214 is fixed
+    ALWAYS_PASS_TEST_GAMMA=true
+  endif
+endif
+ifeq ($(ALWAYS_PASS_TEST_GAMMA), true)
   TEST_GAMMA_STATUS= echo 'exit 0';
 else
   TEST_GAMMA_STATUS=
--- a/make/bsd/makefiles/gcc.make	Thu Dec 15 12:57:57 2011 -0800
+++ b/make/bsd/makefiles/gcc.make	Fri Dec 16 15:07:10 2011 -0800
@@ -86,7 +86,6 @@
 ifneq "$(shell expr \( $(CC_VER_MAJOR) \> 3 \) \| \( \( $(CC_VER_MAJOR) = 3 \) \& \( $(CC_VER_MINOR) \>= 4 \) \))" "0"
 # Allow the user to turn off precompiled headers from the command line.
 ifneq ($(USE_PRECOMPILED_HEADER),0)
-USE_PRECOMPILED_HEADER=1
 PRECOMPILED_HEADER_DIR=.
 PRECOMPILED_HEADER_SRC=$(GAMMADIR)/src/share/vm/precompiled/precompiled.hpp
 PRECOMPILED_HEADER=$(PRECOMPILED_HEADER_DIR)/precompiled.hpp.gch
@@ -216,7 +215,7 @@
 endif
 
 # -DDONT_USE_PRECOMPILED_HEADER will exclude all includes in precompiled.hpp.
-ifneq ($(USE_PRECOMPILED_HEADER),1)
+ifeq ($(USE_PRECOMPILED_HEADER),0)
 CFLAGS += -DDONT_USE_PRECOMPILED_HEADER
 endif
 
--- a/make/bsd/makefiles/sa.make	Thu Dec 15 12:57:57 2011 -0800
+++ b/make/bsd/makefiles/sa.make	Fri Dec 16 15:07:10 2011 -0800
@@ -37,11 +37,24 @@
 TOPDIR    = $(shell echo `pwd`)
 GENERATED = $(TOPDIR)/../generated
 
-# tools.jar is needed by the JDI - SA binding
-ifeq ($(SA_APPLE_BOOT_JAVA),true)
-  SA_CLASSPATH = $(BOOT_JAVA_HOME)/bundle/Classes/classes.jar
+# SA-JDI depends on the standard JDI classes.
+# Default SA_CLASSPATH location:
+DEF_SA_CLASSPATH=$(BOOT_JAVA_HOME)/lib/tools.jar
+ifeq ($(ALT_SA_CLASSPATH),)
+  # no alternate specified; see if default exists
+  SA_CLASSPATH=$(shell test -f $(DEF_SA_CLASSPATH) && echo $(DEF_SA_CLASSPATH))
+  ifeq ($(SA_CLASSPATH),)
+    # the default doesn't exist
+    ifeq ($(OS_VENDOR), Darwin)
+      # A JDK from Apple doesn't have tools.jar; the JDI classes are
+      # are in the regular classes.jar file.
+      APPLE_JAR=$(BOOT_JAVA_HOME)/bundle/Classes/classes.jar
+      SA_CLASSPATH=$(shell test -f $(APPLE_JAR) && echo $(APPLE_JAR))
+    endif
+  endif
 else
-  SA_CLASSPATH = $(BOOT_JAVA_HOME)/lib/tools.jar
+  _JUNK_ := $(shell echo >&2 "INFO: ALT_SA_CLASSPATH=$(ALT_SA_CLASSPATH)")
+  SA_CLASSPATH=$(shell test -f $(ALT_SA_CLASSPATH) && echo $(ALT_SA_CLASSPATH))
 endif
 
 # TODO: if it's a modules image, check if SA module is installed.
@@ -72,8 +85,8 @@
 	  echo "ALT_BOOTDIR, BOOTDIR or JAVA_HOME needs to be defined to build SA"; \
 	  exit 1; \
 	fi
-	$(QUIETLY) if [ ! -f $(SA_CLASSPATH) -a ! -d $(MODULELIB_PATH) ] ; then \
-	  echo "Missing $(SA_CLASSPATH) file. Use 1.6.0 or later version of JDK";\
+	$(QUIETLY) if [ ! -f "$(SA_CLASSPATH)" -a ! -d $(MODULELIB_PATH) ] ; then \
+	  echo "Cannot find JDI classes. Use 1.6.0 or later version of JDK."; \
 	  echo ""; \
 	  exit 1; \
 	fi
--- a/make/bsd/makefiles/top.make	Thu Dec 15 12:57:57 2011 -0800
+++ b/make/bsd/makefiles/top.make	Fri Dec 16 15:07:10 2011 -0800
@@ -47,12 +47,10 @@
 Plat_File   = $(Platform_file)
 CDG         = cd $(GENERATED); 
 
-ifdef USE_PRECOMPILED_HEADER
-PrecompiledOption = -DUSE_PRECOMPILED_HEADER
-UpdatePCH         = $(MAKE) -f vm.make $(PRECOMPILED_HEADER) $(MFLAGS) 
+ifneq ($(USE_PRECOMPILED_HEADER),0)
+UpdatePCH = $(MAKE) -f vm.make $(PRECOMPILED_HEADER) $(MFLAGS) 
 else
-UpdatePCH         = \# precompiled header is not used
-PrecompiledOption = 
+UpdatePCH = \# precompiled header is not used
 endif
 
 Cached_plat = $(GENERATED)/platform.current
--- a/make/hotspot_version	Thu Dec 15 12:57:57 2011 -0800
+++ b/make/hotspot_version	Fri Dec 16 15:07:10 2011 -0800
@@ -35,7 +35,7 @@
 
 HS_MAJOR_VER=23
 HS_MINOR_VER=0
-HS_BUILD_NUMBER=06
+HS_BUILD_NUMBER=08
 
 JDK_MAJOR_VER=1
 JDK_MINOR_VER=8
--- a/make/jprt.properties	Thu Dec 15 12:57:57 2011 -0800
+++ b/make/jprt.properties	Fri Dec 16 15:07:10 2011 -0800
@@ -248,7 +248,7 @@
 
 jprt.my.solaris.sparc.test.targets= \
     ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-jvm98, \
-    ${jprt.my.solaris.sparc}-{product|fastdebug}-c2-jvm98_tiered, \
+    ${jprt.my.solaris.sparc}-{product|fastdebug}-c2-jvm98_nontiered, \
     ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-scimark, \
     ${jprt.my.solaris.sparc}-product-{c1|c2}-runThese, \
     ${jprt.my.solaris.sparc}-fastdebug-c1-runThese_Xshare, \
@@ -267,7 +267,7 @@
     ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCOld_G1, \
     ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCOld_ParOldGC, \
     ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-jbb_default, \
-    ${jprt.my.solaris.sparc}-{product|fastdebug}-c2-jbb_default_tiered, \
+    ${jprt.my.solaris.sparc}-{product|fastdebug}-c2-jbb_default_nontiered, \
     ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-jbb_SerialGC, \
     ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-jbb_ParallelGC, \
     ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-jbb_CMS, \
@@ -276,7 +276,7 @@
 
 jprt.my.solaris.sparcv9.test.targets= \
     ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-jvm98, \
-    ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-jvm98_tiered, \
+    ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-jvm98_nontiered, \
     ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-scimark, \
     ${jprt.my.solaris.sparcv9}-product-c2-runThese, \
     ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_default, \
@@ -294,7 +294,7 @@
     ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCOld_G1, \
     ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCOld_ParOldGC, \
     ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-jbb_default, \
-    ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-jbb_default_tiered, \
+    ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-jbb_default_nontiered, \
     ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-jbb_SerialGC, \
     ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-jbb_ParallelGC, \
     ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-jbb_CMS, \
@@ -303,7 +303,7 @@
 
 jprt.my.solaris.x64.test.targets= \
     ${jprt.my.solaris.x64}-{product|fastdebug}-c2-jvm98, \
-    ${jprt.my.solaris.x64}-{product|fastdebug}-c2-jvm98_tiered, \
+    ${jprt.my.solaris.x64}-{product|fastdebug}-c2-jvm98_nontiered, \
     ${jprt.my.solaris.x64}-{product|fastdebug}-c2-scimark, \
     ${jprt.my.solaris.x64}-product-c2-runThese, \
     ${jprt.my.solaris.x64}-product-c2-runThese_Xcomp, \
@@ -322,7 +322,7 @@
     ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCOld_G1, \
     ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCOld_ParOldGC, \
     ${jprt.my.solaris.x64}-{product|fastdebug}-c2-jbb_default, \
-    ${jprt.my.solaris.x64}-{product|fastdebug}-c2-jbb_default_tiered, \
+    ${jprt.my.solaris.x64}-{product|fastdebug}-c2-jbb_default_nontiered, \
     ${jprt.my.solaris.x64}-{product|fastdebug}-c2-jbb_SerialGC, \
     ${jprt.my.solaris.x64}-{product|fastdebug}-c2-jbb_ParallelGC, \
     ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCOld_CMS, \
@@ -331,7 +331,7 @@
 
 jprt.my.solaris.i586.test.targets= \
     ${jprt.my.solaris.i586}-{product|fastdebug}-{c1|c2}-jvm98, \
-    ${jprt.my.solaris.i586}-{product|fastdebug}-c2-jvm98_tiered, \
+    ${jprt.my.solaris.i586}-{product|fastdebug}-c2-jvm98_nontiered, \
     ${jprt.my.solaris.i586}-{product|fastdebug}-{c1|c2}-scimark, \
     ${jprt.my.solaris.i586}-product-{c1|c2}-runThese_Xcomp, \
     ${jprt.my.solaris.i586}-fastdebug-c1-runThese_Xcomp, \
@@ -358,7 +358,7 @@
     ${jprt.my.solaris.i586}-product-c1-GCOld_G1, \
     ${jprt.my.solaris.i586}-product-c1-GCOld_ParOldGC, \
     ${jprt.my.solaris.i586}-fastdebug-c2-jbb_default, \
-    ${jprt.my.solaris.i586}-fastdebug-c2-jbb_default_tiered, \
+    ${jprt.my.solaris.i586}-fastdebug-c2-jbb_default_nontiered, \
     ${jprt.my.solaris.i586}-fastdebug-c2-jbb_ParallelGC, \
     ${jprt.my.solaris.i586}-fastdebug-c2-jbb_CMS, \
     ${jprt.my.solaris.i586}-fastdebug-c2-jbb_G1, \
@@ -366,7 +366,7 @@
 
 jprt.my.linux.i586.test.targets = \
     ${jprt.my.linux.i586}-{product|fastdebug}-{c1|c2}-jvm98, \
-    ${jprt.my.linux.i586}-{product|fastdebug}-c2-jvm98_tiered, \
+    ${jprt.my.linux.i586}-{product|fastdebug}-c2-jvm98_nontiered, \
     ${jprt.my.linux.i586}-{product|fastdebug}-{c1|c2}-scimark, \
     ${jprt.my.linux.i586}-product-c1-runThese_Xcomp, \
     ${jprt.my.linux.i586}-fastdebug-c1-runThese_Xshare, \
@@ -386,7 +386,7 @@
     ${jprt.my.linux.i586}-product-{c1|c2}-GCOld_G1, \
     ${jprt.my.linux.i586}-product-{c1|c2}-GCOld_ParOldGC, \
     ${jprt.my.linux.i586}-{product|fastdebug}-c1-jbb_default, \
-    ${jprt.my.linux.i586}-{product|fastdebug}-c2-jbb_default_tiered, \
+    ${jprt.my.linux.i586}-{product|fastdebug}-c2-jbb_default_nontiered, \
     ${jprt.my.linux.i586}-{product|fastdebug}-c1-jbb_ParallelGC, \
     ${jprt.my.linux.i586}-{product|fastdebug}-c1-jbb_CMS, \
     ${jprt.my.linux.i586}-{product|fastdebug}-c1-jbb_G1, \
@@ -394,7 +394,7 @@
 
 jprt.my.linux.x64.test.targets = \
     ${jprt.my.linux.x64}-{product|fastdebug}-c2-jvm98, \
-    ${jprt.my.linux.x64}-{product|fastdebug}-c2-jvm98_tiered, \
+    ${jprt.my.linux.x64}-{product|fastdebug}-c2-jvm98_nontiered, \
     ${jprt.my.linux.x64}-{product|fastdebug}-c2-scimark, \
     ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCBasher_default, \
     ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCBasher_SerialGC, \
@@ -411,14 +411,14 @@
     ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCOld_G1, \
     ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCOld_ParOldGC, \
     ${jprt.my.linux.x64}-{product|fastdebug}-c2-jbb_default, \
-    ${jprt.my.linux.x64}-{product|fastdebug}-c2-jbb_default_tiered, \
+    ${jprt.my.linux.x64}-{product|fastdebug}-c2-jbb_default_nontiered, \
     ${jprt.my.linux.x64}-{product|fastdebug}-c2-jbb_ParallelGC, \
     ${jprt.my.linux.x64}-{product|fastdebug}-c2-jbb_G1, \
     ${jprt.my.linux.x64}-{product|fastdebug}-c2-jbb_ParOldGC
 
 jprt.my.windows.i586.test.targets = \
     ${jprt.my.windows.i586}-{product|fastdebug}-{c1|c2}-jvm98, \
-    ${jprt.my.windows.i586}-{product|fastdebug}-c2-jvm98_tiered, \
+    ${jprt.my.windows.i586}-{product|fastdebug}-c2-jvm98_nontiered, \
     ${jprt.my.windows.i586}-{product|fastdebug}-{c1|c2}-scimark, \
     ${jprt.my.windows.i586}-product-{c1|c2}-runThese, \
     ${jprt.my.windows.i586}-product-{c1|c2}-runThese_Xcomp, \
@@ -438,7 +438,7 @@
     ${jprt.my.windows.i586}-product-{c1|c2}-GCOld_G1, \
     ${jprt.my.windows.i586}-product-{c1|c2}-GCOld_ParOldGC, \
     ${jprt.my.windows.i586}-{product|fastdebug}-{c1|c2}-jbb_default, \
-    ${jprt.my.windows.i586}-{product|fastdebug}-c2-jbb_default_tiered, \
+    ${jprt.my.windows.i586}-{product|fastdebug}-c2-jbb_default_nontiered, \
     ${jprt.my.windows.i586}-product-{c1|c2}-jbb_ParallelGC, \
     ${jprt.my.windows.i586}-product-{c1|c2}-jbb_CMS, \
     ${jprt.my.windows.i586}-product-{c1|c2}-jbb_G1, \
@@ -446,7 +446,7 @@
 
 jprt.my.windows.x64.test.targets = \
     ${jprt.my.windows.x64}-{product|fastdebug}-c2-jvm98, \
-    ${jprt.my.windows.x64}-{product|fastdebug}-c2-jvm98_tiered, \
+    ${jprt.my.windows.x64}-{product|fastdebug}-c2-jvm98_nontiered, \
     ${jprt.my.windows.x64}-{product|fastdebug}-c2-scimark, \
     ${jprt.my.windows.x64}-product-c2-runThese, \
     ${jprt.my.windows.x64}-product-c2-runThese_Xcomp, \
@@ -465,7 +465,7 @@
     ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCOld_G1, \
     ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCOld_ParOldGC, \
     ${jprt.my.windows.x64}-{product|fastdebug}-c2-jbb_default, \
-    ${jprt.my.windows.x64}-{product|fastdebug}-c2-jbb_default_tiered, \
+    ${jprt.my.windows.x64}-{product|fastdebug}-c2-jbb_default_nontiered, \
     ${jprt.my.windows.x64}-product-c2-jbb_CMS, \
     ${jprt.my.windows.x64}-product-c2-jbb_ParallelGC, \
     ${jprt.my.windows.x64}-product-c2-jbb_G1, \
@@ -473,9 +473,9 @@
 
 # Some basic "smoke" tests for OpenJDK builds
 jprt.test.targets.open = \
-    ${jprt.my.solaris.x64}-{productOpen|debugOpen|fastdebugOpen}-c2-jvm98_tiered, \
-    ${jprt.my.solaris.i586}-{productOpen|fastdebugOpen}-c2-jvm98_tiered, \
-    ${jprt.my.linux.x64}-{productOpen|fastdebugOpen}-c2-jvm98_tiered
+    ${jprt.my.solaris.x64}-{productOpen|debugOpen|fastdebugOpen}-c2-jvm98, \
+    ${jprt.my.solaris.i586}-{productOpen|fastdebugOpen}-c2-jvm98, \
+    ${jprt.my.linux.x64}-{productOpen|fastdebugOpen}-c2-jvm98
 
 # Testing for actual embedded builds is different to standard
 jprt.my.linux.i586.test.targets.embedded = \
--- a/make/linux/makefiles/gcc.make	Thu Dec 15 12:57:57 2011 -0800
+++ b/make/linux/makefiles/gcc.make	Fri Dec 16 15:07:10 2011 -0800
@@ -50,7 +50,6 @@
 ifneq "$(shell expr \( $(CC_VER_MAJOR) \> 3 \) \| \( \( $(CC_VER_MAJOR) = 3 \) \& \( $(CC_VER_MINOR) \>= 4 \) \))" "0"
 # Allow the user to turn off precompiled headers from the command line.
 ifneq ($(USE_PRECOMPILED_HEADER),0)
-USE_PRECOMPILED_HEADER=1
 PRECOMPILED_HEADER_DIR=.
 PRECOMPILED_HEADER_SRC=$(GAMMADIR)/src/share/vm/precompiled/precompiled.hpp
 PRECOMPILED_HEADER=$(PRECOMPILED_HEADER_DIR)/precompiled.hpp.gch
@@ -165,7 +164,7 @@
 endif
 
 # -DDONT_USE_PRECOMPILED_HEADER will exclude all includes in precompiled.hpp.
-ifneq ($(USE_PRECOMPILED_HEADER),1)
+ifeq ($(USE_PRECOMPILED_HEADER),0)
 CFLAGS += -DDONT_USE_PRECOMPILED_HEADER
 endif
 
--- a/make/linux/makefiles/mapfile-vers-debug	Thu Dec 15 12:57:57 2011 -0800
+++ b/make/linux/makefiles/mapfile-vers-debug	Fri Dec 16 15:07:10 2011 -0800
@@ -1,7 +1,3 @@
-#
-# @(#)mapfile-vers-debug	1.18 07/10/25 16:47:35
-#
-
 #
 # Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -221,6 +217,7 @@
                 JVM_SetArrayElement;
                 JVM_SetClassSigners;
                 JVM_SetLength;
+                JVM_SetNativeThreadName;
                 JVM_SetPrimitiveArrayElement;
                 JVM_SetProtectionDomain;
                 JVM_SetSockOpt;
--- a/make/linux/makefiles/mapfile-vers-product	Thu Dec 15 12:57:57 2011 -0800
+++ b/make/linux/makefiles/mapfile-vers-product	Fri Dec 16 15:07:10 2011 -0800
@@ -1,7 +1,3 @@
-#
-# @(#)mapfile-vers-product	1.19 08/02/12 10:56:37
-#
-
 #
 # Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -221,6 +217,7 @@
                 JVM_SetArrayElement;
                 JVM_SetClassSigners;
                 JVM_SetLength;
+                JVM_SetNativeThreadName;
                 JVM_SetPrimitiveArrayElement;
                 JVM_SetProtectionDomain;
                 JVM_SetSockOpt;
--- a/make/linux/makefiles/top.make	Thu Dec 15 12:57:57 2011 -0800
+++ b/make/linux/makefiles/top.make	Fri Dec 16 15:07:10 2011 -0800
@@ -47,12 +47,10 @@
 Plat_File   = $(Platform_file)
 CDG         = cd $(GENERATED); 
 
-ifdef USE_PRECOMPILED_HEADER
-PrecompiledOption = -DUSE_PRECOMPILED_HEADER
-UpdatePCH         = $(MAKE) -f vm.make $(PRECOMPILED_HEADER) $(MFLAGS) 
+ifneq ($(USE_PRECOMPILED_HEADER),0)
+UpdatePCH = $(MAKE) -f vm.make $(PRECOMPILED_HEADER) $(MFLAGS) 
 else
-UpdatePCH         = \# precompiled header is not used
-PrecompiledOption = 
+UpdatePCH = \# precompiled header is not used
 endif
 
 Cached_plat = $(GENERATED)/platform.current
--- a/make/solaris/makefiles/gcc.make	Thu Dec 15 12:57:57 2011 -0800
+++ b/make/solaris/makefiles/gcc.make	Fri Dec 16 15:07:10 2011 -0800
@@ -49,7 +49,6 @@
 ifneq "$(shell expr \( $(CC_VER_MAJOR) \> 3 \) \| \( \( $(CC_VER_MAJOR) = 3 \) \& \( $(CC_VER_MINOR) \>= 4 \) \))" "0"
 # Allow the user to turn off precompiled headers from the command line.
 ifneq ($(USE_PRECOMPILED_HEADER),0)
-USE_PRECOMPILED_HEADER=1
 PRECOMPILED_HEADER_DIR=.
 PRECOMPILED_HEADER_SRC=$(GAMMADIR)/src/share/vm/precompiled/precompiled.hpp
 PRECOMPILED_HEADER=$(PRECOMPILED_HEADER_DIR)/precompiled.hpp.gch
@@ -142,7 +141,7 @@
 endif
 
 # -DDONT_USE_PRECOMPILED_HEADER will exclude all includes in precompiled.hpp.
-ifneq ($(USE_PRECOMPILED_HEADER),1)
+ifeq ($(USE_PRECOMPILED_HEADER),0)
 CFLAGS += -DDONT_USE_PRECOMPILED_HEADER
 endif
 
--- a/make/solaris/makefiles/mapfile-vers	Thu Dec 15 12:57:57 2011 -0800
+++ b/make/solaris/makefiles/mapfile-vers	Fri Dec 16 15:07:10 2011 -0800
@@ -1,7 +1,3 @@
-#
-# @(#)mapfile-vers	1.32 07/10/25 16:47:36
-#
-
 #
 # Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -221,6 +217,7 @@
 		JVM_SetArrayElement;
 		JVM_SetClassSigners;
 		JVM_SetLength;
+                JVM_SetNativeThreadName;
 		JVM_SetPrimitiveArrayElement;
 		JVM_SetProtectionDomain;
 		JVM_SetSockOpt;
--- a/make/windows/makefiles/projectcreator.make	Thu Dec 15 12:57:57 2011 -0800
+++ b/make/windows/makefiles/projectcreator.make	Fri Dec 16 15:07:10 2011 -0800
@@ -50,6 +50,7 @@
         -relativeInclude src\closed\os_cpu\windows_$(Platform_arch)\vm \
         -relativeInclude src\closed\cpu\$(Platform_arch)\vm \
         -relativeInclude src\share\vm \
+        -relativeInclude src\share\vm\precompiled \
         -relativeInclude src\share\vm\prims \
         -relativeInclude src\os\windows\vm \
         -relativeInclude src\os_cpu\windows_$(Platform_arch)\vm \
--- a/src/cpu/sparc/vm/assembler_sparc.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/cpu/sparc/vm/assembler_sparc.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -855,12 +855,6 @@
     Lookaside  = 1 << 4
   };
 
-  // test if x is within signed immediate range for nbits
-  static bool is_simm(intptr_t x, int nbits) { return -( intptr_t(1) << nbits-1 )  <= x   &&   x  <  ( intptr_t(1) << nbits-1 ); }
-
-  // test if -4096 <= x <= 4095
-  static bool is_simm13(intptr_t x) { return is_simm(x, 13); }
-
   static bool is_in_wdisp_range(address a, address b, int nbits) {
     intptr_t d = intptr_t(b) - intptr_t(a);
     return is_simm(d, nbits + 2);
@@ -1203,7 +1197,7 @@
     if (!UseCBCond || cbcond_before()) return false;
     intptr_t x = intptr_t(target_distance(L)) - intptr_t(pc());
     assert( (x & 3) == 0, "not word aligned");
-    return is_simm(x, 12);
+    return is_simm12(x);
   }
 
   // Tells assembler you know that next instruction is delayed
--- a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -765,7 +765,7 @@
 void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) {
   add_debug_info_for_null_check_here(op->info());
   __ load_klass(O0, G3_scratch);
-  if (__ is_simm13(op->vtable_offset())) {
+  if (Assembler::is_simm13(op->vtable_offset())) {
     __ ld_ptr(G3_scratch, op->vtable_offset(), G5_method);
   } else {
     // This will generate 2 instructions
--- a/src/cpu/sparc/vm/c2_globals_sparc.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/cpu/sparc/vm/c2_globals_sparc.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -42,7 +42,7 @@
 #else
 define_pd_global(bool, ProfileInterpreter,           true);
 #endif // CC_INTERP
-define_pd_global(bool, TieredCompilation,            false);
+define_pd_global(bool, TieredCompilation,            true);
 define_pd_global(intx, CompileThreshold,             10000);
 define_pd_global(intx, BackEdgeThreshold,            140000);
 
--- a/src/cpu/sparc/vm/methodHandles_sparc.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/cpu/sparc/vm/methodHandles_sparc.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -315,7 +315,7 @@
   __ cmp_and_br_short(O7_temp, T_VOID, Assembler::equal, Assembler::pt, L_ok_4);
   extract_conversion_vminfo(_masm, L5_conversion, O5_temp);
   __ ld_ptr(L4_saved_args_base, __ argument_offset(O5_temp, O5_temp), O7_temp);
-  assert(__ is_simm13(RETURN_VALUE_PLACEHOLDER), "must be simm13");
+  assert(Assembler::is_simm13(RETURN_VALUE_PLACEHOLDER), "must be simm13");
   __ cmp_and_brx_short(O7_temp, (int32_t) RETURN_VALUE_PLACEHOLDER, Assembler::equal, Assembler::pt, L_ok_4);
   __ stop("damaged ricochet frame: RETURN_VALUE_PLACEHOLDER not found");
   __ BIND(L_ok_4);
--- a/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -767,7 +767,7 @@
   // In the 64bit build because of wider slots and STACKBIAS we can run
   // out of bits in the displacement to do loads and stores.  Use g3 as
   // temporary displacement.
-  if (! __ is_simm13(extraspace)) {
+  if (!Assembler::is_simm13(extraspace)) {
     __ set(extraspace, G3_scratch);
     __ sub(SP, G3_scratch, SP);
   } else {
--- a/src/cpu/sparc/vm/sparc.ad	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/cpu/sparc/vm/sparc.ad	Fri Dec 16 15:07:10 2011 -0800
@@ -566,7 +566,7 @@
     } else {
       klass_load_size = 1*BytesPerInstWord;
     }
-    if( Assembler::is_simm13(v_off) ) {
+    if (Assembler::is_simm13(v_off)) {
       return klass_load_size +
              (2*BytesPerInstWord +           // ld_ptr, ld_ptr
              NativeCall::instruction_size);  // call; delay slot
@@ -1019,8 +1019,21 @@
 
 
 //=============================================================================
-const bool Matcher::constant_table_absolute_addressing = false;
-const RegMask& MachConstantBaseNode::_out_RegMask = PTR_REG_mask;
+const RegMask& MachConstantBaseNode::_out_RegMask = PTR_REG_mask();
+
+int Compile::ConstantTable::calculate_table_base_offset() const {
+  if (UseRDPCForConstantTableBase) {
+    // The table base offset might be less but then it fits into
+    // simm13 anyway and we are good (cf. MachConstantBaseNode::emit).
+    return Assembler::min_simm13();
+  } else {
+    int offset = -(size() / 2);
+    if (!Assembler::is_simm13(offset)) {
+      offset = Assembler::min_simm13();
+    }
+    return offset;
+  }
+}
 
 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
   Compile* C = ra_->C;
@@ -1028,8 +1041,9 @@
   MacroAssembler _masm(&cbuf);
 
   Register r = as_Register(ra_->get_encode(this));
-  CodeSection* cs = __ code()->consts();
-  int consts_size = cs->align_at_start(cs->size());
+  CodeSection* consts_section = __ code()->consts();
+  int consts_size = consts_section->align_at_start(consts_section->size());
+  assert(constant_table.size() == consts_size, err_msg("must be: %d == %d", constant_table.size(), consts_size));
 
   if (UseRDPCForConstantTableBase) {
     // For the following RDPC logic to work correctly the consts
@@ -1037,30 +1051,37 @@
     // assert checks for that.  The layout and the SECT_* constants
     // are defined in src/share/vm/asm/codeBuffer.hpp.
     assert(CodeBuffer::SECT_CONSTS + 1 == CodeBuffer::SECT_INSTS, "must be");
-    int offset = __ offset();
+    int insts_offset = __ offset();
+
+    // Layout:
+    //
+    // |----------- consts section ------------|----------- insts section -----------...
+    // |------ constant table -----|- padding -|------------------x----
+    //                                                            \ current PC (RDPC instruction)
+    // |<------------- consts_size ----------->|<- insts_offset ->|
+    //                                                            \ table base
+    // The table base offset is later added to the load displacement
+    // so it has to be negative.
+    int table_base_offset = -(consts_size + insts_offset);
     int disp;
 
     // If the displacement from the current PC to the constant table
     // base fits into simm13 we set the constant table base to the
     // current PC.
-    if (__ is_simm13(-(consts_size + offset))) {
-      constant_table.set_table_base_offset(-(consts_size + offset));
+    if (Assembler::is_simm13(table_base_offset)) {
+      constant_table.set_table_base_offset(table_base_offset);
       disp = 0;
     } else {
-      // If the offset of the top constant (last entry in the table)
-      // fits into simm13 we set the constant table base to the actual
-      // table base.
-      if (__ is_simm13(constant_table.top_offset())) {
-        constant_table.set_table_base_offset(0);
-        disp = consts_size + offset;
-      } else {
-        // Otherwise we set the constant table base in the middle of the
-        // constant table.
-        int half_consts_size = consts_size / 2;
-        assert(half_consts_size * 2 == consts_size, "sanity");
-        constant_table.set_table_base_offset(-half_consts_size);  // table base offset gets added to the load displacement.
-        disp = half_consts_size + offset;
-      }
+      // Otherwise we set the constant table base offset to the
+      // maximum negative displacement of load instructions to keep
+      // the disp as small as possible:
+      //
+      // |<------------- consts_size ----------->|<- insts_offset ->|
+      // |<--------- min_simm13 --------->|<-------- disp --------->|
+      //                                  \ table base
+      table_base_offset = Assembler::min_simm13();
+      constant_table.set_table_base_offset(table_base_offset);
+      disp = (consts_size + insts_offset) + table_base_offset;
     }
 
     __ rdpc(r);
@@ -1072,8 +1093,7 @@
   }
   else {
     // Materialize the constant table base.
-    assert(constant_table.size() == consts_size, err_msg("must be: %d == %d", constant_table.size(), consts_size));
-    address baseaddr = cs->start() + -(constant_table.table_base_offset());
+    address baseaddr = consts_section->start() + -(constant_table.table_base_offset());
     RelocationHolder rspec = internal_word_Relocation::spec(baseaddr);
     AddressLiteral base(baseaddr, rspec);
     __ set(base, r);
@@ -1169,6 +1189,13 @@
     __ save(SP, G3, SP);
   }
   C->set_frame_complete( __ offset() );
+
+  if (!UseRDPCForConstantTableBase && C->has_mach_constant_base_node()) {
+    // NOTE: We set the table base offset here because users might be
+    // emitted before MachConstantBaseNode.
+    Compile::ConstantTable& constant_table = C->constant_table();
+    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
+  }
 }
 
 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
@@ -1843,7 +1870,7 @@
 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
   // The passed offset is relative to address of the branch.
   // Don't need to adjust the offset.
-  return UseCBCond && Assembler::is_simm(offset, 12);
+  return UseCBCond && Assembler::is_simm12(offset);
 }
 
 const bool Matcher::isSimpleConstant64(jlong value) {
@@ -1997,7 +2024,7 @@
 }
 
 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
-  return L7_REGP_mask;
+  return L7_REGP_mask();
 }
 
 %}
@@ -2072,8 +2099,8 @@
   %}
 
   enc_class form3_mem_reg_long_unaligned_marshal( memory mem, iRegL reg ) %{
-    assert( Assembler::is_simm13($mem$$disp  ), "need disp and disp+4" );
-    assert( Assembler::is_simm13($mem$$disp+4), "need disp and disp+4" );
+    assert(Assembler::is_simm13($mem$$disp  ), "need disp and disp+4");
+    assert(Assembler::is_simm13($mem$$disp+4), "need disp and disp+4");
     guarantee($mem$$index == R_G0_enc, "double index?");
     emit_form3_mem_reg(cbuf, this, $primary, -1, $mem$$base, $mem$$disp+4, R_G0_enc, R_O7_enc );
     emit_form3_mem_reg(cbuf, this, $primary, -1, $mem$$base, $mem$$disp,   R_G0_enc, $reg$$reg );
@@ -2082,8 +2109,8 @@
   %}
 
   enc_class form3_mem_reg_double_unaligned( memory mem, RegD_low reg ) %{
-    assert( Assembler::is_simm13($mem$$disp  ), "need disp and disp+4" );
-    assert( Assembler::is_simm13($mem$$disp+4), "need disp and disp+4" );
+    assert(Assembler::is_simm13($mem$$disp  ), "need disp and disp+4");
+    assert(Assembler::is_simm13($mem$$disp+4), "need disp and disp+4");
     guarantee($mem$$index == R_G0_enc, "double index?");
     // Load long with 2 instructions
     emit_form3_mem_reg(cbuf, this, $primary, -1, $mem$$base, $mem$$disp,   R_G0_enc, $reg$$reg+0 );
@@ -2563,7 +2590,7 @@
       }
       int entry_offset = instanceKlass::vtable_start_offset() + vtable_index*vtableEntry::size();
       int v_off = entry_offset*wordSize + vtableEntry::method_offset_in_bytes();
-      if( __ is_simm13(v_off) ) {
+      if (Assembler::is_simm13(v_off)) {
         __ ld_ptr(G3, v_off, G5_method);
       } else {
         // Generate 2 instructions
@@ -3336,7 +3363,7 @@
 
 // Integer Immediate: 8-bit
 operand immI8() %{
-  predicate(Assembler::is_simm(n->get_int(), 8));
+  predicate(Assembler::is_simm8(n->get_int()));
   match(ConI);
   op_cost(0);
   format %{ %}
@@ -3365,7 +3392,7 @@
 
 // Integer Immediate: 16-bit
 operand immI16() %{
-  predicate(Assembler::is_simm(n->get_int(), 16));
+  predicate(Assembler::is_simm16(n->get_int()));
   match(ConI);
   op_cost(0);
   format %{ %}
@@ -3393,7 +3420,7 @@
 
 // Integer Immediate: 11-bit
 operand immI11() %{
-  predicate(Assembler::is_simm(n->get_int(),11));
+  predicate(Assembler::is_simm11(n->get_int()));
   match(ConI);
   op_cost(0);
   format %{ %}
@@ -3402,7 +3429,7 @@
 
 // Integer Immediate: 5-bit
 operand immI5() %{
-  predicate(Assembler::is_simm(n->get_int(), 5));
+  predicate(Assembler::is_simm5(n->get_int()));
   match(ConI);
   op_cost(0);
   format %{ %}
@@ -3634,7 +3661,7 @@
 
 // Integer Immediate: 5-bit
 operand immL5() %{
-  predicate(n->get_long() == (int)n->get_long() && Assembler::is_simm((int)n->get_long(), 5));
+  predicate(n->get_long() == (int)n->get_long() && Assembler::is_simm5((int)n->get_long()));
   match(ConL);
   op_cost(0);
   format %{ %}
@@ -9251,13 +9278,16 @@
 
   format %{  "ADD    $constanttablebase, $constantoffset, O7\n\t"
              "LD     [O7 + $switch_val], O7\n\t"
-             "JUMP   O7"
-         %}
+             "JUMP   O7" %}
   ins_encode %{
     // Calculate table address into a register.
     Register table_reg;
     Register label_reg = O7;
-    if (constant_offset() == 0) {
+    // If we are calculating the size of this instruction don't trust
+    // zero offsets because they might change when
+    // MachConstantBaseNode decides to optimize the constant table
+    // base.
+    if ((constant_offset() == 0) && !Compile::current()->in_scratch_emit_size()) {
       table_reg = $constanttablebase;
     } else {
       table_reg = O7;
--- a/src/cpu/sparc/vm/vtableStubs_sparc.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/cpu/sparc/vm/vtableStubs_sparc.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -83,7 +83,7 @@
   }
 #endif
   int v_off = entry_offset*wordSize + vtableEntry::method_offset_in_bytes();
-  if( __ is_simm13(v_off) ) {
+  if (Assembler::is_simm13(v_off)) {
     __ ld_ptr(G3, v_off, G5_method);
   } else {
     __ set(v_off,G5);
--- a/src/cpu/x86/vm/assembler_x86.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -3535,7 +3535,8 @@
 // addressing.
 bool Assembler::is_polling_page_far() {
   intptr_t addr = (intptr_t)os::get_polling_page();
-  return !is_simm32(addr - (intptr_t)CodeCache::low_bound()) ||
+  return ForceUnreachable ||
+         !is_simm32(addr - (intptr_t)CodeCache::low_bound()) ||
          !is_simm32(addr - (intptr_t)CodeCache::high_bound());
 }
 
--- a/src/cpu/x86/vm/assembler_x86.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -693,17 +693,6 @@
   static address locate_next_instruction(address inst);
 
   // Utilities
-
-#ifdef _LP64
- static bool is_simm(int64_t x, int nbits) { return -(CONST64(1) << (nbits-1)) <= x &&
-                                                    x < (CONST64(1) << (nbits-1)); }
- static bool is_simm32(int64_t x) { return x == (int64_t)(int32_t)x; }
-#else
- static bool is_simm(int32_t x, int nbits) { return -(1 << (nbits-1)) <= x &&
-                                                    x < (1 << (nbits-1)); }
- static bool is_simm32(int32_t x) { return true; }
-#endif // _LP64
-
   static bool is_polling_page_far() NOT_LP64({ return false;});
 
   // Generic instructions
--- a/src/cpu/x86/vm/c2_globals_x86.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/cpu/x86/vm/c2_globals_x86.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -44,7 +44,7 @@
 #else
 define_pd_global(bool, ProfileInterpreter,           true);
 #endif // CC_INTERP
-define_pd_global(bool, TieredCompilation,            false);
+define_pd_global(bool, TieredCompilation,            true);
 define_pd_global(intx, CompileThreshold,             10000);
 define_pd_global(intx, BackEdgeThreshold,            100000);
 
--- a/src/cpu/x86/vm/methodHandles_x86.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/cpu/x86/vm/methodHandles_x86.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -27,7 +27,7 @@
 
 // Adapters
 enum /* platform_dependent_constants */ {
-  adapter_code_size = NOT_LP64(16000 DEBUG_ONLY(+ 15000)) LP64_ONLY(32000 DEBUG_ONLY(+ 80000))
+  adapter_code_size = NOT_LP64(16000 DEBUG_ONLY(+ 15000)) LP64_ONLY(32000 DEBUG_ONLY(+ 120000))
 };
 
 public:
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -95,6 +95,7 @@
 #define inc_counter_np(counter) (0)
 #else
   void inc_counter_np_(int& counter) {
+    // This can destroy rscratch1 if counter is far from the code cache
     __ incrementl(ExternalAddress((address)&counter));
   }
 #define inc_counter_np(counter) \
@@ -1268,7 +1269,7 @@
            __ subptr(end, start); // number of bytes to copy
 
           intptr_t disp = (intptr_t) ct->byte_map_base;
-          if (__ is_simm32(disp)) {
+          if (Assembler::is_simm32(disp)) {
             Address cardtable(noreg, noreg, Address::no_scale, disp);
             __ lea(scratch, cardtable);
           } else {
@@ -1466,8 +1467,8 @@
     __ movb(Address(end_to, 8), rax);
 
   __ BIND(L_exit);
-    inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr);
     restore_arg_regs();
+    inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free
     __ xorptr(rax, rax); // return 0
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
@@ -1555,8 +1556,8 @@
     __ decrement(qword_count);
     __ jcc(Assembler::notZero, L_copy_8_bytes);
 
-    inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr);
     restore_arg_regs();
+    inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free
     __ xorptr(rax, rax); // return 0
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
@@ -1564,8 +1565,8 @@
     // Copy in 32-bytes chunks
     copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
 
-    inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr);
     restore_arg_regs();
+    inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free
     __ xorptr(rax, rax); // return 0
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
@@ -1658,8 +1659,8 @@
     __ movw(Address(end_to, 8), rax);
 
   __ BIND(L_exit);
-    inc_counter_np(SharedRuntime::_jshort_array_copy_ctr);
     restore_arg_regs();
+    inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free
     __ xorptr(rax, rax); // return 0
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
@@ -1759,8 +1760,8 @@
     __ decrement(qword_count);
     __ jcc(Assembler::notZero, L_copy_8_bytes);
 
-    inc_counter_np(SharedRuntime::_jshort_array_copy_ctr);
     restore_arg_regs();
+    inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free
     __ xorptr(rax, rax); // return 0
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
@@ -1768,8 +1769,8 @@
     // Copy in 32-bytes chunks
     copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
 
-    inc_counter_np(SharedRuntime::_jshort_array_copy_ctr);
     restore_arg_regs();
+    inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free
     __ xorptr(rax, rax); // return 0
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
@@ -1859,8 +1860,8 @@
       __ leaq(end_to, Address(saved_to, dword_count, Address::times_4, -4));
       gen_write_ref_array_post_barrier(saved_to, end_to, rax);
     }
-    inc_counter_np(SharedRuntime::_jint_array_copy_ctr);
     restore_arg_regs();
+    inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
     __ xorptr(rax, rax); // return 0
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
@@ -1940,11 +1941,11 @@
     __ decrement(qword_count);
     __ jcc(Assembler::notZero, L_copy_8_bytes);
 
-    inc_counter_np(SharedRuntime::_jint_array_copy_ctr);
     if (is_oop) {
       __ jmp(L_exit);
     }
     restore_arg_regs();
+    inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
     __ xorptr(rax, rax); // return 0
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
@@ -1952,7 +1953,6 @@
     // Copy in 32-bytes chunks
     copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
 
-   inc_counter_np(SharedRuntime::_jint_array_copy_ctr);
    __ bind(L_exit);
      if (is_oop) {
        Register end_to = rdx;
@@ -1960,6 +1960,7 @@
        gen_write_ref_array_post_barrier(to, end_to, rax);
      }
     restore_arg_regs();
+    inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
     __ xorptr(rax, rax); // return 0
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
@@ -2032,8 +2033,8 @@
     if (is_oop) {
       __ jmp(L_exit);
     } else {
-      inc_counter_np(SharedRuntime::_jlong_array_copy_ctr);
       restore_arg_regs();
+      inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
       __ xorptr(rax, rax); // return 0
       __ leave(); // required for proper stackwalking of RuntimeStub frame
       __ ret(0);
@@ -2045,11 +2046,13 @@
     if (is_oop) {
     __ BIND(L_exit);
       gen_write_ref_array_post_barrier(saved_to, end_to, rax);
-      inc_counter_np(SharedRuntime::_oop_array_copy_ctr);
-    } else {
-      inc_counter_np(SharedRuntime::_jlong_array_copy_ctr);
     }
     restore_arg_regs();
+    if (is_oop) {
+      inc_counter_np(SharedRuntime::_oop_array_copy_ctr); // Update counter after rscratch1 is free
+    } else {
+      inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
+    }
     __ xorptr(rax, rax); // return 0
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
@@ -2113,8 +2116,8 @@
     if (is_oop) {
       __ jmp(L_exit);
     } else {
-      inc_counter_np(SharedRuntime::_jlong_array_copy_ctr);
       restore_arg_regs();
+      inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
       __ xorptr(rax, rax); // return 0
       __ leave(); // required for proper stackwalking of RuntimeStub frame
       __ ret(0);
@@ -2127,11 +2130,13 @@
     __ BIND(L_exit);
       __ lea(rcx, Address(to, saved_count, Address::times_8, -8));
       gen_write_ref_array_post_barrier(to, rcx, rax);
-      inc_counter_np(SharedRuntime::_oop_array_copy_ctr);
-    } else {
-      inc_counter_np(SharedRuntime::_jlong_array_copy_ctr);
     }
     restore_arg_regs();
+    if (is_oop) {
+      inc_counter_np(SharedRuntime::_oop_array_copy_ctr); // Update counter after rscratch1 is free
+    } else {
+      inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
+    }
     __ xorptr(rax, rax); // return 0
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
@@ -2331,8 +2336,8 @@
     __ BIND(L_done);
     __ movptr(r13, Address(rsp, saved_r13_offset * wordSize));
     __ movptr(r14, Address(rsp, saved_r14_offset * wordSize));
-    inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
     restore_arg_regs();
+    inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr); // Update counter after rscratch1 is free
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
 
--- a/src/cpu/x86/vm/x86_32.ad	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/cpu/x86/vm/x86_32.ad	Fri Dec 16 15:07:10 2011 -0800
@@ -507,9 +507,12 @@
 
 
 //=============================================================================
-const bool Matcher::constant_table_absolute_addressing = true;
 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 
+int Compile::ConstantTable::calculate_table_base_offset() const {
+  return 0;  // absolute addressing, no offset
+}
+
 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
   // Empty encoding
 }
@@ -639,6 +642,12 @@
   }
 #endif
 
+  if (C->has_mach_constant_base_node()) {
+    // NOTE: We set the table base offset here because users might be
+    // emitted before MachConstantBaseNode.
+    Compile::ConstantTable& constant_table = C->constant_table();
+    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
+  }
 }
 
 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
@@ -1515,12 +1524,12 @@
 
 // Register for DIVI projection of divmodI
 RegMask Matcher::divI_proj_mask() {
-  return EAX_REG_mask;
+  return EAX_REG_mask();
 }
 
 // Register for MODI projection of divmodI
 RegMask Matcher::modI_proj_mask() {
-  return EDX_REG_mask;
+  return EDX_REG_mask();
 }
 
 // Register for DIVL projection of divmodL
@@ -1536,7 +1545,7 @@
 }
 
 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
-  return EBP_REG_mask;
+  return EBP_REG_mask();
 }
 
 // Returns true if the high 32 bits of the value is known to be zero.
--- a/src/cpu/x86/vm/x86_64.ad	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/cpu/x86/vm/x86_64.ad	Fri Dec 16 15:07:10 2011 -0800
@@ -843,9 +843,12 @@
 
 
 //=============================================================================
-const bool Matcher::constant_table_absolute_addressing = true;
 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 
+int Compile::ConstantTable::calculate_table_base_offset() const {
+  return 0;  // absolute addressing, no offset
+}
+
 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
   // Empty encoding
 }
@@ -977,6 +980,13 @@
     masm.bind(L);
   }
 #endif
+
+  if (C->has_mach_constant_base_node()) {
+    // NOTE: We set the table base offset here because users might be
+    // emitted before MachConstantBaseNode.
+    Compile::ConstantTable& constant_table = C->constant_table();
+    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
+  }
 }
 
 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
@@ -2079,26 +2089,26 @@
 
 // Register for DIVI projection of divmodI
 RegMask Matcher::divI_proj_mask() {
-  return INT_RAX_REG_mask;
+  return INT_RAX_REG_mask();
 }
 
 // Register for MODI projection of divmodI
 RegMask Matcher::modI_proj_mask() {
-  return INT_RDX_REG_mask;
+  return INT_RDX_REG_mask();
 }
 
 // Register for DIVL projection of divmodL
 RegMask Matcher::divL_proj_mask() {
-  return LONG_RAX_REG_mask;
+  return LONG_RAX_REG_mask();
 }
 
 // Register for MODL projection of divmodL
 RegMask Matcher::modL_proj_mask() {
-  return LONG_RDX_REG_mask;
+  return LONG_RDX_REG_mask();
 }
 
 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
-  return PTR_RBP_REG_mask;
+  return PTR_RBP_REG_mask();
 }
 
 static Address build_address(int b, int i, int s, int d) {
--- a/src/os/bsd/vm/os_bsd.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/os/bsd/vm/os_bsd.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -5778,15 +5778,18 @@
 
 // is_headless_jre()
 //
-// Test for the existence of libmawt in motif21 or xawt directories
+// Test for the existence of xawt/libmawt.so or libawt_xawt.so
 // in order to report if we are running in a headless jre
 //
+// Since JDK8 xawt/libmawt.so was moved into the same directory
+// as libawt.so, and renamed libawt_xawt.so
+//
 bool os::is_headless_jre() {
     struct stat statbuf;
     char buf[MAXPATHLEN];
     char libmawtpath[MAXPATHLEN];
     const char *xawtstr  = "/xawt/libmawt" JNI_LIB_SUFFIX;
-    const char *motifstr = "/motif21/libmawt" JNI_LIB_SUFFIX;
+    const char *new_xawtstr = "/libawt_xawt" JNI_LIB_SUFFIX;
     char *p;
 
     // Get path to libjvm.so
@@ -5807,9 +5810,9 @@
     strcat(libmawtpath, xawtstr);
     if (::stat(libmawtpath, &statbuf) == 0) return false;
 
-    // check motif21/libmawt.so
+    // check libawt_xawt.so
     strcpy(libmawtpath, buf);
-    strcat(libmawtpath, motifstr);
+    strcat(libmawtpath, new_xawtstr);
     if (::stat(libmawtpath, &statbuf) == 0) return false;
 
     return true;
--- a/src/os/linux/vm/os_linux.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/os/linux/vm/os_linux.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -5425,15 +5425,18 @@
 
 // is_headless_jre()
 //
-// Test for the existence of libmawt in motif21 or xawt directories
+// Test for the existence of xawt/libmawt.so or libawt_xawt.so
 // in order to report if we are running in a headless jre
 //
+// Since JDK8 xawt/libmawt.so was moved into the same directory
+// as libawt.so, and renamed libawt_xawt.so
+//
 bool os::is_headless_jre() {
     struct stat statbuf;
     char buf[MAXPATHLEN];
     char libmawtpath[MAXPATHLEN];
     const char *xawtstr  = "/xawt/libmawt.so";
-    const char *motifstr = "/motif21/libmawt.so";
+    const char *new_xawtstr = "/libawt_xawt.so";
     char *p;
 
     // Get path to libjvm.so
@@ -5454,9 +5457,9 @@
     strcat(libmawtpath, xawtstr);
     if (::stat(libmawtpath, &statbuf) == 0) return false;
 
-    // check motif21/libmawt.so
+    // check libawt_xawt.so
     strcpy(libmawtpath, buf);
-    strcat(libmawtpath, motifstr);
+    strcat(libmawtpath, new_xawtstr);
     if (::stat(libmawtpath, &statbuf) == 0) return false;
 
     return true;
--- a/src/os/solaris/vm/os_solaris.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/os/solaris/vm/os_solaris.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -6311,15 +6311,18 @@
 
 // is_headless_jre()
 //
-// Test for the existence of libmawt in motif21 or xawt directories
+// Test for the existence of xawt/libmawt.so or libawt_xawt.so
 // in order to report if we are running in a headless jre
 //
+// Since JDK8 xawt/libmawt.so was moved into the same directory
+// as libawt.so, and renamed libawt_xawt.so
+//
 bool os::is_headless_jre() {
     struct stat statbuf;
     char buf[MAXPATHLEN];
     char libmawtpath[MAXPATHLEN];
     const char *xawtstr  = "/xawt/libmawt.so";
-    const char *motifstr = "/motif21/libmawt.so";
+    const char *new_xawtstr = "/libawt_xawt.so";
     char *p;
 
     // Get path to libjvm.so
@@ -6340,9 +6343,9 @@
     strcat(libmawtpath, xawtstr);
     if (::stat(libmawtpath, &statbuf) == 0) return false;
 
-    // check motif21/libmawt.so
+    // check libawt_xawt.so
     strcpy(libmawtpath, buf);
-    strcat(libmawtpath, motifstr);
+    strcat(libmawtpath, new_xawtstr);
     if (::stat(libmawtpath, &statbuf) == 0) return false;
 
     return true;
--- a/src/share/vm/adlc/adlparse.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/adlc/adlparse.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -982,27 +982,9 @@
       }
       if (strcmp(token,"interpreter_frame_pointer")==0) {
         interpreter_frame_pointer_parse(frame, false);
-        // Add  reg_class interpreter_frame_pointer_reg
-        if( _AD._register != NULL ) {
-          RegClass *reg_class = _AD._register->addRegClass("interpreter_frame_pointer_reg");
-          char *interpreter_frame_pointer_reg = frame->_interpreter_frame_pointer_reg;
-          if( interpreter_frame_pointer_reg != NULL ) {
-            RegDef *regDef = _AD._register->getRegDef(interpreter_frame_pointer_reg);
-            reg_class->addReg(regDef);     // add regDef to regClass
-          }
-        }
       }
       if (strcmp(token,"inline_cache_reg")==0) {
         inline_cache_parse(frame, false);
-        // Add  reg_class inline_cache_reg
-        if( _AD._register != NULL ) {
-          RegClass *reg_class = _AD._register->addRegClass("inline_cache_reg");
-          char *inline_cache_reg = frame->_inline_cache_reg;
-          if( inline_cache_reg != NULL ) {
-            RegDef *regDef = _AD._register->getRegDef(inline_cache_reg);
-            reg_class->addReg(regDef);     // add regDef to regClass
-          }
-        }
       }
       if (strcmp(token,"compiler_method_oop_reg")==0) {
         parse_err(WARN, "Using obsolete Token, compiler_method_oop_reg");
@@ -1010,15 +992,6 @@
       }
       if (strcmp(token,"interpreter_method_oop_reg")==0) {
         interpreter_method_oop_parse(frame, false);
-        // Add  reg_class interpreter_method_oop_reg
-        if( _AD._register != NULL ) {
-          RegClass *reg_class = _AD._register->addRegClass("interpreter_method_oop_reg");
-          char *method_oop_reg = frame->_interpreter_method_oop_reg;
-          if( method_oop_reg != NULL ) {
-            RegDef *regDef = _AD._register->getRegDef(method_oop_reg);
-            reg_class->addReg(regDef);     // add regDef to regClass
-          }
-        }
       }
       if (strcmp(token,"cisc_spilling_operand_name")==0) {
         cisc_spilling_operand_name_parse(frame, false);
@@ -2363,6 +2336,14 @@
       }
     }
     next_char();                  // Skip closing ')'
+  } else if (_curchar == '%') {
+    char *code = find_cpp_block("reg class");
+    if (code == NULL) {
+      parse_err(SYNERR, "missing code declaration for reg class.\n");
+      return;
+    }
+    reg_class->_user_defined = code;
+    return;
   }
 
   // Check for terminating ';'
@@ -3115,7 +3096,7 @@
   encoding->add_code("    _constant = C->constant_table().add");
 
   // Parse everything in ( ) expression.
-  encoding->add_code("(");
+  encoding->add_code("(this, ");
   next_char();  // Skip '('
   int parens_depth = 1;
 
@@ -3130,7 +3111,8 @@
     }
     else if (_curchar == ')') {
       parens_depth--;
-      encoding->add_code(")");
+      if (parens_depth > 0)
+        encoding->add_code(")");
       next_char();
     }
     else {
@@ -3157,7 +3139,7 @@
   }
 
   // Finish code line.
-  encoding->add_code(";");
+  encoding->add_code(");");
 
   if (_AD._adlocation_debug) {
     encoding->add_code(end_line_marker());
@@ -3817,7 +3799,7 @@
     return;
   }
   // Get list of effect-operand pairs and insert into dictionary
-  else get_effectlist(instr->_effects, instr->_localNames);
+  else get_effectlist(instr->_effects, instr->_localNames, instr->_has_call);
 
   // Debug Stuff
   if (_AD._adl_debug > 1) fprintf(stderr,"Effect description: %s\n", desc);
@@ -4595,7 +4577,7 @@
 // effect, and the second must be the name of an operand defined in the
 // operand list of this instruction.  Stores the names with a pointer to the
 // effect form in a local effects table.
-void ADLParser::get_effectlist(FormDict &effects, FormDict &operands) {
+void ADLParser::get_effectlist(FormDict &effects, FormDict &operands, bool& has_call) {
   OperandForm *opForm;
   Effect      *eForm;
   char        *ident;
@@ -4628,26 +4610,31 @@
       // Debugging Stuff
     if (_AD._adl_debug > 1) fprintf(stderr, "\tEffect Type: %s\t", ident);
     skipws();
-    // Get name of operand and check that it is in the local name table
-    if( (ident = get_unique_ident(effects, "effect")) == NULL) {
-      parse_err(SYNERR, "missing operand identifier in effect list\n");
-      return;
+    if (eForm->is(Component::CALL)) {
+      if (_AD._adl_debug > 1) fprintf(stderr, "\n");
+      has_call = true;
+    } else {
+      // Get name of operand and check that it is in the local name table
+      if( (ident = get_unique_ident(effects, "effect")) == NULL) {
+        parse_err(SYNERR, "missing operand identifier in effect list\n");
+        return;
+      }
+      const Form *form = operands[ident];
+      opForm = form ? form->is_operand() : NULL;
+      if( opForm == NULL ) {
+        if( form && form->is_opclass() ) {
+          const char* cname = form->is_opclass()->_ident;
+          parse_err(SYNERR, "operand classes are illegal in effect lists (found %s %s)\n", cname, ident);
+        } else {
+          parse_err(SYNERR, "undefined operand %s in effect list\n", ident);
+        }
+        return;
+      }
+      // Add the pair to the effects table
+      effects.Insert(ident, eForm);
+      // Debugging Stuff
+      if (_AD._adl_debug > 1) fprintf(stderr, "\tOperand Name: %s\n", ident);
     }
-    const Form *form = operands[ident];
-    opForm = form ? form->is_operand() : NULL;
-    if( opForm == NULL ) {
-      if( form && form->is_opclass() ) {
-        const char* cname = form->is_opclass()->_ident;
-        parse_err(SYNERR, "operand classes are illegal in effect lists (found %s %s)\n", cname, ident);
-      } else {
-        parse_err(SYNERR, "undefined operand %s in effect list\n", ident);
-      }
-      return;
-    }
-    // Add the pair to the effects table
-    effects.Insert(ident, eForm);
-    // Debugging Stuff
-    if (_AD._adl_debug > 1) fprintf(stderr, "\tOperand Name: %s\n", ident);
     skipws();
   } while(_curchar == ',');
 
--- a/src/share/vm/adlc/adlparse.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/adlc/adlparse.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -232,7 +232,7 @@
   char *get_relation_dup(void);
 
   void  get_oplist(NameList &parameters, FormDict &operands);// Parse type-operand pairs
-  void  get_effectlist(FormDict &effects, FormDict &operands); // Parse effect-operand pairs
+  void  get_effectlist(FormDict &effects, FormDict &operands, bool& has_call); // Parse effect-operand pairs
   // Return the contents of a parenthesized expression.
   // Requires initial '(' and consumes final ')', which is replaced by '\0'.
   char *get_paren_expr(const char *description, bool include_location = false);
--- a/src/share/vm/adlc/archDesc.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/adlc/archDesc.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -823,9 +823,9 @@
   } else {
     char       *rc_name = toUpper(reg_class_name);
     const char *mask    = "_mask";
-    int         length  = (int)strlen(rc_name) + (int)strlen(mask) + 3;
+    int         length  = (int)strlen(rc_name) + (int)strlen(mask) + 5;
     char       *regMask = new char[length];
-    sprintf(regMask,"%s%s", rc_name, mask);
+    sprintf(regMask,"%s%s()", rc_name, mask);
     return regMask;
   }
 }
@@ -1018,6 +1018,9 @@
     ident = "TEMP";
     eForm = new Effect(ident);
     _globalNames.Insert(ident, eForm);
+    ident = "CALL";
+    eForm = new Effect(ident);
+    _globalNames.Insert(ident, eForm);
   }
 
   //
--- a/src/share/vm/adlc/formsopt.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/adlc/formsopt.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -219,7 +219,9 @@
 
 //------------------------------RegClass---------------------------------------
 // Construct a register class into which registers will be inserted
-RegClass::RegClass(const char *classid) : _stack_or_reg(false), _classid(classid), _regDef(cmpstr,hashstr, Form::arena) {
+RegClass::RegClass(const char *classid) : _stack_or_reg(false), _classid(classid), _regDef(cmpstr,hashstr, Form::arena),
+                                          _user_defined(NULL)
+{
 }
 
 // record a register in this class
--- a/src/share/vm/adlc/formsopt.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/adlc/formsopt.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -161,6 +161,7 @@
   NameList    _regDefs;         // List of registers in class
   Dict        _regDef;          // Dictionary of registers in class
   bool        _stack_or_reg;    // Allowed on any stack slot
+  char*       _user_defined;
 
   // Public Methods
   RegClass(const char *classid);// Constructor
--- a/src/share/vm/adlc/formssel.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/adlc/formssel.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -31,7 +31,8 @@
   : _ident(id), _ideal_only(ideal_only),
     _localNames(cmpstr, hashstr, Form::arena),
     _effects(cmpstr, hashstr, Form::arena),
-    _is_mach_constant(false)
+    _is_mach_constant(false),
+    _has_call(false)
 {
       _ftype = Form::INS;
 
@@ -62,7 +63,8 @@
   : _ident(id), _ideal_only(false),
     _localNames(instr->_localNames),
     _effects(instr->_effects),
-    _is_mach_constant(false)
+    _is_mach_constant(false),
+    _has_call(false)
 {
       _ftype = Form::INS;
 
@@ -1754,6 +1756,7 @@
   if(!strcmp(name, "USE_KILL")) return Component::USE_KILL;
   if(!strcmp(name, "TEMP")) return Component::TEMP;
   if(!strcmp(name, "INVALID")) return Component::INVALID;
+  if(!strcmp(name, "CALL")) return Component::CALL;
   assert( false,"Invalid effect name specified\n");
   return Component::INVALID;
 }
--- a/src/share/vm/adlc/formssel.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/adlc/formssel.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -111,6 +111,8 @@
   ComponentList  _components;      // List of Components matches MachNode's
                                    // operand structure
 
+  bool           _has_call;        // contain a call and caller save registers should be saved?
+
   // Public Methods
   InstructForm(const char *id, bool ideal_only = false);
   InstructForm(const char *id, InstructForm *instr, MatchRule *rule);
@@ -895,7 +897,8 @@
     DEF     = 0x2, USE_DEF   = 0x3,
     KILL    = 0x4, USE_KILL  = 0x5,
     SYNTHETIC = 0x8,
-    TEMP = USE | SYNTHETIC
+    TEMP = USE | SYNTHETIC,
+    CALL = 0x10
   };
 };
 
--- a/src/share/vm/adlc/output_c.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/adlc/output_c.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -162,11 +162,17 @@
       RegClass   *reg_class = _register->getRegClass(rc_name);
       assert( reg_class, "Using an undefined register class");
 
-      int len = RegisterForm::RegMask_Size();
-      fprintf(fp_hpp, "extern const RegMask %s%s_mask;\n", prefix, toUpper( rc_name ) );
+      if (reg_class->_user_defined == NULL) {
+        fprintf(fp_hpp, "extern const RegMask _%s%s_mask;\n", prefix, toUpper( rc_name ) );
+        fprintf(fp_hpp, "inline const RegMask &%s%s_mask() { return _%s%s_mask; }\n", prefix, toUpper( rc_name ), prefix, toUpper( rc_name ));
+      } else {
+        fprintf(fp_hpp, "inline const RegMask &%s%s_mask() { %s }\n", prefix, toUpper( rc_name ), reg_class->_user_defined);
+      }
 
       if( reg_class->_stack_or_reg ) {
-        fprintf(fp_hpp, "extern const RegMask %sSTACK_OR_%s_mask;\n", prefix, toUpper( rc_name ) );
+        assert(reg_class->_user_defined == NULL, "no user defined reg class here");
+        fprintf(fp_hpp, "extern const RegMask _%sSTACK_OR_%s_mask;\n", prefix, toUpper( rc_name ) );
+        fprintf(fp_hpp, "inline const RegMask &%sSTACK_OR_%s_mask() { return _%sSTACK_OR_%s_mask; }\n", prefix, toUpper( rc_name ), prefix, toUpper( rc_name ) );
       }
     }
   }
@@ -188,8 +194,10 @@
       RegClass   *reg_class = _register->getRegClass(rc_name);
       assert( reg_class, "Using an undefined register class");
 
+      if (reg_class->_user_defined != NULL) continue;
+
       int len = RegisterForm::RegMask_Size();
-      fprintf(fp_cpp, "const RegMask %s%s_mask(", prefix, toUpper( rc_name ) );
+      fprintf(fp_cpp, "const RegMask _%s%s_mask(", prefix, toUpper( rc_name ) );
       { int i;
         for( i = 0; i < len-1; i++ )
           fprintf(fp_cpp," 0x%x,",reg_class->regs_in_word(i,false));
@@ -198,7 +206,7 @@
 
       if( reg_class->_stack_or_reg ) {
         int i;
-        fprintf(fp_cpp, "const RegMask %sSTACK_OR_%s_mask(", prefix, toUpper( rc_name ) );
+        fprintf(fp_cpp, "const RegMask _%sSTACK_OR_%s_mask(", prefix, toUpper( rc_name ) );
         for( i = 0; i < len-1; i++ )
           fprintf(fp_cpp," 0x%x,",reg_class->regs_in_word(i,true));
         fprintf(fp_cpp," 0x%x );\n",reg_class->regs_in_word(i,true));
@@ -2585,9 +2593,9 @@
   // Output instruction's emit prototype
   fprintf(fp, "void %sNode::eval_constant(Compile* C) {\n", inst._ident);
 
-  // For ideal jump nodes, allocate a jump table.
+  // For ideal jump nodes, add a jump-table entry.
   if (inst.is_ideal_jump()) {
-    fprintf(fp, "  _constant = C->constant_table().allocate_jump_table(this);\n");
+    fprintf(fp, "  _constant = C->constant_table().add_jump_table(this);\n");
   }
 
   // If user did not define an encode section,
@@ -2690,7 +2698,7 @@
       if (strcmp(first_reg_class, "stack_slots") == 0) {
         fprintf(fp,"  return &(Compile::current()->FIRST_STACK_mask());\n");
       } else {
-        fprintf(fp,"  return &%s_mask;\n", toUpper(first_reg_class));
+        fprintf(fp,"  return &%s_mask();\n", toUpper(first_reg_class));
       }
     } else {
       // Build a switch statement to return the desired mask.
@@ -2702,7 +2710,7 @@
         if( !strcmp(reg_class, "stack_slots") ) {
           fprintf(fp, "  case %d: return &(Compile::current()->FIRST_STACK_mask());\n", index);
         } else {
-          fprintf(fp, "  case %d: return &%s_mask;\n", index, toUpper(reg_class));
+          fprintf(fp, "  case %d: return &%s_mask();\n", index, toUpper(reg_class));
         }
       }
       fprintf(fp,"  }\n");
@@ -4080,8 +4088,6 @@
   fprintf(fp_cpp,"OptoReg::Name Matcher::inline_cache_reg() {");
   fprintf(fp_cpp," return OptoReg::Name(%s_num); }\n\n",
           _frame->_inline_cache_reg);
-  fprintf(fp_cpp,"const RegMask &Matcher::inline_cache_reg_mask() {");
-  fprintf(fp_cpp," return INLINE_CACHE_REG_mask; }\n\n");
   fprintf(fp_cpp,"int Matcher::inline_cache_reg_encode() {");
   fprintf(fp_cpp," return _regEncode[inline_cache_reg()]; }\n\n");
 
@@ -4089,8 +4095,6 @@
   fprintf(fp_cpp,"OptoReg::Name Matcher::interpreter_method_oop_reg() {");
   fprintf(fp_cpp," return OptoReg::Name(%s_num); }\n\n",
           _frame->_interpreter_method_oop_reg);
-  fprintf(fp_cpp,"const RegMask &Matcher::interpreter_method_oop_reg_mask() {");
-  fprintf(fp_cpp," return INTERPRETER_METHOD_OOP_REG_mask; }\n\n");
   fprintf(fp_cpp,"int Matcher::interpreter_method_oop_reg_encode() {");
   fprintf(fp_cpp," return _regEncode[interpreter_method_oop_reg()]; }\n\n");
 
@@ -4101,11 +4105,6 @@
   else
     fprintf(fp_cpp," return OptoReg::Name(%s_num); }\n\n",
             _frame->_interpreter_frame_pointer_reg);
-  fprintf(fp_cpp,"const RegMask &Matcher::interpreter_frame_pointer_reg_mask() {");
-  if (_frame->_interpreter_frame_pointer_reg == NULL)
-    fprintf(fp_cpp," static RegMask dummy; return dummy; }\n\n");
-  else
-    fprintf(fp_cpp," return INTERPRETER_FRAME_POINTER_REG_mask; }\n\n");
 
   // Frame Pointer definition
   /* CNC - I can not contemplate having a different frame pointer between
--- a/src/share/vm/adlc/output_h.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/adlc/output_h.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -1720,6 +1720,16 @@
       }
     }
 
+    // flag: if this instruction is implemented with a call
+    if ( instr->_has_call ) {
+      if ( node_flags_set ) {
+        fprintf(fp," | Flag_has_call");
+      } else {
+        fprintf(fp,"init_flags(Flag_has_call");
+        node_flags_set = true;
+      }
+    }
+
     if ( node_flags_set ) {
       fprintf(fp,"); ");
     }
--- a/src/share/vm/asm/assembler.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/asm/assembler.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -257,6 +257,29 @@
   // ensure buf contains all code (call this before using/copying the code)
   void flush();
 
+  // min and max values for signed immediate ranges
+  static int min_simm(int nbits) { return -(intptr_t(1) << (nbits - 1))    ; }
+  static int max_simm(int nbits) { return  (intptr_t(1) << (nbits - 1)) - 1; }
+
+  // Define some:
+  static int min_simm10() { return min_simm(10); }
+  static int min_simm13() { return min_simm(13); }
+  static int min_simm16() { return min_simm(16); }
+
+  // Test if x is within signed immediate range for nbits
+  static bool is_simm(intptr_t x, int nbits) { return min_simm(nbits) <= x && x <= max_simm(nbits); }
+
+  // Define some:
+  static bool is_simm5( intptr_t x) { return is_simm(x, 5 ); }
+  static bool is_simm8( intptr_t x) { return is_simm(x, 8 ); }
+  static bool is_simm10(intptr_t x) { return is_simm(x, 10); }
+  static bool is_simm11(intptr_t x) { return is_simm(x, 11); }
+  static bool is_simm12(intptr_t x) { return is_simm(x, 12); }
+  static bool is_simm13(intptr_t x) { return is_simm(x, 13); }
+  static bool is_simm16(intptr_t x) { return is_simm(x, 16); }
+  static bool is_simm26(intptr_t x) { return is_simm(x, 26); }
+  static bool is_simm32(intptr_t x) { return is_simm(x, 32); }
+
   // Accessors
   CodeBuffer*   code() const;          // _code_section->outer()
   CodeSection*  code_section() const   { return _code_section; }
--- a/src/share/vm/c1/c1_GraphBuilder.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/c1/c1_GraphBuilder.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -3495,9 +3495,6 @@
     if (profile_calls()) {
       profile_call(recv, holder_known ? callee->holder() : NULL);
     }
-    if (profile_inlined_calls()) {
-      profile_invocation(callee, copy_state_before());
-    }
   }
 
   // Introduce a new callee continuation point - if the callee has
@@ -3571,6 +3568,10 @@
     append(new RuntimeCall(voidType, "dtrace_method_entry", CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), args));
   }
 
+  if (profile_inlined_calls()) {
+    profile_invocation(callee, copy_state_before_with_bci(SynchronizationEntryBCI));
+  }
+
   BlockBegin* callee_start_block = block_at(0);
   if (callee_start_block != NULL) {
     assert(callee_start_block->is_set(BlockBegin::parser_loop_header_flag), "must be loop header");
--- a/src/share/vm/c1/c1_Instruction.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/c1/c1_Instruction.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -501,6 +501,7 @@
   virtual RoundFP*          as_RoundFP()         { return NULL; }
   virtual ExceptionObject*  as_ExceptionObject() { return NULL; }
   virtual UnsafeOp*         as_UnsafeOp()        { return NULL; }
+  virtual ProfileInvoke*    as_ProfileInvoke()   { return NULL; }
 
   virtual void visit(InstructionVisitor* v)      = 0;
 
--- a/src/share/vm/c1/c1_LIRGenerator.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/c1/c1_LIRGenerator.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -429,7 +429,7 @@
         // all locals are dead on exit from the synthetic unlocker
         liveness.clear();
       } else {
-        assert(x->as_MonitorEnter(), "only other case is MonitorEnter");
+        assert(x->as_MonitorEnter() || x->as_ProfileInvoke(), "only other cases are MonitorEnter and ProfileInvoke");
       }
     }
     if (!liveness.is_valid()) {
--- a/src/share/vm/ci/bcEscapeAnalyzer.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/ci/bcEscapeAnalyzer.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -150,11 +150,23 @@
   clear_bits(vars, _arg_local);
 }
 
-void BCEscapeAnalyzer::set_global_escape(ArgumentMap vars) {
+void BCEscapeAnalyzer::set_global_escape(ArgumentMap vars, bool merge) {
   clear_bits(vars, _arg_local);
   clear_bits(vars, _arg_stack);
   if (vars.contains_allocated())
     _allocated_escapes = true;
+
+  if (merge && !vars.is_empty()) {
+    // Merge new state into already processed block.
+    // New state is not taken into account and
+    // it may invalidate set_returned() result.
+    if (vars.contains_unknown() || vars.contains_allocated()) {
+      _return_local = false;
+    }
+    if (vars.contains_unknown() || vars.contains_vars()) {
+      _return_allocated = false;
+    }
+  }
 }
 
 void BCEscapeAnalyzer::set_dirty(ArgumentMap vars) {
@@ -999,7 +1011,7 @@
       t.set_difference(d_state->_stack[i]);
       extra_vars.set_union(t);
     }
-    set_global_escape(extra_vars);
+    set_global_escape(extra_vars, true);
   }
 }
 
--- a/src/share/vm/ci/bcEscapeAnalyzer.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/ci/bcEscapeAnalyzer.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -81,7 +81,7 @@
   bool is_arg_stack(ArgumentMap vars);
   void clear_bits(ArgumentMap vars, VectorSet &bs);
   void set_method_escape(ArgumentMap vars);
-  void set_global_escape(ArgumentMap vars);
+  void set_global_escape(ArgumentMap vars, bool merge = false);
   void set_dirty(ArgumentMap vars);
   void set_modified(ArgumentMap vars, int offs, int size);
 
--- a/src/share/vm/ci/ciMethod.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/ci/ciMethod.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -295,12 +295,6 @@
   // Print the name of this method in various incarnations.
   void print_name(outputStream* st = tty);
   void print_short_name(outputStream* st = tty);
-
-  methodOop get_method_handle_target() {
-    KlassHandle receiver_limit; int flags = 0;
-    methodHandle m = MethodHandles::decode_method(get_oop(), receiver_limit, flags);
-    return m();
-  }
 };
 
 #endif // SHARE_VM_CI_CIMETHOD_HPP
--- a/src/share/vm/compiler/compileBroker.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/compiler/compileBroker.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -1748,7 +1748,7 @@
     tty->print("%4d ", compile_id);    // print compilation number
     tty->print("%s ", (is_osr ? "%" : " "));
     int code_size = (task->code() == NULL) ? 0 : task->code()->total_size();
-    tty->print_cr("size: %d time: %d inlined: %d bytes", code_size, time.milliseconds(), task->num_inlined_bytecodes());
+    tty->print_cr("size: %d time: %d inlined: %d bytes", code_size, (int)time.milliseconds(), task->num_inlined_bytecodes());
   }
 
   if (compilable == ciEnv::MethodCompilable_never) {
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -668,12 +668,16 @@
 
 // We de-virtualize the block-related calls below, since we know that our
 // space is a CompactibleFreeListSpace.
+
 #define FreeListSpace_DCTOC__walk_mem_region_with_cl_DEFN(ClosureType)          \
 void FreeListSpace_DCTOC::walk_mem_region_with_cl(MemRegion mr,                 \
                                                  HeapWord* bottom,              \
                                                  HeapWord* top,                 \
                                                  ClosureType* cl) {             \
-   if (SharedHeap::heap()->n_par_threads() > 0) {                               \
+   bool is_par = SharedHeap::heap()->n_par_threads() > 0;                       \
+   if (is_par) {                                                                \
+     assert(SharedHeap::heap()->n_par_threads() ==                              \
+            SharedHeap::heap()->workers()->active_workers(), "Mismatch");       \
      walk_mem_region_with_cl_par(mr, bottom, top, cl);                          \
    } else {                                                                     \
      walk_mem_region_with_cl_nopar(mr, bottom, top, cl);                        \
@@ -1925,6 +1929,9 @@
   if (rem_size < SmallForDictionary) {
     bool is_par = (SharedHeap::heap()->n_par_threads() > 0);
     if (is_par) _indexedFreeListParLocks[rem_size]->lock();
+    assert(!is_par ||
+           (SharedHeap::heap()->n_par_threads() ==
+            SharedHeap::heap()->workers()->active_workers()), "Mismatch");
     returnChunkToFreeList(ffc);
     split(size, rem_size);
     if (is_par) _indexedFreeListParLocks[rem_size]->unlock();
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -3582,16 +3582,6 @@
        " or no bits are set in the gc_prologue before the start of the next "
        "subsequent marking phase.");
 
-  // Temporarily disabled, since pre/post-consumption closures don't
-  // care about precleaned cards
-  #if 0
-  {
-    MemRegion mr = MemRegion((HeapWord*)_virtual_space.low(),
-                             (HeapWord*)_virtual_space.high());
-    _ct->ct_bs()->preclean_dirty_cards(mr);
-  }
-  #endif
-
   // Save the end of the used_region of the constituent generations
   // to be used to limit the extent of sweep in each generation.
   save_sweep_limits();
@@ -4062,7 +4052,7 @@
   Par_ConcMarkingClosure(CMSCollector* collector, CMSConcMarkingTask* task, OopTaskQueue* work_queue,
                          CMSBitMap* bit_map, CMSMarkStack* overflow_stack,
                          CMSMarkStack* revisit_stack):
-    Par_KlassRememberingOopClosure(collector, NULL, revisit_stack),
+    Par_KlassRememberingOopClosure(collector, collector->ref_processor(), revisit_stack),
     _task(task),
     _span(collector->_span),
     _work_queue(work_queue),
@@ -4244,9 +4234,11 @@
 
 bool CMSCollector::do_marking_mt(bool asynch) {
   assert(ConcGCThreads > 0 && conc_workers() != NULL, "precondition");
-  // In the future this would be determined ergonomically, based
-  // on #cpu's, # active mutator threads (and load), and mutation rate.
-  int num_workers = ConcGCThreads;
+  int num_workers = AdaptiveSizePolicy::calc_active_conc_workers(
+                                       conc_workers()->total_workers(),
+                                       conc_workers()->active_workers(),
+                                       Threads::number_of_non_daemon_threads());
+  conc_workers()->set_active_workers(num_workers);
 
   CompactibleFreeListSpace* cms_space  = _cmsGen->cmsSpace();
   CompactibleFreeListSpace* perm_space = _permGen->cmsSpace();
@@ -5062,6 +5054,8 @@
   ParallelTaskTerminator _term;
 
  public:
+  // A value of 0 passed to n_workers will cause the number of
+  // workers to be taken from the active workers in the work gang.
   CMSParRemarkTask(CMSCollector* collector,
                    CompactibleFreeListSpace* cms_space,
                    CompactibleFreeListSpace* perm_space,
@@ -5544,7 +5538,15 @@
   GenCollectedHeap* gch = GenCollectedHeap::heap();
   FlexibleWorkGang* workers = gch->workers();
   assert(workers != NULL, "Need parallel worker threads.");
-  int n_workers = workers->total_workers();
+  // Choose to use the number of GC workers most recently set
+  // into "active_workers".  If active_workers is not set, set it
+  // to ParallelGCThreads.
+  int n_workers = workers->active_workers();
+  if (n_workers == 0) {
+    assert(n_workers > 0, "Should have been set during scavenge");
+    n_workers = ParallelGCThreads;
+    workers->set_active_workers(n_workers);
+  }
   CompactibleFreeListSpace* cms_space  = _cmsGen->cmsSpace();
   CompactibleFreeListSpace* perm_space = _permGen->cmsSpace();
 
@@ -5884,8 +5886,17 @@
       // and a different number of discovered lists may have Ref objects.
       // That is OK as long as the Reference lists are balanced (see
       // balance_all_queues() and balance_queues()).
-
-      rp->set_active_mt_degree(ParallelGCThreads);
+      GenCollectedHeap* gch = GenCollectedHeap::heap();
+      int active_workers = ParallelGCThreads;
+      FlexibleWorkGang* workers = gch->workers();
+      if (workers != NULL) {
+        active_workers = workers->active_workers();
+        // The expectation is that active_workers will have already
+        // been set to a reasonable value.  If it has not been set,
+        // investigate.
+        assert(active_workers > 0, "Should have been set during scavenge");
+      }
+      rp->set_active_mt_degree(active_workers);
       CMSRefProcTaskExecutor task_executor(*this);
       rp->process_discovered_references(&_is_alive_closure,
                                         &cmsKeepAliveClosure,
--- a/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -255,7 +255,18 @@
 CollectionSetChooser::
 prepareForAddMarkedHeapRegionsPar(size_t n_regions, size_t chunkSize) {
   _first_par_unreserved_idx = 0;
-  size_t max_waste = ParallelGCThreads * chunkSize;
+  int n_threads = ParallelGCThreads;
+  if (UseDynamicNumberOfGCThreads) {
+    assert(G1CollectedHeap::heap()->workers()->active_workers() > 0,
+      "Should have been set earlier");
+    // This is defensive code. As the assertion above says, the number
+    // of active threads should be > 0, but in case there is some path
+    // or some improperly initialized variable with leads to no
+    // active threads, protect against that in a product build.
+    n_threads = MAX2(G1CollectedHeap::heap()->workers()->active_workers(),
+                     1);
+  }
+  size_t max_waste = n_threads * chunkSize;
   // it should be aligned with respect to chunkSize
   size_t aligned_n_regions =
                      (n_regions + (chunkSize - 1)) / chunkSize * chunkSize;
@@ -265,6 +276,11 @@
 
 jint
 CollectionSetChooser::getParMarkedHeapRegionChunk(jint n_regions) {
+  // Don't do this assert because this can be called at a point
+  // where the loop up stream will not execute again but might
+  // try to claim more chunks (loop test has not been done yet).
+  // assert(_markedRegions.length() > _first_par_unreserved_idx,
+  //  "Striding beyond the marked regions");
   jint res = Atomic::add(n_regions, &_first_par_unreserved_idx);
   assert(_markedRegions.length() > res + n_regions - 1,
          "Should already have been expanded");
--- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -44,7 +44,7 @@
 //
 // CMS Bit Map Wrapper
 
-CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
+CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) :
   _bm((uintptr_t*)NULL,0),
   _shifter(shifter) {
   _bmStartWord = (HeapWord*)(rs.base());
@@ -458,12 +458,17 @@
 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 #endif // _MSC_VER
 
+size_t ConcurrentMark::scale_parallel_threads(size_t n_par_threads) {
+  return MAX2((n_par_threads + 2) / 4, (size_t)1);
+}
+
 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
                                int max_regions) :
   _markBitMap1(rs, MinObjAlignment - 1),
   _markBitMap2(rs, MinObjAlignment - 1),
 
   _parallel_marking_threads(0),
+  _max_parallel_marking_threads(0),
   _sleep_factor(0.0),
   _marking_task_overhead(1.0),
   _cleanup_sleep_factor(0.0),
@@ -554,15 +559,17 @@
   if (ParallelGCThreads == 0) {
     // if we are not running with any parallel GC threads we will not
     // spawn any marking threads either
-    _parallel_marking_threads =   0;
-    _sleep_factor             = 0.0;
-    _marking_task_overhead    = 1.0;
+    _parallel_marking_threads =       0;
+    _max_parallel_marking_threads =   0;
+    _sleep_factor             =     0.0;
+    _marking_task_overhead    =     1.0;
   } else {
     if (ConcGCThreads > 0) {
       // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
       // if both are set
 
       _parallel_marking_threads = ConcGCThreads;
+      _max_parallel_marking_threads = _parallel_marking_threads;
       _sleep_factor             = 0.0;
       _marking_task_overhead    = 1.0;
     } else if (G1MarkingOverheadPercent > 0) {
@@ -583,10 +590,12 @@
                          (1.0 - marking_task_overhead) / marking_task_overhead;
 
       _parallel_marking_threads = (size_t) marking_thread_num;
+      _max_parallel_marking_threads = _parallel_marking_threads;
       _sleep_factor             = sleep_factor;
       _marking_task_overhead    = marking_task_overhead;
     } else {
-      _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
+      _parallel_marking_threads = scale_parallel_threads(ParallelGCThreads);
+      _max_parallel_marking_threads = _parallel_marking_threads;
       _sleep_factor             = 0.0;
       _marking_task_overhead    = 1.0;
     }
@@ -609,7 +618,7 @@
 
     guarantee(parallel_marking_threads() > 0, "peace of mind");
     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
-         (int) _parallel_marking_threads, false, true);
+         (int) _max_parallel_marking_threads, false, true);
     if (_parallel_workers == NULL) {
       vm_exit_during_initialization("Failed necessary allocation.");
     } else {
@@ -1106,6 +1115,33 @@
   ~CMConcurrentMarkingTask() { }
 };
 
+// Calculates the number of active workers for a concurrent
+// phase.
+int ConcurrentMark::calc_parallel_marking_threads() {
+
+  size_t n_conc_workers;
+  if (!G1CollectedHeap::use_parallel_gc_threads()) {
+    n_conc_workers = 1;
+  } else {
+    if (!UseDynamicNumberOfGCThreads ||
+        (!FLAG_IS_DEFAULT(ConcGCThreads) &&
+         !ForceDynamicNumberOfGCThreads)) {
+      n_conc_workers = max_parallel_marking_threads();
+    } else {
+      n_conc_workers =
+        AdaptiveSizePolicy::calc_default_active_workers(
+                                     max_parallel_marking_threads(),
+                                     1, /* Minimum workers */
+                                     parallel_marking_threads(),
+                                     Threads::number_of_non_daemon_threads());
+      // Don't scale down "n_conc_workers" by scale_parallel_threads() because
+      // that scaling has already gone into "_max_parallel_marking_threads".
+    }
+  }
+  assert(n_conc_workers > 0, "Always need at least 1");
+  return (int) MAX2(n_conc_workers, (size_t) 1);
+}
+
 void ConcurrentMark::markFromRoots() {
   // we might be tempted to assert that:
   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
@@ -1116,9 +1152,20 @@
 
   _restart_for_overflow = false;
 
-  size_t active_workers = MAX2((size_t) 1, parallel_marking_threads());
+  // Parallel task terminator is set in "set_phase()".
   force_overflow_conc()->init();
-  set_phase(active_workers, true /* concurrent */);
+
+  // _g1h has _n_par_threads
+
+  _parallel_marking_threads = calc_parallel_marking_threads();
+  assert(parallel_marking_threads() <= max_parallel_marking_threads(),
+    "Maximum number of marking threads exceeded");
+  _parallel_workers->set_active_workers((int)_parallel_marking_threads);
+  // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
+  // and the decisions on that MT processing is made elsewhere.
+
+  assert( _parallel_workers->active_workers() > 0, "Should have been set");
+  set_phase(_parallel_workers->active_workers(), true /* concurrent */);
 
   CMConcurrentMarkingTask markingTask(this, cmThread());
   if (parallel_marking_threads() > 0) {
@@ -1181,6 +1228,7 @@
                                        true /* expected_active */);
 
     if (VerifyDuringGC) {
+
       HandleMark hm;  // handle scope
       gclog_or_tty->print(" VerifyDuringGC:(after)");
       Universe::heap()->prepare_for_verify();
@@ -1463,12 +1511,20 @@
   G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
                       BitMap* region_bm, BitMap* card_bm)
     : AbstractGangTask("G1 final counting"), _g1h(g1h),
-      _bm(bm), _region_bm(region_bm), _card_bm(card_bm) {
-    if (ParallelGCThreads > 0) {
-      _n_workers = _g1h->workers()->total_workers();
+    _bm(bm), _region_bm(region_bm), _card_bm(card_bm),
+    _n_workers(0)
+  {
+    // Use the value already set as the number of active threads
+    // in the call to run_task().  Needed for the allocation of
+    // _live_bytes and _used_bytes.
+    if (G1CollectedHeap::use_parallel_gc_threads()) {
+      assert( _g1h->workers()->active_workers() > 0,
+        "Should have been previously set");
+      _n_workers = _g1h->workers()->active_workers();
     } else {
       _n_workers = 1;
     }
+
     _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
     _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
   }
@@ -1485,6 +1541,7 @@
     calccl.no_yield();
     if (G1CollectedHeap::use_parallel_gc_threads()) {
       _g1h->heap_region_par_iterate_chunked(&calccl, i,
+                                            (int) _n_workers,
                                             HeapRegion::FinalCountClaimValue);
     } else {
       _g1h->heap_region_iterate(&calccl);
@@ -1530,10 +1587,42 @@
                              FreeRegionList* local_cleanup_list,
                              OldRegionSet* old_proxy_set,
                              HumongousRegionSet* humongous_proxy_set,
-                             HRRSCleanupTask* hrrs_cleanup_task);
+                             HRRSCleanupTask* hrrs_cleanup_task) :
+    _g1(g1), _worker_num(worker_num),
+    _max_live_bytes(0), _regions_claimed(0),
+    _freed_bytes(0),
+    _claimed_region_time(0.0), _max_region_time(0.0),
+    _local_cleanup_list(local_cleanup_list),
+    _old_proxy_set(old_proxy_set),
+    _humongous_proxy_set(humongous_proxy_set),
+    _hrrs_cleanup_task(hrrs_cleanup_task) { }
+
   size_t freed_bytes() { return _freed_bytes; }
 
-  bool doHeapRegion(HeapRegion *r);
+  bool doHeapRegion(HeapRegion *hr) {
+    // We use a claim value of zero here because all regions
+    // were claimed with value 1 in the FinalCount task.
+    hr->reset_gc_time_stamp();
+    if (!hr->continuesHumongous()) {
+      double start = os::elapsedTime();
+      _regions_claimed++;
+      hr->note_end_of_marking();
+      _max_live_bytes += hr->max_live_bytes();
+      _g1->free_region_if_empty(hr,
+                                &_freed_bytes,
+                                _local_cleanup_list,
+                                _old_proxy_set,
+                                _humongous_proxy_set,
+                                _hrrs_cleanup_task,
+                                true /* par */);
+      double region_time = (os::elapsedTime() - start);
+      _claimed_region_time += region_time;
+      if (region_time > _max_region_time) {
+        _max_region_time = region_time;
+      }
+    }
+    return false;
+  }
 
   size_t max_live_bytes() { return _max_live_bytes; }
   size_t regions_claimed() { return _regions_claimed; }
@@ -1568,6 +1657,7 @@
                                            &hrrs_cleanup_task);
     if (G1CollectedHeap::use_parallel_gc_threads()) {
       _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
+                                            _g1h->workers()->active_workers(),
                                             HeapRegion::NoteEndClaimValue);
     } else {
       _g1h->heap_region_iterate(&g1_note_end);
@@ -1644,47 +1734,6 @@
 
 };
 
-G1NoteEndOfConcMarkClosure::
-G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
-                           int worker_num,
-                           FreeRegionList* local_cleanup_list,
-                           OldRegionSet* old_proxy_set,
-                           HumongousRegionSet* humongous_proxy_set,
-                           HRRSCleanupTask* hrrs_cleanup_task)
-  : _g1(g1), _worker_num(worker_num),
-    _max_live_bytes(0), _regions_claimed(0),
-    _freed_bytes(0),
-    _claimed_region_time(0.0), _max_region_time(0.0),
-    _local_cleanup_list(local_cleanup_list),
-    _old_proxy_set(old_proxy_set),
-    _humongous_proxy_set(humongous_proxy_set),
-    _hrrs_cleanup_task(hrrs_cleanup_task) { }
-
-bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *hr) {
-  // We use a claim value of zero here because all regions
-  // were claimed with value 1 in the FinalCount task.
-  hr->reset_gc_time_stamp();
-  if (!hr->continuesHumongous()) {
-    double start = os::elapsedTime();
-    _regions_claimed++;
-    hr->note_end_of_marking();
-    _max_live_bytes += hr->max_live_bytes();
-    _g1->free_region_if_empty(hr,
-                              &_freed_bytes,
-                              _local_cleanup_list,
-                              _old_proxy_set,
-                              _humongous_proxy_set,
-                              _hrrs_cleanup_task,
-                              true /* par */);
-    double region_time = (os::elapsedTime() - start);
-    _claimed_region_time += region_time;
-    if (region_time > _max_region_time) {
-      _max_region_time = region_time;
-    }
-  }
-  return false;
-}
-
 void ConcurrentMark::cleanup() {
   // world is stopped at this checkpoint
   assert(SafepointSynchronize::is_at_safepoint(),
@@ -1716,6 +1765,9 @@
 
   HeapRegionRemSet::reset_for_cleanup_tasks();
 
+  g1h->set_par_threads();
+  size_t n_workers = g1h->n_par_threads();
+
   // Do counting once more with the world stopped for good measure.
   G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
                                         &_region_bm, &_card_bm);
@@ -1724,9 +1776,10 @@
                                                HeapRegion::InitialClaimValue),
            "sanity check");
 
-    int n_workers = g1h->workers()->total_workers();
-    g1h->set_par_threads(n_workers);
+    assert(g1h->n_par_threads() == (int) n_workers,
+      "Should not have been reset");
     g1h->workers()->run_task(&g1_par_count_task);
+    // Done with the parallel phase so reset to 0.
     g1h->set_par_threads(0);
 
     assert(g1h->check_heap_region_claim_values(
@@ -1776,8 +1829,7 @@
   double note_end_start = os::elapsedTime();
   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
   if (G1CollectedHeap::use_parallel_gc_threads()) {
-    int n_workers = g1h->workers()->total_workers();
-    g1h->set_par_threads(n_workers);
+    g1h->set_par_threads((int)n_workers);
     g1h->workers()->run_task(&g1_par_note_end_task);
     g1h->set_par_threads(0);
 
@@ -1806,8 +1858,7 @@
     double rs_scrub_start = os::elapsedTime();
     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
     if (G1CollectedHeap::use_parallel_gc_threads()) {
-      int n_workers = g1h->workers()->total_workers();
-      g1h->set_par_threads(n_workers);
+      g1h->set_par_threads((int)n_workers);
       g1h->workers()->run_task(&g1_par_scrub_rs_task);
       g1h->set_par_threads(0);
 
@@ -1825,7 +1876,7 @@
 
   // this will also free any regions totally full of garbage objects,
   // and sort the regions.
-  g1h->g1_policy()->record_concurrent_mark_cleanup_end();
+  g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
 
   // Statistics.
   double end = os::elapsedTime();
@@ -1991,16 +2042,12 @@
 class G1CMParKeepAliveAndDrainClosure: public OopClosure {
   ConcurrentMark*  _cm;
   CMTask*          _task;
-  CMBitMap*        _bitMap;
   int              _ref_counter_limit;
   int              _ref_counter;
  public:
-  G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm,
-                                  CMTask* task,
-                                  CMBitMap* bitMap) :
-    _cm(cm), _task(task), _bitMap(bitMap),
-    _ref_counter_limit(G1RefProcDrainInterval)
-  {
+  G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) :
+    _cm(cm), _task(task),
+    _ref_counter_limit(G1RefProcDrainInterval) {
     assert(_ref_counter_limit > 0, "sanity");
     _ref_counter = _ref_counter_limit;
   }
@@ -2091,19 +2138,16 @@
 private:
   G1CollectedHeap* _g1h;
   ConcurrentMark*  _cm;
-  CMBitMap*        _bitmap;
   WorkGang*        _workers;
   int              _active_workers;
 
 public:
   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
                         ConcurrentMark* cm,
-                        CMBitMap* bitmap,
                         WorkGang* workers,
                         int n_workers) :
-    _g1h(g1h), _cm(cm), _bitmap(bitmap),
-    _workers(workers), _active_workers(n_workers)
-  { }
+    _g1h(g1h), _cm(cm),
+    _workers(workers), _active_workers(n_workers) { }
 
   // Executes the given task using concurrent marking worker threads.
   virtual void execute(ProcessTask& task);
@@ -2115,21 +2159,18 @@
   ProcessTask&     _proc_task;
   G1CollectedHeap* _g1h;
   ConcurrentMark*  _cm;
-  CMBitMap*        _bitmap;
 
 public:
   G1CMRefProcTaskProxy(ProcessTask& proc_task,
                      G1CollectedHeap* g1h,
-                     ConcurrentMark* cm,
-                     CMBitMap* bitmap) :
+                     ConcurrentMark* cm) :
     AbstractGangTask("Process reference objects in parallel"),
-    _proc_task(proc_task), _g1h(g1h), _cm(cm), _bitmap(bitmap)
-  {}
+    _proc_task(proc_task), _g1h(g1h), _cm(cm) { }
 
   virtual void work(int i) {
     CMTask* marking_task = _cm->task(i);
     G1CMIsAliveClosure g1_is_alive(_g1h);
-    G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task, _bitmap);
+    G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task);
     G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
 
     _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain);
@@ -2139,7 +2180,7 @@
 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
   assert(_workers != NULL, "Need parallel worker threads.");
 
-  G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm, _bitmap);
+  G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
 
   // We need to reset the phase for each task execution so that
   // the termination protocol of CMTask::do_marking_step works.
@@ -2156,8 +2197,7 @@
 public:
   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
     AbstractGangTask("Enqueue reference objects in parallel"),
-    _enq_task(enq_task)
-  { }
+    _enq_task(enq_task) { }
 
   virtual void work(int i) {
     _enq_task.work(i);
@@ -2207,10 +2247,10 @@
 
     // We use the work gang from the G1CollectedHeap and we utilize all
     // the worker threads.
-    int active_workers = g1h->workers() ? g1h->workers()->total_workers() : 1;
+    int active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1;
     active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1);
 
-    G1CMRefProcTaskExecutor par_task_executor(g1h, this, nextMarkBitMap(),
+    G1CMRefProcTaskExecutor par_task_executor(g1h, this,
                                               g1h->workers(), active_workers);
 
     if (rp->processing_is_mt()) {
@@ -2290,7 +2330,9 @@
   }
 
   CMRemarkTask(ConcurrentMark* cm) :
-    AbstractGangTask("Par Remark"), _cm(cm) { }
+    AbstractGangTask("Par Remark"), _cm(cm) {
+    _cm->terminator()->reset_for_reuse(cm->_g1h->workers()->active_workers());
+  }
 };
 
 void ConcurrentMark::checkpointRootsFinalWork() {
@@ -2302,16 +2344,21 @@
 
   if (G1CollectedHeap::use_parallel_gc_threads()) {
     G1CollectedHeap::StrongRootsScope srs(g1h);
-    // this is remark, so we'll use up all available threads
-    int active_workers = ParallelGCThreads;
+    // this is remark, so we'll use up all active threads
+    int active_workers = g1h->workers()->active_workers();
+    if (active_workers == 0) {
+      assert(active_workers > 0, "Should have been set earlier");
+      active_workers = ParallelGCThreads;
+      g1h->workers()->set_active_workers(active_workers);
+    }
     set_phase(active_workers, false /* concurrent */);
+    // Leave _parallel_marking_threads at it's
+    // value originally calculated in the ConcurrentMark
+    // constructor and pass values of the active workers
+    // through the gang in the task.
 
     CMRemarkTask remarkTask(this);
-    // We will start all available threads, even if we decide that the
-    // active_workers will be fewer. The extra ones will just bail out
-    // immediately.
-    int n_workers = g1h->workers()->total_workers();
-    g1h->set_par_threads(n_workers);
+    g1h->set_par_threads(active_workers);
     g1h->workers()->run_task(&remarkTask);
     g1h->set_par_threads(0);
   } else {
@@ -2859,8 +2906,10 @@
   }
 }
 
-class CSMarkOopClosure: public OopClosure {
-  friend class CSMarkBitMapClosure;
+// Closures used by ConcurrentMark::complete_marking_in_collection_set().
+
+class CSetMarkOopClosure: public OopClosure {
+  friend class CSetMarkBitMapClosure;
 
   G1CollectedHeap* _g1h;
   CMBitMap*        _bm;
@@ -2870,6 +2919,7 @@
   int              _ms_size;
   int              _ms_ind;
   int              _array_increment;
+  int              _worker_i;
 
   bool push(oop obj, int arr_ind = 0) {
     if (_ms_ind == _ms_size) {
@@ -2910,7 +2960,6 @@
         for (int j = arr_ind; j < lim; j++) {
           do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
         }
-
       } else {
         obj->oop_iterate(this);
       }
@@ -2920,17 +2969,17 @@
   }
 
 public:
-  CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
+  CSetMarkOopClosure(ConcurrentMark* cm, int ms_size, int worker_i) :
     _g1h(G1CollectedHeap::heap()),
     _cm(cm),
     _bm(cm->nextMarkBitMap()),
     _ms_size(ms_size), _ms_ind(0),
     _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
     _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
-    _array_increment(MAX2(ms_size/8, 16))
-  {}
-
-  ~CSMarkOopClosure() {
+    _array_increment(MAX2(ms_size/8, 16)),
+    _worker_i(worker_i) { }
+
+  ~CSetMarkOopClosure() {
     FREE_C_HEAP_ARRAY(oop, _ms);
     FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
   }
@@ -2953,10 +3002,11 @@
     if (hr != NULL) {
       if (hr->in_collection_set()) {
         if (_g1h->is_obj_ill(obj)) {
-          _bm->mark((HeapWord*)obj);
-          if (!push(obj)) {
-            gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
-            set_abort();
+          if (_bm->parMark((HeapWord*)obj)) {
+            if (!push(obj)) {
+              gclog_or_tty->print_cr("Setting abort in CSetMarkOopClosure because push failed.");
+              set_abort();
+            }
           }
         }
       } else {
@@ -2967,19 +3017,19 @@
   }
 };
 
-class CSMarkBitMapClosure: public BitMapClosure {
-  G1CollectedHeap* _g1h;
-  CMBitMap*        _bitMap;
-  ConcurrentMark*  _cm;
-  CSMarkOopClosure _oop_cl;
+class CSetMarkBitMapClosure: public BitMapClosure {
+  G1CollectedHeap*   _g1h;
+  CMBitMap*          _bitMap;
+  ConcurrentMark*    _cm;
+  CSetMarkOopClosure _oop_cl;
+  int                _worker_i;
+
 public:
-  CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
+  CSetMarkBitMapClosure(ConcurrentMark* cm, int ms_size, int worker_i) :
     _g1h(G1CollectedHeap::heap()),
     _bitMap(cm->nextMarkBitMap()),
-    _oop_cl(cm, ms_size)
-  {}
-
-  ~CSMarkBitMapClosure() {}
+    _oop_cl(cm, ms_size, worker_i),
+    _worker_i(worker_i) { }
 
   bool do_bit(size_t offset) {
     // convert offset into a HeapWord*
@@ -3001,53 +3051,69 @@
   }
 };
 
-
-class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
-  CMBitMap* _bm;
-  CSMarkBitMapClosure _bit_cl;
+class CompleteMarkingInCSetHRClosure: public HeapRegionClosure {
+  CMBitMap*             _bm;
+  CSetMarkBitMapClosure _bit_cl;
+  int                   _worker_i;
+
   enum SomePrivateConstants {
     MSSize = 1000
   };
-  bool _completed;
+
 public:
-  CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
+  CompleteMarkingInCSetHRClosure(ConcurrentMark* cm, int worker_i) :
     _bm(cm->nextMarkBitMap()),
-    _bit_cl(cm, MSSize),
-    _completed(true)
-  {}
-
-  ~CompleteMarkingInCSHRClosure() {}
-
-  bool doHeapRegion(HeapRegion* r) {
-    if (!r->evacuation_failed()) {
-      MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
-      if (!mr.is_empty()) {
-        if (!_bm->iterate(&_bit_cl, mr)) {
-          _completed = false;
-          return true;
+    _bit_cl(cm, MSSize, worker_i),
+    _worker_i(worker_i) { }
+
+  bool doHeapRegion(HeapRegion* hr) {
+    if (hr->claimHeapRegion(HeapRegion::CompleteMarkCSetClaimValue)) {
+      // The current worker has successfully claimed the region.
+      if (!hr->evacuation_failed()) {
+        MemRegion mr = MemRegion(hr->bottom(), hr->next_top_at_mark_start());
+        if (!mr.is_empty()) {
+          bool done = false;
+          while (!done) {
+            done = _bm->iterate(&_bit_cl, mr);
+          }
         }
       }
     }
     return false;
   }
-
-  bool completed() { return _completed; }
 };
 
-class ClearMarksInHRClosure: public HeapRegionClosure {
-  CMBitMap* _bm;
+class SetClaimValuesInCSetHRClosure: public HeapRegionClosure {
+  jint _claim_value;
+
 public:
-  ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
-
-  bool doHeapRegion(HeapRegion* r) {
-    if (!r->used_region().is_empty() && !r->evacuation_failed()) {
-      MemRegion usedMR = r->used_region();
-      _bm->clearRange(r->used_region());
-    }
+  SetClaimValuesInCSetHRClosure(jint claim_value) :
+    _claim_value(claim_value) { }
+
+  bool doHeapRegion(HeapRegion* hr) {
+    hr->set_claim_value(_claim_value);
     return false;
   }
 };
 
+class G1ParCompleteMarkInCSetTask: public AbstractGangTask {
+protected:
+  G1CollectedHeap* _g1h;
+  ConcurrentMark*  _cm;
+
+public:
+  G1ParCompleteMarkInCSetTask(G1CollectedHeap* g1h,
+                              ConcurrentMark* cm) :
+    AbstractGangTask("Complete Mark in CSet"),
+    _g1h(g1h), _cm(cm) { }
+
+  void work(int worker_i) {
+    CompleteMarkingInCSetHRClosure cmplt(_cm, worker_i);
+    HeapRegion* hr = _g1h->start_cset_region_for_worker(worker_i);
+    _g1h->collection_set_iterate_from(hr, &cmplt);
+  }
+};
+
 void ConcurrentMark::complete_marking_in_collection_set() {
   G1CollectedHeap* g1h =  G1CollectedHeap::heap();
 
@@ -3056,20 +3122,32 @@
     return;
   }
 
-  int i = 1;
   double start = os::elapsedTime();
-  while (true) {
-    i++;
-    CompleteMarkingInCSHRClosure cmplt(this);
-    g1h->collection_set_iterate(&cmplt);
-    if (cmplt.completed()) break;
+  int n_workers = g1h->workers()->total_workers();
+
+  G1ParCompleteMarkInCSetTask complete_mark_task(g1h, this);
+
+  assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");
+
+  if (G1CollectedHeap::use_parallel_gc_threads()) {
+    g1h->set_par_threads(n_workers);
+    g1h->workers()->run_task(&complete_mark_task);
+    g1h->set_par_threads(0);
+  } else {
+    complete_mark_task.work(0);
   }
+
+  assert(g1h->check_cset_heap_region_claim_values(HeapRegion::CompleteMarkCSetClaimValue), "sanity");
+
+  // Now reset the claim values in the regions in the collection set.
+  SetClaimValuesInCSetHRClosure set_cv_cl(HeapRegion::InitialClaimValue);
+  g1h->collection_set_iterate(&set_cv_cl);
+
+  assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");
+
   double end_time = os::elapsedTime();
   double elapsed_time_ms = (end_time - start) * 1000.0;
   g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
-
-  ClearMarksInHRClosure clr(nextMarkBitMap());
-  g1h->collection_set_iterate(&clr);
 }
 
 // The next two methods deal with the following optimisation. Some
--- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -360,7 +360,7 @@
   friend class ConcurrentMarkThread;
   friend class CMTask;
   friend class CMBitMapClosure;
-  friend class CSMarkOopClosure;
+  friend class CSetMarkOopClosure;
   friend class CMGlobalObjectClosure;
   friend class CMRemarkTask;
   friend class CMConcurrentMarkingTask;
@@ -375,7 +375,9 @@
   ConcurrentMarkThread* _cmThread;   // the thread doing the work
   G1CollectedHeap*      _g1h;        // the heap.
   size_t                _parallel_marking_threads; // the number of marking
-                                                   // threads we'll use
+                                                   // threads we're use
+  size_t                _max_parallel_marking_threads; // max number of marking
+                                                   // threads we'll ever use
   double                _sleep_factor; // how much we have to sleep, with
                                        // respect to the work we just did, to
                                        // meet the marking overhead goal
@@ -473,7 +475,7 @@
 
   double*   _accum_task_vtime;   // accumulated task vtime
 
-  WorkGang* _parallel_workers;
+  FlexibleWorkGang* _parallel_workers;
 
   ForceOverflowSettings _force_overflow_conc;
   ForceOverflowSettings _force_overflow_stw;
@@ -504,6 +506,7 @@
 
   // accessor methods
   size_t parallel_marking_threads() { return _parallel_marking_threads; }
+  size_t max_parallel_marking_threads() { return _max_parallel_marking_threads;}
   double sleep_factor()             { return _sleep_factor; }
   double marking_task_overhead()    { return _marking_task_overhead;}
   double cleanup_sleep_factor()     { return _cleanup_sleep_factor; }
@@ -709,6 +712,14 @@
   CMBitMapRO* prevMarkBitMap() const { return _prevMarkBitMap; }
   CMBitMap*   nextMarkBitMap() const { return _nextMarkBitMap; }
 
+  // Returns the number of GC threads to be used in a concurrent
+  // phase based on the number of GC threads being used in a STW
+  // phase.
+  size_t scale_parallel_threads(size_t n_par_threads);
+
+  // Calculates the number of GC threads to be used in a concurrent phase.
+  int calc_parallel_marking_threads();
+
   // The following three are interaction between CM and
   // G1CollectedHeap
 
--- a/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -191,7 +191,11 @@
         VM_CGC_Operation op(&cl_cl, verbose_str);
         VMThread::execute(&op);
       } else {
+        // We don't want to update the marking status if a GC pause
+        // is already underway.
+        _sts.join();
         g1h->set_marking_complete();
+        _sts.leave();
       }
 
       // Check if cleanup set the free_regions_coming flag. If it
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -66,6 +66,18 @@
 // apply to TLAB allocation, which is not part of this interface: it
 // is done by clients of this interface.)
 
+// Notes on implementation of parallelism in different tasks.
+//
+// G1ParVerifyTask uses heap_region_par_iterate_chunked() for parallelism.
+// The number of GC workers is passed to heap_region_par_iterate_chunked().
+// It does use run_task() which sets _n_workers in the task.
+// G1ParTask executes g1_process_strong_roots() ->
+// SharedHeap::process_strong_roots() which calls eventuall to
+// CardTableModRefBS::par_non_clean_card_iterate_work() which uses
+// SequentialSubTasksDone.  SharedHeap::process_strong_roots() also
+// directly uses SubTasksDone (_process_strong_tasks field in SharedHeap).
+//
+
 // Local to this file.
 
 class RefineCardTableEntryClosure: public CardTableEntryClosure {
@@ -176,8 +188,7 @@
   hr->set_next_young_region(_head);
   _head = hr;
 
-  hr->set_young();
-  double yg_surv_rate = _g1h->g1_policy()->predict_yg_surv_rate((int)_length);
+  _g1h->g1_policy()->set_region_eden(hr, (int) _length);
   ++_length;
 }
 
@@ -190,7 +201,6 @@
     _survivor_tail = hr;
   }
   _survivor_head = hr;
-
   ++_survivor_length;
 }
 
@@ -315,16 +325,20 @@
   _g1h->g1_policy()->note_start_adding_survivor_regions();
   _g1h->g1_policy()->finished_recalculating_age_indexes(true /* is_survivors */);
 
+  int young_index_in_cset = 0;
   for (HeapRegion* curr = _survivor_head;
        curr != NULL;
        curr = curr->get_next_young_region()) {
-    _g1h->g1_policy()->set_region_survivors(curr);
+    _g1h->g1_policy()->set_region_survivor(curr, young_index_in_cset);
 
     // The region is a non-empty survivor so let's add it to
     // the incremental collection set for the next evacuation
     // pause.
     _g1h->g1_policy()->add_region_to_incremental_cset_rhs(curr);
-  }
+    young_index_in_cset += 1;
+  }
+  assert((size_t) young_index_in_cset == _survivor_length,
+         "post-condition");
   _g1h->g1_policy()->note_stop_adding_survivor_regions();
 
   _head   = _survivor_head;
@@ -1154,6 +1168,7 @@
   void work(int i) {
     RebuildRSOutOfRegionClosure rebuild_rs(_g1, i);
     _g1->heap_region_par_iterate_chunked(&rebuild_rs, i,
+                                          _g1->workers()->active_workers(),
                                          HeapRegion::RebuildRSClaimValue);
   }
 };
@@ -1358,12 +1373,32 @@
     }
 
     // Rebuild remembered sets of all regions.
-
     if (G1CollectedHeap::use_parallel_gc_threads()) {
+      int n_workers =
+        AdaptiveSizePolicy::calc_active_workers(workers()->total_workers(),
+                                       workers()->active_workers(),
+                                       Threads::number_of_non_daemon_threads());
+      assert(UseDynamicNumberOfGCThreads ||
+             n_workers == workers()->total_workers(),
+             "If not dynamic should be using all the  workers");
+      workers()->set_active_workers(n_workers);
+      // Set parallel threads in the heap (_n_par_threads) only
+      // before a parallel phase and always reset it to 0 after
+      // the phase so that the number of parallel threads does
+      // no get carried forward to a serial phase where there
+      // may be code that is "possibly_parallel".
+      set_par_threads(n_workers);
+
       ParRebuildRSTask rebuild_rs_task(this);
       assert(check_heap_region_claim_values(
              HeapRegion::InitialClaimValue), "sanity check");
-      set_par_threads(workers()->total_workers());
+      assert(UseDynamicNumberOfGCThreads ||
+             workers()->active_workers() == workers()->total_workers(),
+        "Unless dynamic should use total workers");
+      // Use the most recent number of  active workers
+      assert(workers()->active_workers() > 0,
+        "Active workers not properly set");
+      set_par_threads(workers()->active_workers());
       workers()->run_task(&rebuild_rs_task);
       set_par_threads(0);
       assert(check_heap_region_claim_values(
@@ -2475,11 +2510,17 @@
 void
 G1CollectedHeap::heap_region_par_iterate_chunked(HeapRegionClosure* cl,
                                                  int worker,
+                                                 int no_of_par_workers,
                                                  jint claim_value) {
   const size_t regions = n_regions();
-  const size_t worker_num = (G1CollectedHeap::use_parallel_gc_threads() ? ParallelGCThreads : 1);
+  const size_t max_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
+                             no_of_par_workers :
+                             1);
+  assert(UseDynamicNumberOfGCThreads ||
+         no_of_par_workers == workers()->total_workers(),
+         "Non dynamic should use fixed number of workers");
   // try to spread out the starting points of the workers
-  const size_t start_index = regions / worker_num * (size_t) worker;
+  const size_t start_index = regions / max_workers * (size_t) worker;
 
   // each worker will actually look at all regions
   for (size_t count = 0; count < regions; ++count) {
@@ -2576,10 +2617,10 @@
     _claim_value(claim_value), _failures(0), _sh_region(NULL) { }
   bool doHeapRegion(HeapRegion* r) {
     if (r->claim_value() != _claim_value) {
-      gclog_or_tty->print_cr("Region ["PTR_FORMAT","PTR_FORMAT"), "
+      gclog_or_tty->print_cr("Region " HR_FORMAT ", "
                              "claim value = %d, should be %d",
-                             r->bottom(), r->end(), r->claim_value(),
-                             _claim_value);
+                             HR_FORMAT_PARAMS(r),
+                             r->claim_value(), _claim_value);
       ++_failures;
     }
     if (!r->isHumongous()) {
@@ -2588,9 +2629,9 @@
       _sh_region = r;
     } else if (r->continuesHumongous()) {
       if (r->humongous_start_region() != _sh_region) {
-        gclog_or_tty->print_cr("Region ["PTR_FORMAT","PTR_FORMAT"), "
+        gclog_or_tty->print_cr("Region " HR_FORMAT ", "
                                "HS = "PTR_FORMAT", should be "PTR_FORMAT,
-                               r->bottom(), r->end(),
+                               HR_FORMAT_PARAMS(r),
                                r->humongous_start_region(),
                                _sh_region);
         ++_failures;
@@ -2608,8 +2649,63 @@
   heap_region_iterate(&cl);
   return cl.failures() == 0;
 }
+
+class CheckClaimValuesInCSetHRClosure: public HeapRegionClosure {
+  jint   _claim_value;
+  size_t _failures;
+
+public:
+  CheckClaimValuesInCSetHRClosure(jint claim_value) :
+    _claim_value(claim_value),
+    _failures(0) { }
+
+  size_t failures() {
+    return _failures;
+  }
+
+  bool doHeapRegion(HeapRegion* hr) {
+    assert(hr->in_collection_set(), "how?");
+    assert(!hr->isHumongous(), "H-region in CSet");
+    if (hr->claim_value() != _claim_value) {
+      gclog_or_tty->print_cr("CSet Region " HR_FORMAT ", "
+                             "claim value = %d, should be %d",
+                             HR_FORMAT_PARAMS(hr),
+                             hr->claim_value(), _claim_value);
+      _failures += 1;
+    }
+    return false;
+  }
+};
+
+bool G1CollectedHeap::check_cset_heap_region_claim_values(jint claim_value) {
+  CheckClaimValuesInCSetHRClosure cl(claim_value);
+  collection_set_iterate(&cl);
+  return cl.failures() == 0;
+}
 #endif // ASSERT
 
+// We want the parallel threads to start their collection
+// set iteration at different collection set regions to
+// avoid contention.
+// If we have:
+//          n collection set regions
+//          p threads
+// Then thread t will start at region t * floor (n/p)
+
+HeapRegion* G1CollectedHeap::start_cset_region_for_worker(int worker_i) {
+  HeapRegion* result = g1_policy()->collection_set();
+  if (G1CollectedHeap::use_parallel_gc_threads()) {
+    size_t cs_size = g1_policy()->cset_region_length();
+    int n_workers = workers()->total_workers();
+    size_t cs_spans = cs_size / n_workers;
+    size_t ind      = cs_spans * worker_i;
+    for (size_t i = 0; i < ind; i++) {
+      result = result->next_in_collection_set();
+    }
+  }
+  return result;
+}
+
 void G1CollectedHeap::collection_set_iterate(HeapRegionClosure* cl) {
   HeapRegion* r = g1_policy()->collection_set();
   while (r != NULL) {
@@ -2918,6 +3014,7 @@
     HandleMark hm;
     VerifyRegionClosure blk(_allow_dirty, true, _vo);
     _g1h->heap_region_par_iterate_chunked(&blk, worker_i,
+                                          _g1h->workers()->active_workers(),
                                           HeapRegion::ParVerifyClaimValue);
     if (blk.failures()) {
       _failures = true;
@@ -2935,6 +3032,10 @@
   if (SafepointSynchronize::is_at_safepoint() || ! UseTLAB) {
     if (!silent) { gclog_or_tty->print("Roots (excluding permgen) "); }
     VerifyRootsClosure rootsCl(vo);
+
+    assert(Thread::current()->is_VM_thread(),
+      "Expected to be executed serially by the VM thread at this point");
+
     CodeBlobToOopClosure blobsCl(&rootsCl, /*do_marking=*/ false);
 
     // We apply the relevant closures to all the oops in the
@@ -2979,7 +3080,10 @@
              "sanity check");
 
       G1ParVerifyTask task(this, allow_dirty, vo);
-      int n_workers = workers()->total_workers();
+      assert(UseDynamicNumberOfGCThreads ||
+        workers()->active_workers() == workers()->total_workers(),
+        "If not dynamic should be using all the workers");
+      int n_workers = workers()->active_workers();
       set_par_threads(n_workers);
       workers()->run_task(&task);
       set_par_threads(0);
@@ -2987,6 +3091,8 @@
         failures = true;
       }
 
+      // Checks that the expected amount of parallel work was done.
+      // The implication is that n_workers is > 0.
       assert(check_heap_region_claim_values(HeapRegion::ParVerifyClaimValue),
              "sanity check");
 
@@ -3210,8 +3316,6 @@
   }
 }
 
-// <NEW PREDICTION>
-
 double G1CollectedHeap::predict_region_elapsed_time_ms(HeapRegion *hr,
                                                        bool young) {
   return _g1_policy->predict_region_elapsed_time_ms(hr, young);
@@ -3251,7 +3355,7 @@
 void
 G1CollectedHeap::setup_surviving_young_words() {
   guarantee( _surviving_young_words == NULL, "pre-condition" );
-  size_t array_length = g1_policy()->young_cset_length();
+  size_t array_length = g1_policy()->young_cset_region_length();
   _surviving_young_words = NEW_C_HEAP_ARRAY(size_t, array_length);
   if (_surviving_young_words == NULL) {
     vm_exit_out_of_memory(sizeof(size_t) * array_length,
@@ -3268,7 +3372,7 @@
 void
 G1CollectedHeap::update_surviving_young_words(size_t* surv_young_words) {
   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
-  size_t array_length = g1_policy()->young_cset_length();
+  size_t array_length = g1_policy()->young_cset_region_length();
   for (size_t i = 0; i < array_length; ++i)
     _surviving_young_words[i] += surv_young_words[i];
 }
@@ -3280,8 +3384,6 @@
   _surviving_young_words = NULL;
 }
 
-// </NEW PREDICTION>
-
 #ifdef ASSERT
 class VerifyCSetClosure: public HeapRegionClosure {
 public:
@@ -3404,6 +3506,10 @@
     assert(check_young_list_well_formed(),
       "young list should be well formed");
 
+    // Don't dynamically change the number of GC threads this early.  A value of
+    // 0 is used to indicate serial work.  When parallel work is done,
+    // it will be set.
+
     { // Call to jvmpi::post_class_unload_events must occur outside of active GC
       IsGCActiveMark x;
 
@@ -3617,7 +3723,8 @@
         double end_time_sec = os::elapsedTime();
         double pause_time_ms = (end_time_sec - start_time_sec) * MILLIUNITS;
         g1_policy()->record_pause_time_ms(pause_time_ms);
-        g1_policy()->record_collection_pause_end();
+        int active_gc_threads = workers()->active_workers();
+        g1_policy()->record_collection_pause_end(active_gc_threads);
 
         MemoryService::track_memory_usage();
 
@@ -4158,7 +4265,7 @@
   // non-young regions (where the age is -1)
   // We also add a few elements at the beginning and at the end in
   // an attempt to eliminate cache contention
-  size_t real_length = 1 + _g1h->g1_policy()->young_cset_length();
+  size_t real_length = 1 + _g1h->g1_policy()->young_cset_region_length();
   size_t array_length = PADDING_ELEM_NUM +
                         real_length +
                         PADDING_ELEM_NUM;
@@ -4564,13 +4671,13 @@
   }
 
 public:
-  G1ParTask(G1CollectedHeap* g1h, int workers, RefToScanQueueSet *task_queues)
+  G1ParTask(G1CollectedHeap* g1h,
+            RefToScanQueueSet *task_queues)
     : AbstractGangTask("G1 collection"),
       _g1h(g1h),
       _queues(task_queues),
-      _terminator(workers, _queues),
-      _stats_lock(Mutex::leaf, "parallel G1 stats lock", true),
-      _n_workers(workers)
+      _terminator(0, _queues),
+      _stats_lock(Mutex::leaf, "parallel G1 stats lock", true)
   {}
 
   RefToScanQueueSet* queues() { return _queues; }
@@ -4579,6 +4686,20 @@
     return queues()->queue(i);
   }
 
+  ParallelTaskTerminator* terminator() { return &_terminator; }
+
+  virtual void set_for_termination(int active_workers) {
+    // This task calls set_n_termination() in par_non_clean_card_iterate_work()
+    // in the young space (_par_seq_tasks) in the G1 heap
+    // for SequentialSubTasksDone.
+    // This task also uses SubTasksDone in SharedHeap and G1CollectedHeap
+    // both of which need setting by set_n_termination().
+    _g1h->SharedHeap::set_n_termination(active_workers);
+    _g1h->set_n_termination(active_workers);
+    terminator()->reset_for_reuse(active_workers);
+    _n_workers = active_workers;
+  }
+
   void work(int i) {
     if (i >= _n_workers) return;  // no work needed this round
 
@@ -4863,12 +4984,12 @@
 private:
   G1CollectedHeap*   _g1h;
   RefToScanQueueSet* _queues;
-  WorkGang*          _workers;
+  FlexibleWorkGang*  _workers;
   int                _active_workers;
 
 public:
   G1STWRefProcTaskExecutor(G1CollectedHeap* g1h,
-                        WorkGang* workers,
+                        FlexibleWorkGang* workers,
                         RefToScanQueueSet *task_queues,
                         int n_workers) :
     _g1h(g1h),
@@ -5124,11 +5245,13 @@
   // referents points to another object which is also referenced by an
   // object discovered by the STW ref processor.
 
-  int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
-                        workers()->total_workers() : 1);
-
-  set_par_threads(n_workers);
-  G1ParPreserveCMReferentsTask keep_cm_referents(this, n_workers, _task_queues);
+  int active_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
+                        workers()->active_workers() : 1);
+
+  assert(active_workers == workers()->active_workers(),
+         "Need to reset active_workers");
+  set_par_threads(active_workers);
+  G1ParPreserveCMReferentsTask keep_cm_referents(this, active_workers, _task_queues);
 
   if (G1CollectedHeap::use_parallel_gc_threads()) {
     workers()->run_task(&keep_cm_referents);
@@ -5194,7 +5317,6 @@
                                       NULL);
   } else {
     // Parallel reference processing
-    int active_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1);
     assert(rp->num_q() == active_workers, "sanity");
     assert(active_workers <= rp->max_num_q(), "sanity");
 
@@ -5227,7 +5349,9 @@
   } else {
     // Parallel reference enqueuing
 
-    int active_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1);
+    int active_workers = (ParallelGCThreads > 0 ? workers()->active_workers() : 1);
+    assert(active_workers == workers()->active_workers(),
+           "Need to reset active_workers");
     assert(rp->num_q() == active_workers, "sanity");
     assert(active_workers <= rp->max_num_q(), "sanity");
 
@@ -5254,9 +5378,24 @@
   concurrent_g1_refine()->set_use_cache(false);
   concurrent_g1_refine()->clear_hot_cache_claimed_index();
 
-  int n_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1);
-  set_par_threads(n_workers);
-  G1ParTask g1_par_task(this, n_workers, _task_queues);
+  int n_workers;
+  if (G1CollectedHeap::use_parallel_gc_threads()) {
+    n_workers =
+      AdaptiveSizePolicy::calc_active_workers(workers()->total_workers(),
+                                     workers()->active_workers(),
+                                     Threads::number_of_non_daemon_threads());
+    assert(UseDynamicNumberOfGCThreads ||
+           n_workers == workers()->total_workers(),
+           "If not dynamic should be using all the  workers");
+    set_par_threads(n_workers);
+  } else {
+    assert(n_par_threads() == 0,
+           "Should be the original non-parallel value");
+    n_workers = 1;
+  }
+  workers()->set_active_workers(n_workers);
+
+  G1ParTask g1_par_task(this, _task_queues);
 
   init_for_evac_failure(NULL);
 
@@ -5269,6 +5408,10 @@
     // The individual threads will set their evac-failure closures.
     StrongRootsScope srs(this);
     if (ParallelGCVerbose) G1ParScanThreadState::print_termination_stats_hdr();
+    // These tasks use ShareHeap::_process_strong_tasks
+    assert(UseDynamicNumberOfGCThreads ||
+           workers()->active_workers() == workers()->total_workers(),
+           "If not dynamic should be using all the  workers");
     workers()->run_task(&g1_par_task);
   } else {
     StrongRootsScope srs(this);
@@ -5277,6 +5420,7 @@
 
   double par_time = (os::elapsedTime() - start_par) * 1000.0;
   g1_policy()->record_par_time(par_time);
+
   set_par_threads(0);
 
   // Process any discovered reference objects - we have
@@ -5304,8 +5448,11 @@
 
   finalize_for_evac_failure();
 
-  // Must do this before removing self-forwarding pointers, which clears
-  // the per-region evac-failure flags.
+  // Must do this before clearing the per-region evac-failure flags
+  // (which is currently done when we free the collection set).
+  // We also only do this if marking is actually in progress and so
+  // have to do this before we set the mark_in_progress flag at the
+  // end of an initial mark pause.
   concurrent_mark()->complete_marking_in_collection_set();
 
   if (evacuation_failed()) {
@@ -5567,7 +5714,6 @@
 
   while (cur != NULL) {
     assert(!is_on_master_free_list(cur), "sanity");
-
     if (non_young) {
       if (cur->is_young()) {
         double end_sec = os::elapsedTime();
@@ -5578,12 +5724,14 @@
         non_young = false;
       }
     } else {
-      double end_sec = os::elapsedTime();
-      double elapsed_ms = (end_sec - start_sec) * 1000.0;
-      young_time_ms += elapsed_ms;
-
-      start_sec = os::elapsedTime();
-      non_young = true;
+      if (!cur->is_young()) {
+        double end_sec = os::elapsedTime();
+        double elapsed_ms = (end_sec - start_sec) * 1000.0;
+        young_time_ms += elapsed_ms;
+
+        start_sec = os::elapsedTime();
+        non_young = true;
+      }
     }
 
     rs_lengths += cur->rem_set()->occupied();
@@ -5595,8 +5743,8 @@
 
     if (cur->is_young()) {
       int index = cur->young_index_in_cset();
-      guarantee( index != -1, "invariant" );
-      guarantee( (size_t)index < policy->young_cset_length(), "invariant" );
+      assert(index != -1, "invariant");
+      assert((size_t) index < policy->young_cset_region_length(), "invariant");
       size_t words_survived = _surviving_young_words[index];
       cur->record_surv_words_in_group(words_survived);
 
@@ -5607,7 +5755,7 @@
       cur->set_next_young_region(NULL);
     } else {
       int index = cur->young_index_in_cset();
-      guarantee( index == -1, "invariant" );
+      assert(index == -1, "invariant");
     }
 
     assert( (cur->is_young() && cur->young_index_in_cset() > -1) ||
@@ -5615,13 +5763,26 @@
             "invariant" );
 
     if (!cur->evacuation_failed()) {
+      MemRegion used_mr = cur->used_region();
+
       // And the region is empty.
-      assert(!cur->is_empty(), "Should not have empty regions in a CS.");
+      assert(!used_mr.is_empty(), "Should not have empty regions in a CS.");
+
+      // If marking is in progress then clear any objects marked in
+      // the current region. Note mark_in_progress() returns false,
+      // even during an initial mark pause, until the set_marking_started()
+      // call which takes place later in the pause.
+      if (mark_in_progress()) {
+        assert(!g1_policy()->during_initial_mark_pause(), "sanity");
+        _cm->nextMarkBitMap()->clearRange(used_mr);
+      }
+
       free_region(cur, &pre_used, &local_free_list, false /* par */);
     } else {
       cur->uninstall_surv_rate_group();
-      if (cur->is_young())
+      if (cur->is_young()) {
         cur->set_young_index_in_cset(-1);
+      }
       cur->set_not_young();
       cur->set_evacuation_failed(false);
       // The region is now considered to be old.
@@ -5635,10 +5796,12 @@
 
   double end_sec = os::elapsedTime();
   double elapsed_ms = (end_sec - start_sec) * 1000.0;
-  if (non_young)
+
+  if (non_young) {
     non_young_time_ms += elapsed_ms;
-  else
+  } else {
     young_time_ms += elapsed_ms;
+  }
 
   update_sets_after_freeing_regions(pre_used, &local_free_list,
                                     NULL /* old_proxy_set */,
@@ -5722,7 +5885,6 @@
   assert(heap_lock_held_for_gc(),
               "the heap lock should already be held by or for this thread");
   _young_list->push_region(hr);
-  g1_policy()->set_region_short_lived(hr);
 }
 
 class NoYoungRegionsClosure: public HeapRegionClosure {
@@ -5880,7 +6042,6 @@
     HeapRegion* new_alloc_region = new_region(word_size,
                                               false /* do_expand */);
     if (new_alloc_region != NULL) {
-      g1_policy()->update_region_num(true /* next_is_young */);
       set_region_short_lived_locked(new_alloc_region);
       _hr_printer.alloc(new_alloc_region, G1HRPrinter::Eden, young_list_full);
       return new_alloc_region;
@@ -5908,6 +6069,21 @@
   return _g1h->new_mutator_alloc_region(word_size, force);
 }
 
+void G1CollectedHeap::set_par_threads() {
+  // Don't change the number of workers.  Use the value previously set
+  // in the workgroup.
+  int n_workers = workers()->active_workers();
+    assert(UseDynamicNumberOfGCThreads ||
+           n_workers == workers()->total_workers(),
+      "Otherwise should be using the total number of workers");
+  if (n_workers == 0) {
+    assert(false, "Should have been set in prior evacuation pause.");
+    n_workers = ParallelGCThreads;
+    workers()->set_active_workers(n_workers);
+  }
+  set_par_threads(n_workers);
+}
+
 void MutatorAllocRegion::retire_region(HeapRegion* alloc_region,
                                        size_t allocated_bytes) {
   _g1h->retire_mutator_alloc_region(alloc_region, allocated_bytes);
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -987,6 +987,16 @@
 
   void set_par_threads(int t) {
     SharedHeap::set_par_threads(t);
+    // Done in SharedHeap but oddly there are
+    // two _process_strong_tasks's in a G1CollectedHeap
+    // so do it here too.
+    _process_strong_tasks->set_n_threads(t);
+  }
+
+  // Set _n_par_threads according to a policy TBD.
+  void set_par_threads();
+
+  void set_n_termination(int t) {
     _process_strong_tasks->set_n_threads(t);
   }
 
@@ -1276,6 +1286,7 @@
   // i.e., that a closure never attempt to abort a traversal.
   void heap_region_par_iterate_chunked(HeapRegionClosure* blk,
                                        int worker,
+                                       int no_of_par_workers,
                                        jint claim_value);
 
   // It resets all the region claim values to the default.
@@ -1283,8 +1294,17 @@
 
 #ifdef ASSERT
   bool check_heap_region_claim_values(jint claim_value);
+
+  // Same as the routine above but only checks regions in the
+  // current collection set.
+  bool check_cset_heap_region_claim_values(jint claim_value);
 #endif // ASSERT
 
+  // Given the id of a worker, calculate a suitable
+  // starting region for iterating over the current
+  // collection set.
+  HeapRegion* start_cset_region_for_worker(int worker_i);
+
   // Iterate over the regions (if any) in the current collection set.
   void collection_set_iterate(HeapRegionClosure* blk);
 
@@ -1610,16 +1630,12 @@
 public:
   void stop_conc_gc_threads();
 
-  // <NEW PREDICTION>
-
   double predict_region_elapsed_time_ms(HeapRegion* hr, bool young);
   void check_if_region_is_too_expensive(double predicted_time_ms);
   size_t pending_card_num();
   size_t max_pending_card_num();
   size_t cards_scanned();
 
-  // </NEW PREDICTION>
-
 protected:
   size_t _max_heap_capacity;
 };
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -36,10 +36,6 @@
 #include "runtime/mutexLocker.hpp"
 #include "utilities/debug.hpp"
 
-#define PREDICTIONS_VERBOSE 0
-
-// <NEW PREDICTION>
-
 // Different defaults for different number of GC threads
 // They were chosen by running GCOld and SPECjbb on debris with different
 //   numbers of GC threads and choosing them based on the results
@@ -80,8 +76,6 @@
   1.0, 0.7, 0.7, 0.5, 0.5, 0.42, 0.42, 0.30
 };
 
-// </NEW PREDICTION>
-
 // Help class for avoiding interleaved logging
 class LineBuffer: public StackObj {
 
@@ -137,10 +131,6 @@
   _parallel_gc_threads(G1CollectedHeap::use_parallel_gc_threads()
                         ? ParallelGCThreads : 1),
 
-  _n_pauses(0),
-  _recent_rs_scan_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
-  _recent_pause_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
-  _recent_rs_sizes(new TruncatedSeq(NumPrevPausesForHeuristics)),
   _recent_gc_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
   _all_pause_times_ms(new NumberSeq()),
   _stop_world_start(0.0),
@@ -148,11 +138,10 @@
   _all_yield_times_ms(new NumberSeq()),
   _using_new_ratio_calculations(false),
 
-  _all_mod_union_times_ms(new NumberSeq()),
-
   _summary(new Summary()),
 
   _cur_clear_ct_time_ms(0.0),
+  _mark_closure_time_ms(0.0),
 
   _cur_ref_proc_time_ms(0.0),
   _cur_ref_enq_time_ms(0.0),
@@ -165,11 +154,6 @@
   _num_cc_clears(0L),
 #endif
 
-  _region_num_young(0),
-  _region_num_tenured(0),
-  _prev_region_num_young(0),
-  _prev_region_num_tenured(0),
-
   _aux_num(10),
   _all_aux_times_ms(new NumberSeq[_aux_num]),
   _cur_aux_start_times_ms(new double[_aux_num]),
@@ -179,8 +163,6 @@
   _concurrent_mark_remark_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
   _concurrent_mark_cleanup_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
 
-  // <NEW PREDICTION>
-
   _alloc_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
   _prev_collection_pause_end_ms(0.0),
   _pending_card_diff_seq(new TruncatedSeq(TruncatedSeqLength)),
@@ -199,13 +181,10 @@
                                          new TruncatedSeq(TruncatedSeqLength)),
 
   _pending_cards_seq(new TruncatedSeq(TruncatedSeqLength)),
-  _scanned_cards_seq(new TruncatedSeq(TruncatedSeqLength)),
   _rs_lengths_seq(new TruncatedSeq(TruncatedSeqLength)),
 
   _pause_time_target_ms((double) MaxGCPauseMillis),
 
-  // </NEW PREDICTION>
-
   _full_young_gcs(true),
   _full_young_pause_num(0),
   _partial_young_pause_num(0),
@@ -221,16 +200,10 @@
 
    _recent_prev_end_times_for_all_gcs_sec(new TruncatedSeq(NumPrevPausesForHeuristics)),
 
-  _recent_CS_bytes_used_before(new TruncatedSeq(NumPrevPausesForHeuristics)),
-  _recent_CS_bytes_surviving(new TruncatedSeq(NumPrevPausesForHeuristics)),
-
   _recent_avg_pause_time_ratio(0.0),
 
   _all_full_gc_times_ms(new NumberSeq()),
 
-  // G1PausesBtwnConcMark defaults to -1
-  // so the hack is to do the cast  QQQ FIXME
-  _pauses_btwn_concurrent_mark((size_t)G1PausesBtwnConcMark),
   _initiate_conc_mark_if_possible(false),
   _during_initial_mark_pause(false),
   _should_revert_to_full_young_gcs(false),
@@ -242,22 +215,21 @@
 
   _prev_collection_pause_used_at_end_bytes(0),
 
+  _eden_cset_region_length(0),
+  _survivor_cset_region_length(0),
+  _old_cset_region_length(0),
+
   _collection_set(NULL),
-  _collection_set_size(0),
   _collection_set_bytes_used_before(0),
 
   // Incremental CSet attributes
   _inc_cset_build_state(Inactive),
   _inc_cset_head(NULL),
   _inc_cset_tail(NULL),
-  _inc_cset_size(0),
-  _inc_cset_young_index(0),
   _inc_cset_bytes_used_before(0),
   _inc_cset_max_finger(NULL),
-  _inc_cset_recorded_young_bytes(0),
   _inc_cset_recorded_rs_lengths(0),
   _inc_cset_predicted_elapsed_time_ms(0.0),
-  _inc_cset_predicted_bytes_to_copy(0),
 
 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
@@ -325,8 +297,6 @@
   // start conservatively
   _expensive_region_limit_ms = 0.5 * (double) MaxGCPauseMillis;
 
-  // <NEW PREDICTION>
-
   int index;
   if (ParallelGCThreads == 0)
     index = 0;
@@ -348,8 +318,6 @@
   _non_young_other_cost_per_region_ms_seq->add(
                            non_young_other_cost_per_region_ms_defaults[index]);
 
-  // </NEW PREDICTION>
-
   // Below, we might need to calculate the pause time target based on
   // the pause interval. When we do so we are going to give G1 maximum
   // flexibility and allow it to do pauses when it needs to. So, we'll
@@ -908,9 +876,6 @@
 
   record_survivor_regions(0, NULL, NULL);
 
-  _prev_region_num_young   = _region_num_young;
-  _prev_region_num_tenured = _region_num_tenured;
-
   _free_regions_at_end_of_collection = _g1->free_regions();
   // Reset survivors SurvRateGroup.
   _survivor_surv_rate_group->reset();
@@ -982,10 +947,9 @@
     _cur_aux_times_set[i] = false;
   }
 
-  // These are initialized to zero here and they are set during
+  // This is initialized to zero here and is set during
   // the evacuation pause if marking is in progress.
   _cur_satb_drain_time_ms = 0.0;
-  _last_satb_drain_processed_buffers = 0;
 
   _last_young_gc_full = false;
 
@@ -996,10 +960,6 @@
   assert( verify_young_ages(), "region age verification" );
 }
 
-void G1CollectorPolicy::record_mark_closure_time(double mark_closure_time_ms) {
-  _mark_closure_time_ms = mark_closure_time_ms;
-}
-
 void G1CollectorPolicy::record_concurrent_mark_init_end(double
                                                    mark_init_elapsed_time_ms) {
   _during_marking = true;
@@ -1060,7 +1020,7 @@
   double total = 0.0;
   LineBuffer buf(level);
   buf.append("[%s (ms):", str);
-  for (uint i = 0; i < ParallelGCThreads; ++i) {
+  for (uint i = 0; i < no_of_gc_threads(); ++i) {
     double val = data[i];
     if (val < min)
       min = val;
@@ -1070,7 +1030,7 @@
     buf.append("  %3.1lf", val);
   }
   buf.append_and_print_cr("");
-  double avg = total / (double) ParallelGCThreads;
+  double avg = total / (double) no_of_gc_threads();
   buf.append_and_print_cr(" Avg: %5.1lf, Min: %5.1lf, Max: %5.1lf, Diff: %5.1lf]",
     avg, min, max, max - min);
 }
@@ -1082,7 +1042,7 @@
   double total = 0.0;
   LineBuffer buf(level);
   buf.append("[%s :", str);
-  for (uint i = 0; i < ParallelGCThreads; ++i) {
+  for (uint i = 0; i < no_of_gc_threads(); ++i) {
     double val = data[i];
     if (val < min)
       min = val;
@@ -1092,7 +1052,7 @@
     buf.append(" %d", (int) val);
   }
   buf.append_and_print_cr("");
-  double avg = total / (double) ParallelGCThreads;
+  double avg = total / (double) no_of_gc_threads();
   buf.append_and_print_cr(" Sum: %d, Avg: %d, Min: %d, Max: %d, Diff: %d]",
     (int)total, (int)avg, (int)min, (int)max, (int)max - (int)min);
 }
@@ -1112,10 +1072,10 @@
 double G1CollectorPolicy::avg_value(double* data) {
   if (G1CollectedHeap::use_parallel_gc_threads()) {
     double ret = 0.0;
-    for (uint i = 0; i < ParallelGCThreads; ++i) {
+    for (uint i = 0; i < no_of_gc_threads(); ++i) {
       ret += data[i];
     }
-    return ret / (double) ParallelGCThreads;
+    return ret / (double) no_of_gc_threads();
   } else {
     return data[0];
   }
@@ -1124,7 +1084,7 @@
 double G1CollectorPolicy::max_value(double* data) {
   if (G1CollectedHeap::use_parallel_gc_threads()) {
     double ret = data[0];
-    for (uint i = 1; i < ParallelGCThreads; ++i) {
+    for (uint i = 1; i < no_of_gc_threads(); ++i) {
       if (data[i] > ret) {
         ret = data[i];
       }
@@ -1138,7 +1098,7 @@
 double G1CollectorPolicy::sum_of_values(double* data) {
   if (G1CollectedHeap::use_parallel_gc_threads()) {
     double sum = 0.0;
-    for (uint i = 0; i < ParallelGCThreads; i++) {
+    for (uint i = 0; i < no_of_gc_threads(); i++) {
       sum += data[i];
     }
     return sum;
@@ -1151,7 +1111,7 @@
   double ret = data1[0] + data2[0];
 
   if (G1CollectedHeap::use_parallel_gc_threads()) {
-    for (uint i = 1; i < ParallelGCThreads; ++i) {
+    for (uint i = 1; i < no_of_gc_threads(); ++i) {
       double data = data1[i] + data2[i];
       if (data > ret) {
         ret = data;
@@ -1164,16 +1124,19 @@
 // Anything below that is considered to be zero
 #define MIN_TIMER_GRANULARITY 0.0000001
 
-void G1CollectorPolicy::record_collection_pause_end() {
+void G1CollectorPolicy::record_collection_pause_end(int no_of_gc_threads) {
   double end_time_sec = os::elapsedTime();
   double elapsed_ms = _last_pause_time_ms;
   bool parallel = G1CollectedHeap::use_parallel_gc_threads();
+  assert(_cur_collection_pause_used_regions_at_start >= cset_region_length(),
+         "otherwise, the subtraction below does not make sense");
   size_t rs_size =
-    _cur_collection_pause_used_regions_at_start - collection_set_size();
+            _cur_collection_pause_used_regions_at_start - cset_region_length();
   size_t cur_used_bytes = _g1->used();
   assert(cur_used_bytes == _g1->recalculate_used(), "It should!");
   bool last_pause_included_initial_mark = false;
   bool update_stats = !_g1->evacuation_failed();
+  set_no_of_gc_threads(no_of_gc_threads);
 
 #ifndef PRODUCT
   if (G1YoungSurvRateVerbose) {
@@ -1226,10 +1189,6 @@
   _mmu_tracker->add_pause(end_time_sec - elapsed_ms/1000.0,
                           end_time_sec, false);
 
-  guarantee(_cur_collection_pause_used_regions_at_start >=
-            collection_set_size(),
-            "Negative RS size?");
-
   // This assert is exempted when we're doing parallel collection pauses,
   // because the fragmentation caused by the parallel GC allocation buffers
   // can lead to more memory being used during collection than was used
@@ -1253,8 +1212,6 @@
     (double)surviving_bytes/
     (double)_collection_set_bytes_used_before;
 
-  _n_pauses++;
-
   // These values are used to update the summary information that is
   // displayed when TraceGen0Time is enabled, and are output as part
   // of the PrintGCDetails output, in the non-parallel case.
@@ -1291,14 +1248,15 @@
   // current value of "other time"
   other_time_ms -= _cur_clear_ct_time_ms;
 
+  // Subtract the time spent completing marking in the collection
+  // set. Note if marking is not in progress during the pause
+  // the value of _mark_closure_time_ms will be zero.
+  other_time_ms -= _mark_closure_time_ms;
+
   // TraceGen0Time and TraceGen1Time summary info updating.
   _all_pause_times_ms->add(elapsed_ms);
 
   if (update_stats) {
-    _recent_rs_scan_times_ms->add(scan_rs_time);
-    _recent_pause_times_ms->add(elapsed_ms);
-    _recent_rs_sizes->add(rs_size);
-
     _summary->record_total_time_ms(elapsed_ms);
     _summary->record_other_time_ms(other_time_ms);
 
@@ -1342,9 +1300,6 @@
            || surviving_bytes <= _collection_set_bytes_used_before,
            "Or else negative collection!");
 
-    _recent_CS_bytes_used_before->add(_collection_set_bytes_used_before);
-    _recent_CS_bytes_surviving->add(surviving_bytes);
-
     // this is where we update the allocation rate of the application
     double app_time_ms =
       (_cur_collection_start_sec * 1000.0 - _prev_collection_pause_end_ms);
@@ -1354,13 +1309,17 @@
       // We'll just set it to something (arbitrarily) small.
       app_time_ms = 1.0;
     }
-    size_t regions_allocated =
-      (_region_num_young - _prev_region_num_young) +
-      (_region_num_tenured - _prev_region_num_tenured);
+    // We maintain the invariant that all objects allocated by mutator
+    // threads will be allocated out of eden regions. So, we can use
+    // the eden region number allocated since the previous GC to
+    // calculate the application's allocate rate. The only exception
+    // to that is humongous objects that are allocated separately. But
+    // given that humongous object allocations do not really affect
+    // either the pause's duration nor when the next pause will take
+    // place we can safely ignore them here.
+    size_t regions_allocated = eden_cset_region_length();
     double alloc_rate_ms = (double) regions_allocated / app_time_ms;
     _alloc_rate_ms_seq->add(alloc_rate_ms);
-    _prev_region_num_young   = _region_num_young;
-    _prev_region_num_tenured = _region_num_tenured;
 
     double interval_ms =
       (end_time_sec - _recent_prev_end_times_for_all_gcs_sec->oldest()) * 1000.0;
@@ -1398,33 +1357,6 @@
     }
   }
 
-
-  if (G1PolicyVerbose > 1) {
-    gclog_or_tty->print_cr("   Recording collection pause(%d)", _n_pauses);
-  }
-
-  if (G1PolicyVerbose > 1) {
-    gclog_or_tty->print_cr("      ET: %10.6f ms           (avg: %10.6f ms)\n"
-                           "       ET-RS:  %10.6f ms      (avg: %10.6f ms)\n"
-                           "      |RS|: " SIZE_FORMAT,
-                           elapsed_ms, recent_avg_time_for_pauses_ms(),
-                           scan_rs_time, recent_avg_time_for_rs_scan_ms(),
-                           rs_size);
-
-    gclog_or_tty->print_cr("       Used at start: " SIZE_FORMAT"K"
-                           "       At end " SIZE_FORMAT "K\n"
-                           "       garbage      : " SIZE_FORMAT "K"
-                           "       of     " SIZE_FORMAT "K\n"
-                           "       survival     : %6.2f%%  (%6.2f%% avg)",
-                           _cur_collection_pause_used_at_start_bytes/K,
-                           _g1->used()/K, freed_bytes/K,
-                           _collection_set_bytes_used_before/K,
-                           survival_fraction*100.0,
-                           recent_avg_survival_fraction()*100.0);
-    gclog_or_tty->print_cr("       Recent %% gc pause time: %6.2f",
-                           recent_avg_pause_time_ratio() * 100.0);
-  }
-
   // PrintGCDetails output
   if (PrintGCDetails) {
     bool print_marking_info =
@@ -1436,7 +1368,6 @@
 
     if (print_marking_info) {
       print_stats(1, "SATB Drain Time", _cur_satb_drain_time_ms);
-      print_stats(2, "Processed Buffers", _last_satb_drain_processed_buffers);
     }
 
     if (parallel) {
@@ -1478,6 +1409,9 @@
       print_stats(1, "Scan RS", scan_rs_time);
       print_stats(1, "Object Copying", obj_copy_time);
     }
+    if (print_marking_info) {
+      print_stats(1, "Complete CSet Marking", _mark_closure_time_ms);
+    }
     print_stats(1, "Clear CT", _cur_clear_ct_time_ms);
 #ifndef PRODUCT
     print_stats(1, "Cur Clear CC", _cur_clear_cc_time_ms);
@@ -1489,9 +1423,14 @@
     }
 #endif
     print_stats(1, "Other", other_time_ms);
-    print_stats(2, "Choose CSet", _recorded_young_cset_choice_time_ms);
+    print_stats(2, "Choose CSet",
+                   (_recorded_young_cset_choice_time_ms +
+                    _recorded_non_young_cset_choice_time_ms));
     print_stats(2, "Ref Proc", _cur_ref_proc_time_ms);
     print_stats(2, "Ref Enq", _cur_ref_enq_time_ms);
+    print_stats(2, "Free CSet",
+                   (_recorded_young_free_cset_time_ms +
+                    _recorded_non_young_free_cset_time_ms));
 
     for (int i = 0; i < _aux_num; ++i) {
       if (_cur_aux_times_set[i]) {
@@ -1576,8 +1515,6 @@
   _short_lived_surv_rate_group->start_adding_regions();
   // do that for any other surv rate groupsx
 
-  // <NEW PREDICTION>
-
   if (update_stats) {
     double pause_time_ms = elapsed_ms;
 
@@ -1612,9 +1549,15 @@
         _partially_young_cards_per_entry_ratio_seq->add(cards_per_entry_ratio);
     }
 
-    size_t rs_length_diff = _max_rs_lengths - _recorded_rs_lengths;
-    if (rs_length_diff >= 0)
-      _rs_length_diff_seq->add((double) rs_length_diff);
+    // It turns out that, sometimes, _max_rs_lengths can get smaller
+    // than _recorded_rs_lengths which causes rs_length_diff to get
+    // very large and mess up the RSet length predictions. We'll be
+    // defensive until we work out why this happens.
+    size_t rs_length_diff = 0;
+    if (_max_rs_lengths > _recorded_rs_lengths) {
+      rs_length_diff = _max_rs_lengths - _recorded_rs_lengths;
+    }
+    _rs_length_diff_seq->add((double) rs_length_diff);
 
     size_t copied_bytes = surviving_bytes;
     double cost_per_byte_ms = 0.0;
@@ -1631,21 +1574,21 @@
        _mark_closure_time_ms + termination_time);
 
     double young_other_time_ms = 0.0;
-    if (_recorded_young_regions > 0) {
+    if (young_cset_region_length() > 0) {
       young_other_time_ms =
         _recorded_young_cset_choice_time_ms +
         _recorded_young_free_cset_time_ms;
       _young_other_cost_per_region_ms_seq->add(young_other_time_ms /
-                                             (double) _recorded_young_regions);
+                                          (double) young_cset_region_length());
     }
     double non_young_other_time_ms = 0.0;
-    if (_recorded_non_young_regions > 0) {
+    if (old_cset_region_length() > 0) {
       non_young_other_time_ms =
         _recorded_non_young_cset_choice_time_ms +
         _recorded_non_young_free_cset_time_ms;
 
       _non_young_other_cost_per_region_ms_seq->add(non_young_other_time_ms /
-                                         (double) _recorded_non_young_regions);
+                                            (double) old_cset_region_length());
     }
 
     double constant_other_time_ms = all_other_time_ms -
@@ -1659,7 +1602,6 @@
     }
 
     _pending_cards_seq->add((double) _pending_cards);
-    _scanned_cards_seq->add((double) cards_scanned);
     _rs_lengths_seq->add((double) _max_rs_lengths);
 
     double expensive_region_limit_ms =
@@ -1670,49 +1612,6 @@
       expensive_region_limit_ms = (double) MaxGCPauseMillis;
     }
     _expensive_region_limit_ms = expensive_region_limit_ms;
-
-    if (PREDICTIONS_VERBOSE) {
-      gclog_or_tty->print_cr("");
-      gclog_or_tty->print_cr("PREDICTIONS %1.4lf %d "
-                    "REGIONS %d %d %d "
-                    "PENDING_CARDS %d %d "
-                    "CARDS_SCANNED %d %d "
-                    "RS_LENGTHS %d %d "
-                    "RS_UPDATE %1.6lf %1.6lf RS_SCAN %1.6lf %1.6lf "
-                    "SURVIVAL_RATIO %1.6lf %1.6lf "
-                    "OBJECT_COPY %1.6lf %1.6lf OTHER_CONSTANT %1.6lf %1.6lf "
-                    "OTHER_YOUNG %1.6lf %1.6lf "
-                    "OTHER_NON_YOUNG %1.6lf %1.6lf "
-                    "VTIME_DIFF %1.6lf TERMINATION %1.6lf "
-                    "ELAPSED %1.6lf %1.6lf ",
-                    _cur_collection_start_sec,
-                    (!_last_young_gc_full) ? 2 :
-                    (last_pause_included_initial_mark) ? 1 : 0,
-                    _recorded_region_num,
-                    _recorded_young_regions,
-                    _recorded_non_young_regions,
-                    _predicted_pending_cards, _pending_cards,
-                    _predicted_cards_scanned, cards_scanned,
-                    _predicted_rs_lengths, _max_rs_lengths,
-                    _predicted_rs_update_time_ms, update_rs_time,
-                    _predicted_rs_scan_time_ms, scan_rs_time,
-                    _predicted_survival_ratio, survival_ratio,
-                    _predicted_object_copy_time_ms, obj_copy_time,
-                    _predicted_constant_other_time_ms, constant_other_time_ms,
-                    _predicted_young_other_time_ms, young_other_time_ms,
-                    _predicted_non_young_other_time_ms,
-                    non_young_other_time_ms,
-                    _vtime_diff_ms, termination_time,
-                    _predicted_pause_time_ms, elapsed_ms);
-    }
-
-    if (G1PolicyVerbose > 0) {
-      gclog_or_tty->print_cr("Pause Time, predicted: %1.4lfms (predicted %s), actual: %1.4lfms",
-                    _predicted_pause_time_ms,
-                    (_within_target) ? "within" : "outside",
-                    elapsed_ms);
-    }
-
   }
 
   _in_marking_window = new_in_marking_window;
@@ -1723,7 +1622,6 @@
   // Note that _mmu_tracker->max_gc_time() returns the time in seconds.
   double update_rs_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0;
   adjust_concurrent_refinement(update_rs_time, update_rs_processed_buffers, update_rs_time_goal_ms);
-  // </NEW PREDICTION>
 
   assert(assertMarkedBytesDataOK(), "Marked regions not OK at pause end.");
 }
@@ -1768,8 +1666,6 @@
   }
 }
 
-// <NEW PREDICTION>
-
 void G1CollectorPolicy::adjust_concurrent_refinement(double update_rs_time,
                                                      double update_rs_processed_buffers,
                                                      double goal_ms) {
@@ -1905,98 +1801,17 @@
 }
 
 void
-G1CollectorPolicy::start_recording_regions() {
-  _recorded_rs_lengths            = 0;
-  _recorded_young_regions         = 0;
-  _recorded_non_young_regions     = 0;
-
-#if PREDICTIONS_VERBOSE
-  _recorded_marked_bytes          = 0;
-  _recorded_young_bytes           = 0;
-  _predicted_bytes_to_copy        = 0;
-  _predicted_rs_lengths           = 0;
-  _predicted_cards_scanned        = 0;
-#endif // PREDICTIONS_VERBOSE
-}
-
-void
-G1CollectorPolicy::record_cset_region_info(HeapRegion* hr, bool young) {
-#if PREDICTIONS_VERBOSE
-  if (!young) {
-    _recorded_marked_bytes += hr->max_live_bytes();
-  }
-  _predicted_bytes_to_copy += predict_bytes_to_copy(hr);
-#endif // PREDICTIONS_VERBOSE
-
-  size_t rs_length = hr->rem_set()->occupied();
-  _recorded_rs_lengths += rs_length;
-}
-
-void
-G1CollectorPolicy::record_non_young_cset_region(HeapRegion* hr) {
-  assert(!hr->is_young(), "should not call this");
-  ++_recorded_non_young_regions;
-  record_cset_region_info(hr, false);
-}
-
-void
-G1CollectorPolicy::set_recorded_young_regions(size_t n_regions) {
-  _recorded_young_regions = n_regions;
-}
-
-void G1CollectorPolicy::set_recorded_young_bytes(size_t bytes) {
-#if PREDICTIONS_VERBOSE
-  _recorded_young_bytes = bytes;
-#endif // PREDICTIONS_VERBOSE
+G1CollectorPolicy::init_cset_region_lengths(size_t eden_cset_region_length,
+                                          size_t survivor_cset_region_length) {
+  _eden_cset_region_length     = eden_cset_region_length;
+  _survivor_cset_region_length = survivor_cset_region_length;
+  _old_cset_region_length      = 0;
 }
 
 void G1CollectorPolicy::set_recorded_rs_lengths(size_t rs_lengths) {
   _recorded_rs_lengths = rs_lengths;
 }
 
-void G1CollectorPolicy::set_predicted_bytes_to_copy(size_t bytes) {
-  _predicted_bytes_to_copy = bytes;
-}
-
-void
-G1CollectorPolicy::end_recording_regions() {
-  // The _predicted_pause_time_ms field is referenced in code
-  // not under PREDICTIONS_VERBOSE. Let's initialize it.
-  _predicted_pause_time_ms = -1.0;
-
-#if PREDICTIONS_VERBOSE
-  _predicted_pending_cards = predict_pending_cards();
-  _predicted_rs_lengths = _recorded_rs_lengths + predict_rs_length_diff();
-  if (full_young_gcs())
-    _predicted_cards_scanned += predict_young_card_num(_predicted_rs_lengths);
-  else
-    _predicted_cards_scanned +=
-      predict_non_young_card_num(_predicted_rs_lengths);
-  _recorded_region_num = _recorded_young_regions + _recorded_non_young_regions;
-
-  _predicted_rs_update_time_ms =
-    predict_rs_update_time_ms(_g1->pending_card_num());
-  _predicted_rs_scan_time_ms =
-    predict_rs_scan_time_ms(_predicted_cards_scanned);
-  _predicted_object_copy_time_ms =
-    predict_object_copy_time_ms(_predicted_bytes_to_copy);
-  _predicted_constant_other_time_ms =
-    predict_constant_other_time_ms();
-  _predicted_young_other_time_ms =
-    predict_young_other_time_ms(_recorded_young_regions);
-  _predicted_non_young_other_time_ms =
-    predict_non_young_other_time_ms(_recorded_non_young_regions);
-
-  _predicted_pause_time_ms =
-    _predicted_rs_update_time_ms +
-    _predicted_rs_scan_time_ms +
-    _predicted_object_copy_time_ms +
-    _predicted_constant_other_time_ms +
-    _predicted_young_other_time_ms +
-    _predicted_non_young_other_time_ms;
-#endif // PREDICTIONS_VERBOSE
-}
-
 void G1CollectorPolicy::check_if_region_is_too_expensive(double
                                                            predicted_time_ms) {
   // I don't think we need to do this when in young GC mode since
@@ -2013,9 +1828,6 @@
   }
 }
 
-// </NEW PREDICTION>
-
-
 void G1CollectorPolicy::update_recent_gc_times(double end_time_sec,
                                                double elapsed_ms) {
   _recent_gc_times_ms->add(elapsed_ms);
@@ -2023,99 +1835,6 @@
   _prev_collection_pause_end_ms = end_time_sec * 1000.0;
 }
 
-double G1CollectorPolicy::recent_avg_time_for_pauses_ms() {
-  if (_recent_pause_times_ms->num() == 0) {
-    return (double) MaxGCPauseMillis;
-  }
-  return _recent_pause_times_ms->avg();
-}
-
-double G1CollectorPolicy::recent_avg_time_for_rs_scan_ms() {
-  if (_recent_rs_scan_times_ms->num() == 0) {
-    return (double)MaxGCPauseMillis/3.0;
-  }
-  return _recent_rs_scan_times_ms->avg();
-}
-
-int G1CollectorPolicy::number_of_recent_gcs() {
-  assert(_recent_rs_scan_times_ms->num() ==
-         _recent_pause_times_ms->num(), "Sequence out of sync");
-  assert(_recent_pause_times_ms->num() ==
-         _recent_CS_bytes_used_before->num(), "Sequence out of sync");
-  assert(_recent_CS_bytes_used_before->num() ==
-         _recent_CS_bytes_surviving->num(), "Sequence out of sync");
-
-  return _recent_pause_times_ms->num();
-}
-
-double G1CollectorPolicy::recent_avg_survival_fraction() {
-  return recent_avg_survival_fraction_work(_recent_CS_bytes_surviving,
-                                           _recent_CS_bytes_used_before);
-}
-
-double G1CollectorPolicy::last_survival_fraction() {
-  return last_survival_fraction_work(_recent_CS_bytes_surviving,
-                                     _recent_CS_bytes_used_before);
-}
-
-double
-G1CollectorPolicy::recent_avg_survival_fraction_work(TruncatedSeq* surviving,
-                                                     TruncatedSeq* before) {
-  assert(surviving->num() == before->num(), "Sequence out of sync");
-  if (before->sum() > 0.0) {
-      double recent_survival_rate = surviving->sum() / before->sum();
-      // We exempt parallel collection from this check because Alloc Buffer
-      // fragmentation can produce negative collections.
-      // Further, we're now always doing parallel collection.  But I'm still
-      // leaving this here as a placeholder for a more precise assertion later.
-      // (DLD, 10/05.)
-      assert((true || G1CollectedHeap::use_parallel_gc_threads()) ||
-             _g1->evacuation_failed() ||
-             recent_survival_rate <= 1.0, "Or bad frac");
-      return recent_survival_rate;
-  } else {
-    return 1.0; // Be conservative.
-  }
-}
-
-double
-G1CollectorPolicy::last_survival_fraction_work(TruncatedSeq* surviving,
-                                               TruncatedSeq* before) {
-  assert(surviving->num() == before->num(), "Sequence out of sync");
-  if (surviving->num() > 0 && before->last() > 0.0) {
-    double last_survival_rate = surviving->last() / before->last();
-    // We exempt parallel collection from this check because Alloc Buffer
-    // fragmentation can produce negative collections.
-    // Further, we're now always doing parallel collection.  But I'm still
-    // leaving this here as a placeholder for a more precise assertion later.
-    // (DLD, 10/05.)
-    assert((true || G1CollectedHeap::use_parallel_gc_threads()) ||
-           last_survival_rate <= 1.0, "Or bad frac");
-    return last_survival_rate;
-  } else {
-    return 1.0;
-  }
-}
-
-static const int survival_min_obs = 5;
-static double survival_min_obs_limits[] = { 0.9, 0.7, 0.5, 0.3, 0.1 };
-static const double min_survival_rate = 0.1;
-
-double
-G1CollectorPolicy::conservative_avg_survival_fraction_work(double avg,
-                                                           double latest) {
-  double res = avg;
-  if (number_of_recent_gcs() < survival_min_obs) {
-    res = MAX2(res, survival_min_obs_limits[number_of_recent_gcs()]);
-  }
-  res = MAX2(res, latest);
-  res = MAX2(res, min_survival_rate);
-  // In the parallel case, LAB fragmentation can produce "negative
-  // collections"; so can evac failure.  Cap at 1.0
-  res = MIN2(res, 1.0);
-  return res;
-}
-
 size_t G1CollectorPolicy::expansion_amount() {
   double recent_gc_overhead = recent_avg_pause_time_ratio() * 100.0;
   double threshold = _gc_overhead_perc;
@@ -2331,15 +2050,6 @@
         print_summary_sd(0, buffer, &_all_aux_times_ms[i]);
       }
     }
-
-    size_t all_region_num = _region_num_young + _region_num_tenured;
-    gclog_or_tty->print_cr("   New Regions %8d, Young %8d (%6.2lf%%), "
-               "Tenured %8d (%6.2lf%%)",
-               all_region_num,
-               _region_num_young,
-               (double) _region_num_young / (double) all_region_num * 100.0,
-               _region_num_tenured,
-               (double) _region_num_tenured / (double) all_region_num * 100.0);
   }
   if (TraceGen1Time) {
     if (_all_full_gc_times_ms->num() > 0) {
@@ -2361,14 +2071,6 @@
 #endif // PRODUCT
 }
 
-void G1CollectorPolicy::update_region_num(bool young) {
-  if (young) {
-    ++_region_num_young;
-  } else {
-    ++_region_num_tenured;
-  }
-}
-
 #ifndef PRODUCT
 // for debugging, bit of a hack...
 static char*
@@ -2617,6 +2319,7 @@
     ParKnownGarbageHRClosure parKnownGarbageCl(_hrSorted, _chunk_size, i);
     // Back to zero for the claim value.
     _g1->heap_region_par_iterate_chunked(&parKnownGarbageCl, i,
+                                         _g1->workers()->active_workers(),
                                          HeapRegion::InitialClaimValue);
     jint regions_added = parKnownGarbageCl.marked_regions_added();
     _hrSorted->incNumMarkedHeapRegions(regions_added);
@@ -2628,7 +2331,7 @@
 };
 
 void
-G1CollectorPolicy::record_concurrent_mark_cleanup_end() {
+G1CollectorPolicy::record_concurrent_mark_cleanup_end(int no_of_gc_threads) {
   double start_sec;
   if (G1PrintParCleanupStats) {
     start_sec = os::elapsedTime();
@@ -2644,10 +2347,27 @@
 
   if (G1CollectedHeap::use_parallel_gc_threads()) {
     const size_t OverpartitionFactor = 4;
-    const size_t MinWorkUnit = 8;
-    const size_t WorkUnit =
-      MAX2(_g1->n_regions() / (ParallelGCThreads * OverpartitionFactor),
-           MinWorkUnit);
+    size_t WorkUnit;
+    // The use of MinChunkSize = 8 in the original code
+    // causes some assertion failures when the total number of
+    // region is less than 8.  The code here tries to fix that.
+    // Should the original code also be fixed?
+    if (no_of_gc_threads > 0) {
+      const size_t MinWorkUnit =
+        MAX2(_g1->n_regions() / no_of_gc_threads, (size_t) 1U);
+      WorkUnit =
+        MAX2(_g1->n_regions() / (no_of_gc_threads * OverpartitionFactor),
+             MinWorkUnit);
+    } else {
+      assert(no_of_gc_threads > 0,
+        "The active gc workers should be greater than 0");
+      // In a product build do something reasonable to avoid a crash.
+      const size_t MinWorkUnit =
+        MAX2(_g1->n_regions() / ParallelGCThreads, (size_t) 1U);
+      WorkUnit =
+        MAX2(_g1->n_regions() / (ParallelGCThreads * OverpartitionFactor),
+             MinWorkUnit);
+    }
     _collectionSetChooser->prepareForAddMarkedHeapRegionsPar(_g1->n_regions(),
                                                              WorkUnit);
     ParKnownGarbageTask parKnownGarbageTask(_collectionSetChooser,
@@ -2682,8 +2402,7 @@
 }
 
 // Add the heap region at the head of the non-incremental collection set
-void G1CollectorPolicy::
-add_to_collection_set(HeapRegion* hr) {
+void G1CollectorPolicy::add_old_region_to_cset(HeapRegion* hr) {
   assert(_inc_cset_build_state == Active, "Precondition");
   assert(!hr->is_young(), "non-incremental add of young region");
 
@@ -2694,9 +2413,11 @@
   hr->set_in_collection_set(true);
   hr->set_next_in_collection_set(_collection_set);
   _collection_set = hr;
-  _collection_set_size++;
   _collection_set_bytes_used_before += hr->used();
   _g1->register_region_with_in_cset_fast_test(hr);
+  size_t rs_length = hr->rem_set()->occupied();
+  _recorded_rs_lengths += rs_length;
+  _old_cset_region_length += 1;
 }
 
 // Initialize the per-collection-set information
@@ -2705,16 +2426,11 @@
 
   _inc_cset_head = NULL;
   _inc_cset_tail = NULL;
-  _inc_cset_size = 0;
   _inc_cset_bytes_used_before = 0;
 
-  _inc_cset_young_index = 0;
-
   _inc_cset_max_finger = 0;
-  _inc_cset_recorded_young_bytes = 0;
   _inc_cset_recorded_rs_lengths = 0;
   _inc_cset_predicted_elapsed_time_ms = 0;
-  _inc_cset_predicted_bytes_to_copy = 0;
   _inc_cset_build_state = Active;
 }
 
@@ -2745,20 +2461,6 @@
   // rset sampling code
   hr->set_recorded_rs_length(rs_length);
   hr->set_predicted_elapsed_time_ms(region_elapsed_time_ms);
-
-#if PREDICTIONS_VERBOSE
-  size_t bytes_to_copy = predict_bytes_to_copy(hr);
-  _inc_cset_predicted_bytes_to_copy += bytes_to_copy;
-
-  // Record the number of bytes used in this region
-  _inc_cset_recorded_young_bytes += used_bytes;
-
-  // Cache the values we have added to the aggregated informtion
-  // in the heap region in case we have to remove this region from
-  // the incremental collection set, or it is updated by the
-  // rset sampling code
-  hr->set_predicted_bytes_to_copy(bytes_to_copy);
-#endif // PREDICTIONS_VERBOSE
 }
 
 void G1CollectorPolicy::remove_from_incremental_cset_info(HeapRegion* hr) {
@@ -2784,17 +2486,6 @@
   // Clear the values cached in the heap region
   hr->set_recorded_rs_length(0);
   hr->set_predicted_elapsed_time_ms(0);
-
-#if PREDICTIONS_VERBOSE
-  size_t old_predicted_bytes_to_copy = hr->predicted_bytes_to_copy();
-  _inc_cset_predicted_bytes_to_copy -= old_predicted_bytes_to_copy;
-
-  // Subtract the number of bytes used in this region
-  _inc_cset_recorded_young_bytes -= used_bytes;
-
-  // Clear the values cached in the heap region
-  hr->set_predicted_bytes_to_copy(0);
-#endif // PREDICTIONS_VERBOSE
 }
 
 void G1CollectorPolicy::update_incremental_cset_info(HeapRegion* hr, size_t new_rs_length) {
@@ -2806,8 +2497,8 @@
 }
 
 void G1CollectorPolicy::add_region_to_incremental_cset_common(HeapRegion* hr) {
-  assert( hr->is_young(), "invariant");
-  assert( hr->young_index_in_cset() == -1, "invariant" );
+  assert(hr->is_young(), "invariant");
+  assert(hr->young_index_in_cset() > -1, "should have already been set");
   assert(_inc_cset_build_state == Active, "Precondition");
 
   // We need to clear and set the cached recorded/cached collection set
@@ -2827,11 +2518,7 @@
   hr->set_in_collection_set(true);
   assert( hr->next_in_collection_set() == NULL, "invariant");
 
-  _inc_cset_size++;
   _g1->register_region_with_in_cset_fast_test(hr);
-
-  hr->set_young_index_in_cset((int) _inc_cset_young_index);
-  ++_inc_cset_young_index;
 }
 
 // Add the region at the RHS of the incremental cset
@@ -2899,8 +2586,6 @@
 
   YoungList* young_list = _g1->young_list();
 
-  start_recording_regions();
-
   guarantee(target_pause_time_ms > 0.0,
             err_msg("target_pause_time_ms = %1.6lf should be positive",
                     target_pause_time_ms));
@@ -2923,7 +2608,6 @@
   if (time_remaining_ms < threshold) {
     double prev_time_remaining_ms = time_remaining_ms;
     time_remaining_ms = 0.50 * target_pause_time_ms;
-    _within_target = false;
     ergo_verbose3(ErgoCSetConstruction,
                   "adjust remaining time",
                   ergo_format_reason("remaining time lower than threshold")
@@ -2931,8 +2615,6 @@
                   ergo_format_ms("threshold")
                   ergo_format_ms("adjusted remaining time"),
                   prev_time_remaining_ms, threshold, time_remaining_ms);
-  } else {
-    _within_target = true;
   }
 
   size_t expansion_bytes = _g1->expansion_regions() * HeapRegion::GrainBytes;
@@ -2941,8 +2623,6 @@
   double young_start_time_sec = os::elapsedTime();
 
   _collection_set_bytes_used_before = 0;
-  _collection_set_size = 0;
-  _young_cset_length  = 0;
   _last_young_gc_full = full_young_gcs() ? true : false;
 
   if (_last_young_gc_full) {
@@ -2955,9 +2635,9 @@
   // pause are appended to the RHS of the young list, i.e.
   //   [Newly Young Regions ++ Survivors from last pause].
 
-  size_t survivor_region_num = young_list->survivor_length();
-  size_t eden_region_num = young_list->length() - survivor_region_num;
-  size_t old_region_num = 0;
+  size_t survivor_region_length = young_list->survivor_length();
+  size_t eden_region_length = young_list->length() - survivor_region_length;
+  init_cset_region_lengths(eden_region_length, survivor_region_length);
   hr = young_list->first_survivor_region();
   while (hr != NULL) {
     assert(hr->is_survivor(), "badly formed young list");
@@ -2971,9 +2651,7 @@
   if (_g1->mark_in_progress())
     _g1->concurrent_mark()->register_collection_set_finger(_inc_cset_max_finger);
 
-  _young_cset_length = _inc_cset_young_index;
   _collection_set = _inc_cset_head;
-  _collection_set_size = _inc_cset_size;
   _collection_set_bytes_used_before = _inc_cset_bytes_used_before;
   time_remaining_ms -= _inc_cset_predicted_elapsed_time_ms;
   predicted_pause_time_ms += _inc_cset_predicted_elapsed_time_ms;
@@ -2983,19 +2661,12 @@
                 ergo_format_region("eden")
                 ergo_format_region("survivors")
                 ergo_format_ms("predicted young region time"),
-                eden_region_num, survivor_region_num,
+                eden_region_length, survivor_region_length,
                 _inc_cset_predicted_elapsed_time_ms);
 
   // The number of recorded young regions is the incremental
   // collection set's current size
-  set_recorded_young_regions(_inc_cset_size);
   set_recorded_rs_lengths(_inc_cset_recorded_rs_lengths);
-  set_recorded_young_bytes(_inc_cset_recorded_young_bytes);
-#if PREDICTIONS_VERBOSE
-  set_predicted_bytes_to_copy(_inc_cset_predicted_bytes_to_copy);
-#endif // PREDICTIONS_VERBOSE
-
-  assert(_inc_cset_size == young_list->length(), "Invariant");
 
   double young_end_time_sec = os::elapsedTime();
   _recorded_young_cset_choice_time_ms =
@@ -3009,9 +2680,16 @@
     NumberSeq seq;
     double avg_prediction = 100000000000000000.0; // something very large
 
-    size_t prev_collection_set_size = _collection_set_size;
     double prev_predicted_pause_time_ms = predicted_pause_time_ms;
     do {
+      // Note that add_old_region_to_cset() increments the
+      // _old_cset_region_length field and cset_region_length() returns the
+      // sum of _eden_cset_region_length, _survivor_cset_region_length, and
+      // _old_cset_region_length. So, as old regions are added to the
+      // CSet, _old_cset_region_length will be incremented and
+      // cset_region_length(), which is used below, will always reflect
+      // the the total number of regions added up to this point to the CSet.
+
       hr = _collectionSetChooser->getNextMarkedRegion(time_remaining_ms,
                                                       avg_prediction);
       if (hr != NULL) {
@@ -3019,8 +2697,7 @@
         double predicted_time_ms = predict_region_elapsed_time_ms(hr, false);
         time_remaining_ms -= predicted_time_ms;
         predicted_pause_time_ms += predicted_time_ms;
-        add_to_collection_set(hr);
-        record_non_young_cset_region(hr);
+        add_old_region_to_cset(hr);
         seq.add(predicted_time_ms);
         avg_prediction = seq.avg() + seq.sd();
       }
@@ -3041,13 +2718,13 @@
             should_continue = false;
           }
         } else {
-          if (_collection_set_size >= _young_list_fixed_length) {
+          if (cset_region_length() >= _young_list_fixed_length) {
             ergo_verbose2(ErgoCSetConstruction,
                           "stop adding old regions to CSet",
                           ergo_format_reason("CSet length reached target")
                           ergo_format_region("CSet")
                           ergo_format_region("young target"),
-                          _collection_set_size, _young_list_fixed_length);
+                          cset_region_length(), _young_list_fixed_length);
             should_continue = false;
           }
         }
@@ -3055,23 +2732,21 @@
     } while (should_continue);
 
     if (!adaptive_young_list_length() &&
-        _collection_set_size < _young_list_fixed_length) {
+                             cset_region_length() < _young_list_fixed_length) {
       ergo_verbose2(ErgoCSetConstruction,
                     "request partially-young GCs end",
                     ergo_format_reason("CSet length lower than target")
                     ergo_format_region("CSet")
                     ergo_format_region("young target"),
-                    _collection_set_size, _young_list_fixed_length);
+                    cset_region_length(), _young_list_fixed_length);
       _should_revert_to_full_young_gcs  = true;
     }
 
-    old_region_num = _collection_set_size - prev_collection_set_size;
-
     ergo_verbose2(ErgoCSetConstruction | ErgoHigh,
                   "add old regions to CSet",
                   ergo_format_region("old")
                   ergo_format_ms("predicted old region time"),
-                  old_region_num,
+                  old_cset_region_length(),
                   predicted_pause_time_ms - prev_predicted_pause_time_ms);
   }
 
@@ -3079,8 +2754,6 @@
 
   count_CS_bytes_used();
 
-  end_recording_regions();
-
   ergo_verbose5(ErgoCSetConstruction,
                 "finish choosing CSet",
                 ergo_format_region("eden")
@@ -3088,7 +2761,8 @@
                 ergo_format_region("old")
                 ergo_format_ms("predicted pause time")
                 ergo_format_ms("target pause time"),
-                eden_region_num, survivor_region_num, old_region_num,
+                eden_region_length, survivor_region_length,
+                old_cset_region_length(),
                 predicted_pause_time_ms, target_pause_time_ms);
 
   double non_young_end_time_sec = os::elapsedTime();
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -85,13 +85,13 @@
 
 class G1CollectorPolicy: public CollectorPolicy {
 private:
-  // The number of pauses during the execution.
-  long _n_pauses;
-
   // either equal to the number of parallel threads, if ParallelGCThreads
   // has been set, or 1 otherwise
   int _parallel_gc_threads;
 
+  // The number of GC threads currently active.
+  uintx _no_of_gc_threads;
+
   enum SomePrivateConstants {
     NumPrevPausesForHeuristics = 10
   };
@@ -127,18 +127,9 @@
   jlong  _num_cc_clears;                // number of times the card count cache has been cleared
 #endif
 
-  // Statistics for recent GC pauses.  See below for how indexed.
-  TruncatedSeq* _recent_rs_scan_times_ms;
-
   // These exclude marking times.
-  TruncatedSeq* _recent_pause_times_ms;
   TruncatedSeq* _recent_gc_times_ms;
 
-  TruncatedSeq* _recent_CS_bytes_used_before;
-  TruncatedSeq* _recent_CS_bytes_surviving;
-
-  TruncatedSeq* _recent_rs_sizes;
-
   TruncatedSeq* _concurrent_mark_remark_times_ms;
   TruncatedSeq* _concurrent_mark_cleanup_times_ms;
 
@@ -150,13 +141,6 @@
   NumberSeq* _all_stop_world_times_ms;
   NumberSeq* _all_yield_times_ms;
 
-  size_t     _region_num_young;
-  size_t     _region_num_tenured;
-  size_t     _prev_region_num_young;
-  size_t     _prev_region_num_tenured;
-
-  NumberSeq* _all_mod_union_times_ms;
-
   int        _aux_num;
   NumberSeq* _all_aux_times_ms;
   double*    _cur_aux_start_times_ms;
@@ -194,7 +178,6 @@
   // locker is active. This should be >= _young_list_target_length;
   size_t _young_list_max_length;
 
-  size_t _young_cset_length;
   bool   _last_young_gc_full;
 
   unsigned              _full_young_pause_num;
@@ -217,8 +200,6 @@
     return _during_marking;
   }
 
-  // <NEW PREDICTION>
-
 private:
   enum PredictionConstants {
     TruncatedSeqLength = 10
@@ -240,47 +221,32 @@
   TruncatedSeq* _non_young_other_cost_per_region_ms_seq;
 
   TruncatedSeq* _pending_cards_seq;
-  TruncatedSeq* _scanned_cards_seq;
   TruncatedSeq* _rs_lengths_seq;
 
   TruncatedSeq* _cost_per_byte_ms_during_cm_seq;
 
   TruncatedSeq* _young_gc_eff_seq;
 
-  TruncatedSeq* _max_conc_overhead_seq;
-
   bool   _using_new_ratio_calculations;
   size_t _min_desired_young_length; // as set on the command line or default calculations
   size_t _max_desired_young_length; // as set on the command line or default calculations
 
-  size_t _recorded_young_regions;
-  size_t _recorded_non_young_regions;
-  size_t _recorded_region_num;
+  size_t _eden_cset_region_length;
+  size_t _survivor_cset_region_length;
+  size_t _old_cset_region_length;
+
+  void init_cset_region_lengths(size_t eden_cset_region_length,
+                                size_t survivor_cset_region_length);
+
+  size_t eden_cset_region_length()     { return _eden_cset_region_length;     }
+  size_t survivor_cset_region_length() { return _survivor_cset_region_length; }
+  size_t old_cset_region_length()      { return _old_cset_region_length;      }
 
   size_t _free_regions_at_end_of_collection;
 
   size_t _recorded_rs_lengths;
   size_t _max_rs_lengths;
 
-  size_t _recorded_marked_bytes;
-  size_t _recorded_young_bytes;
-
-  size_t _predicted_pending_cards;
-  size_t _predicted_cards_scanned;
-  size_t _predicted_rs_lengths;
-  size_t _predicted_bytes_to_copy;
-
-  double _predicted_survival_ratio;
-  double _predicted_rs_update_time_ms;
-  double _predicted_rs_scan_time_ms;
-  double _predicted_object_copy_time_ms;
-  double _predicted_constant_other_time_ms;
-  double _predicted_young_other_time_ms;
-  double _predicted_non_young_other_time_ms;
-  double _predicted_pause_time_ms;
-
-  double _vtime_diff_ms;
-
   double _recorded_young_free_cset_time_ms;
   double _recorded_non_young_free_cset_time_ms;
 
@@ -317,21 +283,28 @@
                                     double update_rs_processed_buffers,
                                     double goal_ms);
 
+  uintx no_of_gc_threads() { return _no_of_gc_threads; }
+  void set_no_of_gc_threads(uintx v) { _no_of_gc_threads = v; }
+
   double _pause_time_target_ms;
   double _recorded_young_cset_choice_time_ms;
   double _recorded_non_young_cset_choice_time_ms;
-  bool   _within_target;
   size_t _pending_cards;
   size_t _max_pending_cards;
 
 public:
+  // Accessors
 
-  void set_region_short_lived(HeapRegion* hr) {
+  void set_region_eden(HeapRegion* hr, int young_index_in_cset) {
+    hr->set_young();
     hr->install_surv_rate_group(_short_lived_surv_rate_group);
+    hr->set_young_index_in_cset(young_index_in_cset);
   }
 
-  void set_region_survivors(HeapRegion* hr) {
+  void set_region_survivor(HeapRegion* hr, int young_index_in_cset) {
+    assert(hr->is_young() && hr->is_survivor(), "pre-condition");
     hr->install_surv_rate_group(_survivor_surv_rate_group);
+    hr->set_young_index_in_cset(young_index_in_cset);
   }
 
 #ifndef PRODUCT
@@ -343,10 +316,6 @@
                 seq->davg() * confidence_factor(seq->num()));
   }
 
-  size_t young_cset_length() {
-    return _young_cset_length;
-  }
-
   void record_max_rs_lengths(size_t rs_lengths) {
     _max_rs_lengths = rs_lengths;
   }
@@ -465,20 +434,12 @@
   size_t predict_bytes_to_copy(HeapRegion* hr);
   double predict_region_elapsed_time_ms(HeapRegion* hr, bool young);
 
-  void start_recording_regions();
-  void record_cset_region_info(HeapRegion* hr, bool young);
-  void record_non_young_cset_region(HeapRegion* hr);
+  void set_recorded_rs_lengths(size_t rs_lengths);
 
-  void set_recorded_young_regions(size_t n_regions);
-  void set_recorded_young_bytes(size_t bytes);
-  void set_recorded_rs_lengths(size_t rs_lengths);
-  void set_predicted_bytes_to_copy(size_t bytes);
-
-  void end_recording_regions();
-
-  void record_vtime_diff_ms(double vtime_diff_ms) {
-    _vtime_diff_ms = vtime_diff_ms;
-  }
+  size_t cset_region_length()       { return young_cset_region_length() +
+                                             old_cset_region_length(); }
+  size_t young_cset_region_length() { return eden_cset_region_length() +
+                                             survivor_cset_region_length(); }
 
   void record_young_free_cset_time_ms(double time_ms) {
     _recorded_young_free_cset_time_ms = time_ms;
@@ -494,8 +455,6 @@
 
   double predict_survivor_regions_evac_time();
 
-  // </NEW PREDICTION>
-
   void cset_regions_freed() {
     bool propagate = _last_young_gc_full && !_in_marking_window;
     _short_lived_surv_rate_group->all_surviving_words_recorded(propagate);
@@ -575,8 +534,6 @@
   double sum_of_values (double* data);
   double max_sum (double* data1, double* data2);
 
-  int _last_satb_drain_processed_buffers;
-  int _last_update_rs_processed_buffers;
   double _last_pause_time_ms;
 
   size_t _bytes_in_collection_set_before_gc;
@@ -596,10 +553,6 @@
   // set at the start of the pause.
   HeapRegion* _collection_set;
 
-  // The number of regions in the collection set. Set from the incrementally
-  // built collection set at the start of an evacuation pause.
-  size_t _collection_set_size;
-
   // The number of bytes in the collection set before the pause. Set from
   // the incrementally built collection set at the start of an evacuation
   // pause.
@@ -622,16 +575,6 @@
   // The tail of the incrementally built collection set.
   HeapRegion* _inc_cset_tail;
 
-  // The number of regions in the incrementally built collection set.
-  // Used to set _collection_set_size at the start of an evacuation
-  // pause.
-  size_t _inc_cset_size;
-
-  // Used as the index in the surving young words structure
-  // which tracks the amount of space, for each young region,
-  // that survives the pause.
-  size_t _inc_cset_young_index;
-
   // The number of bytes in the incrementally built collection set.
   // Used to set _collection_set_bytes_used_before at the start of
   // an evacuation pause.
@@ -640,11 +583,6 @@
   // Used to record the highest end of heap region in collection set
   HeapWord* _inc_cset_max_finger;
 
-  // The number of recorded used bytes in the young regions
-  // of the collection set. This is the sum of the used() bytes
-  // of retired young regions in the collection set.
-  size_t _inc_cset_recorded_young_bytes;
-
   // The RSet lengths recorded for regions in the collection set
   // (updated by the periodic sampling of the regions in the
   // young list/collection set).
@@ -655,68 +593,9 @@
   // regions in the young list/collection set).
   double _inc_cset_predicted_elapsed_time_ms;
 
-  // The predicted bytes to copy for the regions in the collection
-  // set (updated by the periodic sampling of the regions in the
-  // young list/collection set).
-  size_t _inc_cset_predicted_bytes_to_copy;
-
   // Stash a pointer to the g1 heap.
   G1CollectedHeap* _g1;
 
-  // The average time in ms per collection pause, averaged over recent pauses.
-  double recent_avg_time_for_pauses_ms();
-
-  // The average time in ms for RS scanning, per pause, averaged
-  // over recent pauses. (Note the RS scanning time for a pause
-  // is itself an average of the RS scanning time for each worker
-  // thread.)
-  double recent_avg_time_for_rs_scan_ms();
-
-  // The number of "recent" GCs recorded in the number sequences
-  int number_of_recent_gcs();
-
-  // The average survival ratio, computed by the total number of bytes
-  // suriviving / total number of bytes before collection over the last
-  // several recent pauses.
-  double recent_avg_survival_fraction();
-  // The survival fraction of the most recent pause; if there have been no
-  // pauses, returns 1.0.
-  double last_survival_fraction();
-
-  // Returns a "conservative" estimate of the recent survival rate, i.e.,
-  // one that may be higher than "recent_avg_survival_fraction".
-  // This is conservative in several ways:
-  //   If there have been few pauses, it will assume a potential high
-  //     variance, and err on the side of caution.
-  //   It puts a lower bound (currently 0.1) on the value it will return.
-  //   To try to detect phase changes, if the most recent pause ("latest") has a
-  //     higher-than average ("avg") survival rate, it returns that rate.
-  // "work" version is a utility function; young is restricted to young regions.
-  double conservative_avg_survival_fraction_work(double avg,
-                                                 double latest);
-
-  // The arguments are the two sequences that keep track of the number of bytes
-  //   surviving and the total number of bytes before collection, resp.,
-  //   over the last evereal recent pauses
-  // Returns the survival rate for the category in the most recent pause.
-  // If there have been no pauses, returns 1.0.
-  double last_survival_fraction_work(TruncatedSeq* surviving,
-                                     TruncatedSeq* before);
-
-  // The arguments are the two sequences that keep track of the number of bytes
-  //   surviving and the total number of bytes before collection, resp.,
-  //   over the last several recent pauses
-  // Returns the average survival ration over the last several recent pauses
-  // If there have been no pauses, return 1.0
-  double recent_avg_survival_fraction_work(TruncatedSeq* surviving,
-                                           TruncatedSeq* before);
-
-  double conservative_avg_survival_fraction() {
-    double avg = recent_avg_survival_fraction();
-    double latest = last_survival_fraction();
-    return conservative_avg_survival_fraction_work(avg, latest);
-  }
-
   // The ratio of gc time to elapsed time, computed over recent pauses.
   double _recent_avg_pause_time_ratio;
 
@@ -724,9 +603,6 @@
     return _recent_avg_pause_time_ratio;
   }
 
-  // Number of pauses between concurrent marking.
-  size_t _pauses_btwn_concurrent_mark;
-
   // At the end of a pause we check the heap occupancy and we decide
   // whether we will start a marking cycle during the next pause. If
   // we decide that we want to do that, we will set this parameter to
@@ -849,9 +725,6 @@
 
   GenRemSet::Name  rem_set_name()     { return GenRemSet::CardTable; }
 
-  // The number of collection pauses so far.
-  long n_pauses() const { return _n_pauses; }
-
   // Update the heuristic info to record a collection pause of the given
   // start time, where the given number of bytes were used at the start.
   // This may involve changing the desired size of a collection set.
@@ -864,19 +737,21 @@
   void record_concurrent_mark_init_end(double
                                            mark_init_elapsed_time_ms);
 
-  void record_mark_closure_time(double mark_closure_time_ms);
+  void record_mark_closure_time(double mark_closure_time_ms) {
+    _mark_closure_time_ms = mark_closure_time_ms;
+  }
 
   void record_concurrent_mark_remark_start();
   void record_concurrent_mark_remark_end();
 
   void record_concurrent_mark_cleanup_start();
-  void record_concurrent_mark_cleanup_end();
+  void record_concurrent_mark_cleanup_end(int no_of_gc_threads);
   void record_concurrent_mark_cleanup_completed();
 
   void record_concurrent_pause();
   void record_concurrent_pause_end();
 
-  void record_collection_pause_end();
+  void record_collection_pause_end(int no_of_gc_threads);
   void print_heap_transition();
 
   // Record the fact that a full collection occurred.
@@ -900,15 +775,6 @@
     _cur_satb_drain_time_ms = ms;
   }
 
-  void record_satb_drain_processed_buffers(int processed_buffers) {
-    assert(_g1->mark_in_progress(), "shouldn't be here otherwise");
-    _last_satb_drain_processed_buffers = processed_buffers;
-  }
-
-  void record_mod_union_time(double ms) {
-    _all_mod_union_times_ms->add(ms);
-  }
-
   void record_update_rs_time(int thread, double ms) {
     _par_last_update_rs_times_ms[thread] = ms;
   }
@@ -1009,11 +875,8 @@
 
   void clear_collection_set() { _collection_set = NULL; }
 
-  // The number of elements in the current collection set.
-  size_t collection_set_size() { return _collection_set_size; }
-
-  // Add "hr" to the CS.
-  void add_to_collection_set(HeapRegion* hr);
+  // Add old region "hr" to the CSet.
+  void add_old_region_to_cset(HeapRegion* hr);
 
   // Incremental CSet Support
 
@@ -1023,9 +886,6 @@
   // The tail of the incrementally built collection set.
   HeapRegion* inc_set_tail() { return _inc_cset_tail; }
 
-  // The number of elements in the incrementally built collection set.
-  size_t inc_cset_size() { return _inc_cset_size; }
-
   // Initialize incremental collection set info.
   void start_incremental_cset_building();
 
@@ -1125,8 +985,6 @@
     return _young_list_max_length;
   }
 
-  void update_region_num(bool young);
-
   bool full_young_gcs() {
     return _full_young_gcs;
   }
--- a/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -84,8 +84,11 @@
       // slightly paranoid test; I'm trying to catch potential
       // problems before we go into push_on_queue to know where the
       // problem is coming from
-      assert(obj == oopDesc::load_decode_heap_oop(p),
-             "p should still be pointing to obj");
+      assert((obj == oopDesc::load_decode_heap_oop(p)) ||
+             (obj->is_forwarded() &&
+                 obj->forwardee() == oopDesc::load_decode_heap_oop(p)),
+             "p should still be pointing to obj or to its forwardee");
+
       _par_scan_state->push_on_queue(p);
     } else {
       _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num());
--- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -209,29 +209,9 @@
   size_t cards_looked_up() { return _cards;}
 };
 
-// We want the parallel threads to start their scanning at
-// different collection set regions to avoid contention.
-// If we have:
-//          n collection set regions
-//          p threads
-// Then thread t will start at region t * floor (n/p)
-
-HeapRegion* G1RemSet::calculateStartRegion(int worker_i) {
-  HeapRegion* result = _g1p->collection_set();
-  if (ParallelGCThreads > 0) {
-    size_t cs_size = _g1p->collection_set_size();
-    int n_workers = _g1->workers()->total_workers();
-    size_t cs_spans = cs_size / n_workers;
-    size_t ind      = cs_spans * worker_i;
-    for (size_t i = 0; i < ind; i++)
-      result = result->next_in_collection_set();
-  }
-  return result;
-}
-
 void G1RemSet::scanRS(OopsInHeapRegionClosure* oc, int worker_i) {
   double rs_time_start = os::elapsedTime();
-  HeapRegion *startRegion = calculateStartRegion(worker_i);
+  HeapRegion *startRegion = _g1->start_cset_region_for_worker(worker_i);
 
   ScanRSClosure scanRScl(oc, worker_i);
 
@@ -430,8 +410,10 @@
   DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
   dcqs.concatenate_logs();
 
-  if (ParallelGCThreads > 0) {
-    _seq_task->set_n_threads((int)n_workers());
+  if (G1CollectedHeap::use_parallel_gc_threads()) {
+    // Don't set the number of workers here.  It will be set
+    // when the task is run
+    // _seq_task->set_n_termination((int)n_workers());
   }
   guarantee( _cards_scanned == NULL, "invariant" );
   _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers());
@@ -578,7 +560,10 @@
 void G1RemSet::scrub_par(BitMap* region_bm, BitMap* card_bm,
                                 int worker_num, int claim_val) {
   ScrubRSClosure scrub_cl(region_bm, card_bm);
-  _g1->heap_region_par_iterate_chunked(&scrub_cl, worker_num, claim_val);
+  _g1->heap_region_par_iterate_chunked(&scrub_cl,
+                                       worker_num,
+                                       (int) n_workers(),
+                                       claim_val);
 }
 
 
--- a/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -104,8 +104,6 @@
   void scanRS(OopsInHeapRegionClosure* oc, int worker_i);
   void updateRS(DirtyCardQueue* into_cset_dcq, int worker_i);
 
-  HeapRegion* calculateStartRegion(int i);
-
   CardTableModRefBS* ct_bs() { return _ct_bs; }
   size_t cardsScanned() { return _total_cards_scanned; }
 
--- a/src/share/vm/gc_implementation/g1/g1_globals.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -39,10 +39,6 @@
   develop(intx, G1MarkingOverheadPercent, 0,                                \
           "Overhead of concurrent marking")                                 \
                                                                             \
-                                                                            \
-  develop(intx, G1PolicyVerbose, 0,                                         \
-          "The verbosity level on G1 policy decisions")                     \
-                                                                            \
   develop(intx, G1MarkingVerboseLevel, 0,                                   \
           "Level (0-4) of verboseness of the marking code")                 \
                                                                             \
@@ -58,9 +54,6 @@
   develop(bool, G1TraceMarkStackOverflow, false,                            \
           "If true, extra debugging code for CM restart for ovflw.")        \
                                                                             \
-  develop(intx, G1PausesBtwnConcMark, -1,                                   \
-          "If positive, fixed number of pauses between conc markings")      \
-                                                                            \
   diagnostic(bool, G1SummarizeConcMark, false,                              \
           "Summarize concurrent mark info")                                 \
                                                                             \
--- a/src/share/vm/gc_implementation/g1/heapRegion.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -367,12 +367,13 @@
   static void setup_heap_region_size(uintx min_heap_size);
 
   enum ClaimValues {
-    InitialClaimValue     = 0,
-    FinalCountClaimValue  = 1,
-    NoteEndClaimValue     = 2,
-    ScrubRemSetClaimValue = 3,
-    ParVerifyClaimValue   = 4,
-    RebuildRSClaimValue   = 5
+    InitialClaimValue          = 0,
+    FinalCountClaimValue       = 1,
+    NoteEndClaimValue          = 2,
+    ScrubRemSetClaimValue      = 3,
+    ParVerifyClaimValue        = 4,
+    RebuildRSClaimValue        = 5,
+    CompleteMarkCSetClaimValue = 6
   };
 
   inline HeapWord* par_allocate_no_bot_updates(size_t word_size) {
@@ -416,7 +417,7 @@
 
   void add_to_marked_bytes(size_t incr_bytes) {
     _next_marked_bytes = _next_marked_bytes + incr_bytes;
-    guarantee( _next_marked_bytes <= used(), "invariant" );
+    assert(_next_marked_bytes <= used(), "invariant" );
   }
 
   void zero_marked_bytes()      {
--- a/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -33,6 +33,7 @@
 #include "runtime/java.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/virtualspace.hpp"
+#include "runtime/vmThread.hpp"
 
 void CardTableModRefBS::non_clean_card_iterate_parallel_work(Space* sp, MemRegion mr,
                                                              OopsInGenClosure* cl,
@@ -42,6 +43,11 @@
   assert((n_threads == 1 && ParallelGCThreads == 0) ||
          n_threads <= (int)ParallelGCThreads,
          "# worker threads != # requested!");
+  assert(!Thread::current()->is_VM_thread() || (n_threads == 1), "There is only 1 VM thread");
+  assert(UseDynamicNumberOfGCThreads ||
+         !FLAG_IS_DEFAULT(ParallelGCThreads) ||
+         n_threads == (int)ParallelGCThreads,
+         "# worker threads != # requested!");
   // Make sure the LNC array is valid for the space.
   jbyte**   lowest_non_clean;
   uintptr_t lowest_non_clean_base_chunk_index;
@@ -52,6 +58,8 @@
 
   int n_strides = n_threads * ParGCStridesPerThread;
   SequentialSubTasksDone* pst = sp->par_seq_tasks();
+  // Sets the condition for completion of the subtask (how many threads
+  // need to finish in order to be done).
   pst->set_n_threads(n_threads);
   pst->set_n_tasks(n_strides);
 
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -305,7 +305,7 @@
 
   inline ParScanThreadState& thread_state(int i);
 
-  void reset(bool promotion_failed);
+  void reset(int active_workers, bool promotion_failed);
   void flush();
 
   #if TASKQUEUE_STATS
@@ -322,6 +322,9 @@
   ParallelTaskTerminator& _term;
   ParNewGeneration&       _gen;
   Generation&             _next_gen;
+ public:
+  bool is_valid(int id) const { return id < length(); }
+  ParallelTaskTerminator* terminator() { return &_term; }
 };
 
 
@@ -351,9 +354,9 @@
 }
 
 
-void ParScanThreadStateSet::reset(bool promotion_failed)
+void ParScanThreadStateSet::reset(int active_threads, bool promotion_failed)
 {
-  _term.reset_for_reuse();
+  _term.reset_for_reuse(active_threads);
   if (promotion_failed) {
     for (int i = 0; i < length(); ++i) {
       thread_state(i).print_and_clear_promotion_failure_size();
@@ -569,6 +572,24 @@
     _state_set(state_set)
   {}
 
+// Reset the terminator for the given number of
+// active threads.
+void ParNewGenTask::set_for_termination(int active_workers) {
+  _state_set->reset(active_workers, _gen->promotion_failed());
+  // Should the heap be passed in?  There's only 1 for now so
+  // grab it instead.
+  GenCollectedHeap* gch = GenCollectedHeap::heap();
+  gch->set_n_termination(active_workers);
+}
+
+// The "i" passed to this method is the part of the work for
+// this thread.  It is not the worker ID.  The "i" is derived
+// from _started_workers which is incremented in internal_note_start()
+// called in GangWorker loop() and which is called under the
+// which is  called under the protection of the gang monitor and is
+// called after a task is started.  So "i" is based on
+// first-come-first-served.
+
 void ParNewGenTask::work(int i) {
   GenCollectedHeap* gch = GenCollectedHeap::heap();
   // Since this is being done in a separate thread, need new resource
@@ -581,6 +602,8 @@
   Generation* old_gen = gch->next_gen(_gen);
 
   ParScanThreadState& par_scan_state = _state_set->thread_state(i);
+  assert(_state_set->is_valid(i), "Should not have been called");
+
   par_scan_state.set_young_old_boundary(_young_old_boundary);
 
   par_scan_state.start_strong_roots();
@@ -733,7 +756,9 @@
 
 private:
   virtual void work(int i);
-
+  virtual void set_for_termination(int active_workers) {
+    _state_set.terminator()->reset_for_reuse(active_workers);
+  }
 private:
   ParNewGeneration&      _gen;
   ProcessTask&           _task;
@@ -789,18 +814,20 @@
   GenCollectedHeap* gch = GenCollectedHeap::heap();
   assert(gch->kind() == CollectedHeap::GenCollectedHeap,
          "not a generational heap");
-  WorkGang* workers = gch->workers();
+  FlexibleWorkGang* workers = gch->workers();
   assert(workers != NULL, "Need parallel worker threads.");
+  _state_set.reset(workers->active_workers(), _generation.promotion_failed());
   ParNewRefProcTaskProxy rp_task(task, _generation, *_generation.next_gen(),
                                  _generation.reserved().end(), _state_set);
   workers->run_task(&rp_task);
-  _state_set.reset(_generation.promotion_failed());
+  _state_set.reset(0 /* bad value in debug if not reset */,
+                   _generation.promotion_failed());
 }
 
 void ParNewRefProcTaskExecutor::execute(EnqueueTask& task)
 {
   GenCollectedHeap* gch = GenCollectedHeap::heap();
-  WorkGang* workers = gch->workers();
+  FlexibleWorkGang* workers = gch->workers();
   assert(workers != NULL, "Need parallel worker threads.");
   ParNewRefEnqueueTaskProxy enq_task(task);
   workers->run_task(&enq_task);
@@ -856,7 +883,13 @@
   assert(gch->kind() == CollectedHeap::GenCollectedHeap,
     "not a CMS generational heap");
   AdaptiveSizePolicy* size_policy = gch->gen_policy()->size_policy();
-  WorkGang* workers = gch->workers();
+  FlexibleWorkGang* workers = gch->workers();
+  assert(workers != NULL, "Need workgang for parallel work");
+  int active_workers =
+      AdaptiveSizePolicy::calc_active_workers(workers->total_workers(),
+                                   workers->active_workers(),
+                                   Threads::number_of_non_daemon_threads());
+  workers->set_active_workers(active_workers);
   _next_gen = gch->next_gen(this);
   assert(_next_gen != NULL,
     "This must be the youngest gen, and not the only gen");
@@ -894,13 +927,19 @@
 
   gch->save_marks();
   assert(workers != NULL, "Need parallel worker threads.");
-  ParallelTaskTerminator _term(workers->total_workers(), task_queues());
-  ParScanThreadStateSet thread_state_set(workers->total_workers(),
+  int n_workers = active_workers;
+
+  // Set the correct parallelism (number of queues) in the reference processor
+  ref_processor()->set_active_mt_degree(n_workers);
+
+  // Always set the terminator for the active number of workers
+  // because only those workers go through the termination protocol.
+  ParallelTaskTerminator _term(n_workers, task_queues());
+  ParScanThreadStateSet thread_state_set(workers->active_workers(),
                                          *to(), *this, *_next_gen, *task_queues(),
                                          _overflow_stacks, desired_plab_sz(), _term);
 
   ParNewGenTask tsk(this, _next_gen, reserved().end(), &thread_state_set);
-  int n_workers = workers->total_workers();
   gch->set_par_threads(n_workers);
   gch->rem_set()->prepare_for_younger_refs_iterate(true);
   // It turns out that even when we're using 1 thread, doing the work in a
@@ -914,7 +953,8 @@
     GenCollectedHeap::StrongRootsScope srs(gch);
     tsk.work(0);
   }
-  thread_state_set.reset(promotion_failed());
+  thread_state_set.reset(0 /* Bad value in debug if not reset */,
+                         promotion_failed());
 
   // Process (weak) reference objects found during scavenge.
   ReferenceProcessor* rp = ref_processor();
@@ -927,6 +967,8 @@
   EvacuateFollowersClosureGeneral evacuate_followers(gch, _level,
     &scan_without_gc_barrier, &scan_with_gc_barrier);
   rp->setup_policy(clear_all_soft_refs);
+  // Can  the mt_degree be set later (at run_task() time would be best)?
+  rp->set_active_mt_degree(active_workers);
   if (rp->processing_is_mt()) {
     ParNewRefProcTaskExecutor task_executor(*this, thread_state_set);
     rp->process_discovered_references(&is_alive, &keep_alive,
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -240,6 +240,10 @@
   HeapWord* young_old_boundary() { return _young_old_boundary; }
 
   void work(int i);
+
+  // Reset the terminator in ParScanThreadStateSet for
+  // "active_workers" threads.
+  virtual void set_for_termination(int active_workers);
 };
 
 class KeepAliveClosure: public DefNewGeneration::KeepAliveClosure {
--- a/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -223,7 +223,8 @@
                                                     MutableSpace* sp,
                                                     HeapWord* space_top,
                                                     PSPromotionManager* pm,
-                                                    uint stripe_number) {
+                                                    uint stripe_number,
+                                                    uint stripe_total) {
   int ssize = 128; // Naked constant!  Work unit = 64k.
   int dirty_card_count = 0;
 
@@ -231,7 +232,11 @@
   jbyte* start_card = byte_for(sp->bottom());
   jbyte* end_card   = byte_for(sp_top - 1) + 1;
   oop* last_scanned = NULL; // Prevent scanning objects more than once
-  for (jbyte* slice = start_card; slice < end_card; slice += ssize*ParallelGCThreads) {
+  // The width of the stripe ssize*stripe_total must be
+  // consistent with the number of stripes so that the complete slice
+  // is covered.
+  size_t slice_width = ssize * stripe_total;
+  for (jbyte* slice = start_card; slice < end_card; slice += slice_width) {
     jbyte* worker_start_card = slice + stripe_number * ssize;
     if (worker_start_card >= end_card)
       return; // We're done.
--- a/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -69,7 +69,8 @@
                                   MutableSpace* sp,
                                   HeapWord* space_top,
                                   PSPromotionManager* pm,
-                                  uint stripe_number);
+                                  uint stripe_number,
+                                  uint stripe_total);
 
   // Verification
   static void verify_all_young_refs_imprecise();
--- a/src/share/vm/gc_implementation/parallelScavenge/gcTaskManager.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/gcTaskManager.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,6 +25,7 @@
 #include "precompiled.hpp"
 #include "gc_implementation/parallelScavenge/gcTaskManager.hpp"
 #include "gc_implementation/parallelScavenge/gcTaskThread.hpp"
+#include "gc_implementation/shared/adaptiveSizePolicy.hpp"
 #include "memory/allocation.hpp"
 #include "memory/allocation.inline.hpp"
 #include "runtime/mutex.hpp"
@@ -52,6 +53,9 @@
   case noop_task:
     result = "noop task";
     break;
+  case idle_task:
+    result = "idle task";
+    break;
   }
   return result;
 };
@@ -181,6 +185,7 @@
   }
   set_insert_end(task);
   increment_length();
+  verify_length();
   if (TraceGCTaskQueue) {
     print("after:");
   }
@@ -192,7 +197,7 @@
     tty->print_cr("[" INTPTR_FORMAT "]"
                   " GCTaskQueue::enqueue(list: "
                   INTPTR_FORMAT ")",
-                  this);
+                  this, list);
     print("before:");
     list->print("list:");
   }
@@ -211,14 +216,15 @@
     list->remove_end()->set_older(insert_end());
     insert_end()->set_newer(list->remove_end());
     set_insert_end(list->insert_end());
+    set_length(length() + list_length);
     // empty the argument list.
   }
-  set_length(length() + list_length);
   list->initialize();
   if (TraceGCTaskQueue) {
     print("after:");
     list->print("list:");
   }
+  verify_length();
 }
 
 // Dequeue one task.
@@ -288,6 +294,7 @@
   decrement_length();
   assert(result->newer() == NULL, "shouldn't be on queue");
   assert(result->older() == NULL, "shouldn't be on queue");
+  verify_length();
   return result;
 }
 
@@ -311,22 +318,40 @@
   result->set_newer(NULL);
   result->set_older(NULL);
   decrement_length();
+  verify_length();
   return result;
 }
 
 NOT_PRODUCT(
+// Count the elements in the queue and verify the length against
+// that count.
+void GCTaskQueue::verify_length() const {
+  uint count = 0;
+  for (GCTask* element = insert_end();
+       element != NULL;
+       element = element->older()) {
+
+    count++;
+  }
+  assert(count == length(), "Length does not match queue");
+}
+
 void GCTaskQueue::print(const char* message) const {
   tty->print_cr("[" INTPTR_FORMAT "] GCTaskQueue:"
                 "  insert_end: " INTPTR_FORMAT
                 "  remove_end: " INTPTR_FORMAT
+                "  length:       %d"
                 "  %s",
-                this, insert_end(), remove_end(), message);
+                this, insert_end(), remove_end(), length(), message);
+  uint count = 0;
   for (GCTask* element = insert_end();
        element != NULL;
        element = element->older()) {
     element->print("    ");
+    count++;
     tty->cr();
   }
+  tty->print("Total tasks: %d", count);
 }
 )
 
@@ -351,12 +376,16 @@
 //
 GCTaskManager::GCTaskManager(uint workers) :
   _workers(workers),
+  _active_workers(0),
+  _idle_workers(0),
   _ndc(NULL) {
   initialize();
 }
 
 GCTaskManager::GCTaskManager(uint workers, NotifyDoneClosure* ndc) :
   _workers(workers),
+  _active_workers(0),
+  _idle_workers(0),
   _ndc(ndc) {
   initialize();
 }
@@ -373,6 +402,7 @@
   GCTaskQueue* unsynchronized_queue = GCTaskQueue::create_on_c_heap();
   _queue = SynchronizedGCTaskQueue::create(unsynchronized_queue, lock());
   _noop_task = NoopGCTask::create_on_c_heap();
+  _idle_inactive_task = WaitForBarrierGCTask::create_on_c_heap();
   _resource_flag = NEW_C_HEAP_ARRAY(bool, workers());
   {
     // Set up worker threads.
@@ -418,6 +448,8 @@
   assert(queue()->is_empty(), "still have queued work");
   NoopGCTask::destroy(_noop_task);
   _noop_task = NULL;
+  WaitForBarrierGCTask::destroy(_idle_inactive_task);
+  _idle_inactive_task = NULL;
   if (_thread != NULL) {
     for (uint i = 0; i < workers(); i += 1) {
       GCTaskThread::destroy(thread(i));
@@ -442,6 +474,86 @@
   }
 }
 
+void GCTaskManager::set_active_gang() {
+  _active_workers =
+    AdaptiveSizePolicy::calc_active_workers(workers(),
+                                 active_workers(),
+                                 Threads::number_of_non_daemon_threads());
+
+  assert(!all_workers_active() || active_workers() == ParallelGCThreads,
+         err_msg("all_workers_active() is  incorrect: "
+                 "active %d  ParallelGCThreads %d", active_workers(),
+                 ParallelGCThreads));
+  if (TraceDynamicGCThreads) {
+    gclog_or_tty->print_cr("GCTaskManager::set_active_gang(): "
+                           "all_workers_active()  %d  workers %d  "
+                           "active  %d  ParallelGCThreads %d ",
+                           all_workers_active(), workers(),  active_workers(),
+                           ParallelGCThreads);
+  }
+}
+
+// Create IdleGCTasks for inactive workers.
+// Creates tasks in a ResourceArea and assumes
+// an appropriate ResourceMark.
+void GCTaskManager::task_idle_workers() {
+  {
+    int more_inactive_workers = 0;
+    {
+      // Stop any idle tasks from exiting their IdleGCTask's
+      // and get the count for additional IdleGCTask's under
+      // the GCTaskManager's monitor so that the "more_inactive_workers"
+      // count is correct.
+      MutexLockerEx ml(monitor(), Mutex::_no_safepoint_check_flag);
+      _idle_inactive_task->set_should_wait(true);
+      // active_workers are a number being requested.  idle_workers
+      // are the number currently idle.  If all the workers are being
+      // requested to be active but some are already idle, reduce
+      // the number of active_workers to be consistent with the
+      // number of idle_workers.  The idle_workers are stuck in
+      // idle tasks and will no longer be release (since a new GC
+      // is starting).  Try later to release enough idle_workers
+      // to allow the desired number of active_workers.
+      more_inactive_workers =
+        workers() - active_workers() - idle_workers();
+      if (more_inactive_workers < 0) {
+        int reduced_active_workers = active_workers() + more_inactive_workers;
+        set_active_workers(reduced_active_workers);
+        more_inactive_workers = 0;
+      }
+      if (TraceDynamicGCThreads) {
+        gclog_or_tty->print_cr("JT: %d  workers %d  active  %d  "
+                                "idle %d  more %d",
+                                Threads::number_of_non_daemon_threads(),
+                                workers(),
+                                active_workers(),
+                                idle_workers(),
+                                more_inactive_workers);
+      }
+    }
+    GCTaskQueue* q = GCTaskQueue::create();
+    for(uint i = 0; i < (uint) more_inactive_workers; i++) {
+      q->enqueue(IdleGCTask::create_on_c_heap());
+      increment_idle_workers();
+    }
+    assert(workers() == active_workers() + idle_workers(),
+      "total workers should equal active + inactive");
+    add_list(q);
+    // GCTaskQueue* q was created in a ResourceArea so a
+    // destroy() call is not needed.
+  }
+}
+
+void  GCTaskManager::release_idle_workers() {
+  {
+    MutexLockerEx ml(monitor(),
+      Mutex::_no_safepoint_check_flag);
+    _idle_inactive_task->set_should_wait(false);
+    monitor()->notify_all();
+  // Release monitor
+  }
+}
+
 void GCTaskManager::print_task_time_stamps() {
   for(uint i=0; i<ParallelGCThreads; i++) {
     GCTaskThread* t = thread(i);
@@ -510,6 +622,13 @@
   // Release monitor().
 }
 
+// GC workers wait in get_task() for new work to be added
+// to the GCTaskManager's queue.  When new work is added,
+// a notify is sent to the waiting GC workers which then
+// compete to get tasks.  If a GC worker wakes up and there
+// is no work on the queue, it is given a noop_task to execute
+// and then loops to find more work.
+
 GCTask* GCTaskManager::get_task(uint which) {
   GCTask* result = NULL;
   // Grab the queue lock.
@@ -558,8 +677,10 @@
                   which, result, GCTask::Kind::to_string(result->kind()));
     tty->print_cr("     %s", result->name());
   }
-  increment_busy_workers();
-  increment_delivered_tasks();
+  if (!result->is_idle_task()) {
+    increment_busy_workers();
+    increment_delivered_tasks();
+  }
   return result;
   // Release monitor().
 }
@@ -622,6 +743,7 @@
 
 uint GCTaskManager::decrement_busy_workers() {
   assert(queue()->own_lock(), "don't own the lock");
+  assert(_busy_workers > 0, "About to make a mistake");
   _busy_workers -= 1;
   return _busy_workers;
 }
@@ -643,11 +765,34 @@
   set_resource_flag(which, false);
 }
 
+// "list" contains tasks that are ready to execute.  Those
+// tasks are added to the GCTaskManager's queue of tasks and
+// then the GC workers are notified that there is new work to
+// do.
+//
+// Typically different types of tasks can be added to the "list".
+// For example in PSScavenge OldToYoungRootsTask, SerialOldToYoungRootsTask,
+// ScavengeRootsTask, and StealTask tasks are all added to the list
+// and then the GC workers are notified of new work.  The tasks are
+// handed out in the order in which they are added to the list
+// (although execution is not necessarily in that order).  As long
+// as any tasks are running the GCTaskManager will wait for execution
+// to complete.  GC workers that execute a stealing task remain in
+// the stealing task until all stealing tasks have completed.  The load
+// balancing afforded by the stealing tasks work best if the stealing
+// tasks are added last to the list.
+
 void GCTaskManager::execute_and_wait(GCTaskQueue* list) {
   WaitForBarrierGCTask* fin = WaitForBarrierGCTask::create();
   list->enqueue(fin);
+  // The barrier task will be read by one of the GC
+  // workers once it is added to the list of tasks.
+  // Be sure that is globally visible before the
+  // GC worker reads it (which is after the task is added
+  // to the list of tasks below).
+  OrderAccess::storestore();
   add_list(list);
-  fin->wait_for();
+  fin->wait_for(true /* reset */);
   // We have to release the barrier tasks!
   WaitForBarrierGCTask::destroy(fin);
 }
@@ -692,6 +837,76 @@
 }
 
 //
+// IdleGCTask
+//
+
+IdleGCTask* IdleGCTask::create() {
+  IdleGCTask* result = new IdleGCTask(false);
+  assert(UseDynamicNumberOfGCThreads,
+    "Should only be used with dynamic GC thread");
+  return result;
+}
+
+IdleGCTask* IdleGCTask::create_on_c_heap() {
+  IdleGCTask* result = new(ResourceObj::C_HEAP) IdleGCTask(true);
+  assert(UseDynamicNumberOfGCThreads,
+    "Should only be used with dynamic GC thread");
+  return result;
+}
+
+void IdleGCTask::do_it(GCTaskManager* manager, uint which) {
+  WaitForBarrierGCTask* wait_for_task = manager->idle_inactive_task();
+  if (TraceGCTaskManager) {
+    tty->print_cr("[" INTPTR_FORMAT "]"
+                  " IdleGCTask:::do_it()"
+      "  should_wait: %s",
+      this, wait_for_task->should_wait() ? "true" : "false");
+  }
+  MutexLockerEx ml(manager->monitor(), Mutex::_no_safepoint_check_flag);
+  if (TraceDynamicGCThreads) {
+    gclog_or_tty->print_cr("--- idle %d", which);
+  }
+  // Increment has to be done when the idle tasks are created.
+  // manager->increment_idle_workers();
+  manager->monitor()->notify_all();
+  while (wait_for_task->should_wait()) {
+    if (TraceGCTaskManager) {
+      tty->print_cr("[" INTPTR_FORMAT "]"
+                    " IdleGCTask::do_it()"
+        "  [" INTPTR_FORMAT "] (%s)->wait()",
+        this, manager->monitor(), manager->monitor()->name());
+    }
+    manager->monitor()->wait(Mutex::_no_safepoint_check_flag, 0);
+  }
+  manager->decrement_idle_workers();
+  if (TraceDynamicGCThreads) {
+    gclog_or_tty->print_cr("--- release %d", which);
+  }
+  if (TraceGCTaskManager) {
+    tty->print_cr("[" INTPTR_FORMAT "]"
+                  " IdleGCTask::do_it() returns"
+      "  should_wait: %s",
+      this, wait_for_task->should_wait() ? "true" : "false");
+  }
+  // Release monitor().
+}
+
+void IdleGCTask::destroy(IdleGCTask* that) {
+  if (that != NULL) {
+    that->destruct();
+    if (that->is_c_heap_obj()) {
+      FreeHeap(that);
+    }
+  }
+}
+
+void IdleGCTask::destruct() {
+  // This has to know it's superclass structure, just like the constructor.
+  this->GCTask::destruct();
+  // Nothing else to do.
+}
+
+//
 // BarrierGCTask
 //
 
@@ -768,7 +983,8 @@
 }
 
 WaitForBarrierGCTask* WaitForBarrierGCTask::create_on_c_heap() {
-  WaitForBarrierGCTask* result = new WaitForBarrierGCTask(true);
+  WaitForBarrierGCTask* result =
+    new (ResourceObj::C_HEAP) WaitForBarrierGCTask(true);
   return result;
 }
 
@@ -849,7 +1065,7 @@
   }
 }
 
-void WaitForBarrierGCTask::wait_for() {
+void WaitForBarrierGCTask::wait_for(bool reset) {
   if (TraceGCTaskManager) {
     tty->print_cr("[" INTPTR_FORMAT "]"
                   " WaitForBarrierGCTask::wait_for()"
@@ -869,7 +1085,9 @@
       monitor()->wait(Mutex::_no_safepoint_check_flag, 0);
     }
     // Reset the flag in case someone reuses this task.
-    set_should_wait(true);
+    if (reset) {
+      set_should_wait(true);
+    }
     if (TraceGCTaskManager) {
       tty->print_cr("[" INTPTR_FORMAT "]"
                     " WaitForBarrierGCTask::wait_for() returns"
--- a/src/share/vm/gc_implementation/parallelScavenge/gcTaskManager.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/gcTaskManager.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -45,6 +45,7 @@
 class ReleasingBarrierGCTask;
 class NotifyingBarrierGCTask;
 class WaitForBarrierGCTask;
+class IdleGCTask;
 // A free list of Monitor*'s.
 class MonitorSupply;
 
@@ -64,7 +65,8 @@
       unknown_task,
       ordinary_task,
       barrier_task,
-      noop_task
+      noop_task,
+      idle_task
     };
     static const char* to_string(kind value);
   };
@@ -108,6 +110,9 @@
   bool is_noop_task() const {
     return kind()==Kind::noop_task;
   }
+  bool is_idle_task() const {
+    return kind()==Kind::idle_task;
+  }
   void print(const char* message) const PRODUCT_RETURN;
 protected:
   // Constructors: Only create subclasses.
@@ -153,6 +158,7 @@
     assert(((insert_end() == NULL && remove_end() == NULL) ||
             (insert_end() != NULL && remove_end() != NULL)),
            "insert_end and remove_end don't match");
+    assert((insert_end() != NULL) || (_length == 0), "Not empty");
     return insert_end() == NULL;
   }
   uint length() const {
@@ -204,6 +210,8 @@
   GCTask* remove();                     // Remove from remove end.
   GCTask* remove(GCTask* task);         // Remove from the middle.
   void print(const char* message) const PRODUCT_RETURN;
+  // Debug support
+  void verify_length() const PRODUCT_RETURN;
 };
 
 // A GCTaskQueue that can be synchronized.
@@ -285,12 +293,76 @@
   }
 };
 
+// Dynamic number of GC threads
+//
+//  GC threads wait in get_task() for work (i.e., a task) to perform.
+// When the number of GC threads was static, the number of tasks
+// created to do a job was equal to or greater than the maximum
+// number of GC threads (ParallelGCThreads).  The job might be divided
+// into a number of tasks greater than the number of GC threads for
+// load balancing (i.e., over partitioning).  The last task to be
+// executed by a GC thread in a job is a work stealing task.  A
+// GC  thread that gets a work stealing task continues to execute
+// that task until the job is done.  In the static number of GC theads
+// case, tasks are added to a queue (FIFO).  The work stealing tasks are
+// the last to be added.  Once the tasks are added, the GC threads grab
+// a task and go.  A single thread can do all the non-work stealing tasks
+// and then execute a work stealing and wait for all the other GC threads
+// to execute their work stealing task.
+//  In the dynamic number of GC threads implementation, idle-tasks are
+// created to occupy the non-participating or "inactive" threads.  An
+// idle-task makes the GC thread wait on a barrier that is part of the
+// GCTaskManager.  The GC threads that have been "idled" in a IdleGCTask
+// are released once all the active GC threads have finished their work
+// stealing tasks.  The GCTaskManager does not wait for all the "idled"
+// GC threads to resume execution. When those GC threads do resume
+// execution in the course of the thread scheduling, they call get_tasks()
+// as all the other GC threads do.  Because all the "idled" threads are
+// not required to execute in order to finish a job, it is possible for
+// a GC thread to still be "idled" when the next job is started.  Such
+// a thread stays "idled" for the next job.  This can result in a new
+// job not having all the expected active workers.  For example if on
+// job requests 4 active workers out of a total of 10 workers so the
+// remaining 6 are "idled", if the next job requests 6 active workers
+// but all 6 of the "idled" workers are still idle, then the next job
+// will only get 4 active workers.
+//  The implementation for the parallel old compaction phase has an
+// added complication.  In the static case parold partitions the chunks
+// ready to be filled into stacks, one for each GC thread.  A GC thread
+// executing a draining task (drains the stack of ready chunks)
+// claims a stack according to it's id (the unique ordinal value assigned
+// to each GC thread).  In the dynamic case not all GC threads will
+// actively participate so stacks with ready to fill chunks can only be
+// given to the active threads.  An initial implementation chose stacks
+// number 1-n to get the ready chunks and required that GC threads
+// 1-n be the active workers.  This was undesirable because it required
+// certain threads to participate.  In the final implementation a
+// list of stacks equal in number to the active workers are filled
+// with ready chunks.  GC threads that participate get a stack from
+// the task (DrainStacksCompactionTask), empty the stack, and then add it to a
+// recycling list at the end of the task.  If the same GC thread gets
+// a second task, it gets a second stack to drain and returns it.  The
+// stacks are added to a recycling list so that later stealing tasks
+// for this tasks can get a stack from the recycling list.  Stealing tasks
+// use the stacks in its work in a way similar to the draining tasks.
+// A thread is not guaranteed to get anything but a stealing task and
+// a thread that only gets a stealing task has to get a stack. A failed
+// implementation tried to have the GC threads keep the stack they used
+// during a draining task for later use in the stealing task but that didn't
+// work because as noted a thread is not guaranteed to get a draining task.
+//
+// For PSScavenge and ParCompactionManager the GC threads are
+// held in the GCTaskThread** _thread array in GCTaskManager.
+
+
 class GCTaskManager : public CHeapObj {
  friend class ParCompactionManager;
  friend class PSParallelCompact;
  friend class PSScavenge;
  friend class PSRefProcTaskExecutor;
  friend class RefProcTaskExecutor;
+ friend class GCTaskThread;
+ friend class IdleGCTask;
 private:
   // Instance state.
   NotifyDoneClosure*        _ndc;               // Notify on completion.
@@ -298,6 +370,7 @@
   Monitor*                  _monitor;           // Notification of changes.
   SynchronizedGCTaskQueue*  _queue;             // Queue of tasks.
   GCTaskThread**            _thread;            // Array of worker threads.
+  uint                      _active_workers;    // Number of active workers.
   uint                      _busy_workers;      // Number of busy workers.
   uint                      _blocking_worker;   // The worker that's blocking.
   bool*                     _resource_flag;     // Array of flag per threads.
@@ -307,6 +380,8 @@
   uint                      _emptied_queue;     // Times we emptied the queue.
   NoopGCTask*               _noop_task;         // The NoopGCTask instance.
   uint                      _noop_tasks;        // Count of noop tasks.
+  WaitForBarrierGCTask*     _idle_inactive_task;// Task for inactive workers
+  volatile uint             _idle_workers;      // Number of idled workers
 public:
   // Factory create and destroy methods.
   static GCTaskManager* create(uint workers) {
@@ -324,6 +399,9 @@
   uint busy_workers() const {
     return _busy_workers;
   }
+  volatile uint idle_workers() const {
+    return _idle_workers;
+  }
   //     Pun between Monitor* and Mutex*
   Monitor* monitor() const {
     return _monitor;
@@ -331,6 +409,9 @@
   Monitor * lock() const {
     return _monitor;
   }
+  WaitForBarrierGCTask* idle_inactive_task() {
+    return _idle_inactive_task;
+  }
   // Methods.
   //     Add the argument task to be run.
   void add_task(GCTask* task);
@@ -350,6 +431,10 @@
   bool should_release_resources(uint which); // Predicate.
   //     Note the release of resources by the argument worker.
   void note_release(uint which);
+  //     Create IdleGCTasks for inactive workers and start workers
+  void task_idle_workers();
+  //     Release the workers in IdleGCTasks
+  void release_idle_workers();
   // Constants.
   //     A sentinel worker identifier.
   static uint sentinel_worker() {
@@ -375,6 +460,15 @@
   uint workers() const {
     return _workers;
   }
+  void set_active_workers(uint v) {
+    assert(v <= _workers, "Trying to set more workers active than there are");
+    _active_workers = MIN2(v, _workers);
+    assert(v != 0, "Trying to set active workers to 0");
+    _active_workers = MAX2(1U, _active_workers);
+  }
+  // Sets the number of threads that will be used in a collection
+  void set_active_gang();
+
   NotifyDoneClosure* notify_done_closure() const {
     return _ndc;
   }
@@ -457,8 +551,21 @@
   void reset_noop_tasks() {
     _noop_tasks = 0;
   }
+  void increment_idle_workers() {
+    _idle_workers++;
+  }
+  void decrement_idle_workers() {
+    _idle_workers--;
+  }
   // Other methods.
   void initialize();
+
+ public:
+  // Return true if all workers are currently active.
+  bool all_workers_active() { return workers() == active_workers(); }
+  uint active_workers() const {
+    return _active_workers;
+  }
 };
 
 //
@@ -475,6 +582,8 @@
   static NoopGCTask* create();
   static NoopGCTask* create_on_c_heap();
   static void destroy(NoopGCTask* that);
+
+  virtual char* name() { return (char *)"noop task"; }
   // Methods from GCTask.
   void do_it(GCTaskManager* manager, uint which) {
     // Nothing to do.
@@ -518,6 +627,8 @@
   }
   // Destructor-like method.
   void destruct();
+
+  virtual char* name() { return (char *)"barrier task"; }
   // Methods.
   //     Wait for this to be the only task running.
   void do_it_internal(GCTaskManager* manager, uint which);
@@ -586,11 +697,13 @@
 // the BarrierGCTask is done.
 // This may cover many of the uses of NotifyingBarrierGCTasks.
 class WaitForBarrierGCTask : public BarrierGCTask {
+  friend class GCTaskManager;
+  friend class IdleGCTask;
 private:
   // Instance state.
-  Monitor*   _monitor;                  // Guard and notify changes.
-  bool       _should_wait;              // true=>wait, false=>proceed.
-  const bool _is_c_heap_obj;            // Was allocated on the heap.
+  Monitor*      _monitor;                  // Guard and notify changes.
+  volatile bool _should_wait;              // true=>wait, false=>proceed.
+  const bool    _is_c_heap_obj;            // Was allocated on the heap.
 public:
   virtual char* name() { return (char *) "waitfor-barrier-task"; }
 
@@ -600,7 +713,10 @@
   static void destroy(WaitForBarrierGCTask* that);
   // Methods.
   void     do_it(GCTaskManager* manager, uint which);
-  void     wait_for();
+  void     wait_for(bool reset);
+  void set_should_wait(bool value) {
+    _should_wait = value;
+  }
 protected:
   // Constructor.  Clients use factory, but there might be subclasses.
   WaitForBarrierGCTask(bool on_c_heap);
@@ -613,14 +729,38 @@
   bool should_wait() const {
     return _should_wait;
   }
-  void set_should_wait(bool value) {
-    _should_wait = value;
-  }
   bool is_c_heap_obj() {
     return _is_c_heap_obj;
   }
 };
 
+// Task that is used to idle a GC task when fewer than
+// the maximum workers are wanted.
+class IdleGCTask : public GCTask {
+  const bool    _is_c_heap_obj;            // Was allocated on the heap.
+ public:
+  bool is_c_heap_obj() {
+    return _is_c_heap_obj;
+  }
+  // Factory create and destroy methods.
+  static IdleGCTask* create();
+  static IdleGCTask* create_on_c_heap();
+  static void destroy(IdleGCTask* that);
+
+  virtual char* name() { return (char *)"idle task"; }
+  // Methods from GCTask.
+  virtual void do_it(GCTaskManager* manager, uint which);
+protected:
+  // Constructor.
+  IdleGCTask(bool on_c_heap) :
+    GCTask(GCTask::Kind::idle_task),
+    _is_c_heap_obj(on_c_heap) {
+    // Nothing to do.
+  }
+  // Destructor-like method.
+  void destruct();
+};
+
 class MonitorSupply : public AllStatic {
 private:
   // State.
--- a/src/share/vm/gc_implementation/parallelScavenge/gcTaskThread.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/gcTaskThread.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -1,6 +1,6 @@
 
 /*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -93,6 +93,11 @@
   st->cr();
 }
 
+// GC workers get tasks from the GCTaskManager and execute
+// them in this method.  If there are no tasks to execute,
+// the GC workers wait in the GCTaskManager's get_task()
+// for tasks to be enqueued for execution.
+
 void GCTaskThread::run() {
   // Set up the thread for stack overflow support
   this->record_stack_base_and_size();
@@ -124,7 +129,8 @@
     for (; /* break */; ) {
       // This will block until there is a task to be gotten.
       GCTask* task = manager()->get_task(which());
-
+      // Record if this is an idle task for later use.
+      bool is_idle_task = task->is_idle_task();
       // In case the update is costly
       if (PrintGCTaskTimeStamps) {
         timer.update();
@@ -133,19 +139,33 @@
       jlong entry_time = timer.ticks();
       char* name = task->name();
 
+      // If this is the barrier task, it can be destroyed
+      // by the GC task manager once the do_it() executes.
       task->do_it(manager(), which());
-      manager()->note_completion(which());
 
-      if (PrintGCTaskTimeStamps) {
-        assert(_time_stamps != NULL, "Sanity (PrintGCTaskTimeStamps set late?)");
+      // Use the saved value of is_idle_task because references
+      // using "task" are not reliable for the barrier task.
+      if (!is_idle_task) {
+        manager()->note_completion(which());
 
-        timer.update();
+        if (PrintGCTaskTimeStamps) {
+          assert(_time_stamps != NULL,
+            "Sanity (PrintGCTaskTimeStamps set late?)");
 
-        GCTaskTimeStamp* time_stamp = time_stamp_at(_time_stamp_index++);
+          timer.update();
 
-        time_stamp->set_name(name);
-        time_stamp->set_entry_time(entry_time);
-        time_stamp->set_exit_time(timer.ticks());
+          GCTaskTimeStamp* time_stamp = time_stamp_at(_time_stamp_index++);
+
+          time_stamp->set_name(name);
+          time_stamp->set_entry_time(entry_time);
+          time_stamp->set_exit_time(timer.ticks());
+        }
+      } else {
+        // idle tasks complete outside the normal accounting
+        // so that a task can complete without waiting for idle tasks.
+        // They have to be terminated separately.
+        IdleGCTask::destroy((IdleGCTask*)task);
+        set_is_working(true);
       }
 
       // Check if we should release our inner resources.
--- a/src/share/vm/gc_implementation/parallelScavenge/gcTaskThread.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/gcTaskThread.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -35,6 +35,7 @@
 class GCTaskManager;
 
 class GCTaskThread : public WorkerThread {
+  friend class GCTaskManager;
 private:
   // Instance state.
   GCTaskManager* _manager;              // Manager for worker.
@@ -45,6 +46,8 @@
 
   GCTaskTimeStamp* time_stamp_at(uint index);
 
+  bool _is_working;                     // True if participating in GC tasks
+
  public:
   // Factory create and destroy methods.
   static GCTaskThread* create(GCTaskManager* manager,
@@ -84,6 +87,7 @@
   uint processor_id() const {
     return _processor_id;
   }
+  void set_is_working(bool v) { _is_working = v; }
 };
 
 class GCTaskTimeStamp : public CHeapObj
--- a/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -152,15 +152,16 @@
 {
   ParallelScavengeHeap* heap = PSParallelCompact::gc_heap();
   uint parallel_gc_threads = heap->gc_task_manager()->workers();
+  uint active_gc_threads = heap->gc_task_manager()->active_workers();
   RegionTaskQueueSet* qset = ParCompactionManager::region_array();
-  ParallelTaskTerminator terminator(parallel_gc_threads, qset);
+  ParallelTaskTerminator terminator(active_gc_threads, qset);
   GCTaskQueue* q = GCTaskQueue::create();
   for(uint i=0; i<parallel_gc_threads; i++) {
     q->enqueue(new RefProcTaskProxy(task, i));
   }
   if (task.marks_oops_alive()) {
     if (parallel_gc_threads>1) {
-      for (uint j=0; j<parallel_gc_threads; j++) {
+      for (uint j=0; j<active_gc_threads; j++) {
         q->enqueue(new StealMarkingTask(&terminator));
       }
     }
@@ -216,7 +217,6 @@
 // StealRegionCompactionTask
 //
 
-
 StealRegionCompactionTask::StealRegionCompactionTask(ParallelTaskTerminator* t):
   _terminator(t) {}
 
@@ -229,6 +229,32 @@
   ParCompactionManager* cm =
     ParCompactionManager::gc_thread_compaction_manager(which);
 
+
+  // If not all threads are active, get a draining stack
+  // from the list.  Else, just use this threads draining stack.
+  uint which_stack_index;
+  bool use_all_workers = manager->all_workers_active();
+  if (use_all_workers) {
+    which_stack_index = which;
+    assert(manager->active_workers() == ParallelGCThreads,
+           err_msg("all_workers_active has been incorrectly set: "
+                   " active %d  ParallelGCThreads %d", manager->active_workers(),
+                   ParallelGCThreads));
+  } else {
+    which_stack_index = ParCompactionManager::pop_recycled_stack_index();
+  }
+
+  cm->set_region_stack_index(which_stack_index);
+  cm->set_region_stack(ParCompactionManager::region_list(which_stack_index));
+  if (TraceDynamicGCThreads) {
+    gclog_or_tty->print_cr("StealRegionCompactionTask::do_it "
+                           "region_stack_index %d region_stack = 0x%x "
+                           " empty (%d) use all workers %d",
+    which_stack_index, ParCompactionManager::region_list(which_stack_index),
+    cm->region_stack()->is_empty(),
+    use_all_workers);
+  }
+
   // Has to drain stacks first because there may be regions on
   // preloaded onto the stack and this thread may never have
   // done a draining task.  Are the draining tasks needed?
@@ -285,6 +311,50 @@
   ParCompactionManager* cm =
     ParCompactionManager::gc_thread_compaction_manager(which);
 
+  uint which_stack_index;
+  bool use_all_workers = manager->all_workers_active();
+  if (use_all_workers) {
+    which_stack_index = which;
+    assert(manager->active_workers() == ParallelGCThreads,
+           err_msg("all_workers_active has been incorrectly set: "
+                   " active %d  ParallelGCThreads %d", manager->active_workers(),
+                   ParallelGCThreads));
+  } else {
+    which_stack_index = stack_index();
+  }
+
+  cm->set_region_stack(ParCompactionManager::region_list(which_stack_index));
+  if (TraceDynamicGCThreads) {
+    gclog_or_tty->print_cr("DrainStacksCompactionTask::do_it which = %d "
+                           "which_stack_index = %d/empty(%d) "
+                           "use all workers %d",
+                           which, which_stack_index,
+                           cm->region_stack()->is_empty(),
+                           use_all_workers);
+  }
+
+  cm->set_region_stack_index(which_stack_index);
+
   // Process any regions already in the compaction managers stacks.
   cm->drain_region_stacks();
+
+  assert(cm->region_stack()->is_empty(), "Not empty");
+
+  if (!use_all_workers) {
+    // Always give up the region stack.
+    assert(cm->region_stack() ==
+           ParCompactionManager::region_list(cm->region_stack_index()),
+           "region_stack and region_stack_index are inconsistent");
+    ParCompactionManager::push_recycled_stack_index(cm->region_stack_index());
+
+    if (TraceDynamicGCThreads) {
+      void* old_region_stack = (void*) cm->region_stack();
+      int old_region_stack_index = cm->region_stack_index();
+      gclog_or_tty->print_cr("Pushing region stack 0x%x/%d",
+        old_region_stack, old_region_stack_index);
+    }
+
+    cm->set_region_stack(NULL);
+    cm->set_region_stack_index((uint)max_uintx);
+  }
 }
--- a/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -39,6 +39,9 @@
 
 PSOldGen*            ParCompactionManager::_old_gen = NULL;
 ParCompactionManager**  ParCompactionManager::_manager_array = NULL;
+
+RegionTaskQueue**              ParCompactionManager::_region_list = NULL;
+
 OopTaskQueueSet*     ParCompactionManager::_stack_array = NULL;
 ParCompactionManager::ObjArrayTaskQueueSet*
   ParCompactionManager::_objarray_queues = NULL;
@@ -46,8 +49,14 @@
 ParMarkBitMap*       ParCompactionManager::_mark_bitmap = NULL;
 RegionTaskQueueSet*  ParCompactionManager::_region_array = NULL;
 
+uint*                 ParCompactionManager::_recycled_stack_index = NULL;
+int                   ParCompactionManager::_recycled_top = -1;
+int                   ParCompactionManager::_recycled_bottom = -1;
+
 ParCompactionManager::ParCompactionManager() :
-    _action(CopyAndUpdate) {
+    _action(CopyAndUpdate),
+    _region_stack(NULL),
+    _region_stack_index((uint)max_uintx) {
 
   ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap();
   assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "Sanity");
@@ -57,7 +66,10 @@
 
   marking_stack()->initialize();
   _objarray_stack.initialize();
-  region_stack()->initialize();
+}
+
+ParCompactionManager::~ParCompactionManager() {
+  delete _recycled_stack_index;
 }
 
 void ParCompactionManager::initialize(ParMarkBitMap* mbm) {
@@ -72,6 +84,19 @@
   _manager_array = NEW_C_HEAP_ARRAY(ParCompactionManager*, parallel_gc_threads+1 );
   guarantee(_manager_array != NULL, "Could not allocate manager_array");
 
+  _region_list = NEW_C_HEAP_ARRAY(RegionTaskQueue*,
+                                         parallel_gc_threads+1);
+  guarantee(_region_list != NULL, "Could not initialize promotion manager");
+
+  _recycled_stack_index = NEW_C_HEAP_ARRAY(uint, parallel_gc_threads);
+
+  // parallel_gc-threads + 1 to be consistent with the number of
+  // compaction managers.
+  for(uint i=0; i<parallel_gc_threads + 1; i++) {
+    _region_list[i] = new RegionTaskQueue();
+    region_list(i)->initialize();
+  }
+
   _stack_array = new OopTaskQueueSet(parallel_gc_threads);
   guarantee(_stack_array != NULL, "Could not allocate stack_array");
   _objarray_queues = new ObjArrayTaskQueueSet(parallel_gc_threads);
@@ -85,7 +110,7 @@
     guarantee(_manager_array[i] != NULL, "Could not create ParCompactionManager");
     stack_array()->register_queue(i, _manager_array[i]->marking_stack());
     _objarray_queues->register_queue(i, &_manager_array[i]->_objarray_stack);
-    region_array()->register_queue(i, _manager_array[i]->region_stack());
+    region_array()->register_queue(i, region_list(i));
   }
 
   // The VMThread gets its own ParCompactionManager, which is not available
@@ -97,6 +122,29 @@
     "Not initialized?");
 }
 
+int ParCompactionManager::pop_recycled_stack_index() {
+  assert(_recycled_bottom <= _recycled_top, "list is empty");
+  // Get the next available index
+  if (_recycled_bottom < _recycled_top) {
+    uint cur, next, last;
+    do {
+      cur = _recycled_bottom;
+      next = cur + 1;
+      last = Atomic::cmpxchg(next, &_recycled_bottom, cur);
+    } while (cur != last);
+    return _recycled_stack_index[next];
+  } else {
+    return -1;
+  }
+}
+
+void ParCompactionManager::push_recycled_stack_index(uint v) {
+  // Get the next available index
+  int cur = Atomic::add(1, &_recycled_top);
+  _recycled_stack_index[cur] = v;
+  assert(_recycled_bottom <= _recycled_top, "list top and bottom are wrong");
+}
+
 bool ParCompactionManager::should_update() {
   assert(action() != NotValid, "Action is not set");
   return (action() == ParCompactionManager::Update) ||
@@ -111,14 +159,13 @@
          (action() == ParCompactionManager::UpdateAndCopy);
 }
 
-bool ParCompactionManager::should_verify_only() {
-  assert(action() != NotValid, "Action is not set");
-  return action() == ParCompactionManager::VerifyUpdate;
+void ParCompactionManager::region_list_push(uint list_index,
+                                            size_t region_index) {
+  region_list(list_index)->push(region_index);
 }
 
-bool ParCompactionManager::should_reset_only() {
-  assert(action() != NotValid, "Action is not set");
-  return action() == ParCompactionManager::ResetObjects;
+void ParCompactionManager::verify_region_list_empty(uint list_index) {
+  assert(region_list(list_index)->is_empty(), "Not empty");
 }
 
 ParCompactionManager*
--- a/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -48,6 +48,7 @@
   friend class StealRegionCompactionTask;
   friend class UpdateAndFillClosure;
   friend class RefProcTaskExecutor;
+  friend class IdleGCTask;
 
  public:
 
@@ -58,8 +59,6 @@
     Copy,
     UpdateAndCopy,
     CopyAndUpdate,
-    VerifyUpdate,
-    ResetObjects,
     NotValid
   };
 // ------------------------  End don't putback if not needed
@@ -85,7 +84,31 @@
   // Is there a way to reuse the _marking_stack for the
   // saving empty regions?  For now just create a different
   // type of TaskQueue.
-  RegionTaskQueue               _region_stack;
+  RegionTaskQueue*             _region_stack;
+
+  static RegionTaskQueue**     _region_list;
+  // Index in _region_list for current _region_stack.
+  uint _region_stack_index;
+
+  // Indexes of recycled region stacks/overflow stacks
+  // Stacks of regions to be compacted are embedded in the tasks doing
+  // the compaction.  A thread that executes the task extracts the
+  // region stack and drains it.  These threads keep these region
+  // stacks for use during compaction task stealing.  If a thread
+  // gets a second draining task, it pushed its current region stack
+  // index into the array _recycled_stack_index and gets a new
+  // region stack from the task.  A thread that is executing a
+  // compaction stealing task without ever having executing a
+  // draining task, will get a region stack from _recycled_stack_index.
+  //
+  // Array of indexes into the array of region stacks.
+  static uint*                    _recycled_stack_index;
+  // The index into _recycled_stack_index of the last region stack index
+  // pushed.  If -1, there are no entries into _recycled_stack_index.
+  static int                      _recycled_top;
+  // The index into _recycled_stack_index of the last region stack index
+  // popped.  If -1, there has not been any entry popped.
+  static int                      _recycled_bottom;
 
   Stack<Klass*>                 _revisit_klass_stack;
   Stack<DataLayout*>            _revisit_mdo_stack;
@@ -104,7 +127,6 @@
   // Array of tasks.  Needed by the ParallelTaskTerminator.
   static RegionTaskQueueSet* region_array()      { return _region_array; }
   OverflowTaskQueue<oop>*  marking_stack()       { return &_marking_stack; }
-  RegionTaskQueue* region_stack()                { return &_region_stack; }
 
   // Pushes onto the marking stack.  If the marking stack is full,
   // pushes onto the overflow stack.
@@ -116,10 +138,33 @@
   Action action() { return _action; }
   void set_action(Action v) { _action = v; }
 
+  RegionTaskQueue* region_stack()                { return _region_stack; }
+  void set_region_stack(RegionTaskQueue* v)       { _region_stack = v; }
+
   inline static ParCompactionManager* manager_array(int index);
 
+  inline static RegionTaskQueue* region_list(int index) {
+    return _region_list[index];
+  }
+
+  uint region_stack_index() { return _region_stack_index; }
+  void set_region_stack_index(uint v) { _region_stack_index = v; }
+
+  // Pop and push unique reusable stack index
+  static int pop_recycled_stack_index();
+  static void push_recycled_stack_index(uint v);
+  static void reset_recycled_stack_index() {
+    _recycled_bottom = _recycled_top = -1;
+  }
+
   ParCompactionManager();
+  ~ParCompactionManager();
 
+  // Pushes onto the region stack at the given index.  If the
+  // region stack is full,
+  // pushes onto the region overflow stack.
+  static void region_list_push(uint stack_index, size_t region_index);
+  static void verify_region_list_empty(uint stack_index);
   ParMarkBitMap* mark_bitmap() { return _mark_bitmap; }
 
   // Take actions in preparation for a compaction.
@@ -129,8 +174,6 @@
 
   bool should_update();
   bool should_copy();
-  bool should_verify_only();
-  bool should_reset_only();
 
   Stack<Klass*>* revisit_klass_stack() { return &_revisit_klass_stack; }
   Stack<DataLayout*>* revisit_mdo_stack() { return &_revisit_mdo_stack; }
--- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweepDecorator.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweepDecorator.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -96,7 +96,8 @@
    * by the MarkSweepAlwaysCompactCount parameter. This is a significant
    * performance improvement!
    */
-  bool skip_dead = ((PSMarkSweep::total_invocations() % MarkSweepAlwaysCompactCount) != 0);
+  bool skip_dead = (MarkSweepAlwaysCompactCount < 1)
+    || ((PSMarkSweep::total_invocations() % MarkSweepAlwaysCompactCount) != 0);
 
   size_t allowed_deadspace = 0;
   if (skip_dead) {
--- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -2045,6 +2045,11 @@
     ResourceMark rm;
     HandleMark hm;
 
+    // Set the number of GC threads to be used in this collection
+    gc_task_manager()->set_active_gang();
+    gc_task_manager()->task_idle_workers();
+    heap->set_par_threads(gc_task_manager()->active_workers());
+
     const bool is_system_gc = gc_cause == GCCause::_java_lang_system_gc;
 
     // This is useful for debugging but don't change the output the
@@ -2197,6 +2202,7 @@
     // Track memory usage and detect low memory
     MemoryService::track_memory_usage();
     heap->update_counters();
+    gc_task_manager()->release_idle_workers();
   }
 
 #ifdef ASSERT
@@ -2204,7 +2210,7 @@
     ParCompactionManager* const cm =
       ParCompactionManager::manager_array(int(i));
     assert(cm->marking_stack()->is_empty(),       "should be empty");
-    assert(cm->region_stack()->is_empty(),        "should be empty");
+    assert(ParCompactionManager::region_list(int(i))->is_empty(), "should be empty");
     assert(cm->revisit_klass_stack()->is_empty(), "should be empty");
   }
 #endif // ASSERT
@@ -2351,8 +2357,9 @@
 
   ParallelScavengeHeap* heap = gc_heap();
   uint parallel_gc_threads = heap->gc_task_manager()->workers();
+  uint active_gc_threads = heap->gc_task_manager()->active_workers();
   TaskQueueSetSuper* qset = ParCompactionManager::region_array();
-  ParallelTaskTerminator terminator(parallel_gc_threads, qset);
+  ParallelTaskTerminator terminator(active_gc_threads, qset);
 
   PSParallelCompact::MarkAndPushClosure mark_and_push_closure(cm);
   PSParallelCompact::FollowStackClosure follow_stack_closure(cm);
@@ -2374,21 +2381,13 @@
     q->enqueue(new MarkFromRootsTask(MarkFromRootsTask::jvmti));
     q->enqueue(new MarkFromRootsTask(MarkFromRootsTask::code_cache));
 
-    if (parallel_gc_threads > 1) {
-      for (uint j = 0; j < parallel_gc_threads; j++) {
+    if (active_gc_threads > 1) {
+      for (uint j = 0; j < active_gc_threads; j++) {
         q->enqueue(new StealMarkingTask(&terminator));
       }
     }
 
-    WaitForBarrierGCTask* fin = WaitForBarrierGCTask::create();
-    q->enqueue(fin);
-
-    gc_task_manager()->add_list(q);
-
-    fin->wait_for();
-
-    // We have to release the barrier tasks!
-    WaitForBarrierGCTask::destroy(fin);
+    gc_task_manager()->execute_and_wait(q);
   }
 
   // Process reference objects found during marking
@@ -2483,10 +2482,22 @@
 {
   TraceTime tm("drain task setup", print_phases(), true, gclog_or_tty);
 
-  const unsigned int task_count = MAX2(parallel_gc_threads, 1U);
-  for (unsigned int j = 0; j < task_count; j++) {
+  // Find the threads that are active
+  unsigned int which = 0;
+
+  const uint task_count = MAX2(parallel_gc_threads, 1U);
+  for (uint j = 0; j < task_count; j++) {
     q->enqueue(new DrainStacksCompactionTask(j));
+    ParCompactionManager::verify_region_list_empty(j);
+    // Set the region stacks variables to "no" region stack values
+    // so that they will be recognized and needing a region stack
+    // in the stealing tasks if they do not get one by executing
+    // a draining stack.
+    ParCompactionManager* cm = ParCompactionManager::manager_array(j);
+    cm->set_region_stack(NULL);
+    cm->set_region_stack_index((uint)max_uintx);
   }
+  ParCompactionManager::reset_recycled_stack_index();
 
   // Find all regions that are available (can be filled immediately) and
   // distribute them to the thread stacks.  The iteration is done in reverse
@@ -2495,8 +2506,10 @@
   const ParallelCompactData& sd = PSParallelCompact::summary_data();
 
   size_t fillable_regions = 0;   // A count for diagnostic purposes.
-  unsigned int which = 0;       // The worker thread number.
-
+  // A region index which corresponds to the tasks created above.
+  // "which" must be 0 <= which < task_count
+
+  which = 0;
   for (unsigned int id = to_space_id; id > perm_space_id; --id) {
     SpaceInfo* const space_info = _space_info + id;
     MutableSpace* const space = space_info->space();
@@ -2509,8 +2522,7 @@
 
     for (size_t cur = end_region - 1; cur >= beg_region; --cur) {
       if (sd.region(cur)->claim_unsafe()) {
-        ParCompactionManager* cm = ParCompactionManager::manager_array(which);
-        cm->push_region(cur);
+        ParCompactionManager::region_list_push(which, cur);
 
         if (TraceParallelOldGCCompactionPhase && Verbose) {
           const size_t count_mod_8 = fillable_regions & 7;
@@ -2521,8 +2533,10 @@
 
         NOT_PRODUCT(++fillable_regions;)
 
-        // Assign regions to threads in round-robin fashion.
+        // Assign regions to tasks in round-robin fashion.
         if (++which == task_count) {
+          assert(which <= parallel_gc_threads,
+            "Inconsistent number of workers");
           which = 0;
         }
       }
@@ -2642,26 +2656,19 @@
   PSOldGen* old_gen = heap->old_gen();
   old_gen->start_array()->reset();
   uint parallel_gc_threads = heap->gc_task_manager()->workers();
+  uint active_gc_threads = heap->gc_task_manager()->active_workers();
   TaskQueueSetSuper* qset = ParCompactionManager::region_array();
-  ParallelTaskTerminator terminator(parallel_gc_threads, qset);
+  ParallelTaskTerminator terminator(active_gc_threads, qset);
 
   GCTaskQueue* q = GCTaskQueue::create();
-  enqueue_region_draining_tasks(q, parallel_gc_threads);
-  enqueue_dense_prefix_tasks(q, parallel_gc_threads);
-  enqueue_region_stealing_tasks(q, &terminator, parallel_gc_threads);
+  enqueue_region_draining_tasks(q, active_gc_threads);
+  enqueue_dense_prefix_tasks(q, active_gc_threads);
+  enqueue_region_stealing_tasks(q, &terminator, active_gc_threads);
 
   {
     TraceTime tm_pc("par compact", print_phases(), true, gclog_or_tty);
 
-    WaitForBarrierGCTask* fin = WaitForBarrierGCTask::create();
-    q->enqueue(fin);
-
-    gc_task_manager()->add_list(q);
-
-    fin->wait_for();
-
-    // We have to release the barrier tasks!
-    WaitForBarrierGCTask::destroy(fin);
+    gc_task_manager()->execute_and_wait(q);
 
 #ifdef  ASSERT
     // Verify that all regions have been processed before the deferred updates.
@@ -2729,6 +2736,9 @@
 PSParallelCompact::follow_weak_klass_links() {
   // All klasses on the revisit stack are marked at this point.
   // Update and follow all subklass, sibling and implementor links.
+  // Check all the stacks here even if not all the workers are active.
+  // There is no accounting which indicates which stacks might have
+  // contents to be followed.
   if (PrintRevisitStats) {
     gclog_or_tty->print_cr("#classes in system dictionary = %d",
                            SystemDictionary::number_of_classes());
@@ -3360,20 +3370,7 @@
   HeapWord* beg_addr = sp->bottom();
   HeapWord* end_addr = sp->top();
 
-#ifdef ASSERT
   assert(beg_addr <= dp_addr && dp_addr <= end_addr, "bad dense prefix");
-  if (cm->should_verify_only()) {
-    VerifyUpdateClosure verify_update(cm, sp);
-    bitmap->iterate(&verify_update, beg_addr, end_addr);
-    return;
-  }
-
-  if (cm->should_reset_only()) {
-    ResetObjectsClosure reset_objects(cm);
-    bitmap->iterate(&reset_objects, beg_addr, end_addr);
-    return;
-  }
-#endif
 
   const size_t beg_region = sd.addr_to_region_idx(beg_addr);
   const size_t dp_region = sd.addr_to_region_idx(dp_addr);
@@ -3492,35 +3489,6 @@
   return ParMarkBitMap::incomplete;
 }
 
-// Verify the new location using the forwarding pointer
-// from MarkSweep::mark_sweep_phase2().  Set the mark_word
-// to the initial value.
-ParMarkBitMapClosure::IterationStatus
-PSParallelCompact::VerifyUpdateClosure::do_addr(HeapWord* addr, size_t words) {
-  // The second arg (words) is not used.
-  oop obj = (oop) addr;
-  HeapWord* forwarding_ptr = (HeapWord*) obj->mark()->decode_pointer();
-  HeapWord* new_pointer = summary_data().calc_new_pointer(obj);
-  if (forwarding_ptr == NULL) {
-    // The object is dead or not moving.
-    assert(bitmap()->is_unmarked(obj) || (new_pointer == (HeapWord*) obj),
-           "Object liveness is wrong.");
-    return ParMarkBitMap::incomplete;
-  }
-  assert(HeapMaximumCompactionInterval > 1 || MarkSweepAlwaysCompactCount > 1 ||
-         forwarding_ptr == new_pointer, "new location is incorrect");
-  return ParMarkBitMap::incomplete;
-}
-
-// Reset objects modified for debug checking.
-ParMarkBitMapClosure::IterationStatus
-PSParallelCompact::ResetObjectsClosure::do_addr(HeapWord* addr, size_t words) {
-  // The second arg (words) is not used.
-  oop obj = (oop) addr;
-  obj->init_mark();
-  return ParMarkBitMap::incomplete;
-}
-
 // Prepare for compaction.  This method is executed once
 // (i.e., by a single thread) before compaction.
 // Save the updated location of the intArrayKlassObj for
--- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -832,31 +832,6 @@
     virtual void do_code_blob(CodeBlob* cb) const { }
   };
 
-  // Closure for verifying update of pointers.  Does not
-  // have any side effects.
-  class VerifyUpdateClosure: public ParMarkBitMapClosure {
-    const MutableSpace* _space; // Is this ever used?
-
-   public:
-    VerifyUpdateClosure(ParCompactionManager* cm, const MutableSpace* sp) :
-      ParMarkBitMapClosure(PSParallelCompact::mark_bitmap(), cm), _space(sp)
-    { }
-
-    virtual IterationStatus do_addr(HeapWord* addr, size_t words);
-
-    const MutableSpace* space() { return _space; }
-  };
-
-  // Closure for updating objects altered for debug checking
-  class ResetObjectsClosure: public ParMarkBitMapClosure {
-   public:
-    ResetObjectsClosure(ParCompactionManager* cm):
-      ParMarkBitMapClosure(PSParallelCompact::mark_bitmap(), cm)
-    { }
-
-    virtual IterationStatus do_addr(HeapWord* addr, size_t words);
-  };
-
   friend class KeepAliveClosure;
   friend class FollowStackClosure;
   friend class AdjustPointerClosure;
@@ -1183,10 +1158,6 @@
   // Update the deferred objects in the space.
   static void update_deferred_objects(ParCompactionManager* cm, SpaceId id);
 
-  // Mark pointer and follow contents.
-  template <class T>
-  static inline void mark_and_follow(ParCompactionManager* cm, T* p);
-
   static ParMarkBitMap* mark_bitmap() { return &_mark_bitmap; }
   static ParallelCompactData& summary_data() { return _summary_data; }
 
@@ -1283,20 +1254,6 @@
 }
 
 template <class T>
-inline void PSParallelCompact::mark_and_follow(ParCompactionManager* cm,
-                                               T* p) {
-  T heap_oop = oopDesc::load_heap_oop(p);
-  if (!oopDesc::is_null(heap_oop)) {
-    oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
-    if (mark_bitmap()->is_unmarked(obj)) {
-      if (mark_obj(obj)) {
-        obj->follow_contents(cm);
-      }
-    }
-  }
-}
-
-template <class T>
 inline void PSParallelCompact::mark_and_push(ParCompactionManager* cm, T* p) {
   T heap_oop = oopDesc::load_heap_oop(p);
   if (!oopDesc::is_null(heap_oop)) {
--- a/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -181,28 +181,29 @@
 void PSRefProcTaskExecutor::execute(ProcessTask& task)
 {
   GCTaskQueue* q = GCTaskQueue::create();
-  for(uint i=0; i<ParallelGCThreads; i++) {
+  GCTaskManager* manager = ParallelScavengeHeap::gc_task_manager();
+  for(uint i=0; i < manager->active_workers(); i++) {
     q->enqueue(new PSRefProcTaskProxy(task, i));
   }
-  ParallelTaskTerminator terminator(
-                 ParallelScavengeHeap::gc_task_manager()->workers(),
+  ParallelTaskTerminator terminator(manager->active_workers(),
                  (TaskQueueSetSuper*) PSPromotionManager::stack_array_depth());
-  if (task.marks_oops_alive() && ParallelGCThreads > 1) {
-    for (uint j=0; j<ParallelGCThreads; j++) {
+  if (task.marks_oops_alive() && manager->active_workers() > 1) {
+    for (uint j = 0; j < manager->active_workers(); j++) {
       q->enqueue(new StealTask(&terminator));
     }
   }
-  ParallelScavengeHeap::gc_task_manager()->execute_and_wait(q);
+  manager->execute_and_wait(q);
 }
 
 
 void PSRefProcTaskExecutor::execute(EnqueueTask& task)
 {
   GCTaskQueue* q = GCTaskQueue::create();
-  for(uint i=0; i<ParallelGCThreads; i++) {
+  GCTaskManager* manager = ParallelScavengeHeap::gc_task_manager();
+  for(uint i=0; i < manager->active_workers(); i++) {
     q->enqueue(new PSRefEnqueueTaskProxy(task, i));
   }
-  ParallelScavengeHeap::gc_task_manager()->execute_and_wait(q);
+  manager->execute_and_wait(q);
 }
 
 // This method contains all heap specific policy for invoking scavenge.
@@ -375,6 +376,14 @@
     // Release all previously held resources
     gc_task_manager()->release_all_resources();
 
+    // Set the number of GC threads to be used in this collection
+    gc_task_manager()->set_active_gang();
+    gc_task_manager()->task_idle_workers();
+    // Get the active number of workers here and use that value
+    // throughout the methods.
+    uint active_workers = gc_task_manager()->active_workers();
+    heap->set_par_threads(active_workers);
+
     PSPromotionManager::pre_scavenge();
 
     // We'll use the promotion manager again later.
@@ -385,8 +394,9 @@
 
       GCTaskQueue* q = GCTaskQueue::create();
 
-      for(uint i=0; i<ParallelGCThreads; i++) {
-        q->enqueue(new OldToYoungRootsTask(old_gen, old_top, i));
+      uint stripe_total = active_workers;
+      for(uint i=0; i < stripe_total; i++) {
+        q->enqueue(new OldToYoungRootsTask(old_gen, old_top, i, stripe_total));
       }
 
       q->enqueue(new SerialOldToYoungRootsTask(perm_gen, perm_top));
@@ -403,10 +413,10 @@
       q->enqueue(new ScavengeRootsTask(ScavengeRootsTask::code_cache));
 
       ParallelTaskTerminator terminator(
-                  gc_task_manager()->workers(),
+        active_workers,
                   (TaskQueueSetSuper*) promotion_manager->stack_array_depth());
-      if (ParallelGCThreads>1) {
-        for (uint j=0; j<ParallelGCThreads; j++) {
+      if (active_workers > 1) {
+        for (uint j = 0; j < active_workers; j++) {
           q->enqueue(new StealTask(&terminator));
         }
       }
@@ -419,6 +429,7 @@
     // Process reference objects discovered during scavenge
     {
       reference_processor()->setup_policy(false); // not always_clear
+      reference_processor()->set_active_mt_degree(active_workers);
       PSKeepAliveClosure keep_alive(promotion_manager);
       PSEvacuateFollowersClosure evac_followers(promotion_manager);
       if (reference_processor()->processing_is_mt()) {
@@ -622,6 +633,8 @@
     // Track memory usage and detect low memory
     MemoryService::track_memory_usage();
     heap->update_counters();
+
+    gc_task_manager()->release_idle_workers();
   }
 
   if (VerifyAfterGC && heap->total_collections() >= VerifyGCStartAt) {
@@ -804,6 +817,7 @@
 
   // Initialize ref handling object for scavenging.
   MemRegion mr = young_gen->reserved();
+
   _ref_processor =
     new ReferenceProcessor(mr,                         // span
                            ParallelRefProcEnabled && (ParallelGCThreads > 1), // mt processing
--- a/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -202,7 +202,8 @@
                                            _gen->object_space(),
                                            _gen_top,
                                            pm,
-                                           _stripe_number);
+                                           _stripe_number,
+                                           _stripe_total);
 
     // Do the real work
     pm->drain_stacks(false);
--- a/src/share/vm/gc_implementation/parallelScavenge/psTasks.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/psTasks.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -135,16 +135,63 @@
 // OldToYoungRootsTask
 //
 // This task is used to scan old to young roots in parallel
+//
+// A GC thread executing this tasks divides the generation (old gen)
+// into slices and takes a stripe in the slice as its part of the
+// work.
+//
+//      +===============+        slice 0
+//      |  stripe 0     |
+//      +---------------+
+//      |  stripe 1     |
+//      +---------------+
+//      |  stripe 2     |
+//      +---------------+
+//      |  stripe 3     |
+//      +===============+        slice 1
+//      |  stripe 0     |
+//      +---------------+
+//      |  stripe 1     |
+//      +---------------+
+//      |  stripe 2     |
+//      +---------------+
+//      |  stripe 3     |
+//      +===============+        slice 2
+//      ...
+//
+// A task is created for each stripe.  In this case there are 4 tasks
+// created.  A GC thread first works on its stripe within slice 0
+// and then moves to its stripe in the next slice until all stripes
+// exceed the top of the generation.  Note that having fewer GC threads
+// than stripes works because all the tasks are executed so all stripes
+// will be covered.  In this example if 4 tasks have been created to cover
+// all the stripes and there are only 3 threads, one of the threads will
+// get the tasks with the 4th stripe.  However, there is a dependence in
+// CardTableExtension::scavenge_contents_parallel() on the number
+// of tasks created.  In scavenge_contents_parallel the distance
+// to the next stripe is calculated based on the number of tasks.
+// If the stripe width is ssize, a task's next stripe is at
+// ssize * number_of_tasks (= slice_stride).  In this case after
+// finishing stripe 0 in slice 0, the thread finds the stripe 0 in slice1
+// by adding slice_stride to the start of stripe 0 in slice 0 to get
+// to the start of stride 0 in slice 1.
 
 class OldToYoungRootsTask : public GCTask {
  private:
   PSOldGen* _gen;
   HeapWord* _gen_top;
   uint _stripe_number;
+  uint _stripe_total;
 
  public:
-  OldToYoungRootsTask(PSOldGen *gen, HeapWord* gen_top, uint stripe_number) :
-    _gen(gen), _gen_top(gen_top), _stripe_number(stripe_number) { }
+  OldToYoungRootsTask(PSOldGen *gen,
+                      HeapWord* gen_top,
+                      uint stripe_number,
+                      uint stripe_total) :
+    _gen(gen),
+    _gen_top(gen_top),
+    _stripe_number(stripe_number),
+    _stripe_total(stripe_total) { }
 
   char* name() { return (char *)"old-to-young-roots-task"; }
 
--- a/src/share/vm/gc_implementation/shared/adaptiveSizePolicy.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/shared/adaptiveSizePolicy.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -28,8 +28,10 @@
 #include "memory/collectorPolicy.hpp"
 #include "runtime/timer.hpp"
 #include "utilities/ostream.hpp"
+#include "utilities/workgroup.hpp"
 elapsedTimer AdaptiveSizePolicy::_minor_timer;
 elapsedTimer AdaptiveSizePolicy::_major_timer;
+bool AdaptiveSizePolicy::_debug_perturbation = false;
 
 // The throughput goal is implemented as
 //      _throughput_goal = 1 - ( 1 / (1 + gc_cost_ratio))
@@ -88,6 +90,134 @@
   _young_gen_policy_is_ready = false;
 }
 
+//  If the number of GC threads was set on the command line,
+// use it.
+//  Else
+//    Calculate the number of GC threads based on the number of Java threads.
+//    Calculate the number of GC threads based on the size of the heap.
+//    Use the larger.
+
+int AdaptiveSizePolicy::calc_default_active_workers(uintx total_workers,
+                                            const uintx min_workers,
+                                            uintx active_workers,
+                                            uintx application_workers) {
+  // If the user has specifically set the number of
+  // GC threads, use them.
+
+  // If the user has turned off using a dynamic number of GC threads
+  // or the users has requested a specific number, set the active
+  // number of workers to all the workers.
+
+  uintx new_active_workers = total_workers;
+  uintx prev_active_workers = active_workers;
+  uintx active_workers_by_JT = 0;
+  uintx active_workers_by_heap_size = 0;
+
+  // Always use at least min_workers but use up to
+  // GCThreadsPerJavaThreads * application threads.
+  active_workers_by_JT =
+    MAX2((uintx) GCWorkersPerJavaThread * application_workers,
+         min_workers);
+
+  // Choose a number of GC threads based on the current size
+  // of the heap.  This may be complicated because the size of
+  // the heap depends on factors such as the thoughput goal.
+  // Still a large heap should be collected by more GC threads.
+  active_workers_by_heap_size =
+      MAX2((size_t) 2U, Universe::heap()->capacity() / HeapSizePerGCThread);
+
+  uintx max_active_workers =
+    MAX2(active_workers_by_JT, active_workers_by_heap_size);
+
+  // Limit the number of workers to the the number created,
+  // (workers()).
+  new_active_workers = MIN2(max_active_workers,
+                                (uintx) total_workers);
+
+  // Increase GC workers instantly but decrease them more
+  // slowly.
+  if (new_active_workers < prev_active_workers) {
+    new_active_workers =
+      MAX2(min_workers, (prev_active_workers + new_active_workers) / 2);
+  }
+
+  // Check once more that the number of workers is within the limits.
+  assert(min_workers <= total_workers, "Minimum workers not consistent with total workers");
+  assert(new_active_workers >= min_workers, "Minimum workers not observed");
+  assert(new_active_workers <= total_workers, "Total workers not observed");
+
+  if (ForceDynamicNumberOfGCThreads) {
+    // Assume this is debugging and jiggle the number of GC threads.
+    if (new_active_workers == prev_active_workers) {
+      if (new_active_workers < total_workers) {
+        new_active_workers++;
+      } else if (new_active_workers > min_workers) {
+        new_active_workers--;
+      }
+    }
+    if (new_active_workers == total_workers) {
+      if (_debug_perturbation) {
+        new_active_workers =  min_workers;
+      }
+      _debug_perturbation = !_debug_perturbation;
+    }
+    assert((new_active_workers <= (uintx) ParallelGCThreads) &&
+           (new_active_workers >= min_workers),
+      "Jiggled active workers too much");
+  }
+
+  if (TraceDynamicGCThreads) {
+     gclog_or_tty->print_cr("GCTaskManager::calc_default_active_workers() : "
+       "active_workers(): %d  new_acitve_workers: %d  "
+       "prev_active_workers: %d\n"
+       " active_workers_by_JT: %d  active_workers_by_heap_size: %d",
+       active_workers, new_active_workers, prev_active_workers,
+       active_workers_by_JT, active_workers_by_heap_size);
+  }
+  assert(new_active_workers > 0, "Always need at least 1");
+  return new_active_workers;
+}
+
+int AdaptiveSizePolicy::calc_active_workers(uintx total_workers,
+                                            uintx active_workers,
+                                            uintx application_workers) {
+  // If the user has specifically set the number of
+  // GC threads, use them.
+
+  // If the user has turned off using a dynamic number of GC threads
+  // or the users has requested a specific number, set the active
+  // number of workers to all the workers.
+
+  int new_active_workers;
+  if (!UseDynamicNumberOfGCThreads ||
+     (!FLAG_IS_DEFAULT(ParallelGCThreads) && !ForceDynamicNumberOfGCThreads)) {
+    new_active_workers = total_workers;
+  } else {
+    new_active_workers = calc_default_active_workers(total_workers,
+                                                     2, /* Minimum number of workers */
+                                                     active_workers,
+                                                     application_workers);
+  }
+  assert(new_active_workers > 0, "Always need at least 1");
+  return new_active_workers;
+}
+
+int AdaptiveSizePolicy::calc_active_conc_workers(uintx total_workers,
+                                                 uintx active_workers,
+                                                 uintx application_workers) {
+  if (!UseDynamicNumberOfGCThreads ||
+     (!FLAG_IS_DEFAULT(ConcGCThreads) && !ForceDynamicNumberOfGCThreads)) {
+    return ConcGCThreads;
+  } else {
+    int no_of_gc_threads = calc_default_active_workers(
+                             total_workers,
+                             1, /* Minimum number of workers */
+                             active_workers,
+                             application_workers);
+    return no_of_gc_threads;
+  }
+}
+
 bool AdaptiveSizePolicy::tenuring_threshold_change() const {
   return decrement_tenuring_threshold_for_gc_cost() ||
          increment_tenuring_threshold_for_gc_cost() ||
--- a/src/share/vm/gc_implementation/shared/adaptiveSizePolicy.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/shared/adaptiveSizePolicy.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -187,6 +187,8 @@
   julong _young_gen_change_for_minor_throughput;
   julong _old_gen_change_for_major_throughput;
 
+  static const uint GCWorkersPerJavaThread  = 2;
+
   // Accessors
 
   double gc_pause_goal_sec() const { return _gc_pause_goal_sec; }
@@ -331,6 +333,8 @@
   // Return true if the policy suggested a change.
   bool tenuring_threshold_change() const;
 
+  static bool _debug_perturbation;
+
  public:
   AdaptiveSizePolicy(size_t init_eden_size,
                      size_t init_promo_size,
@@ -338,6 +342,31 @@
                      double gc_pause_goal_sec,
                      uint gc_cost_ratio);
 
+  // Return number default  GC threads to use in the next GC.
+  static int calc_default_active_workers(uintx total_workers,
+                                         const uintx min_workers,
+                                         uintx active_workers,
+                                         uintx application_workers);
+
+  // Return number of GC threads to use in the next GC.
+  // This is called sparingly so as not to change the
+  // number of GC workers gratuitously.
+  //   For ParNew collections
+  //   For PS scavenge and ParOld collections
+  //   For G1 evacuation pauses (subject to update)
+  // Other collection phases inherit the number of
+  // GC workers from the calls above.  For example,
+  // a CMS parallel remark uses the same number of GC
+  // workers as the most recent ParNew collection.
+  static int calc_active_workers(uintx total_workers,
+                                 uintx active_workers,
+                                 uintx application_workers);
+
+  // Return number of GC threads to use in the next concurrent GC phase.
+  static int calc_active_conc_workers(uintx total_workers,
+                                      uintx active_workers,
+                                      uintx application_workers);
+
   bool is_gc_cms_adaptive_size_policy() {
     return kind() == _gc_cms_adaptive_size_policy;
   }
--- a/src/share/vm/gc_implementation/shared/markSweep.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/shared/markSweep.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -196,8 +196,6 @@
   static void mark_object(oop obj);
   // Mark pointer and follow contents.  Empty marking stack afterwards.
   template <class T> static inline void follow_root(T* p);
-  // Mark pointer and follow contents.
-  template <class T> static inline void mark_and_follow(T* p);
   // Check mark and maybe push on marking stack
   template <class T> static inline void mark_and_push(T* p);
   static inline void push_objarray(oop obj, size_t index);
--- a/src/share/vm/gc_implementation/shared/markSweep.inline.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/gc_implementation/shared/markSweep.inline.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -63,18 +63,6 @@
   follow_stack();
 }
 
-template <class T> inline void MarkSweep::mark_and_follow(T* p) {
-//  assert(Universe::heap()->is_in_reserved(p), "should be in object space");
-  T heap_oop = oopDesc::load_heap_oop(p);
-  if (!oopDesc::is_null(heap_oop)) {
-    oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
-    if (!obj->mark()->is_marked()) {
-      mark_object(obj);
-      obj->follow_contents();
-    }
-  }
-}
-
 template <class T> inline void MarkSweep::mark_and_push(T* p) {
 //  assert(Universe::heap()->is_in_reserved(p), "should be in object space");
   T heap_oop = oopDesc::load_heap_oop(p);
--- a/src/share/vm/memory/cardTableModRefBS.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/memory/cardTableModRefBS.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -460,9 +460,43 @@
                                                                  OopsInGenClosure* cl,
                                                                  CardTableRS* ct) {
   if (!mr.is_empty()) {
-    int n_threads = SharedHeap::heap()->n_par_threads();
-    if (n_threads > 0) {
+    // Caller (process_strong_roots()) claims that all GC threads
+    // execute this call.  With UseDynamicNumberOfGCThreads now all
+    // active GC threads execute this call.  The number of active GC
+    // threads needs to be passed to par_non_clean_card_iterate_work()
+    // to get proper partitioning and termination.
+    //
+    // This is an example of where n_par_threads() is used instead
+    // of workers()->active_workers().  n_par_threads can be set to 0 to
+    // turn off parallelism.  For example when this code is called as
+    // part of verification and SharedHeap::process_strong_roots() is being
+    // used, then n_par_threads() may have been set to 0.  active_workers
+    // is not overloaded with the meaning that it is a switch to disable
+    // parallelism and so keeps the meaning of the number of
+    // active gc workers.  If parallelism has not been shut off by
+    // setting n_par_threads to 0, then n_par_threads should be
+    // equal to active_workers.  When a different mechanism for shutting
+    // off parallelism is used, then active_workers can be used in
+    // place of n_par_threads.
+    //  This is an example of a path where n_par_threads is
+    // set to 0 to turn off parallism.
+    //  [7] CardTableModRefBS::non_clean_card_iterate()
+    //  [8] CardTableRS::younger_refs_in_space_iterate()
+    //  [9] Generation::younger_refs_in_space_iterate()
+    //  [10] OneContigSpaceCardGeneration::younger_refs_iterate()
+    //  [11] CompactingPermGenGen::younger_refs_iterate()
+    //  [12] CardTableRS::younger_refs_iterate()
+    //  [13] SharedHeap::process_strong_roots()
+    //  [14] G1CollectedHeap::verify()
+    //  [15] Universe::verify()
+    //  [16] G1CollectedHeap::do_collection_pause_at_safepoint()
+    //
+    int n_threads =  SharedHeap::heap()->n_par_threads();
+    bool is_par = n_threads > 0;
+    if (is_par) {
 #ifndef SERIALGC
+      assert(SharedHeap::heap()->n_par_threads() ==
+             SharedHeap::heap()->workers()->active_workers(), "Mismatch");
       non_clean_card_iterate_parallel_work(sp, mr, cl, ct, n_threads);
 #else  // SERIALGC
       fatal("Parallel gc not supported here.");
@@ -489,6 +523,10 @@
 // change their values in any manner.
 void CardTableModRefBS::non_clean_card_iterate_serial(MemRegion mr,
                                                       MemRegionClosure* cl) {
+  bool is_par = (SharedHeap::heap()->n_par_threads() > 0);
+  assert(!is_par ||
+          (SharedHeap::heap()->n_par_threads() ==
+          SharedHeap::heap()->workers()->active_workers()), "Mismatch");
   for (int i = 0; i < _cur_covered_regions; i++) {
     MemRegion mri = mr.intersection(_covered[i]);
     if (mri.word_size() > 0) {
@@ -624,23 +662,6 @@
   return MemRegion(mr.end(), mr.end());
 }
 
-// Set all the dirty cards in the given region to "precleaned" state.
-void CardTableModRefBS::preclean_dirty_cards(MemRegion mr) {
-  for (int i = 0; i < _cur_covered_regions; i++) {
-    MemRegion mri = mr.intersection(_covered[i]);
-    if (!mri.is_empty()) {
-      jbyte *cur_entry, *limit;
-      for (cur_entry = byte_for(mri.start()), limit = byte_for(mri.last());
-           cur_entry <= limit;
-           cur_entry++) {
-        if (*cur_entry == dirty_card) {
-          *cur_entry = precleaned_card;
-        }
-      }
-    }
-  }
-}
-
 uintx CardTableModRefBS::ct_max_alignment_constraint() {
   return card_size * os::vm_page_size();
 }
--- a/src/share/vm/memory/cardTableModRefBS.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/memory/cardTableModRefBS.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -435,9 +435,6 @@
   MemRegion dirty_card_range_after_reset(MemRegion mr, bool reset,
                                          int reset_val);
 
-  // Set all the dirty cards in the given region to precleaned state.
-  void preclean_dirty_cards(MemRegion mr);
-
   // Provide read-only access to the card table array.
   const jbyte* byte_for_const(const void* p) const {
     return byte_for(p);
--- a/src/share/vm/memory/cardTableRS.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/memory/cardTableRS.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -164,7 +164,13 @@
 ClearNoncleanCardWrapper::ClearNoncleanCardWrapper(
   DirtyCardToOopClosure* dirty_card_closure, CardTableRS* ct) :
     _dirty_card_closure(dirty_card_closure), _ct(ct) {
+    // Cannot yet substitute active_workers for n_par_threads
+    // in the case where parallelism is being turned off by
+    // setting n_par_threads to 0.
     _is_par = (SharedHeap::heap()->n_par_threads() > 0);
+    assert(!_is_par ||
+           (SharedHeap::heap()->n_par_threads() ==
+            SharedHeap::heap()->workers()->active_workers()), "Mismatch");
 }
 
 void ClearNoncleanCardWrapper::do_MemRegion(MemRegion mr) {
--- a/src/share/vm/memory/sharedHeap.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/memory/sharedHeap.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -58,7 +58,6 @@
   _perm_gen(NULL), _rem_set(NULL),
   _strong_roots_parity(0),
   _process_strong_tasks(new SubTasksDone(SH_PS_NumElements)),
-  _n_par_threads(0),
   _workers(NULL)
 {
   if (_process_strong_tasks == NULL || !_process_strong_tasks->valid()) {
@@ -80,6 +79,14 @@
   }
 }
 
+int SharedHeap::n_termination() {
+  return _process_strong_tasks->n_threads();
+}
+
+void SharedHeap::set_n_termination(int t) {
+  _process_strong_tasks->set_n_threads(t);
+}
+
 bool SharedHeap::heap_lock_held_for_gc() {
   Thread* t = Thread::current();
   return    Heap_lock->owned_by_self()
@@ -144,6 +151,10 @@
   StrongRootsScope srs(this, activate_scope);
   // General strong roots.
   assert(_strong_roots_parity != 0, "must have called prologue code");
+  // _n_termination for _process_strong_tasks should be set up stream
+  // in a method not running in a GC worker.  Otherwise the GC worker
+  // could be trying to change the termination condition while the task
+  // is executing in another GC worker.
   if (!_process_strong_tasks->is_task_claimed(SH_PS_Universe_oops_do)) {
     Universe::oops_do(roots);
     // Consider perm-gen discovered lists to be strong.
--- a/src/share/vm/memory/sharedHeap.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/memory/sharedHeap.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -49,6 +49,62 @@
 class CollectorPolicy;
 class KlassHandle;
 
+// Note on use of FlexibleWorkGang's for GC.
+// There are three places where task completion is determined.
+// In
+//    1) ParallelTaskTerminator::offer_termination() where _n_threads
+//    must be set to the correct value so that count of workers that
+//    have offered termination will exactly match the number
+//    working on the task.  Tasks such as those derived from GCTask
+//    use ParallelTaskTerminator's.  Tasks that want load balancing
+//    by work stealing use this method to gauge completion.
+//    2) SubTasksDone has a variable _n_threads that is used in
+//    all_tasks_completed() to determine completion.  all_tasks_complete()
+//    counts the number of tasks that have been done and then reset
+//    the SubTasksDone so that it can be used again.  When the number of
+//    tasks is set to the number of GC workers, then _n_threads must
+//    be set to the number of active GC workers. G1CollectedHeap,
+//    HRInto_G1RemSet, GenCollectedHeap and SharedHeap have SubTasksDone.
+//    This seems too many.
+//    3) SequentialSubTasksDone has an _n_threads that is used in
+//    a way similar to SubTasksDone and has the same dependency on the
+//    number of active GC workers.  CompactibleFreeListSpace and Space
+//    have SequentialSubTasksDone's.
+// Example of using SubTasksDone and SequentialSubTasksDone
+// G1CollectedHeap::g1_process_strong_roots() calls
+//  process_strong_roots(false, // no scoping; this is parallel code
+//                       collecting_perm_gen, so,
+//                       &buf_scan_non_heap_roots,
+//                       &eager_scan_code_roots,
+//                       &buf_scan_perm);
+//  which delegates to SharedHeap::process_strong_roots() and uses
+//  SubTasksDone* _process_strong_tasks to claim tasks.
+//  process_strong_roots() calls
+//      rem_set()->younger_refs_iterate(perm_gen(), perm_blk);
+//  to scan the card table and which eventually calls down into
+//  CardTableModRefBS::par_non_clean_card_iterate_work().  This method
+//  uses SequentialSubTasksDone* _pst to claim tasks.
+//  Both SubTasksDone and SequentialSubTasksDone call their method
+//  all_tasks_completed() to count the number of GC workers that have
+//  finished their work.  That logic is "when all the workers are
+//  finished the tasks are finished".
+//
+//  The pattern that appears  in the code is to set _n_threads
+//  to a value > 1 before a task that you would like executed in parallel
+//  and then to set it to 0 after that task has completed.  A value of
+//  0 is a "special" value in set_n_threads() which translates to
+//  setting _n_threads to 1.
+//
+//  Some code uses _n_terminiation to decide if work should be done in
+//  parallel.  The notorious possibly_parallel_oops_do() in threads.cpp
+//  is an example of such code.  Look for variable "is_par" for other
+//  examples.
+//
+//  The active_workers is not reset to 0 after a parallel phase.  It's
+//  value may be used in later phases and in one instance at least
+//  (the parallel remark) it has to be used (the parallel remark depends
+//  on the partitioning done in the previous parallel scavenge).
+
 class SharedHeap : public CollectedHeap {
   friend class VMStructs;
 
@@ -84,11 +140,6 @@
   // If we're doing parallel GC, use this gang of threads.
   FlexibleWorkGang* _workers;
 
-  // Number of parallel threads currently working on GC tasks.
-  // O indicates use sequential code; 1 means use parallel code even with
-  // only one thread, for performance testing purposes.
-  int _n_par_threads;
-
   // Full initialization is done in a concrete subtype's "initialize"
   // function.
   SharedHeap(CollectorPolicy* policy_);
@@ -107,6 +158,7 @@
   CollectorPolicy *collector_policy() const { return _collector_policy; }
 
   void set_barrier_set(BarrierSet* bs);
+  SubTasksDone* process_strong_tasks() { return _process_strong_tasks; }
 
   // Does operations required after initialization has been done.
   virtual void post_initialize();
@@ -198,13 +250,6 @@
 
   FlexibleWorkGang* workers() const { return _workers; }
 
-  // Sets the number of parallel threads that will be doing tasks
-  // (such as process strong roots) subsequently.
-  virtual void set_par_threads(int t);
-
-  // Number of threads currently working on GC tasks.
-  int n_par_threads() { return _n_par_threads; }
-
   // Invoke the "do_oop" method the closure "roots" on all root locations.
   // If "collecting_perm_gen" is false, then roots that may only contain
   // references to permGen objects are not scanned; instead, in that case,
@@ -240,6 +285,13 @@
   virtual void gc_prologue(bool full) = 0;
   virtual void gc_epilogue(bool full) = 0;
 
+  // Sets the number of parallel threads that will be doing tasks
+  // (such as process strong roots) subsequently.
+  virtual void set_par_threads(int t);
+
+  int n_termination();
+  void set_n_termination(int t);
+
   //
   // New methods from CollectedHeap
   //
--- a/src/share/vm/memory/space.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/memory/space.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -533,7 +533,8 @@
    * by the MarkSweepAlwaysCompactCount parameter.                           \
    */                                                                        \
   int invocations = SharedHeap::heap()->perm_gen()->stat_record()->invocations;\
-  bool skip_dead = ((invocations % MarkSweepAlwaysCompactCount) != 0);       \
+  bool skip_dead = (MarkSweepAlwaysCompactCount < 1)                         \
+    ||((invocations % MarkSweepAlwaysCompactCount) != 0);                    \
                                                                              \
   size_t allowed_deadspace = 0;                                              \
   if (skip_dead) {                                                           \
--- a/src/share/vm/oops/objArrayOop.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/oops/objArrayOop.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -34,7 +34,7 @@
   friend class objArrayKlass;
   friend class Runtime1;
   friend class psPromotionManager;
-  friend class CSMarkOopClosure;
+  friend class CSetMarkOopClosure;
   friend class G1ParScanPartialArrayClosure;
 
   template <class T> T* obj_at_addr(int index) const {
--- a/src/share/vm/opto/block.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/opto/block.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -898,45 +898,41 @@
 void PhaseCFG::verify( ) const {
 #ifdef ASSERT
   // Verify sane CFG
-  for( uint i = 0; i < _num_blocks; i++ ) {
+  for (uint i = 0; i < _num_blocks; i++) {
     Block *b = _blocks[i];
     uint cnt = b->_nodes.size();
     uint j;
-    for( j = 0; j < cnt; j++ ) {
+    for (j = 0; j < cnt; j++)  {
       Node *n = b->_nodes[j];
       assert( _bbs[n->_idx] == b, "" );
-      if( j >= 1 && n->is_Mach() &&
-          n->as_Mach()->ideal_Opcode() == Op_CreateEx ) {
-        assert( j == 1 || b->_nodes[j-1]->is_Phi(),
-                "CreateEx must be first instruction in block" );
+      if (j >= 1 && n->is_Mach() &&
+          n->as_Mach()->ideal_Opcode() == Op_CreateEx) {
+        assert(j == 1 || b->_nodes[j-1]->is_Phi(),
+               "CreateEx must be first instruction in block");
       }
-      for( uint k = 0; k < n->req(); k++ ) {
+      for (uint k = 0; k < n->req(); k++) {
         Node *def = n->in(k);
-        if( def && def != n ) {
-          assert( _bbs[def->_idx] || def->is_Con(),
-                  "must have block; constants for debug info ok" );
+        if (def && def != n) {
+          assert(_bbs[def->_idx] || def->is_Con(),
+                 "must have block; constants for debug info ok");
           // Verify that instructions in the block is in correct order.
           // Uses must follow their definition if they are at the same block.
           // Mostly done to check that MachSpillCopy nodes are placed correctly
           // when CreateEx node is moved in build_ifg_physical().
-          if( _bbs[def->_idx] == b &&
+          if (_bbs[def->_idx] == b &&
               !(b->head()->is_Loop() && n->is_Phi()) &&
               // See (+++) comment in reg_split.cpp
-              !(n->jvms() != NULL && n->jvms()->is_monitor_use(k)) ) {
+              !(n->jvms() != NULL && n->jvms()->is_monitor_use(k))) {
             bool is_loop = false;
             if (n->is_Phi()) {
-              for( uint l = 1; l < def->req(); l++ ) {
+              for (uint l = 1; l < def->req(); l++) {
                 if (n == def->in(l)) {
                   is_loop = true;
                   break; // Some kind of loop
                 }
               }
             }
-            assert( is_loop || b->find_node(def) < j, "uses must follow definitions" );
-          }
-          if( def->is_SafePointScalarObject() ) {
-            assert(_bbs[def->_idx] == b, "SafePointScalarObject Node should be at the same block as its SafePoint node");
-            assert(_bbs[def->_idx] == _bbs[def->in(0)->_idx], "SafePointScalarObject Node should be at the same block as its control edge");
+            assert(is_loop || b->find_node(def) < j, "uses must follow definitions");
           }
         }
       }
@@ -946,12 +942,11 @@
     Node *bp = (Node*)b->_nodes[b->_nodes.size()-1]->is_block_proj();
     assert( bp, "last instruction must be a block proj" );
     assert( bp == b->_nodes[j], "wrong number of successors for this block" );
-    if( bp->is_Catch() ) {
-      while( b->_nodes[--j]->is_MachProj() ) ;
-      assert( b->_nodes[j]->is_MachCall(), "CatchProj must follow call" );
-    }
-    else if( bp->is_Mach() && bp->as_Mach()->ideal_Opcode() == Op_If ) {
-      assert( b->_num_succs == 2, "Conditional branch must have two targets");
+    if (bp->is_Catch()) {
+      while (b->_nodes[--j]->is_MachProj()) ;
+      assert(b->_nodes[j]->is_MachCall(), "CatchProj must follow call");
+    } else if (bp->is_Mach() && bp->as_Mach()->ideal_Opcode() == Op_If) {
+      assert(b->_num_succs == 2, "Conditional branch must have two targets");
     }
   }
 #endif
--- a/src/share/vm/opto/block.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/opto/block.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -281,6 +281,8 @@
   // Find and remove n from block list
   void find_remove( const Node *n );
 
+  // helper function that adds caller save registers to MachProjNode
+  void add_call_kills(MachProjNode *proj, RegMask& regs, const char* save_policy, bool exclude_soe);
   // Schedule a call next in the block
   uint sched_call(Matcher &matcher, Block_Array &bbs, uint node_cnt, Node_List &worklist, int *ready_cnt, MachCallNode *mcall, VectorSet &next_call);
 
--- a/src/share/vm/opto/c2_globals.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/opto/c2_globals.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -456,6 +456,12 @@
   product(intx, EliminateAllocationArraySizeLimit, 64,                      \
           "Array size (number of elements) limit for scalar replacement")   \
                                                                             \
+  product(bool, OptimizePtrCompare, true,                                   \
+          "Use escape analysis to optimize pointers compare")               \
+                                                                            \
+  notproduct(bool, PrintOptimizePtrCompare, false,                          \
+          "Print information about optimized pointers compare")             \
+                                                                            \
   product(bool, UseOptoBiasInlining, true,                                  \
           "Generate biased locking code in C2 ideal graph")                 \
                                                                             \
--- a/src/share/vm/opto/callGenerator.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/opto/callGenerator.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -318,17 +318,17 @@
   return new DirectCallGenerator(m, separate_io_proj);
 }
 
-CallGenerator* CallGenerator::for_dynamic_call(ciMethod* m) {
-  assert(m->is_method_handle_invoke() || m->is_method_handle_adapter(), "for_dynamic_call mismatch");
-  return new DynamicCallGenerator(m);
-}
-
 CallGenerator* CallGenerator::for_virtual_call(ciMethod* m, int vtable_index) {
   assert(!m->is_static(), "for_virtual_call mismatch");
   assert(!m->is_method_handle_invoke(), "should be a direct call");
   return new VirtualCallGenerator(m, vtable_index);
 }
 
+CallGenerator* CallGenerator::for_dynamic_call(ciMethod* m) {
+  assert(m->is_method_handle_invoke() || m->is_method_handle_adapter(), "for_dynamic_call mismatch");
+  return new DynamicCallGenerator(m);
+}
+
 // Allow inlining decisions to be delayed
 class LateInlineCallGenerator : public DirectCallGenerator {
   CallGenerator* _inline_cg;
@@ -576,7 +576,9 @@
     kit.set_control(slow_ctl);
     if (!kit.stopped()) {
       slow_jvms = _if_missed->generate(kit.sync_jvms());
-      assert(slow_jvms != NULL, "miss path must not fail to generate");
+      if (kit.failing())
+        return NULL;  // might happen because of NodeCountInliningCutoff
+      assert(slow_jvms != NULL, "must be");
       kit.add_exception_states_from(slow_jvms);
       kit.set_map(slow_jvms->map());
       if (!kit.stopped())
@@ -682,6 +684,15 @@
 }
 
 
+CallGenerator* CallGenerator::for_method_handle_call(Node* method_handle, JVMState* jvms,
+                                                     ciMethod* caller, ciMethod* callee, ciCallProfile profile) {
+  assert(callee->is_method_handle_invoke() || callee->is_method_handle_adapter(), "for_method_handle_call mismatch");
+  CallGenerator* cg = CallGenerator::for_method_handle_inline(method_handle, jvms, caller, callee, profile);
+  if (cg != NULL)
+    return cg;
+  return CallGenerator::for_direct_call(callee);
+}
+
 CallGenerator* CallGenerator::for_method_handle_inline(Node* method_handle, JVMState* jvms,
                                                        ciMethod* caller, ciMethod* callee, ciCallProfile profile) {
   if (method_handle->Opcode() == Op_ConP) {
@@ -721,8 +732,8 @@
     // Generate a guard so that each can be inlined.  We might want to
     // do more inputs at later point but this gets the most common
     // case.
-    CallGenerator* cg1 = for_method_handle_inline(method_handle->in(1), jvms, caller, callee, profile.rescale(1.0 - prob));
-    CallGenerator* cg2 = for_method_handle_inline(method_handle->in(2), jvms, caller, callee, profile.rescale(prob));
+    CallGenerator* cg1 = for_method_handle_call(method_handle->in(1), jvms, caller, callee, profile.rescale(1.0 - prob));
+    CallGenerator* cg2 = for_method_handle_call(method_handle->in(2), jvms, caller, callee, profile.rescale(prob));
     if (cg1 != NULL && cg2 != NULL) {
       const TypeOopPtr* oop_ptr = method_handle->in(1)->bottom_type()->is_oopptr();
       ciObject* const_oop = oop_ptr->const_oop();
@@ -733,6 +744,17 @@
   return NULL;
 }
 
+CallGenerator* CallGenerator::for_invokedynamic_call(JVMState* jvms, ciMethod* caller, ciMethod* callee, ciCallProfile profile) {
+  assert(callee->is_method_handle_invoke() || callee->is_method_handle_adapter(), "for_invokedynamic_call mismatch");
+  // Get the CallSite object.
+  ciBytecodeStream str(caller);
+  str.force_bci(jvms->bci());  // Set the stream to the invokedynamic bci.
+  ciCallSite* call_site = str.get_call_site();
+  CallGenerator* cg = CallGenerator::for_invokedynamic_inline(call_site, jvms, caller, callee, profile);
+  if (cg != NULL)
+    return cg;
+  return CallGenerator::for_dynamic_call(callee);
+}
 
 CallGenerator* CallGenerator::for_invokedynamic_inline(ciCallSite* call_site, JVMState* jvms,
                                                        ciMethod* caller, ciMethod* callee, ciCallProfile profile) {
@@ -819,7 +841,9 @@
     kit.set_control(slow_ctl);
     if (!kit.stopped()) {
       slow_jvms = _if_missed->generate(kit.sync_jvms());
-      assert(slow_jvms != NULL, "miss path must not fail to generate");
+      if (kit.failing())
+        return NULL;  // might happen because of NodeCountInliningCutoff
+      assert(slow_jvms != NULL, "must be");
       kit.add_exception_states_from(slow_jvms);
       kit.set_map(slow_jvms->map());
       if (!kit.stopped())
--- a/src/share/vm/opto/callGenerator.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/opto/callGenerator.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -108,8 +108,11 @@
 
   // How to generate vanilla out-of-line call sites:
   static CallGenerator* for_direct_call(ciMethod* m, bool separate_io_projs = false);   // static, special
+  static CallGenerator* for_virtual_call(ciMethod* m, int vtable_index);  // virtual, interface
   static CallGenerator* for_dynamic_call(ciMethod* m);   // invokedynamic
-  static CallGenerator* for_virtual_call(ciMethod* m, int vtable_index);  // virtual, interface
+
+  static CallGenerator* for_method_handle_call(Node* method_handle, JVMState* jvms, ciMethod* caller, ciMethod* callee, ciCallProfile profile);
+  static CallGenerator* for_invokedynamic_call(                     JVMState* jvms, ciMethod* caller, ciMethod* callee, ciCallProfile profile);
 
   static CallGenerator* for_method_handle_inline(Node* method_handle,   JVMState* jvms, ciMethod* caller, ciMethod* callee, ciCallProfile profile);
   static CallGenerator* for_invokedynamic_inline(ciCallSite* call_site, JVMState* jvms, ciMethod* caller, ciMethod* callee, ciCallProfile profile);
--- a/src/share/vm/opto/callnode.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/opto/callnode.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -1071,8 +1071,11 @@
   init_class_id(Class_SafePointScalarObject);
 }
 
-bool SafePointScalarObjectNode::pinned() const { return true; }
-bool SafePointScalarObjectNode::depends_only_on_test() const { return false; }
+// Do not allow value-numbering for SafePointScalarObject node.
+uint SafePointScalarObjectNode::hash() const { return NO_HASH; }
+uint SafePointScalarObjectNode::cmp( const Node &n ) const {
+  return (&n == this); // Always fail except on self
+}
 
 uint SafePointScalarObjectNode::ideal_reg() const {
   return 0; // No matching to machine instruction
@@ -1096,7 +1099,6 @@
   if (cached != NULL) {
     return (SafePointScalarObjectNode*)cached;
   }
-  Compile* C = Compile::current();
   SafePointScalarObjectNode* res = (SafePointScalarObjectNode*)Node::clone();
   res->_first_index += jvms_adj;
   sosn_map->Insert((void*)this, (void*)res);
@@ -1142,6 +1144,8 @@
 
 Node* AllocateArrayNode::Ideal(PhaseGVN *phase, bool can_reshape) {
   if (remove_dead_region(phase, can_reshape))  return this;
+  // Don't bother trying to transform a dead node
+  if (in(0) && in(0)->is_top())  return NULL;
 
   const Type* type = phase->type(Ideal_length());
   if (type->isa_int() && type->is_int()->_hi < 0) {
@@ -1522,13 +1526,16 @@
 
   // perform any generic optimizations first (returns 'this' or NULL)
   Node *result = SafePointNode::Ideal(phase, can_reshape);
+  if (result != NULL)  return result;
+  // Don't bother trying to transform a dead node
+  if (in(0) && in(0)->is_top())  return NULL;
 
   // Now see if we can optimize away this lock.  We don't actually
   // remove the locking here, we simply set the _eliminate flag which
   // prevents macro expansion from expanding the lock.  Since we don't
   // modify the graph, the value returned from this function is the
   // one computed above.
-  if (result == NULL && can_reshape && EliminateLocks && !is_eliminated()) {
+  if (can_reshape && EliminateLocks && (!is_eliminated() || is_coarsened())) {
     //
     // If we are locking an unescaped object, the lock/unlock is unnecessary
     //
@@ -1537,8 +1544,16 @@
     if (cgr != NULL)
       es = cgr->escape_state(obj_node());
     if (es != PointsToNode::UnknownEscape && es != PointsToNode::GlobalEscape) {
-      // Mark it eliminated to update any counters
-      this->set_eliminated();
+      if (!is_eliminated()) {
+        // Mark it eliminated to update any counters
+        this->set_eliminated();
+      } else {
+        assert(is_coarsened(), "sanity");
+        // The lock could be marked eliminated by lock coarsening
+        // code during first IGVN before EA. Clear coarsened flag
+        // to eliminate all associated locks/unlocks.
+        this->clear_coarsened();
+      }
       return result;
     }
 
@@ -1546,7 +1561,7 @@
     // Try lock coarsening
     //
     PhaseIterGVN* iter = phase->is_IterGVN();
-    if (iter != NULL) {
+    if (iter != NULL && !is_eliminated()) {
 
       GrowableArray<AbstractLockNode*>   lock_ops;
 
@@ -1602,7 +1617,7 @@
           lock->set_eliminated();
           lock->set_coarsened();
         }
-      } else if (result != NULL && ctrl->is_Region() &&
+      } else if (ctrl->is_Region() &&
                  iter->_worklist.member(ctrl)) {
         // We weren't able to find any opportunities but the region this
         // lock is control dependent on hasn't been processed yet so put
@@ -1623,7 +1638,10 @@
 Node *UnlockNode::Ideal(PhaseGVN *phase, bool can_reshape) {
 
   // perform any generic optimizations first (returns 'this' or NULL)
-  Node * result = SafePointNode::Ideal(phase, can_reshape);
+  Node *result = SafePointNode::Ideal(phase, can_reshape);
+  if (result != NULL)  return result;
+  // Don't bother trying to transform a dead node
+  if (in(0) && in(0)->is_top())  return NULL;
 
   // Now see if we can optimize away this unlock.  We don't actually
   // remove the unlocking here, we simply set the _eliminate flag which
@@ -1631,7 +1649,7 @@
   // modify the graph, the value returned from this function is the
   // one computed above.
   // Escape state is defined after Parse phase.
-  if (result == NULL && can_reshape && EliminateLocks && !is_eliminated()) {
+  if (can_reshape && EliminateLocks && (!is_eliminated() || is_coarsened())) {
     //
     // If we are unlocking an unescaped object, the lock/unlock is unnecessary.
     //
@@ -1640,8 +1658,16 @@
     if (cgr != NULL)
       es = cgr->escape_state(obj_node());
     if (es != PointsToNode::UnknownEscape && es != PointsToNode::GlobalEscape) {
-      // Mark it eliminated to update any counters
-      this->set_eliminated();
+      if (!is_eliminated()) {
+        // Mark it eliminated to update any counters
+        this->set_eliminated();
+      } else {
+        assert(is_coarsened(), "sanity");
+        // The lock could be marked eliminated by lock coarsening
+        // code during first IGVN before EA. Clear coarsened flag
+        // to eliminate all associated locks/unlocks.
+        this->clear_coarsened();
+      }
     }
   }
   return result;
--- a/src/share/vm/opto/callnode.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/opto/callnode.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -440,6 +440,10 @@
                      // states of the scalarized object fields are collected.
   uint _n_fields;    // Number of non-static fields of the scalarized object.
   DEBUG_ONLY(AllocateNode* _alloc;)
+
+  virtual uint hash() const ; // { return NO_HASH; }
+  virtual uint cmp( const Node &n ) const;
+
 public:
   SafePointScalarObjectNode(const TypeOopPtr* tp,
 #ifdef ASSERT
@@ -454,15 +458,10 @@
 
   uint first_index() const { return _first_index; }
   uint n_fields()    const { return _n_fields; }
-  DEBUG_ONLY(AllocateNode* alloc() const { return _alloc; })
 
-  // SafePointScalarObject should be always pinned to the control edge
-  // of the SafePoint node for which it was generated.
-  virtual bool pinned() const; // { return true; }
-
-  // SafePointScalarObject depends on the SafePoint node
-  // for which it was generated.
-  virtual bool depends_only_on_test() const; // { return false; }
+#ifdef ASSERT
+  AllocateNode* alloc() const { return _alloc; }
+#endif
 
   virtual uint size_of() const { return sizeof(*this); }
 
@@ -880,6 +879,7 @@
 
   bool is_coarsened()  { return _coarsened; }
   void set_coarsened() { _coarsened = true; }
+  void clear_coarsened() { _coarsened = false; }
 
   // locking does not modify its arguments
   virtual bool        may_modify(const TypePtr *addr_t, PhaseTransform *phase){ return false;}
--- a/src/share/vm/opto/cfgnode.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/opto/cfgnode.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -460,8 +460,11 @@
     // Is it dead loop?
     // If it is LoopNopde it had 2 (+1 itself) inputs and
     // one of them was cut. The loop is dead if it was EntryContol.
-    assert(!this->is_Loop() || cnt_orig == 3, "Loop node should have 3 inputs");
-    if (this->is_Loop() && del_it == LoopNode::EntryControl ||
+    // Loop node may have only one input because entry path
+    // is removed in PhaseIdealLoop::Dominators().
+    assert(!this->is_Loop() || cnt_orig <= 3, "Loop node should have 3 or less inputs");
+    if (this->is_Loop() && (del_it == LoopNode::EntryControl ||
+                            del_it == 0 && is_unreachable_region(phase)) ||
        !this->is_Loop() && has_phis && is_unreachable_region(phase)) {
       // Yes,  the region will be removed during the next step below.
       // Cut the backedge input and remove phis since no data paths left.
@@ -1585,14 +1588,17 @@
     // Only one not-NULL unique input path is left.
     // Determine if this input is backedge of a loop.
     // (Skip new phis which have no uses and dead regions).
-    if( outcnt() > 0 && r->in(0) != NULL ) {
+    if (outcnt() > 0 && r->in(0) != NULL) {
       // First, take the short cut when we know it is a loop and
       // the EntryControl data path is dead.
-      assert(!r->is_Loop() || r->req() == 3, "Loop node should have 3 inputs");
+      // Loop node may have only one input because entry path
+      // is removed in PhaseIdealLoop::Dominators().
+      assert(!r->is_Loop() || r->req() <= 3, "Loop node should have 3 or less inputs");
+      bool is_loop = (r->is_Loop() && r->req() == 3);
       // Then, check if there is a data loop when phi references itself directly
       // or through other data nodes.
-      if( r->is_Loop() && !phase->eqv_uncast(uin, in(LoopNode::EntryControl)) ||
-         !r->is_Loop() && is_unsafe_data_reference(uin) ) {
+      if (is_loop && !phase->eqv_uncast(uin, in(LoopNode::EntryControl)) ||
+         !is_loop && is_unsafe_data_reference(uin)) {
         // Break this data loop to avoid creation of a dead loop.
         if (can_reshape) {
           return top;
--- a/src/share/vm/opto/compile.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/opto/compile.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -1711,11 +1711,22 @@
 
     if (failing())  return;
 
+    // Optimize out fields loads from scalar replaceable allocations.
     igvn.optimize();
     print_method("Iter GVN after EA", 2);
 
     if (failing())  return;
 
+    if (congraph() != NULL && macro_count() > 0) {
+      PhaseMacroExpand mexp(igvn);
+      mexp.eliminate_macro_nodes();
+      igvn.set_delay_transform(false);
+
+      igvn.optimize();
+      print_method("Iter GVN after eliminating allocations and locks", 2);
+
+      if (failing())  return;
+    }
   }
 
   // Loop transforms on the ideal graph.  Range Check Elimination,
@@ -3052,24 +3063,13 @@
   return false;
 }
 
-// Emit constants grouped in the following order:
-static BasicType type_order[] = {
-  T_FLOAT,    // 32-bit
-  T_OBJECT,   // 32 or 64-bit
-  T_ADDRESS,  // 32 or 64-bit
-  T_DOUBLE,   // 64-bit
-  T_LONG,     // 64-bit
-  T_VOID,     // 32 or 64-bit (jump-tables are at the end of the constant table for code emission reasons)
-  T_ILLEGAL
-};
-
 static int type_to_size_in_bytes(BasicType t) {
   switch (t) {
   case T_LONG:    return sizeof(jlong  );
   case T_FLOAT:   return sizeof(jfloat );
   case T_DOUBLE:  return sizeof(jdouble);
     // We use T_VOID as marker for jump-table entries (labels) which
-    // need an interal word relocation.
+    // need an internal word relocation.
   case T_VOID:
   case T_ADDRESS:
   case T_OBJECT:  return sizeof(jobject);
@@ -3079,87 +3079,92 @@
   return -1;
 }
 
+int Compile::ConstantTable::qsort_comparator(Constant* a, Constant* b) {
+  // sort descending
+  if (a->freq() > b->freq())  return -1;
+  if (a->freq() < b->freq())  return  1;
+  return 0;
+}
+
 void Compile::ConstantTable::calculate_offsets_and_size() {
-  int size = 0;
-  for (int t = 0; type_order[t] != T_ILLEGAL; t++) {
-    BasicType type = type_order[t];
-
-    for (int i = 0; i < _constants.length(); i++) {
-      Constant con = _constants.at(i);
-      if (con.type() != type)  continue;  // Skip other types.
-
-      // Align size for type.
-      int typesize = type_to_size_in_bytes(con.type());
-      size = align_size_up(size, typesize);
-
-      // Set offset.
-      con.set_offset(size);
-      _constants.at_put(i, con);
-
-      // Add type size.
-      size = size + typesize;
+  // First, sort the array by frequencies.
+  _constants.sort(qsort_comparator);
+
+#ifdef ASSERT
+  // Make sure all jump-table entries were sorted to the end of the
+  // array (they have a negative frequency).
+  bool found_void = false;
+  for (int i = 0; i < _constants.length(); i++) {
+    Constant con = _constants.at(i);
+    if (con.type() == T_VOID)
+      found_void = true;  // jump-tables
+    else
+      assert(!found_void, "wrong sorting");
+  }
+#endif
+
+  int offset = 0;
+  for (int i = 0; i < _constants.length(); i++) {
+    Constant* con = _constants.adr_at(i);
+
+    // Align offset for type.
+    int typesize = type_to_size_in_bytes(con->type());
+    offset = align_size_up(offset, typesize);
+    con->set_offset(offset);   // set constant's offset
+
+    if (con->type() == T_VOID) {
+      MachConstantNode* n = (MachConstantNode*) con->get_jobject();
+      offset = offset + typesize * n->outcnt();  // expand jump-table
+    } else {
+      offset = offset + typesize;
     }
   }
 
   // Align size up to the next section start (which is insts; see
   // CodeBuffer::align_at_start).
   assert(_size == -1, "already set?");
-  _size = align_size_up(size, CodeEntryAlignment);
-
-  if (Matcher::constant_table_absolute_addressing) {
-    set_table_base_offset(0);  // No table base offset required
-  } else {
-    if (UseRDPCForConstantTableBase) {
-      // table base offset is set in MachConstantBaseNode::emit
-    } else {
-      // When RDPC is not used, the table base is set into the middle of
-      // the constant table.
-      int half_size = _size / 2;
-      assert(half_size * 2 == _size, "sanity");
-      set_table_base_offset(-half_size);
-    }
-  }
+  _size = align_size_up(offset, CodeEntryAlignment);
 }
 
 void Compile::ConstantTable::emit(CodeBuffer& cb) {
   MacroAssembler _masm(&cb);
-  for (int t = 0; type_order[t] != T_ILLEGAL; t++) {
-    BasicType type = type_order[t];
-
-    for (int i = 0; i < _constants.length(); i++) {
-      Constant con = _constants.at(i);
-      if (con.type() != type)  continue;  // Skip other types.
-
-      address constant_addr;
-      switch (con.type()) {
-      case T_LONG:   constant_addr = _masm.long_constant(  con.get_jlong()  ); break;
-      case T_FLOAT:  constant_addr = _masm.float_constant( con.get_jfloat() ); break;
-      case T_DOUBLE: constant_addr = _masm.double_constant(con.get_jdouble()); break;
-      case T_OBJECT: {
-        jobject obj = con.get_jobject();
-        int oop_index = _masm.oop_recorder()->find_index(obj);
-        constant_addr = _masm.address_constant((address) obj, oop_Relocation::spec(oop_index));
-        break;
+  for (int i = 0; i < _constants.length(); i++) {
+    Constant con = _constants.at(i);
+    address constant_addr;
+    switch (con.type()) {
+    case T_LONG:   constant_addr = _masm.long_constant(  con.get_jlong()  ); break;
+    case T_FLOAT:  constant_addr = _masm.float_constant( con.get_jfloat() ); break;
+    case T_DOUBLE: constant_addr = _masm.double_constant(con.get_jdouble()); break;
+    case T_OBJECT: {
+      jobject obj = con.get_jobject();
+      int oop_index = _masm.oop_recorder()->find_index(obj);
+      constant_addr = _masm.address_constant((address) obj, oop_Relocation::spec(oop_index));
+      break;
+    }
+    case T_ADDRESS: {
+      address addr = (address) con.get_jobject();
+      constant_addr = _masm.address_constant(addr);
+      break;
+    }
+    // We use T_VOID as marker for jump-table entries (labels) which
+    // need an internal word relocation.
+    case T_VOID: {
+      MachConstantNode* n = (MachConstantNode*) con.get_jobject();
+      // Fill the jump-table with a dummy word.  The real value is
+      // filled in later in fill_jump_table.
+      address dummy = (address) n;
+      constant_addr = _masm.address_constant(dummy);
+      // Expand jump-table
+      for (uint i = 1; i < n->outcnt(); i++) {
+        address temp_addr = _masm.address_constant(dummy + i);
+        assert(temp_addr, "consts section too small");
       }
-      case T_ADDRESS: {
-        address addr = (address) con.get_jobject();
-        constant_addr = _masm.address_constant(addr);
-        break;
-      }
-      // We use T_VOID as marker for jump-table entries (labels) which
-      // need an interal word relocation.
-      case T_VOID: {
-        // Write a dummy word.  The real value is filled in later
-        // in fill_jump_table_in_constant_table.
-        address addr = (address) con.get_jobject();
-        constant_addr = _masm.address_constant(addr);
-        break;
-      }
-      default: ShouldNotReachHere();
-      }
-      assert(constant_addr != NULL, "consts section too small");
-      assert((constant_addr - _masm.code()->consts()->start()) == con.offset(), err_msg("must be: %d == %d", constant_addr - _masm.code()->consts()->start(), con.offset()));
+      break;
     }
+    default: ShouldNotReachHere();
+    }
+    assert(constant_addr, "consts section too small");
+    assert((constant_addr - _masm.code()->consts()->start()) == con.offset(), err_msg("must be: %d == %d", constant_addr - _masm.code()->consts()->start(), con.offset()));
   }
 }
 
@@ -3175,19 +3180,21 @@
   if (con.can_be_reused()) {
     int idx = _constants.find(con);
     if (idx != -1 && _constants.at(idx).can_be_reused()) {
+      _constants.adr_at(idx)->inc_freq(con.freq());  // increase the frequency by the current value
       return;
     }
   }
   (void) _constants.append(con);
 }
 
-Compile::Constant Compile::ConstantTable::add(BasicType type, jvalue value) {
-  Constant con(type, value);
+Compile::Constant Compile::ConstantTable::add(MachConstantNode* n, BasicType type, jvalue value) {
+  Block* b = Compile::current()->cfg()->_bbs[n->_idx];
+  Constant con(type, value, b->_freq);
   add(con);
   return con;
 }
 
-Compile::Constant Compile::ConstantTable::add(MachOper* oper) {
+Compile::Constant Compile::ConstantTable::add(MachConstantNode* n, MachOper* oper) {
   jvalue value;
   BasicType type = oper->type()->basic_type();
   switch (type) {
@@ -3198,20 +3205,18 @@
   case T_ADDRESS: value.l = (jobject) oper->constant(); break;
   default: ShouldNotReachHere();
   }
-  return add(type, value);
+  return add(n, type, value);
 }
 
-Compile::Constant Compile::ConstantTable::allocate_jump_table(MachConstantNode* n) {
+Compile::Constant Compile::ConstantTable::add_jump_table(MachConstantNode* n) {
   jvalue value;
   // We can use the node pointer here to identify the right jump-table
   // as this method is called from Compile::Fill_buffer right before
   // the MachNodes are emitted and the jump-table is filled (means the
   // MachNode pointers do not change anymore).
   value.l = (jobject) n;
-  Constant con(T_VOID, value, false);  // Labels of a jump-table cannot be reused.
-  for (uint i = 0; i < n->outcnt(); i++) {
-    add(con);
-  }
+  Constant con(T_VOID, value, next_jump_table_freq(), false);  // Labels of a jump-table cannot be reused.
+  add(con);
   return con;
 }
 
@@ -3230,9 +3235,9 @@
   MacroAssembler _masm(&cb);
   address* jump_table_base = (address*) (_masm.code()->consts()->start() + offset);
 
-  for (int i = 0; i < labels.length(); i++) {
+  for (uint i = 0; i < n->outcnt(); i++) {
     address* constant_addr = &jump_table_base[i];
-    assert(*constant_addr == (address) n, "all jump-table entries must contain node pointer");
+    assert(*constant_addr == (((address) n) + i), err_msg("all jump-table entries must contain adjusted node pointer: " INTPTR_FORMAT " == " INTPTR_FORMAT, *constant_addr, (((address) n) + i)));
     *constant_addr = cb.consts()->target(*labels.at(i), (address) constant_addr);
     cb.consts()->relocate((address) constant_addr, relocInfo::internal_word_type);
   }
--- a/src/share/vm/opto/compile.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/opto/compile.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -150,14 +150,16 @@
     BasicType _type;
     jvalue    _value;
     int       _offset;         // offset of this constant (in bytes) relative to the constant table base.
+    float     _freq;
     bool      _can_be_reused;  // true (default) if the value can be shared with other users.
 
   public:
-    Constant() : _type(T_ILLEGAL), _offset(-1), _can_be_reused(true) { _value.l = 0; }
-    Constant(BasicType type, jvalue value, bool can_be_reused = true) :
+    Constant() : _type(T_ILLEGAL), _offset(-1), _freq(0.0f), _can_be_reused(true) { _value.l = 0; }
+    Constant(BasicType type, jvalue value, float freq = 0.0f, bool can_be_reused = true) :
       _type(type),
       _value(value),
       _offset(-1),
+      _freq(freq),
       _can_be_reused(can_be_reused)
     {}
 
@@ -173,6 +175,9 @@
     int         offset()  const    { return _offset; }
     void    set_offset(int offset) {        _offset = offset; }
 
+    float       freq()    const    { return _freq;         }
+    void    inc_freq(float freq)   {        _freq += freq; }
+
     bool    can_be_reused() const  { return _can_be_reused; }
   };
 
@@ -182,41 +187,51 @@
     GrowableArray<Constant> _constants;          // Constants of this table.
     int                     _size;               // Size in bytes the emitted constant table takes (including padding).
     int                     _table_base_offset;  // Offset of the table base that gets added to the constant offsets.
+    int                     _nof_jump_tables;    // Number of jump-tables in this constant table.
+
+    static int qsort_comparator(Constant* a, Constant* b);
+
+    // We use negative frequencies to keep the order of the
+    // jump-tables in which they were added.  Otherwise we get into
+    // trouble with relocation.
+    float next_jump_table_freq() { return -1.0f * (++_nof_jump_tables); }
 
   public:
     ConstantTable() :
       _size(-1),
-      _table_base_offset(-1)  // We can use -1 here since the constant table is always bigger than 2 bytes (-(size / 2), see MachConstantBaseNode::emit).
+      _table_base_offset(-1),  // We can use -1 here since the constant table is always bigger than 2 bytes (-(size / 2), see MachConstantBaseNode::emit).
+      _nof_jump_tables(0)
     {}
 
-    int size() const { assert(_size != -1, "size not yet calculated"); return _size; }
+    int size() const { assert(_size != -1, "not calculated yet"); return _size; }
 
-    void set_table_base_offset(int x)  { assert(_table_base_offset == -1, "set only once");                        _table_base_offset = x; }
-    int      table_base_offset() const { assert(_table_base_offset != -1, "table base offset not yet set"); return _table_base_offset; }
+    int calculate_table_base_offset() const;  // AD specific
+    void set_table_base_offset(int x)  { assert(_table_base_offset == -1 || x == _table_base_offset, "can't change"); _table_base_offset = x; }
+    int      table_base_offset() const { assert(_table_base_offset != -1, "not set yet");                      return _table_base_offset; }
 
     void emit(CodeBuffer& cb);
 
     // Returns the offset of the last entry (the top) of the constant table.
-    int  top_offset() const { assert(_constants.top().offset() != -1, "constant not yet bound"); return _constants.top().offset(); }
+    int  top_offset() const { assert(_constants.top().offset() != -1, "not bound yet"); return _constants.top().offset(); }
 
     void calculate_offsets_and_size();
     int  find_offset(Constant& con) const;
 
     void     add(Constant& con);
-    Constant add(BasicType type, jvalue value);
-    Constant add(MachOper* oper);
-    Constant add(jfloat f) {
+    Constant add(MachConstantNode* n, BasicType type, jvalue value);
+    Constant add(MachConstantNode* n, MachOper* oper);
+    Constant add(MachConstantNode* n, jfloat f) {
       jvalue value; value.f = f;
-      return add(T_FLOAT, value);
+      return add(n, T_FLOAT, value);
     }
-    Constant add(jdouble d) {
+    Constant add(MachConstantNode* n, jdouble d) {
       jvalue value; value.d = d;
-      return add(T_DOUBLE, value);
+      return add(n, T_DOUBLE, value);
     }
 
-    // Jump table
-    Constant allocate_jump_table(MachConstantNode* n);
-    void         fill_jump_table(CodeBuffer& cb, MachConstantNode* n, GrowableArray<Label*> labels) const;
+    // Jump-table
+    Constant  add_jump_table(MachConstantNode* n);
+    void     fill_jump_table(CodeBuffer& cb, MachConstantNode* n, GrowableArray<Label*> labels) const;
   };
 
  private:
--- a/src/share/vm/opto/doCall.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/opto/doCall.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -62,7 +62,6 @@
 CallGenerator* Compile::call_generator(ciMethod* call_method, int vtable_index, bool call_is_virtual,
                                        JVMState* jvms, bool allow_inline,
                                        float prof_factor) {
-  CallGenerator*  cg;
   ciMethod*       caller   = jvms->method();
   int             bci      = jvms->bci();
   Bytecodes::Code bytecode = caller->java_code_at_bci(bci);
@@ -110,7 +109,7 @@
   // We do this before the strict f.p. check below because the
   // intrinsics handle strict f.p. correctly.
   if (allow_inline) {
-    cg = find_intrinsic(call_method, call_is_virtual);
+    CallGenerator* cg = find_intrinsic(call_method, call_is_virtual);
     if (cg != NULL)  return cg;
   }
 
@@ -121,33 +120,16 @@
   if (call_method->is_method_handle_invoke()) {
     if (bytecode != Bytecodes::_invokedynamic) {
       GraphKit kit(jvms);
-      Node* n = kit.argument(0);
-
-      CallGenerator* cg = CallGenerator::for_method_handle_inline(n, jvms, caller, call_method, profile);
-      if (cg != NULL) {
-        return cg;
-      }
-      return CallGenerator::for_direct_call(call_method);
+      Node* method_handle = kit.argument(0);
+      return CallGenerator::for_method_handle_call(method_handle, jvms, caller, call_method, profile);
     }
     else {
-      // Get the CallSite object.
-      ciMethod* caller_method = jvms->method();
-      ciBytecodeStream str(caller_method);
-      str.force_bci(jvms->bci());  // Set the stream to the invokedynamic bci.
-      ciCallSite* call_site = str.get_call_site();
-
-      CallGenerator* cg = CallGenerator::for_invokedynamic_inline(call_site, jvms, caller, call_method, profile);
-      if (cg != NULL) {
-        return cg;
-      }
-      // If something failed, generate a normal dynamic call.
-      return CallGenerator::for_dynamic_call(call_method);
+      return CallGenerator::for_invokedynamic_call(jvms, caller, call_method, profile);
     }
   }
 
   // Do not inline strict fp into non-strict code, or the reverse
-  bool caller_method_is_strict = jvms->method()->is_strict();
-  if( caller_method_is_strict ^ call_method->is_strict() ) {
+  if (caller->is_strict() ^ call_method->is_strict()) {
     allow_inline = false;
   }
 
@@ -258,7 +240,7 @@
             }
             if (miss_cg != NULL) {
               NOT_PRODUCT(trace_type_profile(jvms->method(), jvms->depth() - 1, jvms->bci(), receiver_method, profile.receiver(0), site_count, receiver_count));
-              cg = CallGenerator::for_predicted_call(profile.receiver(0), miss_cg, hit_cg, profile.receiver_prob(0));
+              CallGenerator* cg = CallGenerator::for_predicted_call(profile.receiver(0), miss_cg, hit_cg, profile.receiver_prob(0));
               if (cg != NULL)  return cg;
             }
           }
--- a/src/share/vm/opto/escape.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/opto/escape.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -119,6 +119,8 @@
   } else {
     _noop_null = _oop_null; // Should be initialized
   }
+  _pcmp_neq = NULL; // Should be initialized
+  _pcmp_eq  = NULL;
 }
 
 void ConnectionGraph::add_pointsto_edge(uint from_i, uint to_i) {
@@ -128,6 +130,13 @@
   assert(f->node_type() != PointsToNode::UnknownType && t->node_type() != PointsToNode::UnknownType, "node types must be set");
   assert(f->node_type() == PointsToNode::LocalVar || f->node_type() == PointsToNode::Field, "invalid source of PointsTo edge");
   assert(t->node_type() == PointsToNode::JavaObject, "invalid destination of PointsTo edge");
+  if (to_i == _phantom_object) { // Quick test for most common object
+    if (f->has_unknown_ptr()) {
+      return;
+    } else {
+      f->set_has_unknown_ptr();
+    }
+  }
   add_edge(f, to_i, PointsToNode::PointsToEdge);
 }
 
@@ -163,6 +172,9 @@
 }
 
 void ConnectionGraph::add_field_edge(uint from_i, uint to_i, int offset) {
+  // Don't add fields to NULL pointer.
+  if (is_null_ptr(from_i))
+    return;
   PointsToNode *f = ptnode_adr(from_i);
   PointsToNode *t = ptnode_adr(to_i);
 
@@ -177,7 +189,7 @@
 
 void ConnectionGraph::set_escape_state(uint ni, PointsToNode::EscapeState es) {
   // Don't change non-escaping state of NULL pointer.
-  if (ni == _noop_null || ni == _oop_null)
+  if (is_null_ptr(ni))
     return;
   PointsToNode *npt = ptnode_adr(ni);
   PointsToNode::EscapeState old_es = npt->escape_state();
@@ -309,6 +321,9 @@
 
   visited->set(ni);
   PointsToNode *ptn = ptnode_adr(ni);
+  assert(ptn->node_type() == PointsToNode::LocalVar ||
+         ptn->node_type() == PointsToNode::Field, "sanity");
+  assert(ptn->edge_count() != 0, "should have at least phantom_object");
 
   // Mark current edges as visited and move deferred edges to separate array.
   for (uint i = 0; i < ptn->edge_count(); ) {
@@ -329,6 +344,7 @@
     uint t = deferred_edges->at(next);
     PointsToNode *ptt = ptnode_adr(t);
     uint e_cnt = ptt->edge_count();
+    assert(e_cnt != 0, "should have at least phantom_object");
     for (uint e = 0; e < e_cnt; e++) {
       uint etgt = ptt->edge_target(e);
       if (visited->test_set(etgt))
@@ -337,10 +353,6 @@
       PointsToNode::EdgeType et = ptt->edge_type(e);
       if (et == PointsToNode::PointsToEdge) {
         add_pointsto_edge(ni, etgt);
-        if(etgt == _phantom_object) {
-          // Special case - field set outside (globally escaping).
-          set_escape_state(ni, PointsToNode::GlobalEscape);
-        }
       } else if (et == PointsToNode::DeferredEdge) {
         deferred_edges->append(etgt);
       } else {
@@ -348,6 +360,20 @@
       }
     }
   }
+  if (ptn->edge_count() == 0) {
+    // No pointsto edges found after deferred edges are removed.
+    // For example, in the next case where call is replaced
+    // with uncommon trap and as result array's load references
+    // itself through deferred edges:
+    //
+    // A a = b[i];
+    // if (c!=null) a = c.foo();
+    // b[i] = a;
+    //
+    // Assume the value was set outside this method and
+    // add edge to phantom object.
+    add_pointsto_edge(ni, _phantom_object);
+  }
 }
 
 
@@ -356,13 +382,25 @@
 //  a pointsto edge is added if it is a JavaObject
 
 void ConnectionGraph::add_edge_from_fields(uint adr_i, uint to_i, int offs) {
+  // No fields for NULL pointer.
+  if (is_null_ptr(adr_i)) {
+    return;
+  }
   PointsToNode* an = ptnode_adr(adr_i);
   PointsToNode* to = ptnode_adr(to_i);
   bool deferred = (to->node_type() == PointsToNode::LocalVar);
-
+  bool escaped  = (to_i == _phantom_object) && (offs == Type::OffsetTop);
+  if (escaped) {
+    // Values in fields escaped during call.
+    assert(an->escape_state() >= PointsToNode::ArgEscape, "sanity");
+    offs = Type::OffsetBot;
+  }
   for (uint fe = 0; fe < an->edge_count(); fe++) {
     assert(an->edge_type(fe) == PointsToNode::FieldEdge, "expecting a field edge");
     int fi = an->edge_target(fe);
+    if (escaped) {
+      set_escape_state(fi, PointsToNode::GlobalEscape);
+    }
     PointsToNode* pf = ptnode_adr(fi);
     int po = pf->offset();
     if (po == offs || po == Type::OffsetBot || offs == Type::OffsetBot) {
@@ -377,6 +415,15 @@
 // Add a deferred  edge from node given by "from_i" to any field of adr_i
 // whose offset matches "offset".
 void ConnectionGraph::add_deferred_edge_to_fields(uint from_i, uint adr_i, int offs) {
+  // No fields for NULL pointer.
+  if (is_null_ptr(adr_i)) {
+    return;
+  }
+  if (adr_i == _phantom_object) {
+    // Add only one edge for unknown object.
+    add_pointsto_edge(from_i, _phantom_object);
+    return;
+  }
   PointsToNode* an = ptnode_adr(adr_i);
   bool is_alloc = an->_node->is_Allocate();
   for (uint fe = 0; fe < an->edge_count(); fe++) {
@@ -392,6 +439,13 @@
       add_deferred_edge(from_i, fi);
     }
   }
+  // Some fields references (AddP) may still be missing
+  // until Connection Graph construction is complete.
+  // For example, loads from RAW pointers with offset 0
+  // which don't have AddP.
+  // A reference to phantom_object will be added if
+  // a field reference is still missing after completing
+  // Connection Graph (see remove_deferred()).
 }
 
 // Helper functions
@@ -1540,8 +1594,8 @@
 
   GrowableArray<Node*> alloc_worklist;
   GrowableArray<Node*> addp_worklist;
+  GrowableArray<Node*> ptr_cmp_worklist;
   PhaseGVN* igvn = _igvn;
-  bool has_allocations = false;
 
   // Push all useful nodes onto CG list and set their type.
   for( uint next = 0; next < worklist_init.size(); ++next ) {
@@ -1551,11 +1605,8 @@
     // for an escape status. See process_call_result() below.
     if (n->is_Allocate() || n->is_CallStaticJava() &&
         ptnode_adr(n->_idx)->node_type() == PointsToNode::JavaObject) {
-      has_allocations = true;
-      if (n->is_Allocate())
-        alloc_worklist.append(n);
-    }
-    if(n->is_AddP()) {
+      alloc_worklist.append(n);
+    } else if(n->is_AddP()) {
       // Collect address nodes. Use them during stage 3 below
       // to build initial connection graph field edges.
       addp_worklist.append(n);
@@ -1563,6 +1614,10 @@
       // Collect all MergeMem nodes to add memory slices for
       // scalar replaceable objects in split_unique_types().
       _mergemem_worklist.append(n->as_MergeMem());
+    } else if (OptimizePtrCompare && n->is_Cmp() &&
+               (n->Opcode() == Op_CmpP || n->Opcode() == Op_CmpN)) {
+      // Compare pointers nodes
+      ptr_cmp_worklist.append(n);
     }
     for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
       Node* m = n->fast_out(i);   // Get user
@@ -1570,7 +1625,7 @@
     }
   }
 
-  if (!has_allocations) {
+  if (alloc_worklist.length() == 0) {
     _collecting = false;
     return false; // Nothing to do.
   }
@@ -1588,7 +1643,7 @@
   for( uint next = 0; next < addp_length; ++next ) {
     Node* n = addp_worklist.at(next);
     Node* base = get_addp_base(n);
-    if (base->is_Proj())
+    if (base->is_Proj() && base->in(0)->is_Call())
       base = base->in(0);
     PointsToNode::NodeType nt = ptnode_adr(base->_idx)->node_type();
     if (nt == PointsToNode::JavaObject) {
@@ -1653,39 +1708,7 @@
   }
 #undef CG_BUILD_ITER_LIMIT
 
-  Arena* arena = Thread::current()->resource_area();
-  VectorSet visited(arena);
-
-  // 5. Find fields initializing values for not escaped allocations
-  uint alloc_length = alloc_worklist.length();
-  for (uint next = 0; next < alloc_length; ++next) {
-    Node* n = alloc_worklist.at(next);
-    if (ptnode_adr(n->_idx)->escape_state() == PointsToNode::NoEscape) {
-      find_init_values(n, &visited, igvn);
-    }
-  }
-
-  worklist.clear();
-
-  // 6. Remove deferred edges from the graph.
-  uint cg_length = cg_worklist.length();
-  for (uint next = 0; next < cg_length; ++next) {
-    int ni = cg_worklist.at(next);
-    PointsToNode* ptn = ptnode_adr(ni);
-    PointsToNode::NodeType nt = ptn->node_type();
-    if (nt == PointsToNode::LocalVar || nt == PointsToNode::Field) {
-      remove_deferred(ni, &worklist, &visited);
-      Node *n = ptn->_node;
-    }
-  }
-
-  // 7. Adjust escape state of nonescaping objects.
-  for (uint next = 0; next < addp_length; ++next) {
-    Node* n = addp_worklist.at(next);
-    adjust_escape_state(n);
-  }
-
-  // 8. Propagate escape states.
+  // 5. Propagate escaped states.
   worklist.clear();
 
   // mark all nodes reachable from GlobalEscape nodes
@@ -1694,20 +1717,72 @@
   // mark all nodes reachable from ArgEscape nodes
   bool has_non_escaping_obj = propagate_escape_state(&cg_worklist, &worklist, PointsToNode::ArgEscape);
 
+  Arena* arena = Thread::current()->resource_area();
+  VectorSet visited(arena);
+
+  // 6. Find fields initializing values for not escaped allocations
+  uint alloc_length = alloc_worklist.length();
+  for (uint next = 0; next < alloc_length; ++next) {
+    Node* n = alloc_worklist.at(next);
+    if (ptnode_adr(n->_idx)->escape_state() == PointsToNode::NoEscape) {
+      has_non_escaping_obj = true;
+      if (n->is_Allocate()) {
+        find_init_values(n, &visited, igvn);
+      }
+    }
+  }
+
+  uint cg_length = cg_worklist.length();
+
+  // Skip the rest of code if all objects escaped.
+  if (!has_non_escaping_obj) {
+    cg_length = 0;
+    addp_length = 0;
+  }
+
+  for (uint next = 0; next < cg_length; ++next) {
+    int ni = cg_worklist.at(next);
+    PointsToNode* ptn = ptnode_adr(ni);
+    PointsToNode::NodeType nt = ptn->node_type();
+    if (nt == PointsToNode::LocalVar || nt == PointsToNode::Field) {
+      if (ptn->edge_count() == 0) {
+        // No values were found. Assume the value was set
+        // outside this method - add edge to phantom object.
+        add_pointsto_edge(ni, _phantom_object);
+      }
+    }
+  }
+
+  // 7. Remove deferred edges from the graph.
+  for (uint next = 0; next < cg_length; ++next) {
+    int ni = cg_worklist.at(next);
+    PointsToNode* ptn = ptnode_adr(ni);
+    PointsToNode::NodeType nt = ptn->node_type();
+    if (nt == PointsToNode::LocalVar || nt == PointsToNode::Field) {
+      remove_deferred(ni, &worklist, &visited);
+    }
+  }
+
+  // 8. Adjust escape state of nonescaping objects.
+  for (uint next = 0; next < addp_length; ++next) {
+    Node* n = addp_worklist.at(next);
+    adjust_escape_state(n);
+  }
+
   // push all NoEscape nodes on the worklist
+  worklist.clear();
   for( uint next = 0; next < cg_length; ++next ) {
     int nk = cg_worklist.at(next);
-    if (ptnode_adr(nk)->escape_state() == PointsToNode::NoEscape)
+    if (ptnode_adr(nk)->escape_state() == PointsToNode::NoEscape &&
+        !is_null_ptr(nk))
       worklist.push(nk);
   }
+
   alloc_worklist.clear();
-  // mark all nodes reachable from NoEscape nodes
+  // Propagate scalar_replaceable value.
   while(worklist.length() > 0) {
     uint nk = worklist.pop();
     PointsToNode* ptn = ptnode_adr(nk);
-    if (ptn->node_type() == PointsToNode::JavaObject &&
-        !(nk == _noop_null || nk == _oop_null))
-      has_non_escaping_obj = true; // Non Escape
     Node* n = ptn->_node;
     bool scalar_replaceable = ptn->scalar_replaceable();
     if (n->is_Allocate() && scalar_replaceable) {
@@ -1719,6 +1794,8 @@
     uint e_cnt = ptn->edge_count();
     for (uint ei = 0; ei < e_cnt; ei++) {
       uint npi = ptn->edge_target(ei);
+      if (is_null_ptr(npi))
+        continue;
       PointsToNode *np = ptnode_adr(npi);
       if (np->escape_state() < PointsToNode::NoEscape) {
         set_escape_state(npi, PointsToNode::NoEscape);
@@ -1727,7 +1804,6 @@
         }
         worklist.push(npi);
       } else if (np->scalar_replaceable() && !scalar_replaceable) {
-        // Propagate scalar_replaceable value.
         np->set_scalar_replaceable(false);
         worklist.push(npi);
       }
@@ -1737,9 +1813,11 @@
   _collecting = false;
   assert(C->unique() == nodes_size(), "there should be no new ideal nodes during ConnectionGraph build");
 
-  assert(ptnode_adr(_oop_null)->escape_state() == PointsToNode::NoEscape, "sanity");
+  assert(ptnode_adr(_oop_null)->escape_state() == PointsToNode::NoEscape &&
+         ptnode_adr(_oop_null)->edge_count() == 0, "sanity");
   if (UseCompressedOops) {
-    assert(ptnode_adr(_noop_null)->escape_state() == PointsToNode::NoEscape, "sanity");
+    assert(ptnode_adr(_noop_null)->escape_state() == PointsToNode::NoEscape &&
+           ptnode_adr(_noop_null)->edge_count() == 0, "sanity");
   }
 
   if (EliminateLocks && has_non_escaping_obj) {
@@ -1749,18 +1827,53 @@
       Node *n = C->macro_node(i);
       if (n->is_AbstractLock()) { // Lock and Unlock nodes
         AbstractLockNode* alock = n->as_AbstractLock();
-        if (!alock->is_eliminated()) {
+        if (!alock->is_eliminated() || alock->is_coarsened()) {
           PointsToNode::EscapeState es = escape_state(alock->obj_node());
           assert(es != PointsToNode::UnknownEscape, "should know");
           if (es != PointsToNode::UnknownEscape && es != PointsToNode::GlobalEscape) {
-            // Mark it eliminated
-            alock->set_eliminated();
+            if (!alock->is_eliminated()) {
+              // Mark it eliminated to update any counters
+              alock->set_eliminated();
+            } else {
+              // The lock could be marked eliminated by lock coarsening
+              // code during first IGVN before EA. Clear coarsened flag
+              // to eliminate all associated locks/unlocks and relock
+              // during deoptimization.
+              alock->clear_coarsened();
+            }
           }
         }
       }
     }
   }
 
+  if (OptimizePtrCompare && has_non_escaping_obj) {
+    // Add ConI(#CC_GT) and ConI(#CC_EQ).
+    _pcmp_neq = igvn->makecon(TypeInt::CC_GT);
+    _pcmp_eq = igvn->makecon(TypeInt::CC_EQ);
+    // Optimize objects compare.
+    while (ptr_cmp_worklist.length() != 0) {
+      Node *n = ptr_cmp_worklist.pop();
+      Node *res = optimize_ptr_compare(n);
+      if (res != NULL) {
+#ifndef PRODUCT
+        if (PrintOptimizePtrCompare) {
+          tty->print_cr("++++ Replaced: %d %s(%d,%d) --> %s", n->_idx, (n->Opcode() == Op_CmpP ? "CmpP" : "CmpN"), n->in(1)->_idx, n->in(2)->_idx, (res == _pcmp_eq ? "EQ" : "NotEQ"));
+          if (Verbose) {
+            n->dump(1);
+          }
+        }
+#endif
+        _igvn->replace_node(n, res);
+      }
+    }
+    // cleanup
+    if (_pcmp_neq->outcnt() == 0)
+      igvn->hash_delete(_pcmp_neq);
+    if (_pcmp_eq->outcnt()  == 0)
+      igvn->hash_delete(_pcmp_eq);
+  }
+
 #ifndef PRODUCT
   if (PrintEscapeAnalysis) {
     dump(); // Dump ConnectionGraph
@@ -1821,15 +1934,30 @@
   // Connection Graph does not record a default initialization by NULL
   // captured by Initialize node.
   //
+  uint null_idx = UseCompressedOops ? _noop_null : _oop_null;
   uint ae_cnt = pta->edge_count();
+  bool visited_bottom_offset = false;
   for (uint ei = 0; ei < ae_cnt; ei++) {
     uint nidx = pta->edge_target(ei); // Field (AddP)
     PointsToNode* ptn = ptnode_adr(nidx);
     assert(ptn->_node->is_AddP(), "Should be AddP nodes only");
     int offset = ptn->offset();
-    if (offset != Type::OffsetBot &&
-        offset != oopDesc::klass_offset_in_bytes() &&
-        !visited->test_set(offset)) {
+    if (offset == Type::OffsetBot) {
+      if (!visited_bottom_offset) {
+        visited_bottom_offset = true;
+        // Check only oop fields.
+        const Type* adr_type = ptn->_node->as_AddP()->bottom_type();
+        if (!adr_type->isa_aryptr() ||
+            (adr_type->isa_aryptr()->klass() == NULL) ||
+             adr_type->isa_aryptr()->klass()->is_obj_array_klass()) {
+          // OffsetBot is used to reference array's element,
+          // always add reference to NULL since we don't
+          // known which element is referenced.
+          add_edge_from_fields(alloc->_idx, null_idx, offset);
+        }
+      }
+    } else if (offset != oopDesc::klass_offset_in_bytes() &&
+               !visited->test_set(offset)) {
 
       // Check only oop fields.
       const Type* adr_type = ptn->_node->as_AddP()->bottom_type();
@@ -1904,7 +2032,6 @@
         }
         if (value == NULL || value != ptnode_adr(value->_idx)->_node) {
           // A field's initializing value was not recorded. Add NULL.
-          uint null_idx = UseCompressedOops ? _noop_null : _oop_null;
           add_edge_from_fields(alloc->_idx, null_idx, offset);
         }
       }
@@ -1990,13 +2117,21 @@
   }
   // mark all reachable nodes
   while (worklist->length() > 0) {
-    PointsToNode* ptn = ptnode_adr(worklist->pop());
-    if (ptn->node_type() == PointsToNode::JavaObject) {
+    int pt = worklist->pop();
+    PointsToNode* ptn = ptnode_adr(pt);
+    if (ptn->node_type() == PointsToNode::JavaObject &&
+        !is_null_ptr(pt)) {
       has_java_obj = true;
+      if (esc_state > PointsToNode::NoEscape) {
+        // fields values are unknown if object escapes
+        add_edge_from_fields(pt, _phantom_object, Type::OffsetBot);
+      }
     }
     uint e_cnt = ptn->edge_count();
     for (uint ei = 0; ei < e_cnt; ei++) {
       uint npi = ptn->edge_target(ei);
+      if (is_null_ptr(npi))
+        continue;
       PointsToNode *np = ptnode_adr(npi);
       if (np->escape_state() < esc_state) {
         set_escape_state(npi, esc_state);
@@ -2008,8 +2143,100 @@
   return has_java_obj && (esc_state < PointsToNode::GlobalEscape);
 }
 
+// Optimize objects compare.
+Node* ConnectionGraph::optimize_ptr_compare(Node* n) {
+  assert(OptimizePtrCompare, "sanity");
+  // Clone returned Set since PointsTo() returns pointer
+  // to the same structure ConnectionGraph.pt_ptset.
+  VectorSet ptset1 = *PointsTo(n->in(1));
+  VectorSet ptset2 = *PointsTo(n->in(2));
+
+  // Check simple cases first.
+  if (ptset1.Size() == 1) {
+    uint pt1 = ptset1.getelem();
+    PointsToNode* ptn1 = ptnode_adr(pt1);
+    if (ptn1->escape_state() == PointsToNode::NoEscape) {
+      if (ptset2.Size() == 1 && ptset2.getelem() == pt1) {
+        // Comparing the same not escaping object.
+        return _pcmp_eq;
+      }
+      Node* obj = ptn1->_node;
+      // Comparing not escaping allocation.
+      if ((obj->is_Allocate() || obj->is_CallStaticJava()) &&
+          !ptset2.test(pt1)) {
+        return _pcmp_neq; // This includes nullness check.
+      }
+    }
+  } else if (ptset2.Size() == 1) {
+    uint pt2 = ptset2.getelem();
+    PointsToNode* ptn2 = ptnode_adr(pt2);
+    if (ptn2->escape_state() == PointsToNode::NoEscape) {
+      Node* obj = ptn2->_node;
+      // Comparing not escaping allocation.
+      if ((obj->is_Allocate() || obj->is_CallStaticJava()) &&
+          !ptset1.test(pt2)) {
+        return _pcmp_neq; // This includes nullness check.
+      }
+    }
+  }
+
+  if (!ptset1.disjoint(ptset2)) {
+    return NULL; // Sets are not disjoint
+  }
+
+  // Sets are disjoint.
+  bool set1_has_unknown_ptr = ptset1.test(_phantom_object) != 0;
+  bool set2_has_unknown_ptr = ptset2.test(_phantom_object) != 0;
+  bool set1_has_null_ptr   = (ptset1.test(_oop_null) | ptset1.test(_noop_null)) != 0;
+  bool set2_has_null_ptr   = (ptset2.test(_oop_null) | ptset2.test(_noop_null)) != 0;
+
+  if (set1_has_unknown_ptr && set2_has_null_ptr ||
+      set2_has_unknown_ptr && set1_has_null_ptr) {
+    // Check nullness of unknown object.
+    return NULL;
+  }
+
+  // Disjointness by itself is not sufficient since
+  // alias analysis is not complete for escaped objects.
+  // Disjoint sets are definitely unrelated only when
+  // at least one set has only not escaping objects.
+  if (!set1_has_unknown_ptr && !set1_has_null_ptr) {
+    bool has_only_non_escaping_alloc = true;
+    for (VectorSetI i(&ptset1); i.test(); ++i) {
+      uint pt = i.elem;
+      PointsToNode* ptn = ptnode_adr(pt);
+      Node* obj = ptn->_node;
+      if (ptn->escape_state() != PointsToNode::NoEscape ||
+          !(obj->is_Allocate() || obj->is_CallStaticJava())) {
+        has_only_non_escaping_alloc = false;
+        break;
+      }
+    }
+    if (has_only_non_escaping_alloc) {
+      return _pcmp_neq;
+    }
+  }
+  if (!set2_has_unknown_ptr && !set2_has_null_ptr) {
+    bool has_only_non_escaping_alloc = true;
+    for (VectorSetI i(&ptset2); i.test(); ++i) {
+      uint pt = i.elem;
+      PointsToNode* ptn = ptnode_adr(pt);
+      Node* obj = ptn->_node;
+      if (ptn->escape_state() != PointsToNode::NoEscape ||
+          !(obj->is_Allocate() || obj->is_CallStaticJava())) {
+        has_only_non_escaping_alloc = false;
+        break;
+      }
+    }
+    if (has_only_non_escaping_alloc) {
+      return _pcmp_neq;
+    }
+  }
+  return NULL;
+}
+
 void ConnectionGraph::process_call_arguments(CallNode *call, PhaseTransform *phase) {
-
+    bool is_arraycopy = false;
     switch (call->Opcode()) {
 #ifdef ASSERT
     case Op_Allocate:
@@ -2019,25 +2246,44 @@
       assert(false, "should be done already");
       break;
 #endif
+    case Op_CallLeafNoFP:
+      is_arraycopy = (call->as_CallLeaf()->_name != NULL &&
+                      strstr(call->as_CallLeaf()->_name, "arraycopy") != 0);
+      // fall through
     case Op_CallLeaf:
-    case Op_CallLeafNoFP:
     {
       // Stub calls, objects do not escape but they are not scale replaceable.
       // Adjust escape state for outgoing arguments.
       const TypeTuple * d = call->tf()->domain();
+      bool src_has_oops = false;
       for (uint i = TypeFunc::Parms; i < d->cnt(); i++) {
         const Type* at = d->field_at(i);
         Node *arg = call->in(i)->uncast();
         const Type *aat = phase->type(arg);
+        PointsToNode::EscapeState arg_esc = ptnode_adr(arg->_idx)->escape_state();
         if (!arg->is_top() && at->isa_ptr() && aat->isa_ptr() &&
-            ptnode_adr(arg->_idx)->escape_state() < PointsToNode::ArgEscape) {
+            (is_arraycopy || arg_esc < PointsToNode::ArgEscape)) {
 
           assert(aat == Type::TOP || aat == TypePtr::NULL_PTR ||
                  aat->isa_ptr() != NULL, "expecting an Ptr");
+          bool arg_has_oops = aat->isa_oopptr() &&
+                              (aat->isa_oopptr()->klass() == NULL || aat->isa_instptr() ||
+                               (aat->isa_aryptr() && aat->isa_aryptr()->klass()->is_obj_array_klass()));
+          if (i == TypeFunc::Parms) {
+            src_has_oops = arg_has_oops;
+          }
+          //
+          // src or dst could be j.l.Object when other is basic type array:
+          //
+          //   arraycopy(char[],0,Object*,0,size);
+          //   arraycopy(Object*,0,char[],0,size);
+          //
+          // Don't add edges from dst's fields in such cases.
+          //
+          bool arg_is_arraycopy_dest = src_has_oops && is_arraycopy &&
+                                       arg_has_oops && (i > TypeFunc::Parms);
 #ifdef ASSERT
-          if (!(call->Opcode() == Op_CallLeafNoFP &&
-                call->as_CallLeaf()->_name != NULL &&
-                (strstr(call->as_CallLeaf()->_name, "arraycopy")  != 0) ||
+          if (!(is_arraycopy ||
                 call->as_CallLeaf()->_name != NULL &&
                 (strcmp(call->as_CallLeaf()->_name, "g1_wb_pre")  == 0 ||
                  strcmp(call->as_CallLeaf()->_name, "g1_wb_post") == 0 ))
@@ -2046,20 +2292,72 @@
             assert(false, "EA: unexpected CallLeaf");
           }
 #endif
+          // Always process arraycopy's destination object since
+          // we need to add all possible edges to references in
+          // source object.
+          if (arg_esc >= PointsToNode::ArgEscape &&
+              !arg_is_arraycopy_dest) {
+            continue;
+          }
           set_escape_state(arg->_idx, PointsToNode::ArgEscape);
+          Node* arg_base = arg;
           if (arg->is_AddP()) {
             //
             // The inline_native_clone() case when the arraycopy stub is called
             // after the allocation before Initialize and CheckCastPP nodes.
+            // Or normal arraycopy for object arrays case.
             //
             // Set AddP's base (Allocate) as not scalar replaceable since
             // pointer to the base (with offset) is passed as argument.
             //
-            arg = get_addp_base(arg);
+            arg_base = get_addp_base(arg);
           }
-          for( VectorSetI j(PointsTo(arg)); j.test(); ++j ) {
-            uint pt = j.elem;
-            set_escape_state(pt, PointsToNode::ArgEscape);
+          VectorSet argset = *PointsTo(arg_base); // Clone set
+          for( VectorSetI j(&argset); j.test(); ++j ) {
+            uint pd = j.elem; // Destination object
+            set_escape_state(pd, PointsToNode::ArgEscape);
+
+            if (arg_is_arraycopy_dest) {
+              PointsToNode* ptd = ptnode_adr(pd);
+              // Conservatively reference an unknown object since
+              // not all source's fields/elements may be known.
+              add_edge_from_fields(pd, _phantom_object, Type::OffsetBot);
+
+              Node *src = call->in(TypeFunc::Parms)->uncast();
+              Node* src_base = src;
+              if (src->is_AddP()) {
+                src_base  = get_addp_base(src);
+              }
+              // Create edges from destination's fields to
+              // everything known source's fields could point to.
+              for( VectorSetI s(PointsTo(src_base)); s.test(); ++s ) {
+                uint ps = s.elem;
+                bool has_bottom_offset = false;
+                for (uint fd = 0; fd < ptd->edge_count(); fd++) {
+                  assert(ptd->edge_type(fd) == PointsToNode::FieldEdge, "expecting a field edge");
+                  int fdi = ptd->edge_target(fd);
+                  PointsToNode* pfd = ptnode_adr(fdi);
+                  int offset = pfd->offset();
+                  if (offset == Type::OffsetBot)
+                    has_bottom_offset = true;
+                  assert(offset != -1, "offset should be set");
+                  add_deferred_edge_to_fields(fdi, ps, offset);
+                }
+                // Destination object may not have access (no field edge)
+                // to fields which are accessed in source object.
+                // As result no edges will be created to those source's
+                // fields and escape state of destination object will
+                // not be propagated to those fields.
+                //
+                // Mark source object as global escape except in
+                // the case with Type::OffsetBot field (which is
+                // common case for array elements access) when
+                // edges are created to all source's fields.
+                if (!has_bottom_offset) {
+                  set_escape_state(ps, PointsToNode::GlobalEscape);
+                }
+              }
+            }
           }
         }
       }
@@ -2102,14 +2400,16 @@
             for( VectorSetI j(PointsTo(arg)); j.test(); ++j ) {
               uint pt = j.elem;
               if (global_escapes) {
-                //The argument global escapes, mark everything it could point to
+                // The argument global escapes, mark everything it could point to
                 set_escape_state(pt, PointsToNode::GlobalEscape);
+                add_edge_from_fields(pt, _phantom_object, Type::OffsetBot);
               } else {
+                set_escape_state(pt, PointsToNode::ArgEscape);
                 if (fields_escapes) {
-                  // The argument itself doesn't escape, but any fields might
-                  add_edge_from_fields(pt, _phantom_object, Type::OffsetBot);
+                  // The argument itself doesn't escape, but any fields might.
+                  // Use OffsetTop to indicate such case.
+                  add_edge_from_fields(pt, _phantom_object, Type::OffsetTop);
                 }
-                set_escape_state(pt, PointsToNode::ArgEscape);
               }
             }
           }
@@ -2135,6 +2435,7 @@
           for( VectorSetI j(PointsTo(arg)); j.test(); ++j ) {
             uint pt = j.elem;
             set_escape_state(pt, PointsToNode::GlobalEscape);
+            add_edge_from_fields(pt, _phantom_object, Type::OffsetBot);
           }
         }
       }
@@ -2235,15 +2536,16 @@
           // it's fields will be marked as NoEscape at least.
           set_escape_state(call_idx, PointsToNode::NoEscape);
           ptnode_adr(call_idx)->set_scalar_replaceable(false);
+          // Fields values are unknown
+          add_edge_from_fields(call_idx, _phantom_object, Type::OffsetBot);
           add_pointsto_edge(resproj_idx, call_idx);
           copy_dependencies = true;
-        } else if (call_analyzer->is_return_local()) {
+        } else {
           // determine whether any arguments are returned
           set_escape_state(call_idx, PointsToNode::ArgEscape);
           bool ret_arg = false;
           for (uint i = TypeFunc::Parms; i < d->cnt(); i++) {
             const Type* at = d->field_at(i);
-
             if (at->isa_oopptr() != NULL) {
               Node *arg = call->in(i)->uncast();
 
@@ -2259,17 +2561,14 @@
               }
             }
           }
-          if (done && !ret_arg) {
-            // Returns unknown object.
-            set_escape_state(call_idx, PointsToNode::GlobalEscape);
-            add_pointsto_edge(resproj_idx, _phantom_object);
-          }
           if (done) {
             copy_dependencies = true;
+            // is_return_local() is true when only arguments are returned.
+            if (!ret_arg || !call_analyzer->is_return_local()) {
+              // Returns unknown object.
+              add_pointsto_edge(resproj_idx, _phantom_object);
+            }
           }
-        } else {
-          set_escape_state(call_idx, PointsToNode::GlobalEscape);
-          add_pointsto_edge(resproj_idx, _phantom_object);
         }
         if (copy_dependencies)
           call_analyzer->copy_dependencies(_compile->dependencies());
@@ -2431,6 +2730,11 @@
       add_node(n, PointsToNode::JavaObject, PointsToNode::GlobalEscape, false);
       break;
     }
+    case Op_PartialSubtypeCheck:
+    { // Produces Null or notNull and is used in CmpP.
+      add_node(n, PointsToNode::JavaObject, PointsToNode::ArgEscape, true);
+      break;
+    }
     case Op_Phi:
     {
       const Type *t = n->as_Phi()->type();
@@ -2589,10 +2893,11 @@
     case Op_AddP:
     {
       Node *base = get_addp_base(n);
+      int offset = address_offset(n, phase);
       // Create a field edge to this node from everything base could point to.
       for( VectorSetI i(PointsTo(base)); i.test(); ++i ) {
         uint pt = i.elem;
-        add_field_edge(pt, n_idx, address_offset(n, phase));
+        add_field_edge(pt, n_idx, offset);
       }
       break;
     }
@@ -2659,6 +2964,10 @@
       int offset = address_offset(adr, phase);
       for( VectorSetI i(PointsTo(adr_base)); i.test(); ++i ) {
         uint pt = i.elem;
+        if (adr->is_AddP()) {
+          // Add field edge if it is missing.
+          add_field_edge(pt, adr->_idx, offset);
+        }
         add_deferred_edge_to_fields(n_idx, pt, offset);
       }
       break;
@@ -2668,6 +2977,11 @@
       assert(false, "Op_Parm");
       break;
     }
+    case Op_PartialSubtypeCheck:
+    {
+      assert(false, "Op_PartialSubtypeCheck");
+      break;
+    }
     case Op_Phi:
     {
 #ifdef ASSERT
@@ -2745,11 +3059,14 @@
       assert(adr->is_AddP(), "expecting an AddP");
       Node *adr_base = get_addp_base(adr);
       Node *val = n->in(MemNode::ValueIn)->uncast();
+      int offset = address_offset(adr, phase);
       // For everything "adr_base" could point to, create a deferred edge
       // to "val" from each field with the same offset.
       for( VectorSetI i(PointsTo(adr_base)); i.test(); ++i ) {
         uint pt = i.elem;
-        add_edge_from_fields(pt, val->_idx, address_offset(adr, phase));
+        // Add field edge if it is missing.
+        add_field_edge(pt, adr->_idx, offset);
+        add_edge_from_fields(pt, val->_idx, offset);
       }
       break;
     }
--- a/src/share/vm/opto/escape.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/opto/escape.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -160,6 +160,7 @@
   Node* _node;                 // Ideal node corresponding to this PointsTo node.
   int   _offset;               // Object fields offsets.
   bool  _scalar_replaceable;   // Not escaped object could be replaced with scalar
+  bool  _has_unknown_ptr;      // Has edge to phantom_object
 
 public:
   PointsToNode():
@@ -168,6 +169,7 @@
     _edges(NULL),
     _node(NULL),
     _offset(-1),
+    _has_unknown_ptr(false),
     _scalar_replaceable(true) {}
 
 
@@ -175,6 +177,7 @@
   NodeType node_type() const { return _type;}
   int offset() { return _offset;}
   bool scalar_replaceable() { return _scalar_replaceable;}
+  bool has_unknown_ptr()    { return _has_unknown_ptr;}
 
   void set_offset(int offs) { _offset = offs;}
   void set_escape_state(EscapeState state) { _escape = state; }
@@ -183,6 +186,7 @@
     _type = ntype;
   }
   void set_scalar_replaceable(bool v) { _scalar_replaceable = v; }
+  void set_has_unknown_ptr()          { _has_unknown_ptr = true; }
 
   // count of outgoing edges
   uint edge_count() const { return (_edges == NULL) ? 0 : _edges->length(); }
@@ -236,6 +240,8 @@
                                        // are assumed to point to.
   uint                      _oop_null; // ConP(#NULL)->_idx
   uint                     _noop_null; // ConN(#NULL)->_idx
+  Node*                     _pcmp_neq; // ConI(#CC_GT)
+  Node*                      _pcmp_eq; // ConI(#CC_EQ)
 
   Compile *                  _compile; // Compile object for current compilation
   PhaseIterGVN *                _igvn; // Value numbering
@@ -248,6 +254,8 @@
   }
   uint nodes_size() const { return _nodes.length(); }
 
+  bool is_null_ptr(uint idx) const { return (idx == _noop_null || idx == _oop_null); }
+
   // Add node to ConnectionGraph.
   void add_node(Node *n, PointsToNode::NodeType nt, PointsToNode::EscapeState es, bool done);
 
@@ -331,10 +339,9 @@
   }
 
   // Notify optimizer that a node has been modified
-  // Node:  This assumes that escape analysis is run before
-  //        PhaseIterGVN creation
   void record_for_optimizer(Node *n) {
     _igvn->_worklist.push(n);
+    _igvn->add_users_to_worklist(n);
   }
 
   // Set the escape state of a node
@@ -351,6 +358,9 @@
                               GrowableArray<uint>* worklist,
                               PointsToNode::EscapeState esc_state);
 
+  // Optimize objects compare.
+  Node* optimize_ptr_compare(Node* n);
+
   // Compute the escape information
   bool compute_escape();
 
--- a/src/share/vm/opto/gcm.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/opto/gcm.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -95,7 +95,7 @@
   assert(in0 != NULL, "Only control-dependent");
   const Node *p = in0->is_block_proj();
   if (p != NULL && p != n) {    // Control from a block projection?
-    assert(!n->pinned() || n->is_MachConstantBase() || n->is_SafePointScalarObject(), "only pinned MachConstantBase or SafePointScalarObject node is expected here");
+    assert(!n->pinned() || n->is_MachConstantBase(), "only pinned MachConstantBase node is expected here");
     // Find trailing Region
     Block *pb = _bbs[in0->_idx]; // Block-projection already has basic block
     uint j = 0;
--- a/src/share/vm/opto/idealGraphPrinter.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/opto/idealGraphPrinter.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -447,6 +447,9 @@
     if (flags & Node::Flag_may_be_short_branch) {
       print_prop("may_be_short_branch", "true");
     }
+    if (flags & Node::Flag_has_call) {
+      print_prop("has_call", "true");
+    }
 
     if (C->matcher() != NULL) {
       if (C->matcher()->is_shared(node)) {
--- a/src/share/vm/opto/lcm.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/opto/lcm.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -548,6 +548,22 @@
   set_next_call(call, next_call, bbs);
 }
 
+//------------------------------add_call_kills-------------------------------------
+void Block::add_call_kills(MachProjNode *proj, RegMask& regs, const char* save_policy, bool exclude_soe) {
+  // Fill in the kill mask for the call
+  for( OptoReg::Name r = OptoReg::Name(0); r < _last_Mach_Reg; r=OptoReg::add(r,1) ) {
+    if( !regs.Member(r) ) {     // Not already defined by the call
+      // Save-on-call register?
+      if ((save_policy[r] == 'C') ||
+          (save_policy[r] == 'A') ||
+          ((save_policy[r] == 'E') && exclude_soe)) {
+        proj->_rout.Insert(r);
+      }
+    }
+  }
+}
+
+
 //------------------------------sched_call-------------------------------------
 uint Block::sched_call( Matcher &matcher, Block_Array &bbs, uint node_cnt, Node_List &worklist, int *ready_cnt, MachCallNode *mcall, VectorSet &next_call ) {
   RegMask regs;
@@ -631,17 +647,7 @@
       proj->_rout.OR(Matcher::method_handle_invoke_SP_save_mask());
   }
 
-  // Fill in the kill mask for the call
-  for( OptoReg::Name r = OptoReg::Name(0); r < _last_Mach_Reg; r=OptoReg::add(r,1) ) {
-    if( !regs.Member(r) ) {     // Not already defined by the call
-      // Save-on-call register?
-      if ((save_policy[r] == 'C') ||
-          (save_policy[r] == 'A') ||
-          ((save_policy[r] == 'E') && exclude_soe)) {
-        proj->_rout.Insert(r);
-      }
-    }
-  }
+  add_call_kills(proj, regs, save_policy, exclude_soe);
 
   return node_cnt;
 }
@@ -776,6 +782,7 @@
     }
 #endif
 
+  uint max_idx = matcher.C->unique();
   // Pull from worklist and schedule
   while( worklist.size() ) {    // Worklist is not ready
 
@@ -815,11 +822,28 @@
       phi_cnt = sched_call(matcher, cfg->_bbs, phi_cnt, worklist, ready_cnt, mcall, next_call);
       continue;
     }
+
+    if (n->is_Mach() && n->as_Mach()->has_call()) {
+      RegMask regs;
+      regs.Insert(matcher.c_frame_pointer());
+      regs.OR(n->out_RegMask());
+
+      MachProjNode *proj = new (matcher.C, 1) MachProjNode( n, 1, RegMask::Empty, MachProjNode::fat_proj );
+      cfg->_bbs.map(proj->_idx,this);
+      _nodes.insert(phi_cnt++, proj);
+
+      add_call_kills(proj, regs, matcher._c_reg_save_policy, false);
+    }
+
     // Children are now all ready
     for (DUIterator_Fast i5max, i5 = n->fast_outs(i5max); i5 < i5max; i5++) {
       Node* m = n->fast_out(i5); // Get user
       if( cfg->_bbs[m->_idx] != this ) continue;
       if( m->is_Phi() ) continue;
+      if (m->_idx > max_idx) { // new node, skip it
+        assert(m->is_MachProj() && n->is_Mach() && n->as_Mach()->has_call(), "unexpected node types");
+        continue;
+      }
       if( !--ready_cnt[m->_idx] )
         worklist.push(m);
     }
--- a/src/share/vm/opto/loopnode.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/opto/loopnode.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -715,7 +715,6 @@
     long limit_con = cl->limit()->get_int();
     julong trip_cnt = cl->trip_count();
     long final_con = init_con + trip_cnt*stride_con;
-    final_con -= stride_con;
     int final_int = (int)final_con;
     // The final value should be in integer range since the loop
     // is counted and the limit was checked for overflow.
@@ -1947,7 +1946,7 @@
   }
 
   // Nothing to do, so get out
-  if( !C->has_loops() && !do_split_ifs && !_verify_me && !_verify_only ) {
+  if( !C->has_loops() && !skip_loop_opts && !do_split_ifs && !_verify_me && !_verify_only ) {
     _igvn.optimize();           // Cleanup NeverBranches
     return;
   }
--- a/src/share/vm/opto/machnode.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/opto/machnode.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -480,21 +480,20 @@
 
 //=============================================================================
 int MachConstantNode::constant_offset() {
-  int offset = _constant.offset();
   // Bind the offset lazily.
-  if (offset == -1) {
+  if (_constant.offset() == -1) {
     Compile::ConstantTable& constant_table = Compile::current()->constant_table();
-    // If called from Compile::scratch_emit_size assume the worst-case
-    // for load offsets: half the constant table size.
-    // NOTE: Don't return or calculate the actual offset (which might
-    // be zero) because that leads to problems with e.g. jumpXtnd on
-    // some architectures (cf. add-optimization in SPARC jumpXtnd).
-    if (Compile::current()->in_scratch_emit_size())
-      return constant_table.size() / 2;
-    offset = constant_table.table_base_offset() + constant_table.find_offset(_constant);
-    _constant.set_offset(offset);
+    int offset = constant_table.find_offset(_constant);
+    // If called from Compile::scratch_emit_size return the
+    // pre-calculated offset.
+    // NOTE: If the AD file does some table base offset optimizations
+    // later the AD file needs to take care of this fact.
+    if (Compile::current()->in_scratch_emit_size()) {
+      return constant_table.calculate_table_base_offset() + offset;
+    }
+    _constant.set_offset(constant_table.table_base_offset() + offset);
   }
-  return offset;
+  return _constant.offset();
 }
 
 
--- a/src/share/vm/opto/machnode.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/opto/machnode.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -190,6 +190,9 @@
   // Avoid back to back some instructions on some CPUs.
   bool avoid_back_to_back() const { return (flags() & Flag_avoid_back_to_back) != 0; }
 
+  // instruction implemented with a call
+  bool has_call() const { return (flags() & Flag_has_call) != 0; }
+
   // First index in _in[] corresponding to operand, or -1 if there is none
   int  operand_index(uint operand) const;
 
--- a/src/share/vm/opto/macro.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/opto/macro.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -81,7 +81,7 @@
       uint old_unique = C->unique();
       Node* new_in = old_sosn->clone(jvms_adj, sosn_map);
       if (old_unique != C->unique()) {
-        new_in->set_req(0, newcall->in(0)); // reset control edge
+        new_in->set_req(0, C->root()); // reset control edge
         new_in = transform_later(new_in); // Register new node.
       }
       old_in = new_in;
@@ -565,7 +565,6 @@
   if (res == NULL) {
     // All users were eliminated.
   } else if (!res->is_CheckCastPP()) {
-    alloc->_is_scalar_replaceable = false;  // don't try again
     NOT_PRODUCT(fail_eliminate = "Allocation does not have unique CheckCastPP";)
     can_eliminate = false;
   } else {
@@ -719,7 +718,7 @@
                                                  alloc,
 #endif
                                                  first_ind, nfields);
-    sobj->init_req(0, sfpt->in(TypeFunc::Control));
+    sobj->init_req(0, C->root());
     transform_later(sobj);
 
     // Scan object's fields adding an input to the safepoint for each field.
@@ -762,10 +761,10 @@
 
       Node *field_val = value_from_mem(mem, basic_elem_type, field_type, field_addr_type, alloc);
       if (field_val == NULL) {
-        // we weren't able to find a value for this field,
-        // give up on eliminating this allocation
-        alloc->_is_scalar_replaceable = false;  // don't try again
-        // remove any extra entries we added to the safepoint
+        // We weren't able to find a value for this field,
+        // give up on eliminating this allocation.
+
+        // Remove any extra entries we added to the safepoint.
         uint last = sfpt->req() - 1;
         for (int k = 0;  k < j; k++) {
           sfpt->del_req(last--);
@@ -1804,9 +1803,9 @@
   #ifndef PRODUCT
   if (PrintEliminateLocks) {
     if (alock->is_Lock()) {
-      tty->print_cr("++++ Eliminating: %d Lock", alock->_idx);
+      tty->print_cr("++++ Eliminated: %d Lock", alock->_idx);
     } else {
-      tty->print_cr("++++ Eliminating: %d Unlock", alock->_idx);
+      tty->print_cr("++++ Eliminated: %d Unlock", alock->_idx);
     }
   }
   #endif
@@ -2165,11 +2164,12 @@
   _igvn.replace_node(_memproj_fallthrough, mem_phi);
 }
 
-//------------------------------expand_macro_nodes----------------------
-//  Returns true if a failure occurred.
-bool PhaseMacroExpand::expand_macro_nodes() {
+//---------------------------eliminate_macro_nodes----------------------
+// Eliminate scalar replaced allocations and associated locks.
+void PhaseMacroExpand::eliminate_macro_nodes() {
   if (C->macro_count() == 0)
-    return false;
+    return;
+
   // First, attempt to eliminate locks
   int cnt = C->macro_count();
   for (int i=0; i < cnt; i++) {
@@ -2189,14 +2189,6 @@
       debug_only(int old_macro_count = C->macro_count(););
       if (n->is_AbstractLock()) {
         success = eliminate_locking_node(n->as_AbstractLock());
-      } else if (n->Opcode() == Op_LoopLimit) {
-        // Remove it from macro list and put on IGVN worklist to optimize.
-        C->remove_macro_node(n);
-        _igvn._worklist.push(n);
-        success = true;
-      } else if (n->Opcode() == Op_Opaque1 || n->Opcode() == Op_Opaque2) {
-        _igvn.replace_node(n, n->in(1));
-        success = true;
       }
       assert(success == (C->macro_count() < old_macro_count), "elimination reduces macro count");
       progress = progress || success;
@@ -2220,18 +2212,50 @@
         assert(!n->as_AbstractLock()->is_eliminated(), "sanity");
         break;
       default:
-        assert(false, "unknown node type in macro list");
+        assert(n->Opcode() == Op_LoopLimit ||
+               n->Opcode() == Op_Opaque1   ||
+               n->Opcode() == Op_Opaque2, "unknown node type in macro list");
       }
       assert(success == (C->macro_count() < old_macro_count), "elimination reduces macro count");
       progress = progress || success;
     }
   }
+}
+
+//------------------------------expand_macro_nodes----------------------
+//  Returns true if a failure occurred.
+bool PhaseMacroExpand::expand_macro_nodes() {
+  // Last attempt to eliminate macro nodes.
+  eliminate_macro_nodes();
+
   // Make sure expansion will not cause node limit to be exceeded.
   // Worst case is a macro node gets expanded into about 50 nodes.
   // Allow 50% more for optimization.
   if (C->check_node_count(C->macro_count() * 75, "out of nodes before macro expansion" ) )
     return true;
 
+  // Eliminate Opaque and LoopLimit nodes. Do it after all loop optimizations.
+  bool progress = true;
+  while (progress) {
+    progress = false;
+    for (int i = C->macro_count(); i > 0; i--) {
+      Node * n = C->macro_node(i-1);
+      bool success = false;
+      debug_only(int old_macro_count = C->macro_count(););
+      if (n->Opcode() == Op_LoopLimit) {
+        // Remove it from macro list and put on IGVN worklist to optimize.
+        C->remove_macro_node(n);
+        _igvn._worklist.push(n);
+        success = true;
+      } else if (n->Opcode() == Op_Opaque1 || n->Opcode() == Op_Opaque2) {
+        _igvn.replace_node(n, n->in(1));
+        success = true;
+      }
+      assert(success == (C->macro_count() < old_macro_count), "elimination reduces macro count");
+      progress = progress || success;
+    }
+  }
+
   // expand "macro" nodes
   // nodes are removed from the macro list as they are processed
   while (C->macro_count() > 0) {
@@ -2265,5 +2289,6 @@
 
   _igvn.set_delay_transform(false);
   _igvn.optimize();
+  if (C->failing())  return true;
   return false;
 }
--- a/src/share/vm/opto/macro.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/opto/macro.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -119,6 +119,7 @@
   PhaseMacroExpand(PhaseIterGVN &igvn) : Phase(Macro_Expand), _igvn(igvn) {
     _igvn.set_delay_transform(true);
   }
+  void eliminate_macro_nodes();
   bool expand_macro_nodes();
 
 };
--- a/src/share/vm/opto/matcher.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/opto/matcher.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -294,7 +294,6 @@
   RegMask                     _return_value_mask;
   // Inline Cache Register
   static OptoReg::Name  inline_cache_reg();
-  static const RegMask &inline_cache_reg_mask();
   static int            inline_cache_reg_encode();
 
   // Register for DIVI projection of divmodI
@@ -324,7 +323,6 @@
   // and then expanded into the inline_cache_reg and a method_oop register
 
   static OptoReg::Name  interpreter_method_oop_reg();
-  static const RegMask &interpreter_method_oop_reg_mask();
   static int            interpreter_method_oop_reg_encode();
 
   static OptoReg::Name  compiler_method_oop_reg();
@@ -333,7 +331,6 @@
 
   // Interpreter's Frame Pointer Register
   static OptoReg::Name  interpreter_frame_pointer_reg();
-  static const RegMask &interpreter_frame_pointer_reg_mask();
 
   // Java-Native calling convention
   // (what you use when intercalling between Java and C++ code)
@@ -371,10 +368,6 @@
   // registers?  True for Intel but false for most RISCs
   static const bool clone_shift_expressions;
 
-  // Should constant table entries be accessed with loads using
-  // absolute addressing?  True for x86 but false for most RISCs.
-  static const bool constant_table_absolute_addressing;
-
   static bool narrow_oop_use_complex_address();
 
   // Generate implicit null check for narrow oops if it can fold
--- a/src/share/vm/opto/memnode.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/opto/memnode.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -265,6 +265,13 @@
   if( phase->type( mem ) == Type::TOP ) return NodeSentinel; // caller will return NULL
   assert( mem != this, "dead loop in MemNode::Ideal" );
 
+  if (can_reshape && igvn != NULL && igvn->_worklist.member(mem)) {
+    // This memory slice may be dead.
+    // Delay this mem node transformation until the memory is processed.
+    phase->is_IterGVN()->_worklist.push(this);
+    return NodeSentinel; // caller will return NULL
+  }
+
   Node *address = in(MemNode::Address);
   const Type *t_adr = phase->type( address );
   if( t_adr == Type::TOP )              return NodeSentinel; // caller will return NULL
@@ -2661,6 +2668,8 @@
 // control copies
 Node *StrIntrinsicNode::Ideal(PhaseGVN *phase, bool can_reshape) {
   if (remove_dead_region(phase, can_reshape)) return this;
+  // Don't bother trying to transform a dead node
+  if (in(0) && in(0)->is_top())  return NULL;
 
   if (can_reshape) {
     Node* mem = phase->transform(in(MemNode::Memory));
@@ -2675,6 +2684,12 @@
   return NULL;
 }
 
+//------------------------------Value------------------------------------------
+const Type *StrIntrinsicNode::Value( PhaseTransform *phase ) const {
+  if (in(0) && phase->type(in(0)) == Type::TOP) return Type::TOP;
+  return bottom_type();
+}
+
 //=============================================================================
 MemBarNode::MemBarNode(Compile* C, int alias_idx, Node* precedent)
   : MultiNode(TypeFunc::Parms + (precedent == NULL? 0: 1)),
@@ -2715,6 +2730,8 @@
 // control copies
 Node *MemBarNode::Ideal(PhaseGVN *phase, bool can_reshape) {
   if (remove_dead_region(phase, can_reshape)) return this;
+  // Don't bother trying to transform a dead node
+  if (in(0) && in(0)->is_top())  return NULL;
 
   // Eliminate volatile MemBars for scalar replaced objects.
   if (can_reshape && req() == (Precedent+1) &&
--- a/src/share/vm/opto/memnode.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/opto/memnode.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -800,6 +800,7 @@
   virtual uint match_edge(uint idx) const;
   virtual uint ideal_reg() const { return Op_RegI; }
   virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *Value(PhaseTransform *phase) const;
 };
 
 //------------------------------StrComp-------------------------------------
--- a/src/share/vm/opto/node.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/opto/node.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -641,7 +641,8 @@
     Flag_is_dead_loop_safe   = Flag_is_cisc_alternate << 1,
     Flag_may_be_short_branch = Flag_is_dead_loop_safe << 1,
     Flag_avoid_back_to_back  = Flag_may_be_short_branch << 1,
-    _max_flags = (Flag_avoid_back_to_back << 1) - 1 // allow flags combination
+    Flag_has_call            = Flag_avoid_back_to_back << 1,
+    _max_flags = (Flag_has_call << 1) - 1 // allow flags combination
   };
 
 private:
--- a/src/share/vm/runtime/arguments.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/runtime/arguments.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -1359,9 +1359,12 @@
   // by ergonomics.
   if (MaxHeapSize <= max_heap_for_compressed_oops()) {
 #if !defined(COMPILER1) || defined(TIERED)
+// disable UseCompressedOops by default on MacOS X until 7118647 is fixed
+#ifndef __APPLE__
     if (FLAG_IS_DEFAULT(UseCompressedOops)) {
       FLAG_SET_ERGO(bool, UseCompressedOops, true);
     }
+#endif // !__APPLE__
 #endif
 #ifdef _WIN64
     if (UseLargePages && UseCompressedOops) {
@@ -1394,8 +1397,8 @@
   // If no heap maximum was requested explicitly, use some reasonable fraction
   // of the physical memory, up to a maximum of 1GB.
   if (UseParallelGC) {
-    FLAG_SET_ERGO(uintx, ParallelGCThreads,
-                  Abstract_VM_Version::parallel_worker_threads());
+    FLAG_SET_DEFAULT(ParallelGCThreads,
+                     Abstract_VM_Version::parallel_worker_threads());
 
     // If InitialSurvivorRatio or MinSurvivorRatio were not specified, but the
     // SurvivorRatio has been set, reset their default values to SurvivorRatio +
--- a/src/share/vm/runtime/globals.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/runtime/globals.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -55,10 +55,13 @@
                  MATERIALIZE_PRODUCT_FLAG, MATERIALIZE_PD_PRODUCT_FLAG, \
                  MATERIALIZE_DIAGNOSTIC_FLAG, MATERIALIZE_NOTPRODUCT_FLAG)
 
+MATERIALIZE_FLAGS_EXT
+
+
 bool Flag::is_unlocker() const {
   return strcmp(name, "UnlockDiagnosticVMOptions") == 0     ||
-         strcmp(name, "UnlockExperimentalVMOptions") == 0;
-
+         strcmp(name, "UnlockExperimentalVMOptions") == 0   ||
+         is_unlocker_ext();
 }
 
 bool Flag::is_unlocked() const {
@@ -74,7 +77,7 @@
              strcmp(kind, "{C2 experimental}") == 0) {
     return UnlockExperimentalVMOptions;
   } else {
-    return true;
+    return is_unlocked_ext();
   }
 }
 
@@ -241,6 +244,7 @@
 #ifdef SHARK
  SHARK_FLAGS(SHARK_DEVELOP_FLAG_STRUCT, SHARK_PD_DEVELOP_FLAG_STRUCT, SHARK_PRODUCT_FLAG_STRUCT, SHARK_PD_PRODUCT_FLAG_STRUCT, SHARK_DIAGNOSTIC_FLAG_STRUCT, SHARK_NOTPRODUCT_FLAG_STRUCT)
 #endif
+ FLAGTABLE_EXT
  {0, NULL, NULL}
 };
 
--- a/src/share/vm/runtime/globals.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/runtime/globals.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -243,6 +243,9 @@
   bool is_writeable() const;
   bool is_external() const;
 
+  bool is_unlocker_ext() const;
+  bool is_unlocked_ext() const;
+
   void print_on(outputStream* st, bool withComments = false );
   void print_as_flag(outputStream* st);
 };
@@ -887,7 +890,7 @@
   diagnostic(bool, TraceNMethodInstalls, false,                             \
              "Trace nmethod intallation")                                   \
                                                                             \
-  diagnostic(intx, ScavengeRootsInCode, 1,                                  \
+  diagnostic(intx, ScavengeRootsInCode, 2,                                  \
              "0: do not allow scavengable oops in the code cache; "         \
              "1: allow scavenging from the code cache; "                    \
              "2: emit as many constants as the compiler can see")           \
@@ -1416,6 +1419,21 @@
   product(uintx, ParallelGCThreads, 0,                                      \
           "Number of parallel threads parallel gc will use")                \
                                                                             \
+  product(bool, UseDynamicNumberOfGCThreads, false,                         \
+          "Dynamically choose the number of parallel threads "              \
+          "parallel gc will use")                                           \
+                                                                            \
+  diagnostic(bool, ForceDynamicNumberOfGCThreads, false,                    \
+          "Force dynamic selection of the number of"                        \
+          "parallel threads parallel gc will use to aid debugging")         \
+                                                                            \
+  product(uintx, HeapSizePerGCThread, ScaleForWordSize(64*M),               \
+          "Size of heap (bytes) per GC thread used in calculating the "     \
+          "number of GC threads")                                           \
+                                                                            \
+  product(bool, TraceDynamicGCThreads, false,                               \
+          "Trace the dynamic GC thread usage")                              \
+                                                                            \
   develop(bool, ParallelOldGCSplitALot, false,                              \
           "Provoke splitting (copying data from a young gen space to"       \
           "multiple destination spaces)")                                   \
@@ -2357,7 +2375,7 @@
   develop(bool, TraceGCTaskQueue, false,                                    \
           "Trace actions of the GC task queues")                            \
                                                                             \
-  develop(bool, TraceGCTaskThread, false,                                   \
+  diagnostic(bool, TraceGCTaskThread, false,                                   \
           "Trace actions of the GC task threads")                           \
                                                                             \
   product(bool, PrintParallelOldGCPhaseTimes, false,                        \
@@ -3907,4 +3925,8 @@
 
 RUNTIME_OS_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_NOTPRODUCT_FLAG)
 
+// Extensions
+
+#include "runtime/globals_ext.hpp"
+
 #endif // SHARE_VM_RUNTIME_GLOBALS_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/runtime/globals_ext.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2011 Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_RUNTIME_GLOBALS_EXT_HPP
+#define SHARE_VM_RUNTIME_GLOBALS_EXT_HPP
+
+// globals_extension.hpp extension
+
+// Additional CommandLineFlags enum values
+#define COMMANDLINEFLAG_EXT
+
+// Additional CommandLineFlagsWithType enum values
+#define COMMANDLINEFLAGWITHTYPE_EXT
+
+
+// globals.cpp extension
+
+// Additional flag definitions
+#define MATERIALIZE_FLAGS_EXT
+
+// Additional flag descriptors: see flagTable definition
+#define FLAGTABLE_EXT
+
+
+// Default method implementations
+
+inline bool Flag::is_unlocker_ext() const {
+  return false;
+}
+
+inline bool Flag::is_unlocked_ext() const {
+  return true;
+}
+
+#endif // SHARE_VM_RUNTIME_GLOBALS_EXT_HPP
--- a/src/share/vm/runtime/globals_extension.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/runtime/globals_extension.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -93,6 +93,7 @@
 #ifdef COMPILER2
  C2_FLAGS(C2_DEVELOP_FLAG_MEMBER, C2_PD_DEVELOP_FLAG_MEMBER, C2_PRODUCT_FLAG_MEMBER, C2_PD_PRODUCT_FLAG_MEMBER, C2_DIAGNOSTIC_FLAG_MEMBER, C2_EXPERIMENTAL_FLAG_MEMBER, C2_NOTPRODUCT_FLAG_MEMBER)
 #endif
+ COMMANDLINEFLAG_EXT
  NUM_CommandLineFlag
 } CommandLineFlag;
 
@@ -192,6 +193,7 @@
           C2_EXPERIMENTAL_FLAG_MEMBER_WITH_TYPE,
           C2_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE)
 #endif
+ COMMANDLINEFLAGWITHTYPE_EXT
  NUM_CommandLineFlagWithType
 } CommandLineFlagWithType;
 
--- a/src/share/vm/runtime/simpleThresholdPolicy.cpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/runtime/simpleThresholdPolicy.cpp	Fri Dec 16 15:07:10 2011 -0800
@@ -30,6 +30,27 @@
 #include "runtime/simpleThresholdPolicy.inline.hpp"
 #include "code/scopeDesc.hpp"
 
+
+void SimpleThresholdPolicy::print_counters(const char* prefix, methodHandle mh) {
+  int invocation_count = mh->invocation_count();
+  int backedge_count = mh->backedge_count();
+  methodDataHandle mdh = mh->method_data();
+  int mdo_invocations = 0, mdo_backedges = 0;
+  int mdo_invocations_start = 0, mdo_backedges_start = 0;
+  if (mdh() != NULL) {
+    mdo_invocations = mdh->invocation_count();
+    mdo_backedges = mdh->backedge_count();
+    mdo_invocations_start = mdh->invocation_count_start();
+    mdo_backedges_start = mdh->backedge_count_start();
+  }
+  tty->print(" %stotal: %d,%d %smdo: %d(%d),%d(%d)", prefix,
+      invocation_count, backedge_count, prefix,
+      mdo_invocations, mdo_invocations_start,
+      mdo_backedges, mdo_backedges_start);
+  tty->print(" %smax levels: %d,%d", prefix,
+      mh->highest_comp_level(), mh->highest_osr_comp_level());
+}
+
 // Print an event.
 void SimpleThresholdPolicy::print_event(EventType type, methodHandle mh, methodHandle imh,
                                         int bci, CompLevel level) {
@@ -38,8 +59,6 @@
   ttyLocker tty_lock;
   tty->print("%lf: [", os::elapsedTime());
 
-  int invocation_count = mh->invocation_count();
-  int backedge_count = mh->backedge_count();
   switch(type) {
   case CALL:
     tty->print("call");
@@ -82,23 +101,9 @@
   print_specific(type, mh, imh, bci, level);
 
   if (type != COMPILE) {
-    methodDataHandle mdh = mh->method_data();
-    int mdo_invocations = 0, mdo_backedges = 0;
-    int mdo_invocations_start = 0, mdo_backedges_start = 0;
-    if (mdh() != NULL) {
-      mdo_invocations = mdh->invocation_count();
-      mdo_backedges = mdh->backedge_count();
-      mdo_invocations_start = mdh->invocation_count_start();
-      mdo_backedges_start = mdh->backedge_count_start();
-    }
-    tty->print(" total: %d,%d mdo: %d(%d),%d(%d)",
-               invocation_count, backedge_count,
-               mdo_invocations, mdo_invocations_start,
-               mdo_backedges, mdo_backedges_start);
-    tty->print(" max levels: %d,%d",
-                mh->highest_comp_level(), mh->highest_osr_comp_level());
+    print_counters("", mh);
     if (inlinee_event) {
-      tty->print(" inlinee max levels: %d,%d", imh->highest_comp_level(), imh->highest_osr_comp_level());
+      print_counters("inlinee ", imh);
     }
     tty->print(" compilable: ");
     bool need_comma = false;
--- a/src/share/vm/runtime/simpleThresholdPolicy.hpp	Thu Dec 15 12:57:57 2011 -0800
+++ b/src/share/vm/runtime/simpleThresholdPolicy.hpp	Fri Dec 16 15:07:10 2011 -0800
@@ -55,7 +55,7 @@
   // loop_event checks if a method should be OSR compiled at a different
   // level.
   CompLevel loop_event(methodOop method, CompLevel cur_level);
-
+  void print_counters(const char* prefix, methodHandle mh);
 protected:
   int c1_count() const     { return _c1_count; }
   int c2_count() const     { return _c2_count; }
--- a/src/share/vm/runtime/thread.cpp	Thu Dec 15 12