changeset 9872:9e494b42984b

Merge
author asaha
date Thu, 05 May 2011 22:28:31 -0700
parents 2510e81b3d25 92bf0655022d
children 420381b7095e
files hotspot/make/linux/makefiles/cscope.make hotspot/make/solaris/makefiles/cscope.make jdk/src/share/classes/sun/security/util/SignatureFileManifest.java jdk/test/java/beans/XMLEncoder/java_io_File.java langtools/test/tools/javac/generics/diamond/7030150/Neg01.java langtools/test/tools/javac/generics/diamond/7030150/Neg01.out langtools/test/tools/javac/generics/diamond/7030150/Neg02.java langtools/test/tools/javac/generics/diamond/7030150/Neg02.out langtools/test/tools/javac/generics/diamond/7030150/Neg03.java langtools/test/tools/javac/generics/diamond/7030150/Neg03.out langtools/test/tools/javac/generics/diamond/7030150/Pos01.java langtools/test/tools/javac/generics/diamond/7030150/Pos02.java
diffstat 876 files changed, 52218 insertions(+), 33475 deletions(-) [+]
line wrap: on
line diff
--- a/.hgignore	Wed May 04 12:00:14 2011 -0700
+++ b/.hgignore	Thu May 05 22:28:31 2011 -0700
@@ -2,3 +2,4 @@
 ^dist/
 /nbproject/private/
 ^webrev
+^.hgtip
--- a/.hgtags	Wed May 04 12:00:14 2011 -0700
+++ b/.hgtags	Thu May 05 22:28:31 2011 -0700
@@ -114,3 +114,5 @@
 d1cf7d4ee16c341f5b8c7e7f1d68a8c412b6c693 jdk7-b137
 62b8e328f8c8c66c14b0713222116f2add473f3f jdk7-b138
 955488f34ca418f6cdab843d61c20d2c615637d9 jdk7-b139
+f4298bc3f4b6baa315643be06966f09684290068 jdk7-b140
+5d86d0c7692e8f4a58d430d68c03594e2d3403b3 jdk7-b141
--- a/.hgtags-top-repo	Wed May 04 12:00:14 2011 -0700
+++ b/.hgtags-top-repo	Thu May 05 22:28:31 2011 -0700
@@ -114,3 +114,5 @@
 7654afc6a29e43cb0a1343ce7f1287bf690d5e5f jdk7-b137
 fc47c97bbbd91b1f774d855c48a7e285eb1a351a jdk7-b138
 7ed6d0b9aaa12320832a7ddadb88d6d8d0dda4c1 jdk7-b139
+dcfe74f1c6553c556e7d361c30b0b614eb5e40f6 jdk7-b140
+c6569c5585851dfd39b8de8e021c3c312f51af12 jdk7-b141
--- a/Makefile	Wed May 04 12:00:14 2011 -0700
+++ b/Makefile	Thu May 05 22:28:31 2011 -0700
@@ -97,7 +97,7 @@
 endef
 
 # Generic build of basic repo series
-generic_build_repo_series::
+generic_build_repo_series:: $(SOURCE_TIPS)
 	$(MKDIR) -p $(OUTPUTDIR)
 	$(MKDIR) -p $(OUTPUTDIR)/j2sdk-image
 	@$(call StartTimer)
@@ -243,6 +243,14 @@
 debug_build:: build_debug_image
 fastdebug_build:: build_fastdebug_image
 
+# The source tips are stored with the relative path to the repo.
+#   This file will be used when constructing the jdk image.
+source_tips: $(SOURCE_TIPS)
+	$(CAT) $<
+$(SOURCE_TIPS): FRC
+	@$(prep-target)
+	@$(call GetSourceTips)
+
 clobber:: REPORT_BUILD_TIMES=
 clobber:: 
 	$(RM) -r $(OUTPUTDIR)/*
--- a/corba/.hgignore	Wed May 04 12:00:14 2011 -0700
+++ b/corba/.hgignore	Thu May 05 22:28:31 2011 -0700
@@ -1,3 +1,4 @@
 ^build/
 ^dist/
 /nbproject/private/
+^.hgtip
--- a/corba/.hgtags	Wed May 04 12:00:14 2011 -0700
+++ b/corba/.hgtags	Thu May 05 22:28:31 2011 -0700
@@ -114,3 +114,5 @@
 a66c01d8bf895261715955df0b95545c000ed6a8 jdk7-b137
 78d8cf04697e9df54f7f11e195b7da29b8e345a2 jdk7-b138
 60b074ec6fcf5cdf9efce22fdfb02326ed8fa2d3 jdk7-b139
+cdf5d19ec142424489549025e9c42e51f32cf688 jdk7-b140
+a58635cdd921bafef353f4864184a0481353197b jdk7-b141
--- a/corba/src/share/classes/com/sun/corba/se/impl/orbutil/resources/sunorb_pt_BR.properties	Wed May 04 12:00:14 2011 -0700
+++ b/corba/src/share/classes/com/sun/corba/se/impl/orbutil/resources/sunorb_pt_BR.properties	Thu May 05 22:28:31 2011 -0700
@@ -23,9 +23,9 @@
 # questions.
 #
 
-orbd.usage=Uso: {0} <options> \n\nem que <options> inclui:\n  -port                  porta de ativa\u00E7\u00E3o na qual o ORBD deve ser iniciado, default 1049 (opcional)\n  -defaultdb             diret\u00F3rio dos arquivos ORBD, default "./orb.db" (opcional)\n  -serverid              id do servidor para ORBD, default 1 (opcional)\n  -ORBInitialPort        porta inicial (necess\u00E1rio)\n  -ORBInitialHost        nome de host inicial (necess\u00E1rio)\n
+orbd.usage=Uso: {0} <op\u00E7\u00F5es> \n\nem que <op\u00E7\u00F5es> inclui:\n  -port                  porta de ativa\u00E7\u00E3o na qual o ORBD deve ser iniciado, default 1049 (opcional)\n  -defaultdb             diret\u00F3rio dos arquivos ORBD, default "./orb.db" (opcional)\n  -serverid              id do servidor para ORBD, default 1 (opcional)\n  -ORBInitialPort        porta inicial (obrigat\u00F3rio)\n  -ORBInitialHost        nome de host inicial (obrigat\u00F3rio)\n
 
-servertool.usage=Uso: {0} <options> \n\nem que <options> inclui:\n  -ORBInitialPort        porta inicial (necess\u00E1rio)\n  -ORBInitialHost        nome de host inicial (necess\u00E1rio)\n
+servertool.usage=Uso: {0} <op\u00E7\u00F5es> \n\nem que <op\u00E7\u00F5es> inclui:\n  -ORBInitialPort        porta inicial (obrigat\u00F3rio)\n  -ORBInitialHost        nome de host inicial (obrigat\u00F3rio)\n
 servertool.banner=\n\nBem-vindo \u00E0 Ferramenta de Servidor IDL Java \ninsira os comandos no prompt \n
 servertool.shorthelp=\n\n\tComandos Dispon\u00EDveis: \n\t------------------- \n
 servertool.baddef=Defini\u00E7\u00E3o do servidor inv\u00E1lida: {0}
@@ -40,23 +40,23 @@
 servertool.vmargs=\tvmargs    - {0}
 servertool.serverid=\tserver id - {0}
 servertool.servernotrunning=\to servidor n\u00E3o est\u00E1 em execu\u00E7\u00E3o.
-servertool.register=\n\n\tregister -server <server class name> \n\t         -applicationName <alternate server name> \n\t         -classpath <classpath to server> \n\t         -args <args to server> \n\t         -vmargs <args to server Java VM>\n
+servertool.register=\n\n\tregister -server <nome da classe do servidor> \n\t         -applicationName <nome do servidor alternativo> \n\t         -classpath <classpath para o servidor> \n\t         -args <args para o servidor> \n\t         -vmargs <args para a VM Java do servidor>\n
 servertool.register1=registra um servidor ativ\u00E1vel
 servertool.register2=\tservidor registrado (serverid = {0}).
 servertool.register3=\tservidor registrado, mas em espera (serverid = {0}).
 servertool.register4=\tservidor j\u00E1 registrado (serverid = {0}).
 
-servertool.unregister=\n\tunregister [ -serverid <server id> | -applicationName <name> ] \n
+servertool.unregister=\n\tunregister [ -serverid <id do servidor> | -applicationName <nome> ] \n
 servertool.unregister1=cancela o registro de um servidor registrado
 servertool.unregister2=\tservidor n\u00E3o registrado.
 
-servertool.locate=\n\tlocate [ -serverid <server id> | -applicationName <name> ] [ <-endpointType <endpointType> ] \n
+servertool.locate=\n\tlocate [ -serverid <id do servidor> | -applicationName <nome> ] [ <-endpointType <endpointType> ] \n
 servertool.locate1=localiza portas de tipo espec\u00EDfico para um servidor registrado
 servertool.locate2=\n\n\tNome do Host {0} \n\n\t\tPorta\t\tTipo de Porta\t\tId do ORB\n\t\t----\t\t---------\t\t------\n
-servertool.locateorb=\n\tlocateperorb [ -serverid <server id> | -applicationName <name> ] [ -orbid <ORB name> ]\n
+servertool.locateorb=\n\tlocateperorb [ -serverid <id do servidor> | -applicationName <nome> ] [ -orbid <nome ORB> ]\n
 servertool.locateorb1=localiza portas para um orb espec\u00EDfico de servidor registrado
 servertool.locateorb2=\n\n\tNome do Host {0} \n\n\t\tPorta\t\tTipo de Porta\t\tId do ORB\n\t\t----\t\t--------\t\t------\n
-servertool.getserverid=\n\tgetserverid [ -applicationName <name> ] \n
+servertool.getserverid=\n\tgetserverid [ -applicationName <nome> ] \n
 servertool.getserverid1=retorna o id do servidor de um applicationName
 servertool.getserverid2=\tID do Servidor de applicationName {0} \u00E9 {1}
 
@@ -69,33 +69,33 @@
 servertool.listappnames1=lista os applicationNames atualmente definidos
 servertool.listappnames2=applicationNames do servidor definidos atualmente:
 
-servertool.shutdown=\n\tshutdown [ -serverid <server id> | -applicationName <name> ]\n
+servertool.shutdown=\n\tshutdown [ -serverid <id do servidor> | -applicationName <nome> ]\n
 servertool.shutdown1=faz shutdown de um servidor registrado
 servertool.shutdown2=\tshutdown do servidor bem-sucedido.
-servertool.startserver=\n\tstartup [ -serverid <server id> | -applicationName <name> ]\n
+servertool.startserver=\n\tstartup [ -serverid <id do servidor> | -applicationName <nome> ]\n
 servertool.startserver1=inicia um servidor registrado
 servertool.startserver2=\tservidor iniciado com \u00EAxito.
 
 servertool.quit=\n\tquit\n
 servertool.quit1=sai desta ferramenta
 
-servertool.help=\thelp\n\tOR\n\thelp <command name>\n
+servertool.help=\thelp\n\tOR\n\thelp <nome do comando>\n
 servertool.help1=obt\u00E9m ajuda
 
-servertool.orbidmap=\tUso: orblist [ -serverid <server id> | -applicationName <name> ]\n
+servertool.orbidmap=\tUso: orblist [ -serverid <id do servidor> | -applicationName <nome> ]\n
 servertool.orbidmap1=lista de nomes de orb e seus mapeamentos
 servertool.orbidmap2=\n\tId de ORB\t\tNome de ORB\n\t------\t\t--------\n
 pnameserv.success=NameServer Persistente Iniciado com \u00CAxito
 
 
-bootstrap.usage=Uso: {0} <options> \n\nem que <options> inclui:\n  -ORBInitialPort        porta inicial (necess\u00E1rio)\n  -InitialServicesFile   arquivo que cont\u00E9m a lista de servi\u00E7os iniciais (necess\u00E1rio)\n
+bootstrap.usage=Uso: {0} <op\u00E7\u00F5es> \n\nem que <op\u00E7\u00F5es> inclui:\n  -ORBInitialPort        porta inicial (obrigat\u00F3rio)\n  -InitialServicesFile   arquivo que cont\u00E9m a lista de servi\u00E7os iniciais (obrigat\u00F3rio)\n
 bootstrap.success=definindo porta para {0} e lendo servi\u00E7os de {1}
 bootstrap.filenotreadable=o arquivo {0} n\u00E3o \u00E9 leg\u00EDvel
 bootstrap.filenotfound=arquivo {0} n\u00E3o encontrado
 bootstrap.exception=exce\u00E7\u00E3o capturada ao salvar as propriedades no Arquivo {0}: exce\u00E7\u00E3o {1}
 
 tnameserv.exception=uma exce\u00E7\u00E3o capturada ao iniciar o servi\u00E7o de inicializa\u00E7\u00E3o na porta {0}
-tnameserv.usage=tente usar outra porta com os argumentos de linha de comandos -ORBInitialPort <portno>
+tnameserv.usage=tente usar outra porta com os argumentos de linha de comandos -ORBInitialPort <n\u00BA da porta>
 tnameserv.invalidhostoption=ORBInitialHost n\u00E3o \u00E9 uma op\u00E7\u00E3o v\u00E1lida para NameService
 tnameserv.orbinitialport0=ORBInitialPort 0 n\u00E3o \u00E9 uma op\u00E7\u00E3o v\u00E1lida para NameService
 tnameserv.hs1=Contexto de Nomea\u00E7\u00E3o Inicial:\n{0}
--- a/hotspot/.hgignore	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/.hgignore	Thu May 05 22:28:31 2011 -0700
@@ -5,3 +5,4 @@
 ^src/share/tools/IdealGraphVisualizer/[a-zA-Z0-9]*/build/
 ^src/share/tools/IdealGraphVisualizer/build/
 ^src/share/tools/IdealGraphVisualizer/dist/
+^.hgtip
--- a/hotspot/.hgtags	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/.hgtags	Thu May 05 22:28:31 2011 -0700
@@ -166,3 +166,7 @@
 0930dc920c185afbf40fed9a655290b8e5b16783 hs21-b08
 611e19a16519d6fb5deea9ab565336e6e6ee475d jdk7-b139
 611e19a16519d6fb5deea9ab565336e6e6ee475d hs21-b09
+d283b82966712b353fa307845a1316da42a355f4 jdk7-b140
+d283b82966712b353fa307845a1316da42a355f4 hs21-b10
+5d07913abd59261c77f24cc04a759cb75d804099 jdk7-b141
+3aea9e9feb073f5500e031be6186666bcae89aa2 hs21-b11
--- a/hotspot/agent/make/Makefile	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/agent/make/Makefile	Thu May 05 22:28:31 2011 -0700
@@ -257,7 +257,7 @@
 all: filelist
 	@mkdir -p $(OUTPUT_DIR)
 	@echo "$(SA_BUILD_VERSION_PROP)" > $(SA_PROPERTIES)
-	$(JAVAC) -source 1.4 -classpath $(CLASSPATH) -deprecation -sourcepath $(SRC_DIR) -g -d $(OUTPUT_DIR) @filelist
+	$(JAVAC) -classpath $(CLASSPATH) -deprecation -sourcepath $(SRC_DIR) -g -d $(OUTPUT_DIR) @filelist
 	$(RMIC) -classpath $(OUTPUT_DIR) -d $(OUTPUT_DIR) sun.jvm.hotspot.debugger.remote.RemoteDebuggerServer
 	rm -f $(OUTPUT_DIR)/sun/jvm/hotspot/utilities/soql/sa.js
 	cp $(SRC_DIR)/sun/jvm/hotspot/utilities/soql/sa.js $(OUTPUT_DIR)/sun/jvm/hotspot/utilities/soql
@@ -269,7 +269,7 @@
 allprof: filelist
 	@mkdir -p $(OUTPUT_DIR)
 	@echo "$(SA_BUILD_VERSION_PROP)" > $(SA_PROPERTIES)
-	$(JAVAC) -source 1.4 -J-Xprof -classpath $(CLASSPATH) -deprecation -sourcepath $(SRC_DIR) -g -d $(OUTPUT_DIR) @filelist
+	$(JAVAC) -J-Xprof -classpath $(CLASSPATH) -deprecation -sourcepath $(SRC_DIR) -g -d $(OUTPUT_DIR) @filelist
 	$(RMIC) -classpath $(OUTPUT_DIR) -d $(OUTPUT_DIR) sun.jvm.hotspot.debugger.remote.RemoteDebuggerServer
 	rm -f $(OUTPUT_DIR)/sun/jvm/hotspot/utilities/soql/sa.js
 	cp $(SRC_DIR)/sun/jvm/hotspot/utilities/soql/sa.js $(OUTPUT_DIR)/sun/jvm/hotspot/utilities/soql
--- a/hotspot/agent/src/os/solaris/proc/libproc.h	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/agent/src/os/solaris/proc/libproc.h	Thu May 05 22:28:31 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -420,7 +420,22 @@
 /*
  * Stack frame iteration interface.
  */
+#ifdef SOLARIS_11_B159_OR_LATER
+/* building on Nevada-B159 or later so define the new callback */
+typedef int proc_stack_f(
+    void *,             /* the cookie given to Pstack_iter() */
+    const prgregset_t,  /* the frame's registers */
+    uint_t,             /* argc for the frame's function */
+    const long *,       /* argv for the frame's function */
+    int,                /* bitwise flags describing the frame (see below) */
+    int);               /* a signal number */
+
+#define PR_SIGNAL_FRAME    1    /* called by a signal handler */
+#define PR_FOUND_SIGNAL    2    /* we found the corresponding signal number */
+#else
+/* building on Nevada-B158 or earlier so define the old callback */
 typedef int proc_stack_f(void *, const prgregset_t, uint_t, const long *);
+#endif
 
 extern int Pstack_iter(struct ps_prochandle *,
     const prgregset_t, proc_stack_f *, void *);
--- a/hotspot/agent/src/os/solaris/proc/salibproc.h	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/agent/src/os/solaris/proc/salibproc.h	Thu May 05 22:28:31 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2005, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -101,7 +101,23 @@
 /*
  * Stack frame iteration interface.
  */
+#ifdef SOLARIS_11_B159_OR_LATER
+/* building on Nevada-B159 or later so define the new callback */
+typedef int proc_stack_f(
+    void *,             /* the cookie given to Pstack_iter() */
+    const prgregset_t,  /* the frame's registers */
+    uint_t,             /* argc for the frame's function */
+    const long *,       /* argv for the frame's function */
+    int,                /* bitwise flags describing the frame (see below) */
+    int);               /* a signal number */
+
+#define PR_SIGNAL_FRAME    1    /* called by a signal handler */
+#define PR_FOUND_SIGNAL    2    /* we found the corresponding signal number */
+#else
+/* building on Nevada-B158 or earlier so define the old callback */
 typedef int proc_stack_f(void *, const prgregset_t, uint_t, const long *);
+#endif
+
 extern int Pstack_iter(struct ps_prochandle *,
     const prgregset_t, proc_stack_f *, void *);
 
--- a/hotspot/agent/src/os/solaris/proc/saproc.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/agent/src/os/solaris/proc/saproc.cpp	Thu May 05 22:28:31 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -24,6 +24,9 @@
 
 #include "salibproc.h"
 #include "sun_jvm_hotspot_debugger_proc_ProcDebuggerLocal.h"
+#ifndef SOLARIS_11_B159_OR_LATER
+#include <sys/utsname.h>
+#endif
 #include <thread_db.h>
 #include <strings.h>
 #include <limits.h>
@@ -40,8 +43,22 @@
 #define SYMBOL_BUF_SIZE  256
 #define ERR_MSG_SIZE     (PATH_MAX + 256)
 
-// debug mode
+// debug modes
 static int _libsaproc_debug = 0;
+#ifndef SOLARIS_11_B159_OR_LATER
+static bool _Pstack_iter_debug = false;
+
+static void dprintf_2(const char* format,...) {
+  if (_Pstack_iter_debug) {
+    va_list alist;
+
+    va_start(alist, format);
+    fputs("Pstack_iter DEBUG: ", stderr);
+    vfprintf(stderr, format, alist);
+    va_end(alist);
+  }
+}
+#endif // !SOLARIS_11_B159_OR_LATER
 
 static void print_debug(const char* format,...) {
   if (_libsaproc_debug) {
@@ -450,6 +467,7 @@
   return 0;
 }
 
+// Pstack_iter() proc_stack_f callback prior to Nevada-B159
 static int
 fill_cframe_list(void *cd, const prgregset_t regs, uint_t argc, const long *argv) {
   DebuggerWith2Objects* dbgo2 = (DebuggerWith2Objects*) cd;
@@ -472,6 +490,14 @@
   return 0;
 }
 
+// Pstack_iter() proc_stack_f callback in Nevada-B159 or later
+/*ARGSUSED*/
+static int
+wrapper_fill_cframe_list(void *cd, const prgregset_t regs, uint_t argc,
+                         const long *argv, int frame_flags, int sig) {
+  return(fill_cframe_list(cd, regs, argc, argv));
+}
+
 // part of the class sharing workaround
 
 // FIXME: !!HACK ALERT!!
@@ -970,6 +996,11 @@
                    TD_THR_ANY_STATE, TD_THR_LOWEST_PRIORITY, TD_SIGNO_MASK, TD_THR_ANY_USER_FLAGS);
 }
 
+#ifndef SOLARIS_11_B159_OR_LATER
+// building on Nevada-B158 or earlier so more hoops to jump through
+static bool has_newer_Pstack_iter = false;  // older version by default
+#endif
+
 /*
  * Class:       sun_jvm_hotspot_debugger_proc_ProcDebuggerLocal
  * Method:      fillCFrameList0
@@ -997,7 +1028,24 @@
 
   env->ReleaseLongArrayElements(regsArray, ptr, JNI_ABORT);
   CHECK_EXCEPTION_(0);
-  Pstack_iter((struct ps_prochandle*) p_ps_prochandle, gregs, fill_cframe_list, &dbgo2);
+
+#ifdef SOLARIS_11_B159_OR_LATER
+  // building on Nevada-B159 or later so use the new callback
+  Pstack_iter((struct ps_prochandle*) p_ps_prochandle, gregs,
+              wrapper_fill_cframe_list, &dbgo2);
+#else
+  // building on Nevada-B158 or earlier so figure out which callback to use
+
+  if (has_newer_Pstack_iter) {
+    // Since we're building on Nevada-B158 or earlier, we have to
+    // cast wrapper_fill_cframe_list to make the compiler happy.
+    Pstack_iter((struct ps_prochandle*) p_ps_prochandle, gregs,
+                (proc_stack_f *)wrapper_fill_cframe_list, &dbgo2);
+  } else {
+    Pstack_iter((struct ps_prochandle*) p_ps_prochandle, gregs,
+                fill_cframe_list, &dbgo2);
+  }
+#endif // SOLARIS_11_B159_OR_LATER
   return dbgo2.obj;
 }
 
@@ -1218,6 +1266,102 @@
   return res;
 }
 
+#ifndef SOLARIS_11_B159_OR_LATER
+// Determine if the OS we're running on has the newer version
+// of libproc's Pstack_iter.
+//
+// Set env var PSTACK_ITER_DEBUG=true to debug this logic.
+// Set env var PSTACK_ITER_DEBUG_RELEASE to simulate a 'release' value.
+// Set env var PSTACK_ITER_DEBUG_VERSION to simulate a 'version' value.
+//
+// frankenputer 'uname -r -v': 5.10 Generic_141445-09
+// jurassic 'uname -r -v':     5.11 snv_164
+// lonepeak 'uname -r -v':     5.11 snv_127
+//
+static void set_has_newer_Pstack_iter(JNIEnv *env) {
+  static bool done_set = false;
+
+  if (done_set) {
+    // already set has_newer_Pstack_iter
+    return;
+  }
+
+  struct utsname name;
+  if (uname(&name) == -1) {
+    THROW_NEW_DEBUGGER_EXCEPTION("uname() failed!");
+  }
+  dprintf_2("release='%s'  version='%s'\n", name.release, name.version);
+
+  if (_Pstack_iter_debug) {
+    char *override = getenv("PSTACK_ITER_DEBUG_RELEASE");
+    if (override != NULL) {
+      strncpy(name.release, override, SYS_NMLN - 1);
+      name.release[SYS_NMLN - 2] = '\0';
+      dprintf_2("overriding with release='%s'\n", name.release);
+    }
+    override = getenv("PSTACK_ITER_DEBUG_VERSION");
+    if (override != NULL) {
+      strncpy(name.version, override, SYS_NMLN - 1);
+      name.version[SYS_NMLN - 2] = '\0';
+      dprintf_2("overriding with version='%s'\n", name.version);
+    }
+  }
+
+  // the major number corresponds to the old SunOS major number
+  int major = atoi(name.release);
+  if (major >= 6) {
+    dprintf_2("release is SunOS 6 or later\n");
+    has_newer_Pstack_iter = true;
+    done_set = true;
+    return;
+  }
+  if (major < 5) {
+    dprintf_2("release is SunOS 4 or earlier\n");
+    done_set = true;
+    return;
+  }
+
+  // some SunOS 5.* build so now check for Solaris versions
+  char *dot = strchr(name.release, '.');
+  int minor = 0;
+  if (dot != NULL) {
+    // release is major.minor format
+    *dot = NULL;
+    minor = atoi(dot + 1);
+  }
+
+  if (minor <= 10) {
+    dprintf_2("release is Solaris 10 or earlier\n");
+    done_set = true;
+    return;
+  } else if (minor >= 12) {
+    dprintf_2("release is Solaris 12 or later\n");
+    has_newer_Pstack_iter = true;
+    done_set = true;
+    return;
+  }
+
+  // some Solaris 11 build so now check for internal build numbers
+  if (strncmp(name.version, "snv_", 4) != 0) {
+    dprintf_2("release is Solaris 11 post-GA or later\n");
+    has_newer_Pstack_iter = true;
+    done_set = true;
+    return;
+  }
+
+  // version begins with "snv_" so a pre-GA build of Solaris 11
+  int build = atoi(&name.version[4]);
+  if (build >= 159) {
+    dprintf_2("release is Nevada-B159 or later\n");
+    has_newer_Pstack_iter = true;
+  } else {
+    dprintf_2("release is Nevada-B158 or earlier\n");
+  }
+
+  done_set = true;
+}
+#endif // !SOLARIS_11_B159_OR_LATER
+
 /*
  * Class:       sun_jvm_hotspot_debugger_proc_ProcDebuggerLocal
  * Method:      initIDs
@@ -1237,6 +1381,14 @@
   if (libproc_handle == 0)
      THROW_NEW_DEBUGGER_EXCEPTION("can't load libproc.so, if you are using Solaris 5.7 or below, copy libproc.so from 5.8!");
 
+#ifndef SOLARIS_11_B159_OR_LATER
+  _Pstack_iter_debug = getenv("PSTACK_ITER_DEBUG") != NULL;
+
+  set_has_newer_Pstack_iter(env);
+  CHECK_EXCEPTION;
+  dprintf_2("has_newer_Pstack_iter=%d\n", has_newer_Pstack_iter);
+#endif
+
   p_ps_prochandle_ID = env->GetFieldID(clazz, "p_ps_prochandle", "J");
   CHECK_EXCEPTION;
 
--- a/hotspot/make/altsrc.make	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/make/altsrc.make	Thu May 05 22:28:31 2011 -0700
@@ -24,7 +24,8 @@
 
 # This file defines variables and macros which are used in the makefiles to 
 # allow distributions to augment or replace common hotspot code with 
-# distribution-specific source files.
+# distribution-specific source files. This capability is disabled when
+# an OPENJDK build is requested, unless HS_ALT_SRC_REL has been set externally.
 
 # Requires: GAMMADIR
 # Provides:
@@ -33,14 +34,17 @@
 
 HS_COMMON_SRC_REL=src
 
-# This needs to be changed to a more generic location, but we keep it as this 
-# for now for compatibility
-HS_ALT_SRC_REL=src/closed
+ifneq ($(OPENJDK),true)
+  # This needs to be changed to a more generic location, but we keep it 
+  # as this for now for compatibility
+  HS_ALT_SRC_REL=src/closed
+else
+  HS_ALT_SRC_REL=NO_SUCH_PATH
+endif
 
 HS_COMMON_SRC=$(GAMMADIR)/$(HS_COMMON_SRC_REL)
 HS_ALT_SRC=$(GAMMADIR)/$(HS_ALT_SRC_REL)
 
-
 ## altsrc-equiv 
 # 
 # Convert a common source path to an alternative source path
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/make/cscope.make	Thu May 05 22:28:31 2011 -0700
@@ -0,0 +1,141 @@
+#
+# Copyright (c) 2000, 2008, Oracle and/or its affiliates. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+#  
+#
+
+# The cscope.out file is generated in the current directory.  The old cscope.out
+# file is *not* removed because cscope is smart enough to only build what has
+# changed.  cscope can be confused if files are renamed or removed, so it may be
+# necessary to remove cscope.out (gmake cscope.clean) if a lot of reorganization
+# has occurred.
+
+include $(GAMMADIR)/make/scm.make
+
+RM	= rm -f
+HG	= hg
+CS_TOP	= $(GAMMADIR)
+
+CSDIRS	= $(CS_TOP)/src $(CS_TOP)/make
+CSINCS	= $(CSDIRS:%=-I%)
+
+CSCOPE		= cscope
+CSCOPE_OUT	= cscope.out
+CSCOPE_FLAGS	= -b
+
+# Allow .java files to be added from the environment (CSCLASSES=yes).
+ifdef	CSCLASSES
+ADDCLASSES=	-o -name '*.java'
+endif
+
+# Adding CClassHeaders also pushes the file count of a full workspace up about
+# 200 files (these files also don't exist in a new workspace, and thus will
+# cause the recreation of the database as they get created, which might seem
+# a little confusing).  Thus allow these files to be added from the environment
+# (CSHEADERS=yes).
+ifndef	CSHEADERS
+RMCCHEADERS=	-o -name CClassHeaders
+endif
+
+# Ignore build products.
+CS_PRUNE_GENERATED	= -o -name '${OSNAME}_*_core' -o \
+			     -name '${OSNAME}_*_compiler?'
+
+# O/S-specific files for all systems are included by default.  Set CS_OS to a
+# space-separated list of identifiers to include only those systems.
+ifdef	CS_OS
+CS_PRUNE_OS	= $(patsubst %,-o -name '*%*',\
+		    $(filter-out ${CS_OS},linux macos solaris windows))
+endif
+
+# CPU-specific files for all processors are included by default.  Set CS_CPU 
+# space-separated list identifiers to include only those CPUs.
+ifdef	CS_CPU
+CS_PRUNE_CPU	= $(patsubst %,-o -name '*%*',\
+		    $(filter-out ${CS_CPU},arm ppc sparc x86 zero))
+endif
+
+# What files should we include?  A simple rule might be just those files under
+# SCCS control, however this would miss files we create like the opcodes and
+# CClassHeaders.  The following attempts to find everything that is *useful*.
+# (.del files are created by sccsrm, demo directories contain many .java files
+# that probably aren't useful for development, and the pkgarchive may contain
+# duplicates of files within the source hierarchy).
+
+# Directories to exclude.
+CS_PRUNE_STD	= $(SCM_DIRS) \
+		  -o -name '.del-*' \
+		  -o -name '*demo' \
+		  -o -name pkgarchive
+
+# Placeholder for user-defined excludes.
+CS_PRUNE_EX	=
+
+CS_PRUNE	= $(CS_PRUNE_STD) \
+		  $(CS_PRUNE_OS) \
+		  $(CS_PRUNE_CPU) \
+		  $(CS_PRUNE_GENERATED) \
+		  $(CS_PRUNE_EX) \
+		  $(RMCCHEADERS)
+
+# File names to include.
+CSFILENAMES	= -name '*.[ch]pp' \
+		  -o -name '*.[Ccshlxy]' \
+		  $(CS_ADD_GENERATED) \
+		  -o -name '*.d' \
+		  -o -name '*.il' \
+		  -o -name '*.cc' \
+		  -o -name '*[Mm]akefile*' \
+		  -o -name '*.gmk' \
+		  -o -name '*.make' \
+		  -o -name '*.ad' \
+		  $(ADDCLASSES)
+
+.PHONY:		cscope cscope.clean cscope.scratch TAGS.clean FORCE
+.PRECIOUS:	cscope.out
+
+cscope $(CSCOPE_OUT): cscope.files FORCE
+	$(CSCOPE) -f $(CSCOPE_OUT) $(CSCOPE_FLAGS)
+
+cscope.clean:
+	$(QUIETLY) $(RM) $(CSCOPE_OUT) cscope.files
+
+cscope.scratch:  cscope.clean cscope
+
+# The raw list is reordered so cscope displays the most relevant files first.
+cscope.files:
+	$(QUIETLY)						\
+	raw=cscope.$$$$;					\
+	find $(CSDIRS) -type d \( $(CS_PRUNE) \) -prune -o	\
+	    -type f \( $(CSFILENAMES) \) -print > $$raw;	\
+	{							\
+	echo "$(CSINCS)";					\
+	egrep -v "\.java|/make/" $$raw;				\
+	fgrep ".java" $$raw;					\
+	fgrep "/make/" $$raw;					\
+	} > $@;							\
+	rm -f $$raw
+
+TAGS:  cscope.files FORCE
+	egrep -v '^-|^$$' $< | etags --members -
+
+TAGS.clean:
+	$(RM) TAGS
--- a/hotspot/make/hotspot_version	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/make/hotspot_version	Thu May 05 22:28:31 2011 -0700
@@ -35,7 +35,7 @@
 
 HS_MAJOR_VER=21
 HS_MINOR_VER=0
-HS_BUILD_NUMBER=10
+HS_BUILD_NUMBER=12
 
 JDK_MAJOR_VER=1
 JDK_MINOR_VER=7
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/make/jdk6_hotspot_distro	Thu May 05 22:28:31 2011 -0700
@@ -0,0 +1,32 @@
+# 
+# Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+# 
+
+#
+# This file format must remain compatible with both
+# GNU Makefile and Microsoft nmake formats.
+#
+
+# Don't put quotes (fail windows build).
+HOTSPOT_VM_DISTRO=Java HotSpot(TM)
+COMPANY_NAME=Sun Microsystems, Inc.
+PRODUCT_NAME=Java(TM) Platform SE
--- a/hotspot/make/linux/Makefile	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/make/linux/Makefile	Thu May 05 22:28:31 2011 -0700
@@ -359,7 +359,7 @@
 
 clean:  clean_compiler2 clean_compiler1 clean_core clean_zero clean_shark clean_docs
 
-include $(GAMMADIR)/make/$(OSNAME)/makefiles/cscope.make
+include $(GAMMADIR)/make/cscope.make
 
 #-------------------------------------------------------------------------------
 
--- a/hotspot/make/linux/makefiles/cscope.make	Wed May 04 12:00:14 2011 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,160 +0,0 @@
-#
-# Copyright (c) 2005, 2008, Oracle and/or its affiliates. All rights reserved.
-# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-#
-# This code is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License version 2 only, as
-# published by the Free Software Foundation.
-#
-# This code is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-# version 2 for more details (a copy is included in the LICENSE file that
-# accompanied this code).
-#
-# You should have received a copy of the GNU General Public License version
-# 2 along with this work; if not, write to the Free Software Foundation,
-# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-# or visit www.oracle.com if you need additional information or have any
-# questions.
-#  
-#
-
-#
-# The cscope.out file is made in the current directory and spans the entire
-# source tree.
-#
-# Things to note:
-#	1. We use relative names for cscope.
-#	2. We *don't* remove the old cscope.out file, because cscope is smart
-#	   enough to only build what has changed.  It can be confused, however,
-#	   if files are renamed or removed, so it may be necessary to manually
-#	   remove cscope.out if a lot of reorganization has occurred.
-#
-
-include $(GAMMADIR)/make/scm.make
-
-NAWK	= awk
-RM	= rm -f
-HG	= hg
-CS_TOP	= ../..
-
-CSDIRS	= $(CS_TOP)/src $(CS_TOP)/build
-CSINCS	= $(CSDIRS:%=-I%)
-
-CSCOPE		= cscope
-CSCOPE_FLAGS	= -b
-
-# Allow .java files to be added from the environment (CSCLASSES=yes).
-ifdef	CSCLASSES
-ADDCLASSES=	-o -name '*.java'
-endif
-
-# Adding CClassHeaders also pushes the file count of a full workspace up about
-# 200 files (these files also don't exist in a new workspace, and thus will
-# cause the recreation of the database as they get created, which might seem
-# a little confusing).  Thus allow these files to be added from the environment
-# (CSHEADERS=yes).
-ifndef	CSHEADERS
-RMCCHEADERS=	-o -name CClassHeaders
-endif
-
-# Use CS_GENERATED=x to include auto-generated files in the build directories.
-ifdef	CS_GENERATED
-CS_ADD_GENERATED	= -o -name '*.incl'
-else
-CS_PRUNE_GENERATED	= -o -name '${OS}_*_core' -o -name '${OS}_*_compiler?'
-endif
-
-# OS-specific files for other systems are excluded by default.  Use CS_OS=yes
-# to include platform-specific files for other platforms.
-ifndef	CS_OS
-CS_OS		= linux macos solaris win32
-CS_PRUNE_OS	= $(patsubst %,-o -name '*%*',$(filter-out ${OS},${CS_OS}))
-endif
-
-# Processor-specific files for other processors are excluded by default.  Use
-# CS_CPU=x to include platform-specific files for other platforms.
-ifndef	CS_CPU
-CS_CPU		= i486 sparc amd64 ia64
-CS_PRUNE_CPU	= $(patsubst %,-o -name '*%*',$(filter-out ${SRCARCH},${CS_CPU}))
-endif
-
-# What files should we include?  A simple rule might be just those files under
-# SCCS control, however this would miss files we create like the opcodes and
-# CClassHeaders.  The following attempts to find everything that is *useful*.
-# (.del files are created by sccsrm, demo directories contain many .java files
-# that probably aren't useful for development, and the pkgarchive may contain
-# duplicates of files within the source hierarchy).
-
-# Directories to exclude.
-CS_PRUNE_STD	= $(SCM_DIRS) \
-		  -o -name '.del-*' \
-		  -o -name '*demo' \
-		  -o -name pkgarchive
-
-CS_PRUNE	= $(CS_PRUNE_STD) \
-		  $(CS_PRUNE_OS) \
-		  $(CS_PRUNE_CPU) \
-		  $(CS_PRUNE_GENERATED) \
-		  $(RMCCHEADERS)
-
-# File names to include.
-CSFILENAMES	= -name '*.[ch]pp' \
-		  -o -name '*.[Ccshlxy]' \
-		  $(CS_ADD_GENERATED) \
-		  -o -name '*.il' \
-		  -o -name '*.cc' \
-		  -o -name '*[Mm]akefile*' \
-		  -o -name '*.gmk' \
-		  -o -name '*.make' \
-		  -o -name '*.ad' \
-		  $(ADDCLASSES)
-
-.PRECIOUS:	cscope.out
-
-cscope cscope.out: cscope.files FORCE
-	$(CSCOPE) $(CSCOPE_FLAGS)
-
-# The .raw file is reordered here in an attempt to make cscope display the most
-# relevant files first.
-cscope.files: .cscope.files.raw
-	echo "$(CSINCS)" > $@
-	-egrep -v "\.java|\/make\/"	$< >> $@
-	-fgrep ".java"			$< >> $@
-	-fgrep "/make/"		$< >> $@
-
-.cscope.files.raw:  .nametable.files
-	-find $(CSDIRS) -type d \( $(CS_PRUNE) \) -prune -o \
-	    -type f \( $(CSFILENAMES) \) -print > $@
-
-cscope.clean:  nametable.clean
-	-$(RM) cscope.out cscope.files .cscope.files.raw
-
-TAGS:  cscope.files FORCE
-	egrep -v '^-|^$$' $< | etags --members -
-
-TAGS.clean:  nametable.clean
-	-$(RM) TAGS
-
-# .nametable.files and .nametable.files.tmp are used to determine if any files
-# were added to/deleted from/renamed in the workspace.  If not, then there's
-# normally no need to rebuild the cscope database. To force a rebuild of
-# the cscope database: gmake nametable.clean.
-.nametable.files:  .nametable.files.tmp
-	( cmp -s $@ $< ) || ( cp $< $@ )
-	-$(RM) $<
-
-# `hg status' is slightly faster than `hg fstatus'. Both are
-# quite a bit slower on an NFS mounted file system, so this is
-# really geared towards repos on local file systems.
-.nametable.files.tmp:
-	-$(HG) fstatus -acmn > $@
-nametable.clean:
-	-$(RM) .nametable.files .nametable.files.tmp
-
-FORCE:
-
-.PHONY:		cscope cscope.clean TAGS.clean nametable.clean FORCE
--- a/hotspot/make/linux/makefiles/gcc.make	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/make/linux/makefiles/gcc.make	Thu May 05 22:28:31 2011 -0700
@@ -205,7 +205,7 @@
 SHARED_FLAG = -shared
 
 # Keep symbols even they are not used
-AOUT_FLAGS += -export-dynamic
+AOUT_FLAGS += -Xlinker -export-dynamic
 
 #------------------------------------------------------------------------
 # Debug flags
--- a/hotspot/make/linux/makefiles/vm.make	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/make/linux/makefiles/vm.make	Thu May 05 22:28:31 2011 -0700
@@ -102,6 +102,10 @@
 CFLAGS += $(EXTRA_CFLAGS)
 LFLAGS += $(EXTRA_CFLAGS)
 
+# Don't set excutable bit on stack segment
+# the same could be done by separate execstack command
+LFLAGS += -Xlinker -z -Xlinker noexecstack
+
 LIBS += -lm -ldl -lpthread
 
 # By default, link the *.o into the library, not the executable.
--- a/hotspot/make/solaris/Makefile	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/make/solaris/Makefile	Thu May 05 22:28:31 2011 -0700
@@ -296,7 +296,7 @@
 
 clean:  clean_compiler2 clean_compiler1 clean_core clean_docs clean_kernel
 
-include $(GAMMADIR)/make/$(OSNAME)/makefiles/cscope.make
+include $(GAMMADIR)/make/cscope.make
 
 #-------------------------------------------------------------------------------
 
--- a/hotspot/make/solaris/makefiles/cscope.make	Wed May 04 12:00:14 2011 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,162 +0,0 @@
-#
-# Copyright (c) 2000, 2008, Oracle and/or its affiliates. All rights reserved.
-# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-#
-# This code is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License version 2 only, as
-# published by the Free Software Foundation.
-#
-# This code is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-# version 2 for more details (a copy is included in the LICENSE file that
-# accompanied this code).
-#
-# You should have received a copy of the GNU General Public License version
-# 2 along with this work; if not, write to the Free Software Foundation,
-# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-# or visit www.oracle.com if you need additional information or have any
-# questions.
-#  
-#
-
-#
-# The cscope.out file is made in the current directory and spans the entire
-# source tree.
-#
-# Things to note:
-#	1. We use relative names for cscope.
-#	2. We *don't* remove the old cscope.out file, because cscope is smart
-#	   enough to only build what has changed.  It can be confused, however,
-#	   if files are renamed or removed, so it may be necessary to manually
-#	   remove cscope.out if a lot of reorganization has occurred.
-#
-
-include $(GAMMADIR)/make/scm.make
-
-NAWK	= /usr/xpg4/bin/awk
-RM	= rm -f
-HG	= hg
-CS_TOP	= ../..
-
-CSDIRS	= $(CS_TOP)/src $(CS_TOP)/make
-CSINCS	= $(CSDIRS:%=-I%)
-
-CSCOPE		= cscope
-CSCOPE_FLAGS	= -b
-
-# Allow .java files to be added from the environment (CSCLASSES=yes).
-ifdef	CSCLASSES
-ADDCLASSES=	-o -name '*.java'
-endif
-
-# Adding CClassHeaders also pushes the file count of a full workspace up about
-# 200 files (these files also don't exist in a new workspace, and thus will
-# cause the recreation of the database as they get created, which might seem
-# a little confusing).  Thus allow these files to be added from the environment
-# (CSHEADERS=yes).
-ifndef	CSHEADERS
-RMCCHEADERS=	-o -name CClassHeaders
-endif
-
-# Use CS_GENERATED=x to include auto-generated files in the make directories.
-ifdef	CS_GENERATED
-CS_ADD_GENERATED	= -o -name '*.incl'
-else
-CS_PRUNE_GENERATED	= -o -name '${OS}_*_core' -o -name '${OS}_*_compiler?'
-endif
-
-# OS-specific files for other systems are excluded by default.  Use CS_OS=yes
-# to include platform-specific files for other platforms.
-ifndef	CS_OS
-CS_OS		= linux macos solaris win32
-CS_PRUNE_OS	= $(patsubst %,-o -name '*%*',$(filter-out ${OS},${CS_OS}))
-endif
-
-# Processor-specific files for other processors are excluded by default.  Use
-# CS_CPU=x to include platform-specific files for other platforms.
-ifndef	CS_CPU
-CS_CPU		= i486 sparc amd64 ia64
-CS_PRUNE_CPU	= $(patsubst %,-o -name '*%*',$(filter-out ${SRCARCH},${CS_CPU}))
-endif
-
-# What files should we include?  A simple rule might be just those files under
-# SCCS control, however this would miss files we create like the opcodes and
-# CClassHeaders.  The following attempts to find everything that is *useful*.
-# (.del files are created by sccsrm, demo directories contain many .java files
-# that probably aren't useful for development, and the pkgarchive may contain
-# duplicates of files within the source hierarchy).
-
-# Directories to exclude.
-CS_PRUNE_STD	= $(SCM_DIRS) \
-		  -o -name '.del-*' \
-		  -o -name '*demo' \
-		  -o -name pkgarchive
-
-CS_PRUNE	= $(CS_PRUNE_STD) \
-		  $(CS_PRUNE_OS) \
-		  $(CS_PRUNE_CPU) \
-		  $(CS_PRUNE_GENERATED) \
-		  $(RMCCHEADERS)
-
-# File names to include.
-CSFILENAMES	= -name '*.[ch]pp' \
-		  -o -name '*.[Ccshlxy]' \
-		  $(CS_ADD_GENERATED) \
-		  -o -name '*.d' \
-		  -o -name '*.il' \
-		  -o -name '*.cc' \
-		  -o -name '*[Mm]akefile*' \
-		  -o -name '*.gmk' \
-		  -o -name '*.make' \
-		  -o -name '*.ad' \
-		  $(ADDCLASSES)
-
-.PRECIOUS:	cscope.out
-
-cscope cscope.out: cscope.files FORCE
-	$(CSCOPE) $(CSCOPE_FLAGS)
-
-# The .raw file is reordered here in an attempt to make cscope display the most
-# relevant files first.
-cscope.files: .cscope.files.raw
-	echo "$(CSINCS)" > $@
-	-egrep -v "\.java|\/make\/"	$< >> $@
-	-fgrep ".java"			$< >> $@
-	-fgrep "/make/"		$< >> $@
-
-.cscope.files.raw:  .nametable.files
-	-find $(CSDIRS) -type d \( $(CS_PRUNE) \) -prune -o \
-	    -type f \( $(CSFILENAMES) \) -print > $@
-
-cscope.clean:  nametable.clean
-	-$(RM) cscope.out cscope.files .cscope.files.raw
-
-TAGS:  cscope.files FORCE
-	egrep -v '^-|^$$' $< | etags --members -
-
-TAGS.clean:  nametable.clean
-	-$(RM) TAGS
-
-# .nametable.files and .nametable.files.tmp are used to determine if any files
-# were added to/deleted from/renamed in the workspace.  If not, then there's
-# normally no need to rebuild the cscope database. To force a rebuild of
-# the cscope database: gmake nametable.clean.
-.nametable.files:  .nametable.files.tmp
-	( cmp -s $@ $< ) || ( cp $< $@ )
-	-$(RM) $<
-
-# `hg status' is slightly faster than `hg fstatus'. Both are
-# quite a bit slower on an NFS mounted file system, so this is
-# really geared towards repos on local file systems.
-.nametable.files.tmp:
-	-$(HG) fstatus -acmn > $@
-
-nametable.clean:
-	-$(RM) .nametable.files .nametable.files.tmp
-
-FORCE:
-
-.PHONY:		cscope cscope.clean TAGS.clean nametable.clean FORCE
--- a/hotspot/make/solaris/makefiles/saproc.make	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/make/solaris/makefiles/saproc.make	Thu May 05 22:28:31 2011 -0700
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -56,6 +56,30 @@
 SA_LFLAGS += -mt -xnolib -norunpath
 endif
 
+# The libproc Pstack_iter() interface changed in Nevada-B159.
+# This logic needs to match
+# agent/src/os/solaris/proc/saproc.cpp: set_has_newer_Pstack_iter():
+#   - skip SunOS 4 or older
+#   - skip Solaris 10 or older
+#   - skip two digit Nevada builds
+#   - skip three digit Nevada builds thru 149
+#   - skip Nevada builds 150-158
+SOLARIS_11_B159_OR_LATER := \
+$(shell uname -r -v \
+    | sed -n ' \
+          /^[0-3]\. /b \
+          /^5\.[0-9] /b \
+          /^5\.10 /b \
+          / snv_[0-9][0-9]$/b \
+          / snv_[01][0-4][0-9]$/b \
+          / snv_15[0-8]$/b \
+          s/.*/-DSOLARIS_11_B159_OR_LATER/p \
+          ')
+
+# Uncomment the following to simulate building on Nevada-B159 or later
+# when actually building on Nevada-B158 or earlier:
+#SOLARIS_11_B159_OR_LATER=-DSOLARIS_11_B159_OR_LATER
+
 $(LIBSAPROC): $(SASRCFILES) $(SAMAPFILE)
 	$(QUIETLY) if [ "$(BOOT_JAVA_HOME)" = "" ]; then \
 	  echo "ALT_BOOTDIR, BOOTDIR or JAVA_HOME needs to be defined to build SA"; \
@@ -68,6 +92,7 @@
 	           -I$(GENERATED)                                       \
 	           -I$(BOOT_JAVA_HOME)/include                          \
 	           -I$(BOOT_JAVA_HOME)/include/$(Platform_os_family)    \
+	           $(SOLARIS_11_B159_OR_LATER)                          \
 	           $(SASRCFILES)                                        \
 	           $(SA_LFLAGS)                                         \
 	           -o $@                                                \
--- a/hotspot/make/solaris/makefiles/sparcWorks.make	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/make/solaris/makefiles/sparcWorks.make	Thu May 05 22:28:31 2011 -0700
@@ -100,11 +100,6 @@
 
 LINK_LIB.CC/PRE_HOOK += $(JVM_CHECK_SYMBOLS) || exit 1;
 
-# Some interfaces (_lwp_create) changed with LP64 and Solaris 7
-SOLARIS_7_OR_LATER := \
-$(shell uname -r | awk -F. '{ if ($$2 >= 7) print "-DSOLARIS_7_OR_LATER"; }')
-CFLAGS += ${SOLARIS_7_OR_LATER}
-
 # New architecture options started in SS12 (5.9), we need both styles to build.
 #   The older arch options for SS11 (5.8) or older and also for /usr/ccs/bin/as.
 #   Note: default for 32bit sparc is now the same as v8plus, so the
--- a/hotspot/make/windows/build.make	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/make/windows/build.make	Thu May 05 22:28:31 2011 -0700
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -125,7 +125,25 @@
 # or make/hotspot_distro.
 !ifndef HOTSPOT_VM_DISTRO
 !if exists($(WorkSpace)\src\closed)
+
+# if the build is for JDK6 or earlier version, it should include jdk6_hotspot_distro,
+# instead of hotspot_distro.
+JDK6_OR_EARLIER=0
+!if "$(JDK_MAJOR_VERSION)" != "" && "$(JDK_MINOR_VERSION)" != "" && "$(JDK_MICRO_VERSION)" != ""
+!if $(JDK_MAJOR_VERSION) == 1 && $(JDK_MINOR_VERSION) < 7
+JDK6_OR_EARLIER=1
+!endif
+!else
+!if $(JDK_MAJOR_VER) == 1 && $(JDK_MINOR_VER) < 7
+JDK6_OR_EARLIER=1
+!endif
+!endif
+
+!if $(JDK6_OR_EARLIER) == 1
+!include $(WorkSpace)\make\jdk6_hotspot_distro
+!else
 !include $(WorkSpace)\make\hotspot_distro
+!endif
 !else
 !include $(WorkSpace)\make\openjdk_distro
 !endif
@@ -260,7 +278,7 @@
 	@ echo Variant=$(realVariant)				>> $@
 	@ echo WorkSpace=$(WorkSpace)				>> $@
 	@ echo BootStrapDir=$(BootStrapDir)			>> $@
-        @ if "$(USERNAME)" NEQ "" echo BuildUser=$(USERNAME)	>> $@
+	@ if "$(USERNAME)" NEQ "" echo BuildUser=$(USERNAME)	>> $@
 	@ echo HS_VER=$(HS_VER)					>> $@
 	@ echo HS_DOTVER=$(HS_DOTVER)				>> $@
 	@ echo HS_COMPANY=$(COMPANY_NAME)			>> $@
--- a/hotspot/src/cpu/sparc/vm/frame_sparc.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/cpu/sparc/vm/frame_sparc.cpp	Thu May 05 22:28:31 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -806,3 +806,34 @@
   int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize) - 1;
   return &interpreter_frame_tos_address()[index];
 }
+
+
+#ifdef ASSERT
+
+#define DESCRIBE_FP_OFFSET(name) \
+  values.describe(-1, fp() + frame::name##_offset, #name)
+
+void frame::describe_pd(FrameValues& values, int frame_no) {
+  for (int w = 0; w < frame::register_save_words; w++) {
+    values.describe(frame_no, sp() + w, err_msg("register save area word %d", w), 1);
+  }
+
+  if (is_interpreted_frame()) {
+    DESCRIBE_FP_OFFSET(interpreter_frame_d_scratch_fp);
+    DESCRIBE_FP_OFFSET(interpreter_frame_l_scratch_fp);
+    DESCRIBE_FP_OFFSET(interpreter_frame_padding);
+    DESCRIBE_FP_OFFSET(interpreter_frame_oop_temp);
+  }
+
+  if (!is_compiled_frame()) {
+    if (frame::callee_aggregate_return_pointer_words != 0) {
+      values.describe(frame_no, sp() + frame::callee_aggregate_return_pointer_sp_offset, "callee_aggregate_return_pointer_word");
+    }
+    for (int w = 0; w < frame::callee_register_argument_save_area_words; w++) {
+      values.describe(frame_no, sp() + frame::callee_register_argument_save_area_sp_offset + w,
+                      err_msg("callee_register_argument_save_area_words %d", w));
+    }
+  }
+}
+
+#endif
--- a/hotspot/src/cpu/sparc/vm/methodHandles_sparc.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/cpu/sparc/vm/methodHandles_sparc.cpp	Thu May 05 22:28:31 2011 -0700
@@ -350,8 +350,9 @@
 #ifndef PRODUCT
 extern "C" void print_method_handle(oop mh);
 void trace_method_handle_stub(const char* adaptername,
-                              oopDesc* mh) {
-  printf("MH %s mh="INTPTR_FORMAT"\n", adaptername, (intptr_t) mh);
+                              oopDesc* mh,
+                              intptr_t* saved_sp) {
+  tty->print_cr("MH %s mh="INTPTR_FORMAT " saved_sp=" INTPTR_FORMAT, adaptername, (intptr_t) mh, saved_sp);
   print_method_handle(mh);
 }
 void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {
@@ -361,6 +362,7 @@
   __ save_frame(16);
   __ set((intptr_t) adaptername, O0);
   __ mov(G3_method_handle, O1);
+  __ mov(I5_savedSP, O2);
   __ mov(G3_method_handle, L3);
   __ mov(Gargs, L4);
   __ mov(G5_method_type, L5);
@@ -486,7 +488,7 @@
       if (ek == _invokespecial_mh) {
         // Must load & check the first argument before entering the target method.
         __ load_method_handle_vmslots(O0_argslot, G3_method_handle, O1_scratch);
-        __ ld_ptr(__ argument_address(O0_argslot), G3_method_handle);
+        __ ld_ptr(__ argument_address(O0_argslot, -1), G3_method_handle);
         __ null_check(G3_method_handle);
         __ verify_oop(G3_method_handle);
       }
@@ -643,9 +645,10 @@
 
       // Live at this point:
       // - G5_klass        :  klass required by the target method
+      // - O0_argslot      :  argslot index in vmarg; may be required in the failing path
       // - O1_scratch      :  argument klass to test
       // - G3_method_handle:  adapter method handle
-      __ check_klass_subtype(O1_scratch, G5_klass, O0_argslot, O2_scratch, done);
+      __ check_klass_subtype(O1_scratch, G5_klass, O2_scratch, O3_scratch, done);
 
       // If we get here, the type check failed!
       __ load_heap_oop(G3_amh_argument,        O2_required);  // required class
--- a/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp	Thu May 05 22:28:31 2011 -0700
@@ -1698,35 +1698,21 @@
                      popframe_extra_args;
 
     int local_words = method->max_locals() * Interpreter::stackElementWords;
-    int parm_words  = method->size_of_parameters() * Interpreter::stackElementWords;
-    NEEDS_CLEANUP;
     intptr_t* locals;
-    if (caller->is_interpreted_frame()) {
-      // Can force the locals area to end up properly overlapping the top of the expression stack.
-      intptr_t* Lesp_ptr = caller->interpreter_frame_tos_address() - 1;
-      // Note that this computation means we replace size_of_parameters() values from the caller
-      // interpreter frame's expression stack with our argument locals
-      locals = Lesp_ptr + parm_words;
-      int delta = local_words - parm_words;
-      int computed_sp_adjustment = (delta > 0) ? round_to(delta, WordsPerLong) : 0;
-      *interpreter_frame->register_addr(I5_savedSP)    = (intptr_t) (fp + computed_sp_adjustment) - STACK_BIAS;
+    if (caller->is_compiled_frame()) {
+      // Compiled frames do not allocate a varargs area so place them
+      // next to the register save area.
+      locals = fp + frame::register_save_words + local_words - 1;
+      // Caller wants his own SP back
+      int caller_frame_size = caller->cb()->frame_size();
+      *interpreter_frame->register_addr(I5_savedSP) = (intptr_t)(caller->fp() - caller_frame_size) - STACK_BIAS;
     } else {
-      assert(caller->is_compiled_frame() || caller->is_entry_frame(), "only possible cases");
-      // Don't have Lesp available; lay out locals block in the caller
-      // adjacent to the register window save area.
-      //
-      // Compiled frames do not allocate a varargs area which is why this if
-      // statement is needed.
-      //
-      if (caller->is_compiled_frame()) {
-        locals = fp + frame::register_save_words + local_words - 1;
-      } else {
-        locals = fp + frame::memory_parameter_word_sp_offset + local_words - 1;
-      }
-      if (!caller->is_entry_frame()) {
-        // Caller wants his own SP back
-        int caller_frame_size = caller->cb()->frame_size();
-        *interpreter_frame->register_addr(I5_savedSP) = (intptr_t)(caller->fp() - caller_frame_size) - STACK_BIAS;
+      assert(caller->is_interpreted_frame() || caller->is_entry_frame(), "only possible cases");
+      // The entry and interpreter frames are laid out like normal C
+      // frames so place the locals adjacent to the varargs area.
+      locals = fp + frame::memory_parameter_word_sp_offset + local_words - 1;
+      if (caller->is_interpreted_frame()) {
+        *interpreter_frame->register_addr(I5_savedSP)    = (intptr_t) (fp + rounded_cls) - STACK_BIAS;
       }
     }
     if (TraceDeoptimization) {
--- a/hotspot/src/cpu/sparc/vm/templateTable_sparc.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/cpu/sparc/vm/templateTable_sparc.cpp	Thu May 05 22:28:31 2011 -0700
@@ -3293,8 +3293,6 @@
                              /*virtual*/ false, /*vfinal*/ false, /*indy*/ true);
   __ mov(SP, O5_savedSP);  // record SP that we wanted the callee to restore
 
-  __ verify_oop(G5_callsite);
-
   // profile this call
   __ profile_call(O4);
 
@@ -3307,8 +3305,10 @@
   __ sll(Rret, LogBytesPerWord, Rret);
   __ ld_ptr(Rtemp, Rret, Rret);  // get return address
 
+  __ verify_oop(G5_callsite);
   __ load_heap_oop(G5_callsite, __ delayed_value(java_lang_invoke_CallSite::target_offset_in_bytes, Rscratch), G3_method_handle);
   __ null_check(G3_method_handle);
+  __ verify_oop(G3_method_handle);
 
   // Adjust Rret first so Llast_SP can be same as Rret
   __ add(Rret, -frame::pc_return_offset, O7);
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp	Thu May 05 22:28:31 2011 -0700
@@ -6039,6 +6039,43 @@
   call_VM_leaf(entry_point, 3);
 }
 
+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
+  pass_arg0(this, arg_0);
+  MacroAssembler::call_VM_leaf_base(entry_point, 1);
+}
+
+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
+
+  LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
+  pass_arg1(this, arg_1);
+  pass_arg0(this, arg_0);
+  MacroAssembler::call_VM_leaf_base(entry_point, 2);
+}
+
+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
+  LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
+  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
+  pass_arg2(this, arg_2);
+  LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
+  pass_arg1(this, arg_1);
+  pass_arg0(this, arg_0);
+  MacroAssembler::call_VM_leaf_base(entry_point, 3);
+}
+
+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
+  LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg"));
+  LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
+  LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
+  pass_arg3(this, arg_3);
+  LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
+  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
+  pass_arg2(this, arg_2);
+  LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
+  pass_arg1(this, arg_1);
+  pass_arg0(this, arg_0);
+  MacroAssembler::call_VM_leaf_base(entry_point, 4);
+}
+
 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
 }
 
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp	Thu May 05 22:28:31 2011 -0700
@@ -1655,6 +1655,14 @@
   void call_VM_leaf(address entry_point,
                     Register arg_1, Register arg_2, Register arg_3);
 
+  // These always tightly bind to MacroAssembler::call_VM_leaf_base
+  // bypassing the virtual implementation
+  void super_call_VM_leaf(address entry_point);
+  void super_call_VM_leaf(address entry_point, Register arg_1);
+  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
+  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3);
+  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4);
+
   // last Java Frame (fills frame anchor)
   void set_last_Java_frame(Register thread,
                            Register last_java_sp,
--- a/hotspot/src/cpu/x86/vm/frame_x86.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/cpu/x86/vm/frame_x86.cpp	Thu May 05 22:28:31 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -669,3 +669,23 @@
   int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
   return &interpreter_frame_tos_address()[index];
 }
+
+#ifdef ASSERT
+
+#define DESCRIBE_FP_OFFSET(name) \
+  values.describe(-1, fp() + frame::name##_offset, #name)
+
+void frame::describe_pd(FrameValues& values, int frame_no) {
+  if (is_interpreted_frame()) {
+    DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
+    DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
+    DESCRIBE_FP_OFFSET(interpreter_frame_method);
+    DESCRIBE_FP_OFFSET(interpreter_frame_mdx);
+    DESCRIBE_FP_OFFSET(interpreter_frame_cache);
+    DESCRIBE_FP_OFFSET(interpreter_frame_locals);
+    DESCRIBE_FP_OFFSET(interpreter_frame_bcx);
+    DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp);
+  }
+
+}
+#endif
--- a/hotspot/src/cpu/x86/vm/interp_masm_x86_32.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/cpu/x86/vm/interp_masm_x86_32.cpp	Thu May 05 22:28:31 2011 -0700
@@ -383,32 +383,6 @@
   movptr(Address(rsp, Interpreter::expr_offset_in_bytes(n)), val);
 }
 
-void InterpreterMacroAssembler::super_call_VM_leaf(address entry_point) {
-  MacroAssembler::call_VM_leaf_base(entry_point, 0);
-}
-
-
-void InterpreterMacroAssembler::super_call_VM_leaf(address entry_point, Register arg_1) {
-  push(arg_1);
-  MacroAssembler::call_VM_leaf_base(entry_point, 1);
-}
-
-
-void InterpreterMacroAssembler::super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2) {
-  push(arg_2);
-  push(arg_1);
-  MacroAssembler::call_VM_leaf_base(entry_point, 2);
-}
-
-
-void InterpreterMacroAssembler::super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) {
-  push(arg_3);
-  push(arg_2);
-  push(arg_1);
-  MacroAssembler::call_VM_leaf_base(entry_point, 3);
-}
-
-
 void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() {
   // set sender sp
   lea(rsi, Address(rsp, wordSize));
--- a/hotspot/src/cpu/x86/vm/interp_masm_x86_32.hpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/cpu/x86/vm/interp_masm_x86_32.hpp	Thu May 05 22:28:31 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -124,12 +124,6 @@
   void load_ptr(int n, Register val);
   void store_ptr(int n, Register val);
 
-  // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls
-  void super_call_VM_leaf(address entry_point);
-  void super_call_VM_leaf(address entry_point, Register arg_1);
-  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
-  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3);
-
   // Generate a subtype check: branch to ok_is_subtype if sub_klass is
   // a subtype of super_klass.  EAX holds the super_klass.  Blows ECX
   // and EDI.  Register sub_klass cannot be any of the above.
--- a/hotspot/src/cpu/x86/vm/interp_masm_x86_64.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/cpu/x86/vm/interp_masm_x86_64.cpp	Thu May 05 22:28:31 2011 -0700
@@ -381,56 +381,6 @@
 }
 
 
-void InterpreterMacroAssembler::super_call_VM_leaf(address entry_point) {
-  MacroAssembler::call_VM_leaf_base(entry_point, 0);
-}
-
-
-void InterpreterMacroAssembler::super_call_VM_leaf(address entry_point,
-                                                   Register arg_1) {
-  if (c_rarg0 != arg_1) {
-    mov(c_rarg0, arg_1);
-  }
-  MacroAssembler::call_VM_leaf_base(entry_point, 1);
-}
-
-
-void InterpreterMacroAssembler::super_call_VM_leaf(address entry_point,
-                                                   Register arg_1,
-                                                   Register arg_2) {
-  assert(c_rarg0 != arg_2, "smashed argument");
-  assert(c_rarg1 != arg_1, "smashed argument");
-  if (c_rarg0 != arg_1) {
-    mov(c_rarg0, arg_1);
-  }
-  if (c_rarg1 != arg_2) {
-    mov(c_rarg1, arg_2);
-  }
-  MacroAssembler::call_VM_leaf_base(entry_point, 2);
-}
-
-void InterpreterMacroAssembler::super_call_VM_leaf(address entry_point,
-                                                   Register arg_1,
-                                                   Register arg_2,
-                                                   Register arg_3) {
-  assert(c_rarg0 != arg_2, "smashed argument");
-  assert(c_rarg0 != arg_3, "smashed argument");
-  assert(c_rarg1 != arg_1, "smashed argument");
-  assert(c_rarg1 != arg_3, "smashed argument");
-  assert(c_rarg2 != arg_1, "smashed argument");
-  assert(c_rarg2 != arg_2, "smashed argument");
-  if (c_rarg0 != arg_1) {
-    mov(c_rarg0, arg_1);
-  }
-  if (c_rarg1 != arg_2) {
-    mov(c_rarg1, arg_2);
-  }
-  if (c_rarg2 != arg_3) {
-    mov(c_rarg2, arg_3);
-  }
-  MacroAssembler::call_VM_leaf_base(entry_point, 3);
-}
-
 void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() {
   // set sender sp
   lea(r13, Address(rsp, wordSize));
--- a/hotspot/src/cpu/x86/vm/interp_masm_x86_64.hpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/cpu/x86/vm/interp_masm_x86_64.hpp	Thu May 05 22:28:31 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -136,13 +136,6 @@
   void load_ptr(int n, Register val);
   void store_ptr(int n, Register val);
 
-  // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls
-  void super_call_VM_leaf(address entry_point);
-  void super_call_VM_leaf(address entry_point, Register arg_1);
-  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
-  void super_call_VM_leaf(address entry_point,
-                          Register arg_1, Register arg_2, Register arg_3);
-
   // Generate a subtype check: branch to ok_is_subtype if sub_klass is
   // a subtype of super_klass.
   void gen_subtype_check( Register sub_klass, Label &ok_is_subtype );
--- a/hotspot/src/cpu/x86/vm/methodHandles_x86.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/cpu/x86/vm/methodHandles_x86.cpp	Thu May 05 22:28:31 2011 -0700
@@ -315,56 +315,38 @@
 #ifndef PRODUCT
 extern "C" void print_method_handle(oop mh);
 void trace_method_handle_stub(const char* adaptername,
+                              intptr_t* saved_sp,
                               oop mh,
-                              intptr_t* saved_regs,
-                              intptr_t* entry_sp,
-                              intptr_t* saved_sp,
-                              intptr_t* saved_bp) {
+                              intptr_t* sp) {
   // called as a leaf from native code: do not block the JVM!
-  intptr_t* last_sp = (intptr_t*) saved_bp[frame::interpreter_frame_last_sp_offset];
-  intptr_t* base_sp = (intptr_t*) saved_bp[frame::interpreter_frame_monitor_block_top_offset];
-  printf("MH %s mh="INTPTR_FORMAT" sp=("INTPTR_FORMAT"+"INTX_FORMAT") stack_size="INTX_FORMAT" bp="INTPTR_FORMAT"\n",
-         adaptername, (intptr_t)mh, (intptr_t)entry_sp, (intptr_t)(saved_sp - entry_sp), (intptr_t)(base_sp - last_sp), (intptr_t)saved_bp);
-  if (last_sp != saved_sp && last_sp != NULL)
-    printf("*** last_sp="INTPTR_FORMAT"\n", (intptr_t)last_sp);
+  intptr_t* entry_sp = sp + LP64_ONLY(16) NOT_LP64(8);
+  tty->print_cr("MH %s mh="INTPTR_FORMAT" sp="INTPTR_FORMAT" saved_sp="INTPTR_FORMAT")",
+                adaptername, (intptr_t)mh, (intptr_t)entry_sp, saved_sp);
   if (Verbose) {
-    printf(" reg dump: ");
-    int saved_regs_count = (entry_sp-1) - saved_regs;
-    // 32 bit: rdi rsi rbp rsp; rbx rdx rcx (*) rax
-    int i;
-    for (i = 0; i <= saved_regs_count; i++) {
-      if (i > 0 && i % 4 == 0 && i != saved_regs_count)
-        printf("\n   + dump: ");
-      printf(" %d: "INTPTR_FORMAT, i, saved_regs[i]);
-    }
-    printf("\n");
-    int stack_dump_count = 16;
-    if (stack_dump_count < (int)(saved_bp + 2 - saved_sp))
-      stack_dump_count = (int)(saved_bp + 2 - saved_sp);
-    if (stack_dump_count > 64)  stack_dump_count = 48;
-    for (i = 0; i < stack_dump_count; i += 4) {
-      printf(" dump at SP[%d] "INTPTR_FORMAT": "INTPTR_FORMAT" "INTPTR_FORMAT" "INTPTR_FORMAT" "INTPTR_FORMAT"\n",
-             i, (intptr_t) &entry_sp[i+0], entry_sp[i+0], entry_sp[i+1], entry_sp[i+2], entry_sp[i+3]);
-    }
     print_method_handle(mh);
   }
 }
 void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {
   if (!TraceMethodHandles)  return;
   BLOCK_COMMENT("trace_method_handle {");
-  __ push(rax);
-  __ lea(rax, Address(rsp, wordSize*6)); // entry_sp
   __ pusha();
+#ifdef _LP64
+  // Pass arguments carefully since the registers overlap with the calling convention.
+  // rcx: method handle
+  // r13: saved sp
+  __ mov(c_rarg2, rcx); // mh
+  __ mov(c_rarg1, r13); // saved sp
+  __ mov(c_rarg3, rsp); // sp
+  __ movptr(c_rarg0, (intptr_t) adaptername);
+  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, trace_method_handle_stub), c_rarg0, c_rarg1, c_rarg2, c_rarg3);
+#else
   // arguments:
-  __ push(rbp);               // interpreter frame pointer
-  __ push(rsi);               // saved_sp
-  __ push(rax);               // entry_sp
-  __ push(rcx);               // mh
-  __ push(rcx);
-  __ movptr(Address(rsp, 0), (intptr_t) adaptername);
-  __ call_VM_leaf(CAST_FROM_FN_PTR(address, trace_method_handle_stub), 5);
+  // rcx: method handle
+  // rsi: saved sp
+  __ movptr(rbx, (intptr_t) adaptername);
+  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, trace_method_handle_stub), rbx, rsi, rcx, rsp);
+#endif
   __ popa();
-  __ pop(rax);
   BLOCK_COMMENT("} trace_method_handle");
 }
 #endif //PRODUCT
--- a/hotspot/src/cpu/x86/vm/templateTable_x86_32.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/cpu/x86/vm/templateTable_x86_32.cpp	Thu May 05 22:28:31 2011 -0700
@@ -422,7 +422,7 @@
 
   Label L_done, L_throw_exception;
   const Register con_klass_temp = rcx;  // same as Rcache
-  __ movptr(con_klass_temp, Address(rax, oopDesc::klass_offset_in_bytes()));
+  __ load_klass(con_klass_temp, rax);
   __ cmpptr(con_klass_temp, ExternalAddress((address)Universe::systemObjArrayKlassObj_addr()));
   __ jcc(Assembler::notEqual, L_done);
   __ cmpl(Address(rax, arrayOopDesc::length_offset_in_bytes()), 0);
@@ -432,7 +432,7 @@
 
   // Load the exception from the system-array which wraps it:
   __ bind(L_throw_exception);
-  __ movptr(rax, Address(rax, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
+  __ load_heap_oop(rax, Address(rax, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
   __ jump(ExternalAddress(Interpreter::throw_exception_entry()));
 
   __ bind(L_done);
@@ -946,9 +946,9 @@
   __ jcc(Assembler::zero, is_null);
 
   // Move subklass into EBX
-  __ movptr(rbx, Address(rax, oopDesc::klass_offset_in_bytes()));
+  __ load_klass(rbx, rax);
   // Move superklass into EAX
-  __ movptr(rax, Address(rdx, oopDesc::klass_offset_in_bytes()));
+  __ load_klass(rax, rdx);
   __ movptr(rax, Address(rax, sizeof(oopDesc) + objArrayKlass::element_klass_offset_in_bytes()));
   // Compress array+index*wordSize+12 into a single register.  Frees ECX.
   __ lea(rdx, element_address);
@@ -2001,7 +2001,7 @@
   if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
     assert(state == vtos, "only valid state");
     __ movptr(rax, aaddress(0));
-    __ movptr(rdi, Address(rax, oopDesc::klass_offset_in_bytes()));
+    __ load_klass(rdi, rax);
     __ movl(rdi, Address(rdi, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc)));
     __ testl(rdi, JVM_ACC_HAS_FINALIZER);
     Label skip_register_finalizer;
@@ -2948,7 +2948,7 @@
   // get receiver klass
   __ null_check(recv, oopDesc::klass_offset_in_bytes());
   // Keep recv in rcx for callee expects it there
-  __ movptr(rax, Address(recv, oopDesc::klass_offset_in_bytes()));
+  __ load_klass(rax, recv);
   __ verify_oop(rax);
 
   // profile this call
@@ -3028,7 +3028,7 @@
 
   // Get receiver klass into rdx - also a null check
   __ restore_locals();  // restore rdi
-  __ movptr(rdx, Address(rcx, oopDesc::klass_offset_in_bytes()));
+  __ load_klass(rdx, rcx);
   __ verify_oop(rdx);
 
   // profile this call
@@ -3083,6 +3083,7 @@
 
 void TemplateTable::invokedynamic(int byte_no) {
   transition(vtos, vtos);
+  assert(byte_no == f1_oop, "use this argument");
 
   if (!EnableInvokeDynamic) {
     // We should not encounter this bytecode if !EnableInvokeDynamic.
@@ -3095,7 +3096,6 @@
     return;
   }
 
-  assert(byte_no == f1_oop, "use this argument");
   prepare_invoke(rax, rbx, byte_no);
 
   // rax: CallSite object (f1)
@@ -3106,14 +3106,14 @@
   Register rax_callsite      = rax;
   Register rcx_method_handle = rcx;
 
-  if (ProfileInterpreter) {
-    // %%% should make a type profile for any invokedynamic that takes a ref argument
-    // profile this call
-    __ profile_call(rsi);
-  }
-
-  __ movptr(rcx_method_handle, Address(rax_callsite, __ delayed_value(java_lang_invoke_CallSite::target_offset_in_bytes, rcx)));
+  // %%% should make a type profile for any invokedynamic that takes a ref argument
+  // profile this call
+  __ profile_call(rsi);
+
+  __ verify_oop(rax_callsite);
+  __ load_heap_oop(rcx_method_handle, Address(rax_callsite, __ delayed_value(java_lang_invoke_CallSite::target_offset_in_bytes, rdx)));
   __ null_check(rcx_method_handle);
+  __ verify_oop(rcx_method_handle);
   __ prepare_to_jump_from_interpreted();
   __ jump_to_method_handle_entry(rcx_method_handle, rdx);
 }
@@ -3258,7 +3258,7 @@
                 (int32_t)markOopDesc::prototype()); // header
       __ pop(rcx);   // get saved klass back in the register.
     }
-    __ movptr(Address(rax, oopDesc::klass_offset_in_bytes()), rcx);  // klass
+    __ store_klass(rax, rcx);  // klass
 
     {
       SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0);
@@ -3333,7 +3333,7 @@
   __ movptr(rax, Address(rcx, rbx, Address::times_ptr, sizeof(constantPoolOopDesc)));
 
   __ bind(resolved);
-  __ movptr(rbx, Address(rdx, oopDesc::klass_offset_in_bytes()));
+  __ load_klass(rbx, rdx);
 
   // Generate subtype check.  Blows ECX.  Resets EDI.  Object in EDX.
   // Superklass in EAX.  Subklass in EBX.
@@ -3376,12 +3376,12 @@
   __ push(atos);
   call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc) );
   __ pop_ptr(rdx);
-  __ movptr(rdx, Address(rdx, oopDesc::klass_offset_in_bytes()));
+  __ load_klass(rdx, rdx);
   __ jmp(resolved);
 
   // Get superklass in EAX and subklass in EDX
   __ bind(quicked);
-  __ movptr(rdx, Address(rax, oopDesc::klass_offset_in_bytes()));
+  __ load_klass(rdx, rax);
   __ movptr(rax, Address(rcx, rbx, Address::times_ptr, sizeof(constantPoolOopDesc)));
 
   __ bind(resolved);
--- a/hotspot/src/cpu/x86/vm/templateTable_x86_64.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/cpu/x86/vm/templateTable_x86_64.cpp	Thu May 05 22:28:31 2011 -0700
@@ -436,7 +436,7 @@
   Label L_done, L_throw_exception;
   const Register con_klass_temp = rcx;  // same as cache
   const Register array_klass_temp = rdx;  // same as index
-  __ movptr(con_klass_temp, Address(rax, oopDesc::klass_offset_in_bytes()));
+  __ load_klass(con_klass_temp, rax);
   __ lea(array_klass_temp, ExternalAddress((address)Universe::systemObjArrayKlassObj_addr()));
   __ cmpptr(con_klass_temp, Address(array_klass_temp, 0));
   __ jcc(Assembler::notEqual, L_done);
@@ -447,7 +447,7 @@
 
   // Load the exception from the system-array which wraps it:
   __ bind(L_throw_exception);
-  __ movptr(rax, Address(rax, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
+  __ load_heap_oop(rax, Address(rax, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
   __ jump(ExternalAddress(Interpreter::throw_exception_entry()));
 
   __ bind(L_done);
@@ -3137,7 +3137,6 @@
     return;
   }
 
-  assert(byte_no == f1_oop, "use this argument");
   prepare_invoke(rax, rbx, byte_no);
 
   // rax: CallSite object (f1)
@@ -3148,14 +3147,14 @@
   Register rax_callsite      = rax;
   Register rcx_method_handle = rcx;
 
-  if (ProfileInterpreter) {
-    // %%% should make a type profile for any invokedynamic that takes a ref argument
-    // profile this call
-    __ profile_call(r13);
-  }
-
-  __ load_heap_oop(rcx_method_handle, Address(rax_callsite, __ delayed_value(java_lang_invoke_CallSite::target_offset_in_bytes, rcx)));
+  // %%% should make a type profile for any invokedynamic that takes a ref argument
+  // profile this call
+  __ profile_call(r13);
+
+  __ verify_oop(rax_callsite);
+  __ load_heap_oop(rcx_method_handle, Address(rax_callsite, __ delayed_value(java_lang_invoke_CallSite::target_offset_in_bytes, rdx)));
   __ null_check(rcx_method_handle);
+  __ verify_oop(rcx_method_handle);
   __ prepare_to_jump_from_interpreted();
   __ jump_to_method_handle_entry(rcx_method_handle, rdx);
 }
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp	Thu May 05 22:28:31 2011 -0700
@@ -441,12 +441,25 @@
       }
     }
 
-    // On family 21 processors default is no sw prefetch
-    if ( cpu_family() == 21 ) {
+    // some defaults for AMD family 15h
+    if ( cpu_family() == 0x15 ) {
+      // On family 15h processors default is no sw prefetch
       if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
         AllocatePrefetchStyle = 0;
       }
+      // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
+      if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
+        AllocatePrefetchInstr = 3;
+      }
+      // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
+      if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) {
+        UseXMMForArrayCopy = true;
+      }
+      if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) {
+        UseUnalignedLoadStores = true;
+      }
     }
+
   }
 
   if( is_intel() ) { // Intel cpus specific settings
--- a/hotspot/src/cpu/x86/vm/x86_32.ad	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/cpu/x86/vm/x86_32.ad	Thu May 05 22:28:31 2011 -0700
@@ -12989,6 +12989,53 @@
 %}
 
 // ============================================================================
+// Counted Loop limit node which represents exact final iterator value.
+// Note: the resulting value should fit into integer range since
+// counted loops have limit check on overflow.
+instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
+  match(Set limit (LoopLimit (Binary init limit) stride));
+  effect(TEMP limit_hi, TEMP tmp, KILL flags);
+  ins_cost(300);
+
+  format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
+  ins_encode %{
+    int strd = (int)$stride$$constant;
+    assert(strd != 1 && strd != -1, "sanity");
+    int m1 = (strd > 0) ? 1 : -1;
+    // Convert limit to long (EAX:EDX)
+    __ cdql();
+    // Convert init to long (init:tmp)
+    __ movl($tmp$$Register, $init$$Register);
+    __ sarl($tmp$$Register, 31);
+    // $limit - $init
+    __ subl($limit$$Register, $init$$Register);
+    __ sbbl($limit_hi$$Register, $tmp$$Register);
+    // + ($stride - 1)
+    if (strd > 0) {
+      __ addl($limit$$Register, (strd - 1));
+      __ adcl($limit_hi$$Register, 0);
+      __ movl($tmp$$Register, strd);
+    } else {
+      __ addl($limit$$Register, (strd + 1));
+      __ adcl($limit_hi$$Register, -1);
+      __ lneg($limit_hi$$Register, $limit$$Register);
+      __ movl($tmp$$Register, -strd);
+    }
+    // signed devision: (EAX:EDX) / pos_stride
+    __ idivl($tmp$$Register);
+    if (strd < 0) {
+      // restore sign
+      __ negl($tmp$$Register);
+    }
+    // (EAX) * stride
+    __ mull($tmp$$Register);
+    // + init (ignore upper bits)
+    __ addl($limit$$Register, $init$$Register);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// ============================================================================
 // Branch Instructions
 // Jump Table
 instruct jumpXtnd(eRegI switch_val) %{
--- a/hotspot/src/os/linux/vm/globals_linux.hpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/os/linux/vm/globals_linux.hpp	Thu May 05 22:28:31 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,19 +29,25 @@
 // Defines Linux specific flags. They are not available on other platforms.
 //
 #define RUNTIME_OS_FLAGS(develop, develop_pd, product, product_pd, diagnostic, notproduct) \
-  product(bool, UseOprofile, false,                                 \
-        "enable support for Oprofile profiler")                     \
-                                                                    \
-  product(bool, UseLinuxPosixThreadCPUClocks, true,                 \
-          "enable fast Linux Posix clocks where available")
-// NB: The default value of UseLinuxPosixThreadCPUClocks may be
-// overridden in Arguments::parse_each_vm_init_arg.
+  product(bool, UseOprofile, false,                                     \
+        "enable support for Oprofile profiler")                         \
+                                                                        \
+  product(bool, UseLinuxPosixThreadCPUClocks, true,                     \
+          "enable fast Linux Posix clocks where available")             \
+/*  NB: The default value of UseLinuxPosixThreadCPUClocks may be        \
+    overridden in Arguments::parse_each_vm_init_arg.  */                \
+                                                                        \
+  product(bool, UseHugeTLBFS, false,                                    \
+          "Use MAP_HUGETLB for large pages")                            \
+                                                                        \
+  product(bool, UseSHM, false,                                          \
+          "Use SYSV shared memory for large pages")
 
 //
 // Defines Linux-specific default values. The flags are available on all
 // platforms, but they may have different default values on other platforms.
 //
-define_pd_global(bool, UseLargePages, false);
+define_pd_global(bool, UseLargePages, true);
 define_pd_global(bool, UseLargePagesIndividualAllocation, false);
 define_pd_global(bool, UseOSErrorReporting, false);
 define_pd_global(bool, UseThreadPriorities, true) ;
--- a/hotspot/src/os/linux/vm/os_linux.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/os/linux/vm/os_linux.cpp	Thu May 05 22:28:31 2011 -0700
@@ -2465,16 +2465,40 @@
   return res != (uintptr_t) MAP_FAILED;
 }
 
+// Define MAP_HUGETLB here so we can build HotSpot on old systems.
+#ifndef MAP_HUGETLB
+#define MAP_HUGETLB 0x40000
+#endif
+
+// Define MADV_HUGEPAGE here so we can build HotSpot on old systems.
+#ifndef MADV_HUGEPAGE
+#define MADV_HUGEPAGE 14
+#endif
+
 bool os::commit_memory(char* addr, size_t size, size_t alignment_hint,
                        bool exec) {
+  if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) {
+    int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
+    uintptr_t res =
+      (uintptr_t) ::mmap(addr, size, prot,
+                         MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS|MAP_HUGETLB,
+                         -1, 0);
+    return res != (uintptr_t) MAP_FAILED;
+  }
+
   return commit_memory(addr, size, exec);
 }
 
-void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) { }
+void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
+  if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) {
+    // We don't check the return value: madvise(MADV_HUGEPAGE) may not
+    // be supported or the memory may already be backed by huge pages.
+    ::madvise(addr, bytes, MADV_HUGEPAGE);
+  }
+}
 
 void os::free_memory(char *addr, size_t bytes) {
-  ::mmap(addr, bytes, PROT_READ | PROT_WRITE,
-         MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0);
+  ::madvise(addr, bytes, MADV_DONTNEED);
 }
 
 void os::numa_make_global(char *addr, size_t bytes) {
@@ -2812,6 +2836,43 @@
   return linux_mprotect(addr, size, PROT_READ|PROT_WRITE);
 }
 
+bool os::Linux::hugetlbfs_sanity_check(bool warn, size_t page_size) {
+  bool result = false;
+  void *p = mmap (NULL, page_size, PROT_READ|PROT_WRITE,
+                  MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB,
+                  -1, 0);
+
+  if (p != (void *) -1) {
+    // We don't know if this really is a huge page or not.
+    FILE *fp = fopen("/proc/self/maps", "r");
+    if (fp) {
+      while (!feof(fp)) {
+        char chars[257];
+        long x = 0;
+        if (fgets(chars, sizeof(chars), fp)) {
+          if (sscanf(chars, "%lx-%*lx", &x) == 1
+              && x == (long)p) {
+            if (strstr (chars, "hugepage")) {
+              result = true;
+              break;
+            }
+          }
+        }
+      }
+      fclose(fp);
+    }
+    munmap (p, page_size);
+    if (result)
+      return true;
+  }
+
+  if (warn) {
+    warning("HugeTLBFS is not supported by the operating system.");
+  }
+
+  return result;
+}
+
 /*
 * Set the coredump_filter bits to include largepages in core dump (bit 6)
 *
@@ -2853,8 +2914,22 @@
 
 static size_t _large_page_size = 0;
 
-bool os::large_page_init() {
-  if (!UseLargePages) return false;
+void os::large_page_init() {
+  if (!UseLargePages) {
+    UseHugeTLBFS = false;
+    UseSHM = false;
+    return;
+  }
+
+  if (FLAG_IS_DEFAULT(UseHugeTLBFS) && FLAG_IS_DEFAULT(UseSHM)) {
+    // If UseLargePages is specified on the command line try both methods,
+    // if it's default, then try only HugeTLBFS.
+    if (FLAG_IS_DEFAULT(UseLargePages)) {
+      UseHugeTLBFS = true;
+    } else {
+      UseHugeTLBFS = UseSHM = true;
+    }
+  }
 
   if (LargePageSizeInBytes) {
     _large_page_size = LargePageSizeInBytes;
@@ -2899,20 +2974,24 @@
     }
   }
 
+  // print a warning if any large page related flag is specified on command line
+  bool warn_on_failure = !FLAG_IS_DEFAULT(UseHugeTLBFS);
+
   const size_t default_page_size = (size_t)Linux::page_size();
   if (_large_page_size > default_page_size) {
     _page_sizes[0] = _large_page_size;
     _page_sizes[1] = default_page_size;
     _page_sizes[2] = 0;
   }
+  UseHugeTLBFS = UseHugeTLBFS &&
+                 Linux::hugetlbfs_sanity_check(warn_on_failure, _large_page_size);
+
+  if (UseHugeTLBFS)
+    UseSHM = false;
+
+  UseLargePages = UseHugeTLBFS || UseSHM;
 
   set_coredump_filter();
-
-  // Large page support is available on 2.6 or newer kernel, some vendors
-  // (e.g. Redhat) have backported it to their 2.4 based distributions.
-  // We optimistically assume the support is available. If later it turns out
-  // not true, VM will automatically switch to use regular page size.
-  return true;
 }
 
 #ifndef SHM_HUGETLB
@@ -2922,7 +3001,7 @@
 char* os::reserve_memory_special(size_t bytes, char* req_addr, bool exec) {
   // "exec" is passed in but not used.  Creating the shared image for
   // the code cache doesn't have an SHM_X executable permission to check.
-  assert(UseLargePages, "only for large pages");
+  assert(UseLargePages && UseSHM, "only for SHM large pages");
 
   key_t key = IPC_PRIVATE;
   char *addr;
@@ -2989,16 +3068,15 @@
   return _large_page_size;
 }
 
-// Linux does not support anonymous mmap with large page memory. The only way
-// to reserve large page memory without file backing is through SysV shared
-// memory API. The entire memory region is committed and pinned upfront.
-// Hopefully this will change in the future...
+// HugeTLBFS allows application to commit large page memory on demand;
+// with SysV SHM the entire memory region must be allocated as shared
+// memory.
 bool os::can_commit_large_page_memory() {
-  return false;
+  return UseHugeTLBFS;
 }
 
 bool os::can_execute_large_page_memory() {
-  return false;
+  return UseHugeTLBFS;
 }
 
 // Reserve memory at an arbitrary address, only if that area is
@@ -4038,7 +4116,7 @@
 #endif
   }
 
-  FLAG_SET_DEFAULT(UseLargePages, os::large_page_init());
+  os::large_page_init();
 
   // initialize suspend/resume support - must do this before signal_sets_init()
   if (SR_initialize() != 0) {
@@ -4090,6 +4168,23 @@
         UseNUMA = false;
       }
     }
+    // With SHM large pages we cannot uncommit a page, so there's not way
+    // we can make the adaptive lgrp chunk resizing work. If the user specified
+    // both UseNUMA and UseLargePages (or UseSHM) on the command line - warn and
+    // disable adaptive resizing.
+    if (UseNUMA && UseLargePages && UseSHM) {
+      if (!FLAG_IS_DEFAULT(UseNUMA)) {
+        if (FLAG_IS_DEFAULT(UseLargePages) && FLAG_IS_DEFAULT(UseSHM)) {
+          UseLargePages = false;
+        } else {
+          warning("UseNUMA is not fully compatible with SHM large pages, disabling adaptive resizing");
+          UseAdaptiveSizePolicy = false;
+          UseAdaptiveNUMAChunkSizing = false;
+        }
+      } else {
+        UseNUMA = false;
+      }
+    }
     if (!UseNUMA && ForceNUMA) {
       UseNUMA = true;
     }
--- a/hotspot/src/os/linux/vm/os_linux.hpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/os/linux/vm/os_linux.hpp	Thu May 05 22:28:31 2011 -0700
@@ -86,6 +86,9 @@
 
   static void rebuild_cpu_to_node_map();
   static GrowableArray<int>* cpu_to_node()    { return _cpu_to_node; }
+
+  static bool hugetlbfs_sanity_check(bool warn, size_t page_size);
+
  public:
   static void init_thread_fpu_state();
   static int  get_fpu_control_word();
--- a/hotspot/src/os/solaris/vm/os_solaris.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/os/solaris/vm/os_solaris.cpp	Thu May 05 22:28:31 2011 -0700
@@ -2826,7 +2826,9 @@
 void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
   assert((intptr_t)addr % alignment_hint == 0, "Address should be aligned.");
   assert((intptr_t)(addr + bytes) % alignment_hint == 0, "End should be aligned.");
-  Solaris::set_mpss_range(addr, bytes, alignment_hint);
+  if (UseLargePages && UseMPSS) {
+    Solaris::set_mpss_range(addr, bytes, alignment_hint);
+  }
 }
 
 // Tell the OS to make the range local to the first-touching LWP
@@ -3334,11 +3336,11 @@
   return true;
 }
 
-bool os::large_page_init() {
+void os::large_page_init() {
   if (!UseLargePages) {
     UseISM = false;
     UseMPSS = false;
-    return false;
+    return;
   }
 
   // print a warning if any large page related flag is specified on command line
@@ -3359,7 +3361,6 @@
             Solaris::mpss_sanity_check(warn_on_failure, &_large_page_size);
 
   UseLargePages = UseISM || UseMPSS;
-  return UseLargePages;
 }
 
 bool os::Solaris::set_mpss_range(caddr_t start, size_t bytes, size_t align) {
@@ -4990,7 +4991,7 @@
 #endif
 }
 
-  FLAG_SET_DEFAULT(UseLargePages, os::large_page_init());
+  os::large_page_init();
 
   // Check minimum allowable stack size for thread creation and to initialize
   // the java system classes, including StackOverflowError - depends on page
@@ -5044,6 +5045,20 @@
         UseNUMA = false;
       }
     }
+    // ISM is not compatible with the NUMA allocator - it always allocates
+    // pages round-robin across the lgroups.
+    if (UseNUMA && UseLargePages && UseISM) {
+      if (!FLAG_IS_DEFAULT(UseNUMA)) {
+        if (FLAG_IS_DEFAULT(UseLargePages) && FLAG_IS_DEFAULT(UseISM)) {
+          UseLargePages = false;
+        } else {
+          warning("UseNUMA is not compatible with ISM large pages, disabling NUMA allocator");
+          UseNUMA = false;
+        }
+      } else {
+        UseNUMA = false;
+      }
+    }
     if (!UseNUMA && ForceNUMA) {
       UseNUMA = true;
     }
--- a/hotspot/src/os/windows/vm/os_windows.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/os/windows/vm/os_windows.cpp	Thu May 05 22:28:31 2011 -0700
@@ -2762,8 +2762,8 @@
   _hToken = NULL;
 }
 
-bool os::large_page_init() {
-  if (!UseLargePages) return false;
+void os::large_page_init() {
+  if (!UseLargePages) return;
 
   // print a warning if any large page related flag is specified on command line
   bool warn_on_failure = !FLAG_IS_DEFAULT(UseLargePages) ||
@@ -2808,7 +2808,7 @@
   }
 
   cleanup_after_large_page_init();
-  return success;
+  UseLargePages = success;
 }
 
 // On win32, one cannot release just a part of reserved memory, it's an
@@ -3561,7 +3561,7 @@
 #endif
 }
 
-  FLAG_SET_DEFAULT(UseLargePages, os::large_page_init());
+  os::large_page_init();
 
   // Setup Windows Exceptions
 
--- a/hotspot/src/os_cpu/linux_x86/vm/orderAccess_linux_x86.inline.hpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/os_cpu/linux_x86/vm/orderAccess_linux_x86.inline.hpp	Thu May 05 22:28:31 2011 -0700
@@ -93,7 +93,7 @@
 
 inline void     OrderAccess::store_fence(jbyte*  p, jbyte  v) {
   __asm__ volatile (  "xchgb (%2),%0"
-                    : "=r" (v)
+                    : "=q" (v)
                     : "0" (v), "r" (p)
                     : "memory");
 }
@@ -155,7 +155,7 @@
 // Must duplicate definitions instead of calling store_fence because we don't want to cast away volatile.
 inline void     OrderAccess::release_store_fence(volatile jbyte*  p, jbyte  v) {
   __asm__ volatile (  "xchgb (%2),%0"
-                    : "=r" (v)
+                    : "=q" (v)
                     : "0" (v), "r" (p)
                     : "memory");
 }
--- a/hotspot/src/share/vm/c1/c1_Runtime1.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/c1/c1_Runtime1.cpp	Thu May 05 22:28:31 2011 -0700
@@ -1026,9 +1026,21 @@
           // first replace the tail, then the call
 #ifdef ARM
           if(stub_id == Runtime1::load_klass_patching_id && !VM_Version::supports_movw()) {
+            nmethod* nm = CodeCache::find_nmethod(instr_pc);
+            oop* oop_addr = NULL;
+            assert(nm != NULL, "invalid nmethod_pc");
+            RelocIterator oops(nm, copy_buff, copy_buff + 1);
+            while (oops.next()) {
+              if (oops.type() == relocInfo::oop_type) {
+                oop_Relocation* r = oops.oop_reloc();
+                oop_addr = r->oop_addr();
+                break;
+              }
+            }
+            assert(oop_addr != NULL, "oop relocation must exist");
             copy_buff -= *byte_count;
             NativeMovConstReg* n_copy2 = nativeMovConstReg_at(copy_buff);
-            n_copy2->set_data((intx) (load_klass()), instr_pc);
+            n_copy2->set_pc_relative_offset((address)oop_addr, instr_pc);
           }
 #endif
 
--- a/hotspot/src/share/vm/ci/bcEscapeAnalyzer.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/ci/bcEscapeAnalyzer.cpp	Thu May 05 22:28:31 2011 -0700
@@ -232,14 +232,7 @@
   }
 
   // compute size of arguments
-  int arg_size = target->arg_size();
-  if (code == Bytecodes::_invokedynamic) {
-    assert(!target->is_static(), "receiver explicit in method");
-    arg_size--;  // implicit, not really on stack
-  }
-  if (!target->is_loaded() && code == Bytecodes::_invokestatic) {
-    arg_size--;
-  }
+  int arg_size = target->invoke_arg_size(code);
   int arg_base = MAX2(state._stack_height - arg_size, 0);
 
   // direct recursive calls are skipped if they can be bound statically without introducing
--- a/hotspot/src/share/vm/ci/ciEnv.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/ci/ciEnv.cpp	Thu May 05 22:28:31 2011 -0700
@@ -756,7 +756,7 @@
   assert(bc == Bytecodes::_invokedynamic, "must be invokedynamic");
 
   bool is_resolved = cpool->cache()->main_entry_at(index)->is_resolved(bc);
-  if (is_resolved && (oop) cpool->cache()->secondary_entry_at(index)->f1() == NULL)
+  if (is_resolved && cpool->cache()->secondary_entry_at(index)->is_f1_null())
     // FIXME: code generation could allow for null (unlinked) call site
     is_resolved = false;
 
@@ -770,7 +770,7 @@
 
   // Get the invoker methodOop from the constant pool.
   oop f1_value = cpool->cache()->main_entry_at(index)->f1();
-  methodOop signature_invoker = methodOop(f1_value);
+  methodOop signature_invoker = (methodOop) f1_value;
   assert(signature_invoker != NULL && signature_invoker->is_method() && signature_invoker->is_method_handle_invoke(),
          "correct result from LinkResolver::resolve_invokedynamic");
 
--- a/hotspot/src/share/vm/ci/ciMethod.hpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/ci/ciMethod.hpp	Thu May 05 22:28:31 2011 -0700
@@ -127,7 +127,24 @@
   ciSignature* signature() const                 { return _signature; }
   ciType*      return_type() const               { return _signature->return_type(); }
   int          arg_size_no_receiver() const      { return _signature->size(); }
-  int          arg_size() const                  { return _signature->size() + (_flags.is_static() ? 0 : 1); }
+  // Can only be used on loaded ciMethods
+  int          arg_size() const                  {
+    check_is_loaded();
+    return _signature->size() + (_flags.is_static() ? 0 : 1);
+  }
+  // Report the number of elements on stack when invoking this method.
+  // This is different than the regular arg_size because invokdynamic
+  // has an implicit receiver.
+  int invoke_arg_size(Bytecodes::Code code) const {
+    int arg_size = _signature->size();
+    // Add a receiver argument, maybe:
+    if (code != Bytecodes::_invokestatic &&
+        code != Bytecodes::_invokedynamic) {
+      arg_size++;
+    }
+    return arg_size;
+  }
+
 
   // Method code and related information.
   address code()                                 { if (_code == NULL) load_code(); return _code; }
@@ -276,9 +293,9 @@
   void print_short_name(outputStream* st = tty);
 
   methodOop get_method_handle_target() {
-    klassOop receiver_limit_oop = NULL;
-    int flags = 0;
-    return MethodHandles::decode_method(get_oop(), receiver_limit_oop, flags);
+    KlassHandle receiver_limit; int flags = 0;
+    methodHandle m = MethodHandles::decode_method(get_oop(), receiver_limit, flags);
+    return m();
   }
 };
 
--- a/hotspot/src/share/vm/ci/ciObject.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/ci/ciObject.cpp	Thu May 05 22:28:31 2011 -0700
@@ -194,6 +194,16 @@
 // ciObject::should_be_constant()
 bool ciObject::should_be_constant() {
   if (ScavengeRootsInCode >= 2)  return true;  // force everybody to be a constant
+  if (!JavaObjectsInPerm && !is_null_object()) {
+    // We want Strings and Classes to be embeddable by default since
+    // they used to be in the perm world.  Not all Strings used to be
+    // embeddable but there's no easy way to distinguish the interned
+    // from the regulars ones so just treat them all that way.
+    ciEnv* env = CURRENT_ENV;
+    if (klass() == env->String_klass() || klass() == env->Class_klass()) {
+      return true;
+    }
+  }
   return handle() == NULL || !is_scavengable();
 }
 
--- a/hotspot/src/share/vm/classfile/javaClasses.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/classfile/javaClasses.cpp	Thu May 05 22:28:31 2011 -0700
@@ -1357,7 +1357,7 @@
 };
 
 
-void java_lang_Throwable::fill_in_stack_trace(Handle throwable, TRAPS) {
+void java_lang_Throwable::fill_in_stack_trace(Handle throwable, methodHandle method, TRAPS) {
   if (!StackTraceInThrowable) return;
   ResourceMark rm(THREAD);
 
@@ -1374,6 +1374,16 @@
   JavaThread* thread = (JavaThread*)THREAD;
   BacktraceBuilder bt(CHECK);
 
+  // If there is no Java frame just return the method that was being called
+  // with bci 0
+  if (!thread->has_last_Java_frame()) {
+    if (max_depth >= 1 && method() != NULL) {
+      bt.push(method(), 0, CHECK);
+      set_backtrace(throwable(), bt.backtrace());
+    }
+    return;
+  }
+
   // Instead of using vframe directly, this version of fill_in_stack_trace
   // basically handles everything by hand. This significantly improved the
   // speed of this method call up to 28.5% on Solaris sparc. 27.1% on Windows.
@@ -1477,7 +1487,7 @@
   set_backtrace(throwable(), bt.backtrace());
 }
 
-void java_lang_Throwable::fill_in_stack_trace(Handle throwable) {
+void java_lang_Throwable::fill_in_stack_trace(Handle throwable, methodHandle method) {
   // No-op if stack trace is disabled
   if (!StackTraceInThrowable) {
     return;
@@ -1491,7 +1501,7 @@
   PRESERVE_EXCEPTION_MARK;
 
   JavaThread* thread = JavaThread::active();
-  fill_in_stack_trace(throwable, thread);
+  fill_in_stack_trace(throwable, method, thread);
   // ignore exceptions thrown during stack trace filling
   CLEAR_PENDING_EXCEPTION;
 }
--- a/hotspot/src/share/vm/classfile/javaClasses.hpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/classfile/javaClasses.hpp	Thu May 05 22:28:31 2011 -0700
@@ -440,8 +440,8 @@
   static void fill_in_stack_trace_of_preallocated_backtrace(Handle throwable);
 
   // Fill in current stack trace, can cause GC
-  static void fill_in_stack_trace(Handle throwable, TRAPS);
-  static void fill_in_stack_trace(Handle throwable);
+  static void fill_in_stack_trace(Handle throwable, methodHandle method, TRAPS);
+  static void fill_in_stack_trace(Handle throwable, methodHandle method = methodHandle());
   // Programmatic access to stack trace
   static oop  get_stack_trace_element(oop throwable, int index, TRAPS);
   static int  get_stack_trace_depth(oop throwable, TRAPS);
--- a/hotspot/src/share/vm/compiler/compileBroker.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/compiler/compileBroker.cpp	Thu May 05 22:28:31 2011 -0700
@@ -976,6 +976,15 @@
     return;
   }
 
+  // If the requesting thread is holding the pending list lock
+  // then we just return. We can't risk blocking while holding
+  // the pending list lock or a 3-way deadlock may occur
+  // between the reference handler thread, a GC (instigated
+  // by a compiler thread), and compiled method registration.
+  if (instanceRefKlass::owns_pending_list_lock(JavaThread::current())) {
+    return;
+  }
+
   // Outputs from the following MutexLocker block:
   CompileTask* task     = NULL;
   bool         blocking = false;
@@ -1304,17 +1313,8 @@
 // Should the current thread be blocked until this compilation request
 // has been fulfilled?
 bool CompileBroker::is_compile_blocking(methodHandle method, int osr_bci) {
-  if (!BackgroundCompilation) {
-    Symbol* class_name = method->method_holder()->klass_part()->name();
-    if (class_name->starts_with("java/lang/ref/Reference", 23)) {
-      // The reference handler thread can dead lock with the GC if compilation is blocking,
-      // so we avoid blocking compiles for anything in the java.lang.ref.Reference class,
-      // including inner classes such as ReferenceHandler.
-      return false;
-    }
-    return true;
-  }
-  return false;
+  assert(!instanceRefKlass::owns_pending_list_lock(JavaThread::current()), "possible deadlock");
+  return !BackgroundCompilation;
 }
 
 
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Thu May 05 22:28:31 2011 -0700
@@ -1963,10 +1963,21 @@
 // Iteration support, mostly delegated from a CMS generation
 
 void CompactibleFreeListSpace::save_marks() {
-  // mark the "end" of the used space at the time of this call;
+  assert(Thread::current()->is_VM_thread(),
+         "Global variable should only be set when single-threaded");
+  // Mark the "end" of the used space at the time of this call;
   // note, however, that promoted objects from this point
   // on are tracked in the _promoInfo below.
   set_saved_mark_word(unallocated_block());
+#ifdef ASSERT
+  // Check the sanity of save_marks() etc.
+  MemRegion ur    = used_region();
+  MemRegion urasm = used_region_at_save_marks();
+  assert(ur.contains(urasm),
+         err_msg(" Error at save_marks(): [" PTR_FORMAT "," PTR_FORMAT ")"
+                 " should contain [" PTR_FORMAT "," PTR_FORMAT ")",
+                 ur.start(), ur.end(), urasm.start(), urasm.end()));
+#endif
   // inform allocator that promotions should be tracked.
   assert(_promoInfo.noPromotions(), "_promoInfo inconsistency");
   _promoInfo.startTrackingPromotions();
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Thu May 05 22:28:31 2011 -0700
@@ -3189,10 +3189,9 @@
 }
 
 void CMSCollector::setup_cms_unloading_and_verification_state() {
-  const  bool should_verify =    VerifyBeforeGC || VerifyAfterGC || VerifyDuringGC
+  const  bool should_verify =   VerifyBeforeGC || VerifyAfterGC || VerifyDuringGC
                              || VerifyBeforeExit;
-  const  int  rso           =    SharedHeap::SO_Symbols | SharedHeap::SO_Strings
-                             |   SharedHeap::SO_CodeCache;
+  const  int  rso           =   SharedHeap::SO_Strings | SharedHeap::SO_CodeCache;
 
   if (should_unload_classes()) {   // Should unload classes this cycle
     remove_root_scanning_option(rso);  // Shrink the root set appropriately
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Thu May 05 22:28:31 2011 -0700
@@ -826,6 +826,14 @@
 void ConcurrentMark::checkpointRootsInitialPost() {
   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 
+  // If we force an overflow during remark, the remark operation will
+  // actually abort and we'll restart concurrent marking. If we always
+  // force an oveflow during remark we'll never actually complete the
+  // marking phase. So, we initilize this here, at the start of the
+  // cycle, so that at the remaining overflow number will decrease at
+  // every remark and we'll eventually not need to cause one.
+  force_overflow_stw()->init();
+
   // For each region note start of marking.
   NoteStartOfMarkHRClosure startcl;
   g1h->heap_region_iterate(&startcl);
@@ -893,27 +901,37 @@
 }
 
 /*
-   Notice that in the next two methods, we actually leave the STS
-   during the barrier sync and join it immediately afterwards. If we
-   do not do this, this then the following deadlock can occur: one
-   thread could be in the barrier sync code, waiting for the other
-   thread to also sync up, whereas another one could be trying to
-   yield, while also waiting for the other threads to sync up too.
-
-   Because the thread that does the sync barrier has left the STS, it
-   is possible to be suspended for a Full GC or an evacuation pause
-   could occur. This is actually safe, since the entering the sync
-   barrier is one of the last things do_marking_step() does, and it
-   doesn't manipulate any data structures afterwards.
-*/
+ * Notice that in the next two methods, we actually leave the STS
+ * during the barrier sync and join it immediately afterwards. If we
+ * do not do this, the following deadlock can occur: one thread could
+ * be in the barrier sync code, waiting for the other thread to also
+ * sync up, whereas another one could be trying to yield, while also
+ * waiting for the other threads to sync up too.
+ *
+ * Note, however, that this code is also used during remark and in
+ * this case we should not attempt to leave / enter the STS, otherwise
+ * we'll either hit an asseert (debug / fastdebug) or deadlock
+ * (product). So we should only leave / enter the STS if we are
+ * operating concurrently.
+ *
+ * Because the thread that does the sync barrier has left the STS, it
+ * is possible to be suspended for a Full GC or an evacuation pause
+ * could occur. This is actually safe, since the entering the sync
+ * barrier is one of the last things do_marking_step() does, and it
+ * doesn't manipulate any data structures afterwards.
+ */
 
 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
   if (verbose_low())
     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
 
-  ConcurrentGCThread::stsLeave();
+  if (concurrent()) {
+    ConcurrentGCThread::stsLeave();
+  }
   _first_overflow_barrier_sync.enter();
-  ConcurrentGCThread::stsJoin();
+  if (concurrent()) {
+    ConcurrentGCThread::stsJoin();
+  }
   // at this point everyone should have synced up and not be doing any
   // more work
 
@@ -923,7 +941,12 @@
   // let task 0 do this
   if (task_num == 0) {
     // task 0 is responsible for clearing the global data structures
-    clear_marking_state();
+    // We should be here because of an overflow. During STW we should
+    // not clear the overflow flag since we rely on it being true when
+    // we exit this method to abort the pause and restart concurent
+    // marking.
+    clear_marking_state(concurrent() /* clear_overflow */);
+    force_overflow()->update();
 
     if (PrintGC) {
       gclog_or_tty->date_stamp(PrintGCDateStamps);
@@ -940,15 +963,45 @@
   if (verbose_low())
     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
 
-  ConcurrentGCThread::stsLeave();
+  if (concurrent()) {
+    ConcurrentGCThread::stsLeave();
+  }
   _second_overflow_barrier_sync.enter();
-  ConcurrentGCThread::stsJoin();
+  if (concurrent()) {
+    ConcurrentGCThread::stsJoin();
+  }
   // at this point everything should be re-initialised and ready to go
 
   if (verbose_low())
     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
 }
 
+#ifndef PRODUCT
+void ForceOverflowSettings::init() {
+  _num_remaining = G1ConcMarkForceOverflow;
+  _force = false;
+  update();
+}
+
+void ForceOverflowSettings::update() {
+  if (_num_remaining > 0) {
+    _num_remaining -= 1;
+    _force = true;
+  } else {
+    _force = false;
+  }
+}
+
+bool ForceOverflowSettings::should_force() {
+  if (_force) {
+    _force = false;
+    return true;
+  } else {
+    return false;
+  }
+}
+#endif // !PRODUCT
+
 void ConcurrentMark::grayRoot(oop p) {
   HeapWord* addr = (HeapWord*) p;
   // We can't really check against _heap_start and _heap_end, since it
@@ -1117,6 +1170,7 @@
   _restart_for_overflow = false;
 
   size_t active_workers = MAX2((size_t) 1, parallel_marking_threads());
+  force_overflow_conc()->init();
   set_phase(active_workers, true /* concurrent */);
 
   CMConcurrentMarkingTask markingTask(this, cmThread());
@@ -1845,7 +1899,7 @@
   while (!_cleanup_list.is_empty()) {
     HeapRegion* hr = _cleanup_list.remove_head();
     assert(hr != NULL, "the list was not empty");
-    hr->rem_set()->clear();
+    hr->par_clear();
     tmp_free_list.add_as_tail(hr);
 
     // Instead of adding one region at a time to the secondary_free_list,
@@ -2703,12 +2757,16 @@
 
 }
 
-void ConcurrentMark::clear_marking_state() {
+void ConcurrentMark::clear_marking_state(bool clear_overflow) {
   _markStack.setEmpty();
   _markStack.clear_overflow();
   _regionStack.setEmpty();
   _regionStack.clear_overflow();
-  clear_has_overflown();
+  if (clear_overflow) {
+    clear_has_overflown();
+  } else {
+    assert(has_overflown(), "pre-condition");
+  }
   _finger = _heap_start;
 
   for (int i = 0; i < (int)_max_task_num; ++i) {
@@ -4279,6 +4337,15 @@
     }
   }
 
+  // If we are about to wrap up and go into termination, check if we
+  // should raise the overflow flag.
+  if (do_termination && !has_aborted()) {
+    if (_cm->force_overflow()->should_force()) {
+      _cm->set_has_overflown();
+      regular_clock_call();
+    }
+  }
+
   // We still haven't aborted. Now, let's try to get into the
   // termination protocol.
   if (do_termination && !has_aborted()) {
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Thu May 05 22:28:31 2011 -0700
@@ -316,6 +316,19 @@
   void setEmpty()   { _index = 0; clear_overflow(); }
 };
 
+class ForceOverflowSettings VALUE_OBJ_CLASS_SPEC {
+private:
+#ifndef PRODUCT
+  uintx _num_remaining;
+  bool _force;
+#endif // !defined(PRODUCT)
+
+public:
+  void init() PRODUCT_RETURN;
+  void update() PRODUCT_RETURN;
+  bool should_force() PRODUCT_RETURN_( return false; );
+};
+
 // this will enable a variety of different statistics per GC task
 #define _MARKING_STATS_       0
 // this will enable the higher verbose levels
@@ -462,6 +475,9 @@
 
   WorkGang* _parallel_workers;
 
+  ForceOverflowSettings _force_overflow_conc;
+  ForceOverflowSettings _force_overflow_stw;
+
   void weakRefsWork(bool clear_all_soft_refs);
 
   void swapMarkBitMaps();
@@ -470,7 +486,7 @@
   // task local ones; should be called during initial mark.
   void reset();
   // It resets all the marking data structures.
-  void clear_marking_state();
+  void clear_marking_state(bool clear_overflow = true);
 
   // It should be called to indicate which phase we're in (concurrent
   // mark or remark) and how many threads are currently active.
@@ -547,6 +563,22 @@
   void enter_first_sync_barrier(int task_num);
   void enter_second_sync_barrier(int task_num);
 
+  ForceOverflowSettings* force_overflow_conc() {
+    return &_force_overflow_conc;
+  }
+
+  ForceOverflowSettings* force_overflow_stw() {
+    return &_force_overflow_stw;
+  }
+
+  ForceOverflowSettings* force_overflow() {
+    if (concurrent()) {
+      return force_overflow_conc();
+    } else {
+      return force_overflow_stw();
+    }
+  }
+
 public:
   // Manipulation of the global mark stack.
   // Notice that the first mark_stack_push is CAS-based, whereas the
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Thu May 05 22:28:31 2011 -0700
@@ -1161,6 +1161,7 @@
     TraceTime t(system_gc ? "Full GC (System.gc())" : "Full GC",
                 PrintGC, true, gclog_or_tty);
 
+    TraceCollectorStats tcs(g1mm()->full_collection_counters());
     TraceMemoryManagerStats tms(true /* fullGC */);
 
     double start = os::elapsedTime();
@@ -1339,6 +1340,7 @@
   if (PrintHeapAtGC) {
     Universe::print_heap_after_gc();
   }
+  g1mm()->update_counters();
 
   return true;
 }
@@ -1971,6 +1973,10 @@
 
   init_mutator_alloc_region();
 
+  // Do create of the monitoring and management support so that
+  // values in the heap have been properly initialized.
+  _g1mm = new G1MonitoringSupport(this, &_g1_storage);
+
   return JNI_OK;
 }
 
@@ -2113,6 +2119,28 @@
      (cause == GCCause::_java_lang_system_gc && ExplicitGCInvokesConcurrent));
 }
 
+#ifndef PRODUCT
+void G1CollectedHeap::allocate_dummy_regions() {
+  // Let's fill up most of the region
+  size_t word_size = HeapRegion::GrainWords - 1024;
+  // And as a result the region we'll allocate will be humongous.
+  guarantee(isHumongous(word_size), "sanity");
+
+  for (uintx i = 0; i < G1DummyRegionsPerGC; ++i) {
+    // Let's use the existing mechanism for the allocation
+    HeapWord* dummy_obj = humongous_obj_allocate(word_size);
+    if (dummy_obj != NULL) {
+      MemRegion mr(dummy_obj, word_size);
+      CollectedHeap::fill_with_object(mr);
+    } else {
+      // If we can't allocate once, we probably cannot allocate
+      // again. Let's get out of the loop.
+      break;
+    }
+  }
+}
+#endif // !PRODUCT
+
 void G1CollectedHeap::increment_full_collections_completed(bool concurrent) {
   MonitorLockerEx x(FullGCCount_lock, Mutex::_no_safepoint_check_flag);
 
@@ -2777,17 +2805,26 @@
                              bool silent,
                              bool use_prev_marking) {
   if (SafepointSynchronize::is_at_safepoint() || ! UseTLAB) {
-    if (!silent) { gclog_or_tty->print("roots "); }
+    if (!silent) { gclog_or_tty->print("Roots (excluding permgen) "); }
     VerifyRootsClosure rootsCl(use_prev_marking);
     CodeBlobToOopClosure blobsCl(&rootsCl, /*do_marking=*/ false);
-    process_strong_roots(true,  // activate StrongRootsScope
-                         false,
-                         SharedHeap::SO_AllClasses,
+    // We apply the relevant closures to all the oops in the
+    // system dictionary, the string table and the code cache.
+    const int so = SharedHeap::SO_AllClasses | SharedHeap::SO_Strings | SharedHeap::SO_CodeCache;
+    process_strong_roots(true,      // activate StrongRootsScope
+                         true,      // we set "collecting perm gen" to true,
+                                    // so we don't reset the dirty cards in the perm gen.
+                         SharedHeap::ScanningOption(so),  // roots scanning options
                          &rootsCl,
                          &blobsCl,
                          &rootsCl);
+    // Since we used "collecting_perm_gen" == true above, we will not have
+    // checked the refs from perm into the G1-collected heap. We check those
+    // references explicitly below. Whether the relevant cards are dirty
+    // is checked further below in the rem set verification.
+    if (!silent) { gclog_or_tty->print("Permgen roots "); }
+    perm_gen()->oop_iterate(&rootsCl);
     bool failures = rootsCl.failures();
-    rem_set()->invalidate(perm_gen()->used_region(), false);
     if (!silent) { gclog_or_tty->print("HeapRegionSets "); }
     verify_region_sets();
     if (!silent) { gclog_or_tty->print("HeapRegions "); }
@@ -3164,6 +3201,7 @@
     TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
     TraceTime t(verbose_str, PrintGC && !PrintGCDetails, true, gclog_or_tty);
 
+    TraceCollectorStats tcs(g1mm()->incremental_collection_counters());
     TraceMemoryManagerStats tms(false /* fullGC */);
 
     // If the secondary_free_list is not empty, append it to the
@@ -3338,6 +3376,8 @@
         doConcurrentMark();
       }
 
+      allocate_dummy_regions();
+
 #if YOUNG_LIST_VERBOSE
       gclog_or_tty->print_cr("\nEnd of the pause.\nYoung_list:");
       _young_list->print();
@@ -3401,6 +3441,8 @@
   if (PrintHeapAtGC) {
     Universe::print_heap_after_gc();
   }
+  g1mm()->update_counters();
+
   if (G1SummarizeRSetStats &&
       (G1SummarizeRSetStatsPeriod > 0) &&
       (total_collections() % G1SummarizeRSetStatsPeriod == 0)) {
@@ -3933,6 +3975,9 @@
 oop
 G1CollectedHeap::handle_evacuation_failure_par(OopsInHeapRegionClosure* cl,
                                                oop old) {
+  assert(obj_in_cs(old),
+         err_msg("obj: "PTR_FORMAT" should still be in the CSet",
+                 (HeapWord*) old));
   markOop m = old->mark();
   oop forward_ptr = old->forward_to_atomic(old);
   if (forward_ptr == NULL) {
@@ -3955,7 +4000,13 @@
     }
     return old;
   } else {
-    // Someone else had a place to copy it.
+    // Forward-to-self failed. Either someone else managed to allocate
+    // space for this object (old != forward_ptr) or they beat us in
+    // self-forwarding it (old == forward_ptr).
+    assert(old == forward_ptr || !obj_in_cs(forward_ptr),
+           err_msg("obj: "PTR_FORMAT" forwarded to: "PTR_FORMAT" "
+                   "should not be in the CSet",
+                   (HeapWord*) old, (HeapWord*) forward_ptr));
     return forward_ptr;
   }
 }
@@ -4266,11 +4317,10 @@
   T heap_oop = oopDesc::load_heap_oop(p);
   if (!oopDesc::is_null(heap_oop)) {
     oop obj = oopDesc::decode_heap_oop(heap_oop);
-    assert((_g1->evacuation_failed()) || (!_g1->obj_in_cs(obj)),
-           "shouldn't still be in the CSet if evacuation didn't fail.");
     HeapWord* addr = (HeapWord*)obj;
-    if (_g1->is_in_g1_reserved(addr))
+    if (_g1->is_in_g1_reserved(addr)) {
       _cm->grayRoot(oop(addr));
+    }
   }
 }
 
@@ -4919,36 +4969,45 @@
 
 #ifndef PRODUCT
 class G1VerifyCardTableCleanup: public HeapRegionClosure {
+  G1CollectedHeap* _g1h;
   CardTableModRefBS* _ct_bs;
 public:
-  G1VerifyCardTableCleanup(CardTableModRefBS* ct_bs)
-    : _ct_bs(ct_bs) { }
+  G1VerifyCardTableCleanup(G1CollectedHeap* g1h, CardTableModRefBS* ct_bs)
+    : _g1h(g1h), _ct_bs(ct_bs) { }
   virtual bool doHeapRegion(HeapRegion* r) {
-    MemRegion mr(r->bottom(), r->end());
     if (r->is_survivor()) {
-      _ct_bs->verify_dirty_region(mr);
+      _g1h->verify_dirty_region(r);
     } else {
-      _ct_bs->verify_clean_region(mr);
+      _g1h->verify_not_dirty_region(r);
     }
     return false;
   }
 };
 
+void G1CollectedHeap::verify_not_dirty_region(HeapRegion* hr) {
+  // All of the region should be clean.
+  CardTableModRefBS* ct_bs = (CardTableModRefBS*)barrier_set();
+  MemRegion mr(hr->bottom(), hr->end());
+  ct_bs->verify_not_dirty_region(mr);
+}
+
+void G1CollectedHeap::verify_dirty_region(HeapRegion* hr) {
+  // We cannot guarantee that [bottom(),end()] is dirty.  Threads
+  // dirty allocated blocks as they allocate them. The thread that
+  // retires each region and replaces it with a new one will do a
+  // maximal allocation to fill in [pre_dummy_top(),end()] but will
+  // not dirty that area (one less thing to have to do while holding
+  // a lock). So we can only verify that [bottom(),pre_dummy_top()]
+  // is dirty.
+  CardTableModRefBS* ct_bs = (CardTableModRefBS*) barrier_set();
+  MemRegion mr(hr->bottom(), hr->pre_dummy_top());
+  ct_bs->verify_dirty_region(mr);
+}
+
 void G1CollectedHeap::verify_dirty_young_list(HeapRegion* head) {
-  CardTableModRefBS* ct_bs = (CardTableModRefBS*) (barrier_set());
+  CardTableModRefBS* ct_bs = (CardTableModRefBS*) barrier_set();
   for (HeapRegion* hr = head; hr != NULL; hr = hr->get_next_young_region()) {
-    // We cannot guarantee that [bottom(),end()] is dirty.  Threads
-    // dirty allocated blocks as they allocate them. The thread that
-    // retires each region and replaces it with a new one will do a
-    // maximal allocation to fill in [pre_dummy_top(),end()] but will
-    // not dirty that area (one less thing to have to do while holding
-    // a lock). So we can only verify that [bottom(),pre_dummy_top()]
-    // is dirty. Also note that verify_dirty_region() requires
-    // mr.start() and mr.end() to be card aligned and pre_dummy_top()
-    // is not guaranteed to be.
-    MemRegion mr(hr->bottom(),
-                 ct_bs->align_to_card_boundary(hr->pre_dummy_top()));
-    ct_bs->verify_dirty_region(mr);
+    verify_dirty_region(hr);
   }
 }
 
@@ -4991,7 +5050,7 @@
   g1_policy()->record_clear_ct_time( elapsed * 1000.0);
 #ifndef PRODUCT
   if (G1VerifyCTCleanup || VerifyAfterGC) {
-    G1VerifyCardTableCleanup cleanup_verifier(ct_bs);
+    G1VerifyCardTableCleanup cleanup_verifier(this, ct_bs);
     heap_region_iterate(&cleanup_verifier);
   }
 #endif
@@ -5314,6 +5373,7 @@
     if (new_alloc_region != NULL) {
       g1_policy()->update_region_num(true /* next_is_young */);
       set_region_short_lived_locked(new_alloc_region);
+      g1mm()->update_eden_counters();
       return new_alloc_region;
     }
   }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Thu May 05 22:28:31 2011 -0700
@@ -28,7 +28,9 @@
 #include "gc_implementation/g1/concurrentMark.hpp"
 #include "gc_implementation/g1/g1AllocRegion.hpp"
 #include "gc_implementation/g1/g1RemSet.hpp"
+#include "gc_implementation/g1/g1MonitoringSupport.hpp"
 #include "gc_implementation/g1/heapRegionSets.hpp"
+#include "gc_implementation/shared/hSpaceCounters.hpp"
 #include "gc_implementation/parNew/parGCAllocBuffer.hpp"
 #include "memory/barrierSet.hpp"
 #include "memory/memRegion.hpp"
@@ -57,6 +59,7 @@
 class ConcurrentMark;
 class ConcurrentMarkThread;
 class ConcurrentG1Refine;
+class GenerationCounters;
 
 typedef OverflowTaskQueue<StarTask>         RefToScanQueue;
 typedef GenericTaskQueueSet<RefToScanQueue> RefToScanQueueSet;
@@ -236,6 +239,9 @@
   // current collection.
   HeapRegion* _gc_alloc_region_list;
 
+  // Helper for monitoring and management support.
+  G1MonitoringSupport* _g1mm;
+
   // Determines PLAB size for a particular allocation purpose.
   static size_t desired_plab_sz(GCAllocPurpose purpose);
 
@@ -298,6 +304,14 @@
   // started is maintained in _total_full_collections in CollectedHeap.
   volatile unsigned int _full_collections_completed;
 
+  // This is a non-product method that is helpful for testing. It is
+  // called at the end of a GC and artificially expands the heap by
+  // allocating a number of dead regions. This way we can induce very
+  // frequent marking cycles and stress the cleanup / concurrent
+  // cleanup code more (as all the regions that will be allocated by
+  // this method will be found dead by the marking cycle).
+  void allocate_dummy_regions() PRODUCT_RETURN;
+
   // These are macros so that, if the assert fires, we get the correct
   // line number, file, etc.
 
@@ -542,6 +556,9 @@
   HeapWord* expand_and_allocate(size_t word_size);
 
 public:
+
+  G1MonitoringSupport* g1mm() { return _g1mm; }
+
   // Expand the garbage-first heap by at least the given size (in bytes!).
   // Returns true if the heap was expanded by the requested amount;
   // false otherwise.
@@ -953,6 +970,8 @@
   // The number of regions available for "regular" expansion.
   size_t expansion_regions() { return _expansion_regions; }
 
+  void verify_not_dirty_region(HeapRegion* hr) PRODUCT_RETURN;
+  void verify_dirty_region(HeapRegion* hr) PRODUCT_RETURN;
   void verify_dirty_young_list(HeapRegion* head) PRODUCT_RETURN;
   void verify_dirty_young_regions() PRODUCT_RETURN;
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1MonitoringSupport.cpp	Thu May 05 22:28:31 2011 -0700
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc_implementation/g1/g1MonitoringSupport.hpp"
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "gc_implementation/g1/g1CollectorPolicy.hpp"
+
+G1MonitoringSupport::G1MonitoringSupport(G1CollectedHeap* g1h,
+                                         VirtualSpace* g1_storage_addr) :
+  _g1h(g1h),
+  _incremental_collection_counters(NULL),
+  _full_collection_counters(NULL),
+  _non_young_collection_counters(NULL),
+  _old_space_counters(NULL),
+  _young_collection_counters(NULL),
+  _eden_counters(NULL),
+  _from_counters(NULL),
+  _to_counters(NULL),
+  _g1_storage_addr(g1_storage_addr)
+{
+  // Counters for GC collections
+  //
+  //  name "collector.0".  In a generational collector this would be the
+  // young generation collection.
+  _incremental_collection_counters =
+    new CollectorCounters("G1 incremental collections", 0);
+  //   name "collector.1".  In a generational collector this would be the
+  // old generation collection.
+  _full_collection_counters =
+    new CollectorCounters("G1 stop-the-world full collections", 1);
+
+  // timer sampling for all counters supporting sampling only update the
+  // used value.  See the take_sample() method.  G1 requires both used and
+  // capacity updated so sampling is not currently used.  It might
+  // be sufficient to update all counters in take_sample() even though
+  // take_sample() only returns "used".  When sampling was used, there
+  // were some anomolous values emitted which may have been the consequence
+  // of not updating all values simultaneously (i.e., see the calculation done
+  // in eden_space_used(), is it possbile that the values used to
+  // calculate either eden_used or survivor_used are being updated by
+  // the collector when the sample is being done?).
+  const bool sampled = false;
+
+  // "Generation" and "Space" counters.
+  //
+  //  name "generation.1" This is logically the old generation in
+  // generational GC terms.  The "1, 1" parameters are for
+  // the n-th generation (=1) with 1 space.
+  // Counters are created from minCapacity, maxCapacity, and capacity
+  _non_young_collection_counters =
+    new GenerationCounters("whole heap", 1, 1, _g1_storage_addr);
+
+  //  name  "generation.1.space.0"
+  // Counters are created from maxCapacity, capacity, initCapacity,
+  // and used.
+  _old_space_counters = new HSpaceCounters("space", 0,
+    _g1h->max_capacity(), _g1h->capacity(), _non_young_collection_counters);
+
+  //   Young collection set
+  //  name "generation.0".  This is logically the young generation.
+  //  The "0, 3" are paremeters for the n-th genertaion (=0) with 3 spaces.
+  // See  _non_young_collection_counters for additional counters
+  _young_collection_counters = new GenerationCounters("young", 0, 3, NULL);
+
+  // Replace "max_heap_byte_size() with maximum young gen size for
+  // g1Collectedheap
+  //  name "generation.0.space.0"
+  // See _old_space_counters for additional counters
+  _eden_counters = new HSpaceCounters("eden", 0,
+    _g1h->max_capacity(), eden_space_committed(),
+    _young_collection_counters);
+
+  //  name "generation.0.space.1"
+  // See _old_space_counters for additional counters
+  // Set the arguments to indicate that this survivor space is not used.
+  _from_counters = new HSpaceCounters("s0", 1, (long) 0, (long) 0,
+    _young_collection_counters);
+
+  //  name "generation.0.space.2"
+  // See _old_space_counters for additional counters
+  _to_counters = new HSpaceCounters("s1", 2,
+    _g1h->max_capacity(),
+    survivor_space_committed(),
+    _young_collection_counters);
+}
+
+size_t G1MonitoringSupport::overall_committed() {
+  return g1h()->capacity();
+}
+
+size_t G1MonitoringSupport::overall_used() {
+  return g1h()->used_unlocked();
+}
+
+size_t G1MonitoringSupport::eden_space_committed() {
+  return MAX2(eden_space_used(), (size_t) HeapRegion::GrainBytes);
+}
+
+size_t G1MonitoringSupport::eden_space_used() {
+  size_t young_list_length = g1h()->young_list()->length();
+  size_t eden_used = young_list_length * HeapRegion::GrainBytes;
+  size_t survivor_used = survivor_space_used();
+  eden_used = subtract_up_to_zero(eden_used, survivor_used);
+  return eden_used;
+}
+
+size_t G1MonitoringSupport::survivor_space_committed() {
+  return MAX2(survivor_space_used(),
+              (size_t) HeapRegion::GrainBytes);
+}
+
+size_t G1MonitoringSupport::survivor_space_used() {
+  size_t survivor_num = g1h()->g1_policy()->recorded_survivor_regions();
+  size_t survivor_used = survivor_num * HeapRegion::GrainBytes;
+  return survivor_used;
+}
+
+size_t G1MonitoringSupport::old_space_committed() {
+  size_t committed = overall_committed();
+  size_t eden_committed = eden_space_committed();
+  size_t survivor_committed = survivor_space_committed();
+  committed = subtract_up_to_zero(committed, eden_committed);
+  committed = subtract_up_to_zero(committed, survivor_committed);
+  committed = MAX2(committed, (size_t) HeapRegion::GrainBytes);
+  return committed;
+}
+
+// See the comment near the top of g1MonitoringSupport.hpp for
+// an explanation of these calculations for "used" and "capacity".
+size_t G1MonitoringSupport::old_space_used() {
+  size_t used = overall_used();
+  size_t eden_used = eden_space_used();
+  size_t survivor_used = survivor_space_used();
+  used = subtract_up_to_zero(used, eden_used);
+  used = subtract_up_to_zero(used, survivor_used);
+  return used;
+}
+
+void G1MonitoringSupport::update_counters() {
+  if (UsePerfData) {
+    eden_counters()->update_capacity(eden_space_committed());
+    eden_counters()->update_used(eden_space_used());
+    to_counters()->update_capacity(survivor_space_committed());
+    to_counters()->update_used(survivor_space_used());
+    old_space_counters()->update_capacity(old_space_committed());
+    old_space_counters()->update_used(old_space_used());
+    non_young_collection_counters()->update_all();
+  }
+}
+
+void G1MonitoringSupport::update_eden_counters() {
+  if (UsePerfData) {
+    eden_counters()->update_capacity(eden_space_committed());
+    eden_counters()->update_used(eden_space_used());
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1MonitoringSupport.hpp	Thu May 05 22:28:31 2011 -0700
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1MONITORINGSUPPORT_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_G1MONITORINGSUPPORT_HPP
+
+#include "gc_implementation/shared/hSpaceCounters.hpp"
+
+class G1CollectedHeap;
+class G1SpaceMonitoringSupport;
+
+// Class for monitoring logical spaces in G1.
+// G1 defines a set of regions as a young
+// collection (analogous to a young generation).
+// The young collection is a logical generation
+// with no fixed chunk (see space.hpp) reflecting
+// the address space for the generation.  In addition
+// to the young collection there is its complement
+// the non-young collection that is simply the regions
+// not in the young collection.  The non-young collection
+// is treated here as a logical old generation only
+// because the monitoring tools expect a generational
+// heap.  The monitoring tools expect that a Space
+// (see space.hpp) exists that describe the
+// address space of young collection and non-young
+// collection and such a view is provided here.
+//
+// This class provides interfaces to access
+// the value of variables for the young collection
+// that include the "capacity" and "used" of the
+// young collection along with constant values
+// for the minimum and maximum capacities for
+// the logical spaces.  Similarly for the non-young
+// collection.
+//
+// Also provided are counters for G1 concurrent collections
+// and stop-the-world full heap collecitons.
+//
+// Below is a description of how "used" and "capactiy"
+// (or committed) is calculated for the logical spaces.
+//
+// 1) The used space calculation for a pool is not necessarily
+// independent of the others. We can easily get from G1 the overall
+// used space in the entire heap, the number of regions in the young
+// generation (includes both eden and survivors), and the number of
+// survivor regions. So, from that we calculate:
+//
+//  survivor_used = survivor_num * region_size
+//  eden_used     = young_region_num * region_size - survivor_used
+//  old_gen_used  = overall_used - eden_used - survivor_used
+//
+// Note that survivor_used and eden_used are upper bounds. To get the
+// actual value we would have to iterate over the regions and add up
+// ->used(). But that'd be expensive. So, we'll accept some lack of
+// accuracy for those two. But, we have to be careful when calculating
+// old_gen_used, in case we subtract from overall_used more then the
+// actual number and our result goes negative.
+//
+// 2) Calculating the used space is straightforward, as described
+// above. However, how do we calculate the committed space, given that
+// we allocate space for the eden, survivor, and old gen out of the
+// same pool of regions? One way to do this is to use the used value
+// as also the committed value for the eden and survivor spaces and
+// then calculate the old gen committed space as follows:
+//
+//  old_gen_committed = overall_committed - eden_committed - survivor_committed
+//
+// Maybe a better way to do that would be to calculate used for eden
+// and survivor as a sum of ->used() over their regions and then
+// calculate committed as region_num * region_size (i.e., what we use
+// to calculate the used space now). This is something to consider
+// in the future.
+//
+// 3) Another decision that is again not straightforward is what is
+// the max size that each memory pool can grow to. One way to do this
+// would be to use the committed size for the max for the eden and
+// survivors and calculate the old gen max as follows (basically, it's
+// a similar pattern to what we use for the committed space, as
+// described above):
+//
+//  old_gen_max = overall_max - eden_max - survivor_max
+//
+// Unfortunately, the above makes the max of each pool fluctuate over
+// time and, even though this is allowed according to the spec, it
+// broke several assumptions in the M&M framework (there were cases
+// where used would reach a value greater than max). So, for max we
+// use -1, which means "undefined" according to the spec.
+//
+// 4) Now, there is a very subtle issue with all the above. The
+// framework will call get_memory_usage() on the three pools
+// asynchronously. As a result, each call might get a different value
+// for, say, survivor_num which will yield inconsistent values for
+// eden_used, survivor_used, and old_gen_used (as survivor_num is used
+// in the calculation of all three). This would normally be
+// ok. However, it's possible that this might cause the sum of
+// eden_used, survivor_used, and old_gen_used to go over the max heap
+// size and this seems to sometimes cause JConsole (and maybe other
+// clients) to get confused. There's not a really an easy / clean
+// solution to this problem, due to the asynchrounous nature of the
+// framework.
+
+class G1MonitoringSupport : public CHeapObj {
+  G1CollectedHeap* _g1h;
+  VirtualSpace* _g1_storage_addr;
+
+  // jstat performance counters
+  //  incremental collections both fully and partially young
+  CollectorCounters*   _incremental_collection_counters;
+  //  full stop-the-world collections
+  CollectorCounters*   _full_collection_counters;
+  //  young collection set counters.  The _eden_counters,
+  // _from_counters, and _to_counters are associated with
+  // this "generational" counter.
+  GenerationCounters*  _young_collection_counters;
+  //  non-young collection set counters. The _old_space_counters
+  // below are associated with this "generational" counter.
+  GenerationCounters*  _non_young_collection_counters;
+  // Counters for the capacity and used for
+  //   the whole heap
+  HSpaceCounters*      _old_space_counters;
+  //   the young collection
+  HSpaceCounters*      _eden_counters;
+  //   the survivor collection (only one, _to_counters, is actively used)
+  HSpaceCounters*      _from_counters;
+  HSpaceCounters*      _to_counters;
+
+  // It returns x - y if x > y, 0 otherwise.
+  // As described in the comment above, some of the inputs to the
+  // calculations we have to do are obtained concurrently and hence
+  // may be inconsistent with each other. So, this provides a
+  // defensive way of performing the subtraction and avoids the value
+  // going negative (which would mean a very large result, given that
+  // the parameter are size_t).
+  static size_t subtract_up_to_zero(size_t x, size_t y) {
+    if (x > y) {
+      return x - y;
+    } else {
+      return 0;
+    }
+  }
+
+ public:
+  G1MonitoringSupport(G1CollectedHeap* g1h, VirtualSpace* g1_storage_addr);
+
+  G1CollectedHeap* g1h() { return _g1h; }
+  VirtualSpace* g1_storage_addr() { return _g1_storage_addr; }
+
+  // Performance Counter accessors
+  void update_counters();
+  void update_eden_counters();
+
+  CollectorCounters* incremental_collection_counters() {
+    return _incremental_collection_counters;
+  }
+  CollectorCounters* full_collection_counters() {
+    return _full_collection_counters;
+  }
+  GenerationCounters* non_young_collection_counters() {
+    return _non_young_collection_counters;
+  }
+  HSpaceCounters*      old_space_counters() { return _old_space_counters; }
+  HSpaceCounters*      eden_counters() { return _eden_counters; }
+  HSpaceCounters*      from_counters() { return _from_counters; }
+  HSpaceCounters*      to_counters() { return _to_counters; }
+
+  // Monitoring support used by
+  //   MemoryService
+  //   jstat counters
+  size_t overall_committed();
+  size_t overall_used();
+
+  size_t eden_space_committed();
+  size_t eden_space_used();
+
+  size_t survivor_space_committed();
+  size_t survivor_space_used();
+
+  size_t old_space_committed();
+  size_t old_space_used();
+};
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1MONITORINGSUPPORT_HPP
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Thu May 05 22:28:31 2011 -0700
@@ -157,7 +157,6 @@
   void set_try_claimed() { _try_claimed = true; }
 
   void scanCard(size_t index, HeapRegion *r) {
-    _cards_done++;
     DirtyCardToOopClosure* cl =
       r->new_dcto_closure(_oc,
                          CardTableModRefBS::Precise,
@@ -168,17 +167,14 @@
     HeapWord* card_start = _bot_shared->address_for_index(index);
     HeapWord* card_end = card_start + G1BlockOffsetSharedArray::N_words;
     Space *sp = SharedHeap::heap()->space_containing(card_start);
-    MemRegion sm_region;
-    if (ParallelGCThreads > 0) {
-      // first find the used area
-      sm_region = sp->used_region_at_save_marks();
-    } else {
-      // The closure is not idempotent.  We shouldn't look at objects
-      // allocated during the GC.
-      sm_region = sp->used_region_at_save_marks();
-    }
+    MemRegion sm_region = sp->used_region_at_save_marks();
     MemRegion mr = sm_region.intersection(MemRegion(card_start,card_end));
-    if (!mr.is_empty()) {
+    if (!mr.is_empty() && !_ct_bs->is_card_claimed(index)) {
+      // We make the card as "claimed" lazily (so races are possible
+      // but they're benign), which reduces the number of duplicate
+      // scans (the rsets of the regions in the cset can intersect).
+      _ct_bs->set_card_claimed(index);
+      _cards_done++;
       cl->do_MemRegion(mr);
     }
   }
@@ -199,6 +195,9 @@
     HeapRegionRemSet* hrrs = r->rem_set();
     if (hrrs->iter_is_complete()) return false; // All done.
     if (!_try_claimed && !hrrs->claim_iter()) return false;
+    // If we ever free the collection set concurrently, we should also
+    // clear the card table concurrently therefore we won't need to
+    // add regions of the collection set to the dirty cards region.
     _g1h->push_dirty_cards_region(r);
     // If we didn't return above, then
     //   _try_claimed || r->claim_iter()
@@ -230,15 +229,10 @@
         _g1h->push_dirty_cards_region(card_region);
       }
 
-       // If the card is dirty, then we will scan it during updateRS.
-      if (!card_region->in_collection_set() && !_ct_bs->is_card_dirty(card_index)) {
-        // We make the card as "claimed" lazily (so races are possible but they're benign),
-        // which reduces the number of duplicate scans (the rsets of the regions in the cset
-        // can intersect).
-        if (!_ct_bs->is_card_claimed(card_index)) {
-          _ct_bs->set_card_claimed(card_index);
-          scanCard(card_index, card_region);
-        }
+      // If the card is dirty, then we will scan it during updateRS.
+      if (!card_region->in_collection_set() &&
+          !_ct_bs->is_card_dirty(card_index)) {
+        scanCard(card_index, card_region);
       }
     }
     if (!_try_claimed) {
@@ -246,8 +240,6 @@
     }
     return false;
   }
-  // Set all cards back to clean.
-  void cleanup() {_g1h->cleanUpCardTable();}
   size_t cards_done() { return _cards_done;}
   size_t cards_looked_up() { return _cards;}
 };
@@ -566,8 +558,9 @@
     update_rs_cl.set_region(r);
     HeapWord* stop_point =
       r->oops_on_card_seq_iterate_careful(scanRegion,
-                                        &filter_then_update_rs_cset_oop_cl,
-                                        false /* filter_young */);
+                                          &filter_then_update_rs_cset_oop_cl,
+                                          false /* filter_young */,
+                                          NULL  /* card_ptr */);
 
     // Since this is performed in the event of an evacuation failure, we
     // we shouldn't see a non-null stop point
@@ -735,12 +728,6 @@
                                 (OopClosure*)&mux :
                                 (OopClosure*)&update_rs_oop_cl));
 
-  // Undirty the card.
-  *card_ptr = CardTableModRefBS::clean_card_val();
-  // We must complete this write before we do any of the reads below.
-  OrderAccess::storeload();
-  // And process it, being careful of unallocated portions of TLAB's.
-
   // The region for the current card may be a young region. The
   // current card may have been a card that was evicted from the
   // card cache. When the card was inserted into the cache, we had
@@ -749,7 +736,7 @@
   // and tagged as young.
   //
   // We wish to filter out cards for such a region but the current
-  // thread, if we're running conucrrently, may "see" the young type
+  // thread, if we're running concurrently, may "see" the young type
   // change at any time (so an earlier "is_young" check may pass or
   // fail arbitrarily). We tell the iteration code to perform this
   // filtering when it has been determined that there has been an actual
@@ -759,7 +746,8 @@
   HeapWord* stop_point =
     r->oops_on_card_seq_iterate_careful(dirtyRegion,
                                         &filter_then_update_rs_oop_cl,
-                                        filter_young);
+                                        filter_young,
+                                        card_ptr);
 
   // If stop_point is non-null, then we encountered an unallocated region
   // (perhaps the unfilled portion of a TLAB.)  For now, we'll dirty the
--- a/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp	Thu May 05 22:28:31 2011 -0700
@@ -300,13 +300,22 @@
   develop(uintx, G1StressConcRegionFreeingDelayMillis, 0,                   \
           "Artificial delay during concurrent region freeing")              \
                                                                             \
+  develop(uintx, G1DummyRegionsPerGC, 0,                                    \
+          "The number of dummy regions G1 will allocate at the end of "     \
+          "each evacuation pause in order to artificially fill up the "     \
+          "heap and stress the marking implementation.")                    \
+                                                                            \
   develop(bool, ReduceInitialCardMarksForG1, false,                         \
           "When ReduceInitialCardMarks is true, this flag setting "         \
           " controls whether G1 allows the RICM optimization")              \
                                                                             \
   develop(bool, G1ExitOnExpansionFailure, false,                            \
           "Raise a fatal VM exit out of memory failure in the event "       \
-          " that heap expansion fails due to running out of swap.")
+          " that heap expansion fails due to running out of swap.")         \
+                                                                            \
+  develop(uintx, G1ConcMarkForceOverflow, 0,                                \
+          "The number of times we'll force an overflow during "             \
+          "concurrent marking")
 
 G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG)
 
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp	Thu May 05 22:28:31 2011 -0700
@@ -376,6 +376,17 @@
   if (clear_space) clear(SpaceDecorator::Mangle);
 }
 
+void HeapRegion::par_clear() {
+  assert(used() == 0, "the region should have been already cleared");
+  assert(capacity() == (size_t) HeapRegion::GrainBytes,
+         "should be back to normal");
+  HeapRegionRemSet* hrrs = rem_set();
+  hrrs->clear();
+  CardTableModRefBS* ct_bs =
+                   (CardTableModRefBS*)G1CollectedHeap::heap()->barrier_set();
+  ct_bs->clear(MemRegion(bottom(), end()));
+}
+
 // <PREDICTION>
 void HeapRegion::calc_gc_efficiency() {
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
@@ -600,7 +611,15 @@
 HeapRegion::
 oops_on_card_seq_iterate_careful(MemRegion mr,
                                  FilterOutOfRegionClosure* cl,
-                                 bool filter_young) {
+                                 bool filter_young,
+                                 jbyte* card_ptr) {
+  // Currently, we should only have to clean the card if filter_young
+  // is true and vice versa.
+  if (filter_young) {
+    assert(card_ptr != NULL, "pre-condition");
+  } else {
+    assert(card_ptr == NULL, "pre-condition");
+  }
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 
   // If we're within a stop-world GC, then we might look at a card in a
@@ -626,6 +645,15 @@
 
   assert(!is_young(), "check value of filter_young");
 
+  // We can only clean the card here, after we make the decision that
+  // the card is not young. And we only clean the card if we have been
+  // asked to (i.e., card_ptr != NULL).
+  if (card_ptr != NULL) {
+    *card_ptr = CardTableModRefBS::clean_card_val();
+    // We must complete this write before we do any of the reads below.
+    OrderAccess::storeload();
+  }
+
   // We used to use "block_start_careful" here.  But we're actually happy
   // to update the BOT while we do this...
   HeapWord* cur = block_start(mr.start());
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp	Thu May 05 22:28:31 2011 -0700
@@ -584,6 +584,7 @@
 
   // Reset HR stuff to default values.
   void hr_clear(bool par, bool clear_space);
+  void par_clear();
 
   void initialize(MemRegion mr, bool clear_space, bool mangle_space);
 
@@ -802,12 +803,16 @@
   HeapWord*
   object_iterate_mem_careful(MemRegion mr, ObjectClosure* cl);
 
-  // In this version - if filter_young is true and the region
-  // is a young region then we skip the iteration.
+  // filter_young: if true and the region is a young region then we
+  // skip the iteration.
+  // card_ptr: if not NULL, and we decide that the card is not young
+  // and we iterate over it, we'll clean the card before we start the
+  // iteration.
   HeapWord*
   oops_on_card_seq_iterate_careful(MemRegion mr,
                                    FilterOutOfRegionClosure* cl,
-                                   bool filter_young);
+                                   bool filter_young,
+                                   jbyte* card_ptr);
 
   // A version of block start that is guaranteed to find *some* block
   // boundary at or before "p", but does not object iteration, and may
--- a/hotspot/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp	Thu May 05 22:28:31 2011 -0700
@@ -33,44 +33,43 @@
 #include "runtime/mutexLocker.hpp"
 #include "runtime/virtualspace.hpp"
 
-void CardTableModRefBS::par_non_clean_card_iterate_work(Space* sp, MemRegion mr,
-                                                        DirtyCardToOopClosure* dcto_cl,
-                                                        MemRegionClosure* cl,
-                                                        int n_threads) {
-  if (n_threads > 0) {
-    assert((n_threads == 1 && ParallelGCThreads == 0) ||
-           n_threads <= (int)ParallelGCThreads,
-           "# worker threads != # requested!");
-    // Make sure the LNC array is valid for the space.
-    jbyte**   lowest_non_clean;
-    uintptr_t lowest_non_clean_base_chunk_index;
-    size_t    lowest_non_clean_chunk_size;
-    get_LNC_array_for_space(sp, lowest_non_clean,
-                            lowest_non_clean_base_chunk_index,
-                            lowest_non_clean_chunk_size);
+void CardTableModRefBS::non_clean_card_iterate_parallel_work(Space* sp, MemRegion mr,
+                                                             DirtyCardToOopClosure* dcto_cl,
+                                                             ClearNoncleanCardWrapper* cl,
+                                                             int n_threads) {
+  assert(n_threads > 0, "Error: expected n_threads > 0");
+  assert((n_threads == 1 && ParallelGCThreads == 0) ||
+         n_threads <= (int)ParallelGCThreads,
+         "# worker threads != # requested!");
+  // Make sure the LNC array is valid for the space.
+  jbyte**   lowest_non_clean;
+  uintptr_t lowest_non_clean_base_chunk_index;
+  size_t    lowest_non_clean_chunk_size;
+  get_LNC_array_for_space(sp, lowest_non_clean,
+                          lowest_non_clean_base_chunk_index,
+                          lowest_non_clean_chunk_size);
 
-    int n_strides = n_threads * StridesPerThread;
-    SequentialSubTasksDone* pst = sp->par_seq_tasks();
-    pst->set_n_threads(n_threads);
-    pst->set_n_tasks(n_strides);
+  int n_strides = n_threads * StridesPerThread;
+  SequentialSubTasksDone* pst = sp->par_seq_tasks();
+  pst->set_n_threads(n_threads);
+  pst->set_n_tasks(n_strides);
 
-    int stride = 0;
-    while (!pst->is_task_claimed(/* reference */ stride)) {
-      process_stride(sp, mr, stride, n_strides, dcto_cl, cl,
-                     lowest_non_clean,
-                     lowest_non_clean_base_chunk_index,
-                     lowest_non_clean_chunk_size);
-    }
-    if (pst->all_tasks_completed()) {
-      // Clear lowest_non_clean array for next time.
-      intptr_t first_chunk_index = addr_to_chunk_index(mr.start());
-      uintptr_t last_chunk_index  = addr_to_chunk_index(mr.last());
-      for (uintptr_t ch = first_chunk_index; ch <= last_chunk_index; ch++) {
-        intptr_t ind = ch - lowest_non_clean_base_chunk_index;
-        assert(0 <= ind && ind < (intptr_t)lowest_non_clean_chunk_size,
-               "Bounds error");
-        lowest_non_clean[ind] = NULL;
-      }
+  int stride = 0;
+  while (!pst->is_task_claimed(/* reference */ stride)) {
+    process_stride(sp, mr, stride, n_strides, dcto_cl, cl,
+                   lowest_non_clean,
+                   lowest_non_clean_base_chunk_index,
+                   lowest_non_clean_chunk_size);
+  }
+  if (pst->all_tasks_completed()) {
+    // Clear lowest_non_clean array for next time.
+    intptr_t first_chunk_index = addr_to_chunk_index(mr.start());
+    uintptr_t last_chunk_index  = addr_to_chunk_index(mr.last());
+    for (uintptr_t ch = first_chunk_index; ch <= last_chunk_index; ch++) {
+      intptr_t ind = ch - lowest_non_clean_base_chunk_index;
+      assert(0 <= ind && ind < (intptr_t)lowest_non_clean_chunk_size,
+             "Bounds error");
+      lowest_non_clean[ind] = NULL;
     }
   }
 }
@@ -81,7 +80,7 @@
                MemRegion used,
                jint stride, int n_strides,
                DirtyCardToOopClosure* dcto_cl,
-               MemRegionClosure* cl,
+               ClearNoncleanCardWrapper* cl,
                jbyte** lowest_non_clean,
                uintptr_t lowest_non_clean_base_chunk_index,
                size_t    lowest_non_clean_chunk_size) {
@@ -127,7 +126,11 @@
                              lowest_non_clean_base_chunk_index,
                              lowest_non_clean_chunk_size);
 
-    non_clean_card_iterate_work(chunk_mr, cl);
+    // We do not call the non_clean_card_iterate_serial() version because
+    // we want to clear the cards, and the ClearNoncleanCardWrapper closure
+    // itself does the work of finding contiguous dirty ranges of cards to
+    // process (and clear).
+    cl->do_MemRegion(chunk_mr);
 
     // Find the next chunk of the stride.
     chunk_card_start += CardsPerStrideChunk * n_strides;
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp	Thu May 05 22:28:31 2011 -0700
@@ -224,6 +224,12 @@
   const size_t alignment = virtual_space()->alignment();
   size_t aligned_bytes  = align_size_up(bytes, alignment);
   size_t aligned_expand_bytes = align_size_up(MinHeapDeltaBytes, alignment);
+
+  if (UseNUMA) {
+    // With NUMA we use round-robin page allocation for the old gen. Expand by at least
+    // providing a page per lgroup. Alignment is larger or equal to the page size.
+    aligned_expand_bytes = MAX2(aligned_expand_bytes, alignment * os::numa_get_groups_num());
+  }
   if (aligned_bytes == 0){
     // The alignment caused the number of bytes to wrap.  An expand_by(0) will
     // return true with the implication that and expansion was done when it
--- a/hotspot/src/share/vm/gc_implementation/shared/generationCounters.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/gc_implementation/shared/generationCounters.cpp	Thu May 05 22:28:31 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -51,15 +51,18 @@
 
     cname = PerfDataManager::counter_name(_name_space, "minCapacity");
     PerfDataManager::create_constant(SUN_GC, cname, PerfData::U_Bytes,
+                                     _virtual_space == NULL ? 0 :
                                      _virtual_space->committed_size(), CHECK);
 
     cname = PerfDataManager::counter_name(_name_space, "maxCapacity");
     PerfDataManager::create_constant(SUN_GC, cname, PerfData::U_Bytes,
+                                     _virtual_space == NULL ? 0 :
                                      _virtual_space->reserved_size(), CHECK);
 
     cname = PerfDataManager::counter_name(_name_space, "capacity");
     _current_size = PerfDataManager::create_variable(SUN_GC, cname,
-                                      PerfData::U_Bytes,
+                                     PerfData::U_Bytes,
+                                     _virtual_space == NULL ? 0 :
                                      _virtual_space->committed_size(), CHECK);
   }
 }
--- a/hotspot/src/share/vm/gc_implementation/shared/generationCounters.hpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/gc_implementation/shared/generationCounters.hpp	Thu May 05 22:28:31 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -61,10 +61,11 @@
   }
 
   virtual void update_all() {
-    _current_size->set_value(_virtual_space->committed_size());
+    _current_size->set_value(_virtual_space == NULL ? 0 :
+                             _virtual_space->committed_size());
   }
 
   const char* name_space() const        { return _name_space; }
+
 };
-
 #endif // SHARE_VM_GC_IMPLEMENTATION_SHARED_GENERATIONCOUNTERS_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/shared/hSpaceCounters.cpp	Thu May 05 22:28:31 2011 -0700
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc_implementation/shared/hSpaceCounters.hpp"
+#include "memory/generation.hpp"
+#include "memory/resourceArea.hpp"
+
+HSpaceCounters::HSpaceCounters(const char* name,
+                               int ordinal,
+                               size_t max_size,
+                               size_t initial_capacity,
+                               GenerationCounters* gc) {
+
+  if (UsePerfData) {
+    EXCEPTION_MARK;
+    ResourceMark rm;
+
+    const char* cns =
+      PerfDataManager::name_space(gc->name_space(), "space", ordinal);
+
+    _name_space = NEW_C_HEAP_ARRAY(char, strlen(cns)+1);
+    strcpy(_name_space, cns);
+
+    const char* cname = PerfDataManager::counter_name(_name_space, "name");
+    PerfDataManager::create_string_constant(SUN_GC, cname, name, CHECK);
+
+    cname = PerfDataManager::counter_name(_name_space, "maxCapacity");
+    PerfDataManager::create_constant(SUN_GC, cname, PerfData::U_Bytes,
+                                     (jlong)max_size, CHECK);
+
+    cname = PerfDataManager::counter_name(_name_space, "capacity");
+    _capacity = PerfDataManager::create_variable(SUN_GC, cname,
+                                                 PerfData::U_Bytes,
+                                                 initial_capacity, CHECK);
+
+    cname = PerfDataManager::counter_name(_name_space, "used");
+    _used = PerfDataManager::create_variable(SUN_GC, cname, PerfData::U_Bytes,
+                                             (jlong) 0, CHECK);
+
+    cname = PerfDataManager::counter_name(_name_space, "initCapacity");
+    PerfDataManager::create_constant(SUN_GC, cname, PerfData::U_Bytes,
+                                     initial_capacity, CHECK);
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/shared/hSpaceCounters.hpp	Thu May 05 22:28:31 2011 -0700
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_SHARED_HSPACECOUNTERS_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_SHARED_HSPACECOUNTERS_HPP
+
+#ifndef SERIALGC
+#include "gc_implementation/shared/generationCounters.hpp"
+#include "memory/generation.hpp"
+#include "runtime/perfData.hpp"
+#endif
+
+// A HSpaceCounter is a holder class for performance counters
+// that track a collections (logical spaces) in a heap;
+
+class HeapSpaceUsedHelper;
+class G1SpaceMonitoringSupport;
+
+class HSpaceCounters: public CHeapObj {
+  friend class VMStructs;
+
+ private:
+  PerfVariable*        _capacity;
+  PerfVariable*        _used;
+
+  // Constant PerfData types don't need to retain a reference.
+  // However, it's a good idea to document them here.
+
+  char*             _name_space;
+
+ public:
+
+  HSpaceCounters(const char* name, int ordinal, size_t max_size,
+                 size_t initial_capacity, GenerationCounters* gc);
+
+  ~HSpaceCounters() {
+    if (_name_space != NULL) FREE_C_HEAP_ARRAY(char, _name_space);
+  }
+
+  inline void update_capacity(size_t v) {
+    _capacity->set_value(v);
+  }
+
+  inline void update_used(size_t v) {
+    _used->set_value(v);
+  }
+
+  debug_only(
+    // for security reasons, we do not allow arbitrary reads from
+    // the counters as they may live in shared memory.
+    jlong used() {
+      return _used->get_value();
+    }
+    jlong capacity() {
+      return _used->get_value();
+    }
+  )
+
+  inline void update_all(size_t capacity, size_t used) {
+    update_capacity(capacity);
+    update_used(used);
+  }
+
+  const char* name_space() const        { return _name_space; }
+};
+#endif // SHARE_VM_GC_IMPLEMENTATION_SHARED_HSPACECOUNTERS_HPP
--- a/hotspot/src/share/vm/interpreter/linkResolver.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/interpreter/linkResolver.cpp	Thu May 05 22:28:31 2011 -0700
@@ -327,6 +327,7 @@
 
   // 1. check if klass is not interface
   if (resolved_klass->is_interface()) {
+    ResourceMark rm(THREAD);
     char buf[200];
     jio_snprintf(buf, sizeof(buf), "Found interface %s, but class was expected", Klass::cast(resolved_klass())->external_name());
     THROW_MSG(vmSymbols::java_lang_IncompatibleClassChangeError(), buf);
@@ -413,6 +414,7 @@
 
  // check if klass is interface
   if (!resolved_klass->is_interface()) {
+    ResourceMark rm(THREAD);
     char buf[200];
     jio_snprintf(buf, sizeof(buf), "Found class %s, but interface was expected", Klass::cast(resolved_klass())->external_name());
     THROW_MSG(vmSymbols::java_lang_IncompatibleClassChangeError(), buf);
@@ -534,6 +536,7 @@
 
   // check for errors
   if (is_static != fd.is_static()) {
+    ResourceMark rm(THREAD);
     char msg[200];
     jio_snprintf(msg, sizeof(msg), "Expected %s field %s.%s", is_static ? "static" : "non-static", Klass::cast(resolved_klass())->external_name(), fd.name()->as_C_string());
     THROW_MSG(vmSymbols::java_lang_IncompatibleClassChangeError(), msg);
@@ -631,6 +634,7 @@
 
   // check if static
   if (!resolved_method->is_static()) {
+    ResourceMark rm(THREAD);
     char buf[200];
     jio_snprintf(buf, sizeof(buf), "Expected static method %s", methodOopDesc::name_and_sig_as_C_string(Klass::cast(resolved_klass()),
                                                       resolved_method->name(),
@@ -671,6 +675,7 @@
 
   // check if not static
   if (resolved_method->is_static()) {
+    ResourceMark rm(THREAD);
     char buf[200];
     jio_snprintf(buf, sizeof(buf),
                  "Expecting non-static method %s",
@@ -717,6 +722,7 @@
 
   // check if not static
   if (sel_method->is_static()) {
+    ResourceMark rm(THREAD);
     char buf[200];
     jio_snprintf(buf, sizeof(buf), "Expecting non-static method %s", methodOopDesc::name_and_sig_as_C_string(Klass::cast(resolved_klass()),
                                                                                                              resolved_method->name(),
@@ -757,6 +763,7 @@
 
   // check if not static
   if (resolved_method->is_static()) {
+    ResourceMark rm(THREAD);
     char buf[200];
     jio_snprintf(buf, sizeof(buf), "Expecting non-static method %s", methodOopDesc::name_and_sig_as_C_string(Klass::cast(resolved_klass()),
                                                                                                              resolved_method->name(),
@@ -873,6 +880,7 @@
 
   // check if receiver klass implements the resolved interface
   if (!recv_klass->is_subtype_of(resolved_klass())) {
+    ResourceMark rm(THREAD);
     char buf[200];
     jio_snprintf(buf, sizeof(buf), "Class %s does not implement the requested interface %s",
                  (Klass::cast(recv_klass()))->external_name(),
--- a/hotspot/src/share/vm/memory/allocation.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/memory/allocation.cpp	Thu May 05 22:28:31 2011 -0700
@@ -44,6 +44,14 @@
   return (void *) AllocateHeap(size, "CHeapObj-new");
 }
 
+void* CHeapObj::operator new (size_t size, const std::nothrow_t&  nothrow_constant) {
+  char* p = (char*) os::malloc(size);
+#ifdef ASSERT
+  if (PrintMallocFree) trace_heap_malloc(size, "CHeapObj-new", p);
+#endif
+  return p;
+}
+
 void CHeapObj::operator delete(void* p){
  FreeHeap(p);
 }
--- a/hotspot/src/share/vm/memory/allocation.hpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/memory/allocation.hpp	Thu May 05 22:28:31 2011 -0700
@@ -34,6 +34,8 @@
 #include "opto/c2_globals.hpp"
 #endif
 
+#include <new>
+
 #define ARENA_ALIGN_M1 (((size_t)(ARENA_AMALLOC_ALIGNMENT)) - 1)
 #define ARENA_ALIGN_MASK (~((size_t)ARENA_ALIGN_M1))
 #define ARENA_ALIGN(x) ((((size_t)(x)) + ARENA_ALIGN_M1) & ARENA_ALIGN_MASK)
@@ -99,6 +101,7 @@
 class CHeapObj ALLOCATION_SUPER_CLASS_SPEC {
  public:
   void* operator new(size_t size);
+  void* operator new (size_t size, const std::nothrow_t&  nothrow_constant);
   void  operator delete(void* p);
   void* new_array(size_t size);
 };
--- a/hotspot/src/share/vm/memory/cardTableModRefBS.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/memory/cardTableModRefBS.cpp	Thu May 05 22:28:31 2011 -0700
@@ -456,31 +456,35 @@
 }
 
 
-void CardTableModRefBS::non_clean_card_iterate(Space* sp,
-                                               MemRegion mr,
-                                               DirtyCardToOopClosure* dcto_cl,
-                                               MemRegionClosure* cl) {
+void CardTableModRefBS::non_clean_card_iterate_possibly_parallel(Space* sp,
+                                                                 MemRegion mr,
+                                                                 DirtyCardToOopClosure* dcto_cl,
+                                                                 ClearNoncleanCardWrapper* cl) {
   if (!mr.is_empty()) {
     int n_threads = SharedHeap::heap()->n_par_threads();
     if (n_threads > 0) {
 #ifndef SERIALGC
-      par_non_clean_card_iterate_work(sp, mr, dcto_cl, cl, n_threads);
+      non_clean_card_iterate_parallel_work(sp, mr, dcto_cl, cl, n_threads);
 #else  // SERIALGC
       fatal("Parallel gc not supported here.");
 #endif // SERIALGC
     } else {
-      non_clean_card_iterate_work(mr, cl);
+      // We do not call the non_clean_card_iterate_serial() version below because
+      // we want to clear the cards (which non_clean_card_iterate_serial() does not
+      // do for us), and the ClearNoncleanCardWrapper closure itself does the work
+      // of finding contiguous dirty ranges of cards to process (and clear).
+      cl->do_MemRegion(mr);
     }
   }
 }
 
-// NOTE: For this to work correctly, it is important that
-// we look for non-clean cards below (so as to catch those
-// marked precleaned), rather than look explicitly for dirty
-// cards (and miss those marked precleaned). In that sense,
-// the name precleaned is currently somewhat of a misnomer.
-void CardTableModRefBS::non_clean_card_iterate_work(MemRegion mr,
-                                                    MemRegionClosure* cl) {
+// The iterator itself is not MT-aware, but
+// MT-aware callers and closures can use this to
+// accomplish dirty card iteration in parallel. The
+// iterator itself does not clear the dirty cards, or
+// change their values in any manner.
+void CardTableModRefBS::non_clean_card_iterate_serial(MemRegion mr,
+                                                      MemRegionClosure* cl) {
   for (int i = 0; i < _cur_covered_regions; i++) {
     MemRegion mri = mr.intersection(_covered[i]);
     if (mri.word_size() > 0) {
@@ -648,43 +652,37 @@
 }
 
 #ifndef PRODUCT
-class GuaranteeNotModClosure: public MemRegionClosure {
-  CardTableModRefBS* _ct;
-public:
-  GuaranteeNotModClosure(CardTableModRefBS* ct) : _ct(ct) {}
-  void do_MemRegion(MemRegion mr) {
-    jbyte* entry = _ct->byte_for(mr.start());
-    guarantee(*entry != CardTableModRefBS::clean_card,
-              "Dirty card in region that should be clean");
+void CardTableModRefBS::verify_region(MemRegion mr,
+                                      jbyte val, bool val_equals) {
+  jbyte* start    = byte_for(mr.start());
+  jbyte* end      = byte_for(mr.last());
+  bool   failures = false;
+  for (jbyte* curr = start; curr <= end; ++curr) {
+    jbyte curr_val = *curr;
+    bool failed = (val_equals) ? (curr_val != val) : (curr_val == val);
+    if (failed) {
+      if (!failures) {
+        tty->cr();
+        tty->print_cr("== CT verification failed: ["PTR_FORMAT","PTR_FORMAT"]");
+        tty->print_cr("==   %sexpecting value: %d",
+                      (val_equals) ? "" : "not ", val);
+        failures = true;
+      }
+      tty->print_cr("==   card "PTR_FORMAT" ["PTR_FORMAT","PTR_FORMAT"], "
+                    "val: %d", curr, addr_for(curr),
+                    (HeapWord*) (((size_t) addr_for(curr)) + card_size),
+                    (int) curr_val);
+    }
   }
-};
-
-void CardTableModRefBS::verify_clean_region(MemRegion mr) {
-  GuaranteeNotModClosure blk(this);
-  non_clean_card_iterate_work(mr, &blk);
+  guarantee(!failures, "there should not have been any failures");
 }
 
-// To verify a MemRegion is entirely dirty this closure is passed to
-// dirty_card_iterate. If the region is dirty do_MemRegion will be
-// invoked only once with a MemRegion equal to the one being
-// verified.
-class GuaranteeDirtyClosure: public MemRegionClosure {
-  CardTableModRefBS* _ct;
-  MemRegion _mr;
-  bool _result;
-public:
-  GuaranteeDirtyClosure(CardTableModRefBS* ct, MemRegion mr)
-    : _ct(ct), _mr(mr), _result(false) {}
-  void do_MemRegion(MemRegion mr) {
-    _result = _mr.equals(mr);
-  }
-  bool result() const { return _result; }
-};
+void CardTableModRefBS::verify_not_dirty_region(MemRegion mr) {
+  verify_region(mr, dirty_card, false /* val_equals */);
+}
 
 void CardTableModRefBS::verify_dirty_region(MemRegion mr) {
-  GuaranteeDirtyClosure blk(this, mr);
-  dirty_card_iterate(mr, &blk);
-  guarantee(blk.result(), "Non-dirty cards in region that should be dirty");
+  verify_region(mr, dirty_card, true /* val_equals */);
 }
 #endif
 
--- a/hotspot/src/share/vm/memory/cardTableModRefBS.hpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/memory/cardTableModRefBS.hpp	Thu May 05 22:28:31 2011 -0700
@@ -44,6 +44,7 @@
 class Generation;
 class OopsInGenClosure;
 class DirtyCardToOopClosure;
+class ClearNoncleanCardWrapper;
 
 class CardTableModRefBS: public ModRefBarrierSet {
   // Some classes get to look at some private stuff.
@@ -165,22 +166,28 @@
 
   // Iterate over the portion of the card-table which covers the given
   // region mr in the given space and apply cl to any dirty sub-regions
-  // of mr. cl and dcto_cl must either be the same closure or cl must
-  // wrap dcto_cl. Both are required - neither may be NULL. Also, dcto_cl
-  // may be modified. Note that this function will operate in a parallel
-  // mode if worker threads are available.
-  void non_clean_card_iterate(Space* sp, MemRegion mr,
-                              DirtyCardToOopClosure* dcto_cl,
-                              MemRegionClosure* cl);
+  // of mr. Dirty cards are _not_ cleared by the iterator method itself,
+  // but closures may arrange to do so on their own should they so wish.
+  void non_clean_card_iterate_serial(MemRegion mr, MemRegionClosure* cl);
 
-  // Utility function used to implement the other versions below.
-  void non_clean_card_iterate_work(MemRegion mr, MemRegionClosure* cl);
+  // A variant of the above that will operate in a parallel mode if
+  // worker threads are available, and clear the dirty cards as it
+  // processes them.
+  // ClearNoncleanCardWrapper cl must wrap the DirtyCardToOopClosure dcto_cl,
+  // which may itself be modified by the method.
+  void non_clean_card_iterate_possibly_parallel(Space* sp, MemRegion mr,
+                                                DirtyCardToOopClosure* dcto_cl,
+                                                ClearNoncleanCardWrapper* cl);
 
-  void par_non_clean_card_iterate_work(Space* sp, MemRegion mr,
-                                       DirtyCardToOopClosure* dcto_cl,
-                                       MemRegionClosure* cl,
-                                       int n_threads);
+ private:
+  // Work method used to implement non_clean_card_iterate_possibly_parallel()
+  // above in the parallel case.
+  void non_clean_card_iterate_parallel_work(Space* sp, MemRegion mr,
+                                            DirtyCardToOopClosure* dcto_cl,
+                                            ClearNoncleanCardWrapper* cl,
+                                            int n_threads);
 
+ protected:
   // Dirty the bytes corresponding to "mr" (not all of which must be
   // covered.)
   void dirty_MemRegion(MemRegion mr);
@@ -237,7 +244,7 @@
                       MemRegion used,
                       jint stride, int n_strides,
                       DirtyCardToOopClosure* dcto_cl,
-                      MemRegionClosure* cl,
+                      ClearNoncleanCardWrapper* cl,
                       jbyte** lowest_non_clean,
                       uintptr_t lowest_non_clean_base_chunk_index,
                       size_t lowest_non_clean_chunk_size);
@@ -409,14 +416,14 @@
   // marking, where a dirty card may cause scanning, and summarization
   // marking, of objects that extend onto subsequent cards.)
   void mod_card_iterate(MemRegionClosure* cl) {
-    non_clean_card_iterate_work(_whole_heap, cl);
+    non_clean_card_iterate_serial(_whole_heap, cl);
   }
 
   // Like the "mod_cards_iterate" above, except only invokes the closure
   // for cards within the MemRegion "mr" (which is required to be
   // card-aligned and sized.)
   void mod_card_iterate(MemRegion mr, MemRegionClosure* cl) {
-    non_clean_card_iterate_work(mr, cl);
+    non_clean_card_iterate_serial(mr, cl);
   }
 
   static uintx ct_max_alignment_constraint();
@@ -468,7 +475,10 @@
   void verify();
   void verify_guard();
 
-  void verify_clean_region(MemRegion mr) PRODUCT_RETURN;
+  // val_equals -> it will check that all cards covered by mr equal val
+  // !val_equals -> it will check that all cards covered by mr do not equal val
+  void verify_region(MemRegion mr, jbyte val, bool val_equals) PRODUCT_RETURN;
+  void verify_not_dirty_region(MemRegion mr) PRODUCT_RETURN;
   void verify_dirty_region(MemRegion mr) PRODUCT_RETURN;
 
   static size_t par_chunk_heapword_alignment() {
@@ -493,4 +503,5 @@
   void set_CTRS(CardTableRS* rs) { _rs = rs; }
 };
 
+
 #endif // SHARE_VM_MEMORY_CARDTABLEMODREFBS_HPP
--- a/hotspot/src/share/vm/memory/cardTableRS.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/memory/cardTableRS.cpp	Thu May 05 22:28:31 2011 -0700
@@ -105,107 +105,111 @@
   g->younger_refs_iterate(blk);
 }
 
-class ClearNoncleanCardWrapper: public MemRegionClosure {
-  MemRegionClosure* _dirty_card_closure;
-  CardTableRS* _ct;
-  bool _is_par;
-private:
-  // Clears the given card, return true if the corresponding card should be
-  // processed.
-  bool clear_card(jbyte* entry) {
-    if (_is_par) {
-      while (true) {
-        // In the parallel case, we may have to do this several times.
-        jbyte entry_val = *entry;
-        assert(entry_val != CardTableRS::clean_card_val(),
-               "We shouldn't be looking at clean cards, and this should "
-               "be the only place they get cleaned.");
-        if (CardTableRS::card_is_dirty_wrt_gen_iter(entry_val)
-            || _ct->is_prev_youngergen_card_val(entry_val)) {
-          jbyte res =
-            Atomic::cmpxchg(CardTableRS::clean_card_val(), entry, entry_val);
-          if (res == entry_val) {
-            break;
-          } else {
-            assert(res == CardTableRS::cur_youngergen_and_prev_nonclean_card,
-                   "The CAS above should only fail if another thread did "
-                   "a GC write barrier.");
-          }
-        } else if (entry_val ==
-                   CardTableRS::cur_youngergen_and_prev_nonclean_card) {
-          // Parallelism shouldn't matter in this case.  Only the thread
-          // assigned to scan the card should change this value.
-          *entry = _ct->cur_youngergen_card_val();
-          break;
-        } else {
-          assert(entry_val == _ct->cur_youngergen_card_val(),
-                 "Should be the only possibility.");
-          // In this case, the card was clean before, and become
-          // cur_youngergen only because of processing of a promoted object.
-          // We don't have to look at the card.
-          return false;
-        }
+inline bool ClearNoncleanCardWrapper::clear_card(jbyte* entry) {
+  if (_is_par) {
+    return clear_card_parallel(entry);
+  } else {
+    return clear_card_serial(entry);
+  }
+}
+
+inline bool ClearNoncleanCardWrapper::clear_card_parallel(jbyte* entry) {
+  while (true) {
+    // In the parallel case, we may have to do this several times.
+    jbyte entry_val = *entry;
+    assert(entry_val != CardTableRS::clean_card_val(),
+           "We shouldn't be looking at clean cards, and this should "
+           "be the only place they get cleaned.");
+    if (CardTableRS::card_is_dirty_wrt_gen_iter(entry_val)
+        || _ct->is_prev_youngergen_card_val(entry_val)) {
+      jbyte res =
+        Atomic::cmpxchg(CardTableRS::clean_card_val(), entry, entry_val);
+      if (res == entry_val) {
+        break;
+      } else {
+        assert(res == CardTableRS::cur_youngergen_and_prev_nonclean_card,
+               "The CAS above should only fail if another thread did "
+               "a GC write barrier.");
       }
-      return true;
+    } else if (entry_val ==
+               CardTableRS::cur_youngergen_and_prev_nonclean_card) {
+      // Parallelism shouldn't matter in this case.  Only the thread
+      // assigned to scan the card should change this value.
+      *entry = _ct->cur_youngergen_card_val();
+      break;
     } else {
-      jbyte entry_val = *entry;
-      assert(entry_val != CardTableRS::clean_card_val(),
-             "We shouldn't be looking at clean cards, and this should "
-             "be the only place they get cleaned.");
-      assert(entry_val != CardTableRS::cur_youngergen_and_prev_nonclean_card,
-             "This should be possible in the sequential case.");
-      *entry = CardTableRS::clean_card_val();
-      return true;
+      assert(entry_val == _ct->cur_youngergen_card_val(),
+             "Should be the only possibility.");
+      // In this case, the card was clean before, and become
+      // cur_youngergen only because of processing of a promoted object.
+      // We don't have to look at the card.
+      return false;
     }
   }
+  return true;
+}
 
-public:
-  ClearNoncleanCardWrapper(MemRegionClosure* dirty_card_closure,
-                           CardTableRS* ct) :
+
+inline bool ClearNoncleanCardWrapper::clear_card_serial(jbyte* entry) {
+  jbyte entry_val = *entry;
+  assert(entry_val != CardTableRS::clean_card_val(),
+         "We shouldn't be looking at clean cards, and this should "
+         "be the only place they get cleaned.");
+  assert(entry_val != CardTableRS::cur_youngergen_and_prev_nonclean_card,
+         "This should be possible in the sequential case.");
+  *entry = CardTableRS::clean_card_val();
+  return true;
+}
+
+ClearNoncleanCardWrapper::ClearNoncleanCardWrapper(
+  MemRegionClosure* dirty_card_closure, CardTableRS* ct) :
     _dirty_card_closure(dirty_card_closure), _ct(ct) {
     _is_par = (SharedHeap::heap()->n_par_threads() > 0);
+}
+
+void ClearNoncleanCardWrapper::do_MemRegion(MemRegion mr) {
+  assert(mr.word_size() > 0, "Error");
+  assert(_ct->is_aligned(mr.start()), "mr.start() should be card aligned");
+  // mr.end() may not necessarily be card aligned.
+  jbyte* cur_entry = _ct->byte_for(mr.last());
+  const jbyte* limit = _ct->byte_for(mr.start());
+  HeapWord* end_of_non_clean = mr.end();
+  HeapWord* start_of_non_clean = end_of_non_clean;
+  while (cur_entry >= limit) {
+    HeapWord* cur_hw = _ct->addr_for(cur_entry);
+    if ((*cur_entry != CardTableRS::clean_card_val()) && clear_card(cur_entry)) {
+      // Continue the dirty range by opening the
+      // dirty window one card to the left.
+      start_of_non_clean = cur_hw;
+    } else {
+      // We hit a "clean" card; process any non-empty
+      // "dirty" range accumulated so far.
+      if (start_of_non_clean < end_of_non_clean) {
+        const MemRegion mrd(start_of_non_clean, end_of_non_clean);
+        _dirty_card_closure->do_MemRegion(mrd);
+      }
+      // Reset the dirty window, while continuing to look
+      // for the next dirty card that will start a
+      // new dirty window.
+      end_of_non_clean = cur_hw;
+      start_of_non_clean = cur_hw;
+    }
+    // Note that "cur_entry" leads "start_of_non_clean" in
+    // its leftward excursion after this point
+    // in the loop and, when we hit the left end of "mr",
+    // will point off of the left end of the card-table
+    // for "mr".
+    cur_entry--;
   }
-  void do_MemRegion(MemRegion mr) {
-    // We start at the high end of "mr", walking backwards
-    // while accumulating a contiguous dirty range of cards in
-    // [start_of_non_clean, end_of_non_clean) which we then
-    // process en masse.
-    HeapWord* end_of_non_clean = mr.end();
-    HeapWord* start_of_non_clean = end_of_non_clean;
-    jbyte*       entry = _ct->byte_for(mr.last());
-    const jbyte* first_entry = _ct->byte_for(mr.start());
-    while (entry >= first_entry) {
-      HeapWord* cur = _ct->addr_for(entry);
-      if (!clear_card(entry)) {
-        // We hit a clean card; process any non-empty
-        // dirty range accumulated so far.
-        if (start_of_non_clean < end_of_non_clean) {
-          MemRegion mr2(start_of_non_clean, end_of_non_clean);
-          _dirty_card_closure->do_MemRegion(mr2);
-        }
-        // Reset the dirty window while continuing to
-        // look for the next dirty window to process.
-        end_of_non_clean = cur;
-        start_of_non_clean = end_of_non_clean;
-      }
-      // Open the left end of the window one card to the left.
-      start_of_non_clean = cur;
-      // Note that "entry" leads "start_of_non_clean" in
-      // its leftward excursion after this point
-      // in the loop and, when we hit the left end of "mr",
-      // will point off of the left end of the card-table
-      // for "mr".
-      entry--;
-    }
-    // If the first card of "mr" was dirty, we will have
-    // been left with a dirty window, co-initial with "mr",
-    // which we now process.
-    if (start_of_non_clean < end_of_non_clean) {
-      MemRegion mr2(start_of_non_clean, end_of_non_clean);
-      _dirty_card_closure->do_MemRegion(mr2);
-    }
+  // If the first card of "mr" was dirty, we will have
+  // been left with a dirty window, co-initial with "mr",
+  // which we now process.
+  if (start_of_non_clean < end_of_non_clean) {
+    const MemRegion mrd(start_of_non_clean, end_of_non_clean);
+    _dirty_card_closure->do_MemRegion(mrd);
   }
-};
+}
+
 // clean (by dirty->clean before) ==> cur_younger_gen
 // dirty                          ==> cur_youngergen_and_prev_nonclean_card
 // precleaned                     ==> cur_youngergen_and_prev_nonclean_card
@@ -246,8 +250,35 @@
                                                    cl->gen_boundary());
   ClearNoncleanCardWrapper clear_cl(dcto_cl, this);
 
-  _ct_bs->non_clean_card_iterate(sp, sp->used_region_at_save_marks(),
-                                dcto_cl, &clear_cl);
+  const MemRegion urasm = sp->used_region_at_save_marks();
+#ifdef ASSERT
+  // Convert the assertion check to a warning if we are running
+  // CMS+ParNew until related bug is fixed.
+  MemRegion ur    = sp->used_region();
+  assert(ur.contains(urasm) || (UseConcMarkSweepGC && UseParNewGC),
+         err_msg("Did you forget to call save_marks()? "
+                 "[" PTR_FORMAT ", " PTR_FORMAT ") is not contained in "
+                 "[" PTR_FORMAT ", " PTR_FORMAT ")",
+                 urasm.start(), urasm.end(), ur.start(), ur.end()));
+  // In the case of CMS+ParNew, issue a warning
+  if (!ur.contains(urasm)) {
+    assert(UseConcMarkSweepGC && UseParNewGC, "Tautology: see assert above");
+    warning("CMS+ParNew: Did you forget to call save_marks()? "
+            "[" PTR_FORMAT ", " PTR_FORMAT ") is not contained in "
+            "[" PTR_FORMAT ", " PTR_FORMAT ")",
+             urasm.start(), urasm.end(), ur.start(), ur.end());
+    MemRegion ur2 = sp->used_region();
+    MemRegion urasm2 = sp->used_region_at_save_marks();
+    if (!ur.equals(ur2)) {
+      warning("CMS+ParNew: Flickering used_region()!!");
+    }
+    if (!urasm.equals(urasm2)) {
+      warning("CMS+ParNew: Flickering used_region_at_save_marks()!!");
+    }
+  }
+#endif
+  _ct_bs->non_clean_card_iterate_possibly_parallel(sp, urasm,
+                                                   dcto_cl, &clear_cl);
 }
 
 void CardTableRS::clear_into_younger(Generation* gen, bool clear_perm) {
--- a/hotspot/src/share/vm/memory/cardTableRS.hpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/memory/cardTableRS.hpp	Thu May 05 22:28:31 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -166,4 +166,21 @@
 
 };
 
+class ClearNoncleanCardWrapper: public MemRegionClosure {
+  MemRegionClosure* _dirty_card_closure;
+  CardTableRS* _ct;
+  bool _is_par;
+private:
+  // Clears the given card, return true if the corresponding card should be
+  // processed.
+  inline bool clear_card(jbyte* entry);
+  // Work methods called by the clear_card()
+  inline bool clear_card_serial(jbyte* entry);
+  inline bool clear_card_parallel(jbyte* entry);
+
+public:
+  ClearNoncleanCardWrapper(MemRegionClosure* dirty_card_closure, CardTableRS* ct);
+  void do_MemRegion(MemRegion mr);
+};
+
 #endif // SHARE_VM_MEMORY_CARDTABLERS_HPP
--- a/hotspot/src/share/vm/memory/collectorPolicy.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/memory/collectorPolicy.cpp	Thu May 05 22:28:31 2011 -0700
@@ -265,8 +265,6 @@
   MaxHeapSize = align_size_up(MaxHeapSize, max_alignment());
 
   always_do_update_barrier = UseConcMarkSweepGC;
-  BlockOffsetArrayUseUnallocatedBlock =
-      BlockOffsetArrayUseUnallocatedBlock || ParallelGCThreads > 0;
 
   // Check validity of heap flags
   assert(OldSize     % min_alignment() == 0, "old space alignment");
--- a/hotspot/src/share/vm/memory/genCollectedHeap.hpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/memory/genCollectedHeap.hpp	Thu May 05 22:28:31 2011 -0700
@@ -427,13 +427,13 @@
   // explicitly mark reachable objects in younger generations, to avoid
   // excess storage retention.)  If "collecting_perm_gen" is false, then
   // roots that may only contain references to permGen objects are not
-  // scanned. The "so" argument determines which of the roots
+  // scanned; instead, the older_gens closure is applied to all outgoing
+  // references in the perm gen.  The "so" argument determines which of the roots
   // the closure is applied to:
   // "SO_None" does none;
   // "SO_AllClasses" applies the closure to all entries in the SystemDictionary;
   // "SO_SystemClasses" to all the "system" classes and loaders;
-  // "SO_Symbols_and_Strings" applies the closure to all entries in
-  // SymbolsTable and StringTable.
+  // "SO_Strings" applies the closure to all entries in the StringTable.
   void gen_process_strong_roots(int level,
                                 bool younger_gens_as_roots,
                                 // The remaining arguments are in an order
--- a/hotspot/src/share/vm/memory/genOopClosures.hpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/memory/genOopClosures.hpp	Thu May 05 22:28:31 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -175,7 +175,7 @@
  protected:
   template <class T> inline void do_oop_work(T* p) {
     oop obj = oopDesc::load_decode_heap_oop(p);
-    guarantee(obj->is_oop_or_null(), "invalid oop");
+    guarantee(obj->is_oop_or_null(), err_msg("invalid oop: " INTPTR_FORMAT, (oopDesc*) obj));
   }
  public:
   virtual void do_oop(oop* p);
--- a/hotspot/src/share/vm/memory/modRefBarrierSet.hpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/memory/modRefBarrierSet.hpp	Thu May 05 22:28:31 2011 -0700
@@ -100,12 +100,6 @@
   // Pass along the argument to the superclass.
   ModRefBarrierSet(int max_covered_regions) :
     BarrierSet(max_covered_regions) {}
-
-#ifndef PRODUCT
-  // Verifies that the given region contains no modified references.
-  virtual void verify_clean_region(MemRegion mr) = 0;
-#endif
-
 };
 
 #endif // SHARE_VM_MEMORY_MODREFBARRIERSET_HPP
--- a/hotspot/src/share/vm/memory/sharedHeap.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/memory/sharedHeap.cpp	Thu May 05 22:28:31 2011 -0700
@@ -46,7 +46,6 @@
   SH_PS_Management_oops_do,
   SH_PS_SystemDictionary_oops_do,
   SH_PS_jvmti_oops_do,
-  SH_PS_SymbolTable_oops_do,
   SH_PS_StringTable_oops_do,
   SH_PS_CodeCache_oops_do,
   // Leave this one last.
@@ -161,13 +160,9 @@
   if (!_process_strong_tasks->is_task_claimed(SH_PS_SystemDictionary_oops_do)) {
     if (so & SO_AllClasses) {
       SystemDictionary::oops_do(roots);
-    } else
-      if (so & SO_SystemClasses) {
-        SystemDictionary::always_strong_oops_do(roots);
-      }
-  }
-
-  if (!_process_strong_tasks->is_task_claimed(SH_PS_SymbolTable_oops_do)) {
+    } else if (so & SO_SystemClasses) {
+      SystemDictionary::always_strong_oops_do(roots);
+    }
   }
 
   if (!_process_strong_tasks->is_task_claimed(SH_PS_StringTable_oops_do)) {
--- a/hotspot/src/share/vm/memory/sharedHeap.hpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/memory/sharedHeap.hpp	Thu May 05 22:28:31 2011 -0700
@@ -192,9 +192,8 @@
     SO_None                = 0x0,
     SO_AllClasses          = 0x1,
     SO_SystemClasses       = 0x2,
-    SO_Symbols             = 0x4,
-    SO_Strings             = 0x8,
-    SO_CodeCache           = 0x10
+    SO_Strings             = 0x4,
+    SO_CodeCache           = 0x8
   };
 
   FlexibleWorkGang* workers() const { return _workers; }
@@ -208,14 +207,13 @@
 
   // Invoke the "do_oop" method the closure "roots" on all root locations.
   // If "collecting_perm_gen" is false, then roots that may only contain
-  // references to permGen objects are not scanned.  If true, the
-  // "perm_gen" closure is applied to all older-to-younger refs in the
+  // references to permGen objects are not scanned; instead, in that case,
+  // the "perm_blk" closure is applied to all outgoing refs in the
   // permanent generation.  The "so" argument determines which of roots
   // the closure is applied to:
   // "SO_None" does none;
   // "SO_AllClasses" applies the closure to all entries in the SystemDictionary;
   // "SO_SystemClasses" to all the "system" classes and loaders;
-  // "SO_Symbols" applies the closure to all entries in SymbolsTable;
   // "SO_Strings" applies the closure to all entries in StringTable;
   // "SO_CodeCache" applies the closure to all elements of the CodeCache.
   void process_strong_roots(bool activate_scope,
--- a/hotspot/src/share/vm/oops/cpCacheOop.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/oops/cpCacheOop.cpp	Thu May 05 22:28:31 2011 -0700
@@ -104,7 +104,7 @@
   void* result = Atomic::cmpxchg_ptr(f1, f1_addr, NULL);
   bool success = (result == NULL);
   if (success) {
-    update_barrier_set(f1_addr, f1);
+    update_barrier_set((void*) f1_addr, f1);
   }
 }
 
@@ -275,21 +275,23 @@
   return (int) bsm_cache_index;
 }
 
-void ConstantPoolCacheEntry::set_dynamic_call(Handle call_site,
-                                              methodHandle signature_invoker) {
+void ConstantPoolCacheEntry::set_dynamic_call(Handle call_site, methodHandle signature_invoker) {
   assert(is_secondary_entry(), "");
+  // NOTE: it's important that all other values are set before f1 is
+  // set since some users short circuit on f1 being set
+  // (i.e. non-null) and that may result in uninitialized values for
+  // other racing threads (e.g. flags).
   int param_size = signature_invoker->size_of_parameters();
   assert(param_size >= 1, "method argument size must include MH.this");
-  param_size -= 1;              // do not count MH.this; it is not stacked for invokedynamic
-  if (Atomic::cmpxchg_ptr(call_site(), &_f1, NULL) == NULL) {
-    // racing threads might be trying to install their own favorites
-    set_f1(call_site());
-  }
+  param_size -= 1;  // do not count MH.this; it is not stacked for invokedynamic
   bool is_final = true;
   assert(signature_invoker->is_final_method(), "is_final");
-  set_flags(as_flags(as_TosState(signature_invoker->result_type()), is_final, false, false, false, true) | param_size);
+  int flags = as_flags(as_TosState(signature_invoker->result_type()), is_final, false, false, false, true) | param_size;
+  assert(_flags == 0 || _flags == flags, "flags should be the same");
+  set_flags(flags);
   // do not do set_bytecode on a secondary CP cache entry
   //set_bytecode_1(Bytecodes::_invokedynamic);
+  set_f1_if_null_atomic(call_site());  // This must be the last one to set (see NOTE above)!
 }
 
 
--- a/hotspot/src/share/vm/oops/methodDataOop.hpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/oops/methodDataOop.hpp	Thu May 05 22:28:31 2011 -0700
@@ -1194,7 +1194,7 @@
   // Whole-method sticky bits and flags
 public:
   enum {
-    _trap_hist_limit    = 16,   // decoupled from Deoptimization::Reason_LIMIT
+    _trap_hist_limit    = 17,   // decoupled from Deoptimization::Reason_LIMIT
     _trap_hist_mask     = max_jubyte,
     _extra_data_count   = 4     // extra DataLayout headers, for trap history
   }; // Public flag values
--- a/hotspot/src/share/vm/opto/bytecodeInfo.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/opto/bytecodeInfo.cpp	Thu May 05 22:28:31 2011 -0700
@@ -310,13 +310,14 @@
     return "inlining too deep";
   }
 
-  // We need to detect recursive inlining of method handle targets: if
-  // the current method is a method handle adapter and one of the
-  // callers is the same method as the callee, we bail out if
-  // MaxRecursiveInlineLevel is hit.
-  if (method()->is_method_handle_adapter()) {
+  // detect direct and indirect recursive inlining
+  {
+    // count the current method and the callee
+    int inline_level = (method() == callee_method) ? 1 : 0;
+    if (inline_level > MaxRecursiveInlineLevel)
+      return "recursively inlining too deep";
+    // count callers of current method and callee
     JVMState* jvms = caller_jvms();
-    int inline_level = 0;
     while (jvms != NULL && jvms->has_method()) {
       if (jvms->method() == callee_method) {
         inline_level++;
@@ -327,10 +328,6 @@
     }
   }
 
-  if (method() == callee_method && inline_depth() > MaxRecursiveInlineLevel) {
-    return "recursively inlining too deep";
-  }
-
   int size = callee_method->code_size();
 
   if (UseOldInlining && ClipInlining
@@ -376,7 +373,6 @@
   return true;
 }
 
-#ifndef PRODUCT
 //------------------------------print_inlining---------------------------------
 // Really, the failure_msg can be a success message also.
 void InlineTree::print_inlining(ciMethod* callee_method, int caller_bci, const char* failure_msg) const {
@@ -388,7 +384,6 @@
     tty->print("  bcs: %d+%d  invoked: %d", top->count_inline_bcs(), callee_method->code_size(), callee_method->interpreter_invocation_count());
   }
 }
-#endif
 
 //------------------------------ok_to_inline-----------------------------------
 WarmCallInfo* InlineTree::ok_to_inline(ciMethod* callee_method, JVMState* jvms, ciCallProfile& profile, WarmCallInfo* initial_wci) {
--- a/hotspot/src/share/vm/opto/c2_globals.hpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/opto/c2_globals.hpp	Thu May 05 22:28:31 2011 -0700
@@ -183,6 +183,21 @@
   develop(bool, TraceLoopOpts, false,                                       \
           "Trace executed loop optimizations")                              \
                                                                             \
+  diagnostic(bool, LoopLimitCheck, true,                                    \
+          "Generate a loop limits check for overflow")                      \
+                                                                            \
+  develop(bool, TraceLoopLimitCheck, false,                                 \
+          "Trace generation of loop limits checks")                         \
+                                                                            \
+  diagnostic(bool, RangeLimitCheck, true,                                   \
+          "Additional overflow checks during range check elimination")      \
+                                                                            \
+  develop(bool, TraceRangeLimitCheck, false,                                \
+          "Trace additional overflow checks in RCE")                        \
+                                                                            \
+  diagnostic(bool, UnrollLimitCheck, true,                                  \
+          "Additional overflow checks during loop unroll")                  \
+                                                                            \
   product(bool, OptimizeFill, false,                                        \
           "convert fill/copy loops into intrinsic")                         \
                                                                             \
--- a/hotspot/src/share/vm/opto/cfgnode.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/opto/cfgnode.cpp	Thu May 05 22:28:31 2011 -0700
@@ -1373,7 +1373,7 @@
 
   // Clone loop predicates
   if (predicate_proj != NULL) {
-    newn = igvn->clone_loop_predicates(predicate_proj, newn);
+    newn = igvn->clone_loop_predicates(predicate_proj, newn, !n->is_CountedLoop());
   }
 
   // Now I can point to the new node.
--- a/hotspot/src/share/vm/opto/classes.hpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/opto/classes.hpp	Thu May 05 22:28:31 2011 -0700
@@ -156,6 +156,7 @@
 macro(LogD)
 macro(Log10D)
 macro(Loop)
+macro(LoopLimit)
 macro(Mach)
 macro(MachProj)
 macro(MaxI)
--- a/hotspot/src/share/vm/opto/escape.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/opto/escape.cpp	Thu May 05 22:28:31 2011 -0700
@@ -1437,7 +1437,10 @@
 
   // Update the memory inputs of MemNodes with the value we computed
   // in Phase 2 and move stores memory users to corresponding memory slices.
-#ifdef ASSERT
+
+  // Disable memory split verification code until the fix for 6984348.
+  // Currently it produces false negative results since it does not cover all cases.
+#if 0 // ifdef ASSERT
   visited.Reset();
   Node_Stack old_mems(arena, _compile->unique() >> 2);
 #endif
@@ -1447,7 +1450,7 @@
       Node *n = ptnode_adr(i)->_node;
       assert(n != NULL, "sanity");
       if (n->is_Mem()) {
-#ifdef ASSERT
+#if 0 // ifdef ASSERT
         Node* old_mem = n->in(MemNode::Memory);
         if (!visited.test_set(old_mem->_idx)) {
           old_mems.push(old_mem, old_mem->outcnt());
@@ -1469,13 +1472,13 @@
       }
     }
   }
-#ifdef ASSERT
+#if 0 // ifdef ASSERT
   // Verify that memory was split correctly
   while (old_mems.is_nonempty()) {
     Node* old_mem = old_mems.node();
     uint  old_cnt = old_mems.index();
     old_mems.pop();
-    assert(old_cnt = old_mem->outcnt(), "old mem could be lost");
+    assert(old_cnt == old_mem->outcnt(), "old mem could be lost");
   }
 #endif
 }
--- a/hotspot/src/share/vm/opto/graphKit.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/opto/graphKit.cpp	Thu May 05 22:28:31 2011 -0700
@@ -1033,14 +1033,10 @@
       iter.reset_to_bci(bci());
       iter.next();
       ciMethod* method = iter.get_method(ignore);
-      inputs = method->arg_size_no_receiver();
-      // Add a receiver argument, maybe:
-      if (code != Bytecodes::_invokestatic &&
-          code != Bytecodes::_invokedynamic)
-        inputs += 1;
       // (Do not use ciMethod::arg_size(), because
       // it might be an unloaded method, which doesn't
       // know whether it is static or not.)
+      inputs = method->invoke_arg_size(code);
       int size = method->return_type()->size();
       depth = size - inputs;
     }
@@ -2957,8 +2953,7 @@
 
 //---------------------------set_output_for_allocation-------------------------
 Node* GraphKit::set_output_for_allocation(AllocateNode* alloc,
-                                          const TypeOopPtr* oop_type,
-                                          bool raw_mem_only) {
+                                          const TypeOopPtr* oop_type) {
   int rawidx = Compile::AliasIdxRaw;
   alloc->set_req( TypeFunc::FramePtr, frameptr() );
   add_safepoint_edges(alloc);
@@ -2982,7 +2977,7 @@
                                                  rawoop)->as_Initialize();
   assert(alloc->initialization() == init,  "2-way macro link must work");
   assert(init ->allocation()     == alloc, "2-way macro link must work");
-  if (ReduceFieldZeroing && !raw_mem_only) {
+  {
     // Extract memory strands which may participate in the new object's
     // initialization, and source them from the new InitializeNode.
     // This will allow us to observe initializations when they occur,
@@ -3043,11 +3038,9 @@
 // the type to a constant.
 // The optional arguments are for specialized use by intrinsics:
 //  - If 'extra_slow_test' if not null is an extra condition for the slow-path.
-//  - If 'raw_mem_only', do not cast the result to an oop.
 //  - If 'return_size_val', report the the total object size to the caller.
 Node* GraphKit::new_instance(Node* klass_node,
                              Node* extra_slow_test,
-                             bool raw_mem_only, // affect only raw memory
                              Node* *return_size_val) {
   // Compute size in doublewords
   // The size is always an integral number of doublewords, represented
@@ -3118,7 +3111,7 @@
                      size, klass_node,
                      initial_slow_test);
 
-  return set_output_for_allocation(alloc, oop_type, raw_mem_only);
+  return set_output_for_allocation(alloc, oop_type);
 }
 
 //-------------------------------new_array-------------------------------------
@@ -3128,7 +3121,6 @@
 Node* GraphKit::new_array(Node* klass_node,     // array klass (maybe variable)
                           Node* length,         // number of array elements
                           int   nargs,          // number of arguments to push back for uncommon trap
-                          bool raw_mem_only,    // affect only raw memory
                           Node* *return_size_val) {
   jint  layout_con = Klass::_lh_neutral_value;
   Node* layout_val = get_layout_helper(klass_node, layout_con);
@@ -3273,7 +3265,7 @@
     ary_type = ary_type->is_aryptr()->cast_to_size(length_type);
   }
 
-  Node* javaoop = set_output_for_allocation(alloc, ary_type, raw_mem_only);
+  Node* javaoop = set_output_for_allocation(alloc, ary_type);
 
   // Cast length on remaining path to be as narrow as possible
   if (map()->find_edge(length) >= 0) {
@@ -3386,6 +3378,10 @@
   if (UseLoopPredicate) {
     add_predicate_impl(Deoptimization::Reason_predicate, nargs);
   }
+  // loop's limit check predicate should be near the loop.
+  if (LoopLimitCheck) {
+    add_predicate_impl(Deoptimization::Reason_loop_limit_check, nargs);
+  }
 }
 
 //----------------------------- store barriers ----------------------------
@@ -3462,9 +3458,22 @@
 
   // Get the alias_index for raw card-mark memory
   int adr_type = Compile::AliasIdxRaw;
+  Node*   zero = __ ConI(0); // Dirty card value
+  BasicType bt = T_BYTE;
+
+  if (UseCondCardMark) {
+    // The classic GC reference write barrier is typically implemented
+    // as a store into the global card mark table.  Unfortunately
+    // unconditional stores can result in false sharing and excessive
+    // coherence traffic as well as false transactional aborts.
+    // UseCondCardMark enables MP "polite" conditional card mark
+    // stores.  In theory we could relax the load from ctrl() to
+    // no_ctrl, but that doesn't buy much latitude.
+    Node* card_val = __ load( __ ctrl(), card_adr, TypeInt::BYTE, bt, adr_type);
+    __ if_then(card_val, BoolTest::ne, zero);
+  }
+
   // Smash zero into card
-  Node*   zero = __ ConI(0);
-  BasicType bt = T_BYTE;
   if( !UseConcMarkSweepGC ) {
     __ store(__ ctrl(), card_adr, zero, bt, adr_type);
   } else {
@@ -3472,6 +3481,10 @@
     __ storeCM(__ ctrl(), card_adr, zero, oop_store, adr_idx, bt, adr_type);
   }
 
+  if (UseCondCardMark) {
+    __ end_if();
+  }
+
   // Final sync IdealKit and GraphKit.
   final_sync(ideal);
 }
--- a/hotspot/src/share/vm/opto/graphKit.hpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/opto/graphKit.hpp	Thu May 05 22:28:31 2011 -0700
@@ -773,15 +773,13 @@
 
   // implementation of object creation
   Node* set_output_for_allocation(AllocateNode* alloc,
-                                  const TypeOopPtr* oop_type,
-                                  bool raw_mem_only);
+                                  const TypeOopPtr* oop_type);
   Node* get_layout_helper(Node* klass_node, jint& constant_value);
   Node* new_instance(Node* klass_node,
                      Node* slow_test = NULL,
-                     bool raw_mem_only = false,
                      Node* *return_size_val = NULL);
   Node* new_array(Node* klass_node, Node* count_val, int nargs,
-                  bool raw_mem_only = false, Node* *return_size_val = NULL);
+                  Node* *return_size_val = NULL);
 
   // Handy for making control flow
   IfNode* create_and_map_if(Node* ctrl, Node* tst, float prob, float cnt) {
--- a/hotspot/src/share/vm/opto/ifnode.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/opto/ifnode.cpp	Thu May 05 22:28:31 2011 -0700
@@ -236,6 +236,7 @@
   }
   Node* predicate_c = NULL;
   Node* predicate_x = NULL;
+  bool counted_loop = r->is_CountedLoop();
 
   Node *region_c = new (igvn->C, req_c + 1) RegionNode(req_c + 1);
   Node *phi_c    = con1;
@@ -294,16 +295,16 @@
   if (predicate_c != NULL) {
     assert(predicate_x == NULL, "only one predicate entry expected");
     // Clone loop predicates to each path
-    iff_c_t = igvn->clone_loop_predicates(predicate_c, iff_c_t);
-    iff_c_f = igvn->clone_loop_predicates(predicate_c, iff_c_f);
+    iff_c_t = igvn->clone_loop_predicates(predicate_c, iff_c_t, !counted_loop);
+    iff_c_f = igvn->clone_loop_predicates(predicate_c, iff_c_f, !counted_loop);
   }
   Node *iff_x_t = phase->transform(new (igvn->C, 1) IfTrueNode (iff_x));
   Node *iff_x_f = phase->transform(new (igvn->C, 1) IfFalseNode(iff_x));
   if (predicate_x != NULL) {
     assert(predicate_c == NULL, "only one predicate entry expected");
     // Clone loop predicates to each path
-    iff_x_t = igvn->clone_loop_predicates(predicate_x, iff_x_t);
-    iff_x_f = igvn->clone_loop_predicates(predicate_x, iff_x_f);
+    iff_x_t = igvn->clone_loop_predicates(predicate_x, iff_x_t, !counted_loop);
+    iff_x_f = igvn->clone_loop_predicates(predicate_x, iff_x_f, !counted_loop);
   }
 
   // Merge the TRUE paths
@@ -545,6 +546,7 @@
   Node *new_bol = gvn->transform( new (gvn->C, 2) BoolNode( new_cmp, bol->as_Bool()->_test._test ) );
   igvn->hash_delete( iff );
   iff->set_req_X( 1, new_bol, igvn );
+  igvn->_worklist.push( iff );
 }
 
 //------------------------------up_one_dom-------------------------------------
--- a/hotspot/src/share/vm/opto/library_call.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/opto/library_call.cpp	Thu May 05 22:28:31 2011 -0700
@@ -867,12 +867,10 @@
   Node* str1_offset  = make_load(no_ctrl, str1_offseta, TypeInt::INT, T_INT, string_type->add_offset(offset_offset));
   Node* str1_start   = array_element_address(str1_value, str1_offset, T_CHAR);
 
-  // Pin loads from String::equals() argument since it could be NULL.
-  Node* str2_ctrl = (opcode == Op_StrEquals) ? control() : no_ctrl;
   Node* str2_valuea  = basic_plus_adr(str2, str2, value_offset);
-  Node* str2_value   = make_load(str2_ctrl, str2_valuea, value_type, T_OBJECT, string_type->add_offset(value_offset));
+  Node* str2_value   = make_load(no_ctrl, str2_valuea, value_type, T_OBJECT, string_type->add_offset(value_offset));
   Node* str2_offseta = basic_plus_adr(str2, str2, offset_offset);
-  Node* str2_offset  = make_load(str2_ctrl, str2_offseta, TypeInt::INT, T_INT, string_type->add_offset(offset_offset));
+  Node* str2_offset  = make_load(no_ctrl, str2_offseta, TypeInt::INT, T_INT, string_type->add_offset(offset_offset));
   Node* str2_start   = array_element_address(str2_value, str2_offset, T_CHAR);
 
   Node* result = NULL;
@@ -1012,14 +1010,15 @@
   if (!stopped()) {
     // Properly cast the argument to String
     argument = _gvn.transform(new (C, 2) CheckCastPPNode(control(), argument, string_type));
+    // This path is taken only when argument's type is String:NotNull.
+    argument = cast_not_null(argument, false);
 
     // Get counts for string and argument
     Node* receiver_cnta = basic_plus_adr(receiver, receiver, count_offset);
     receiver_cnt  = make_load(no_ctrl, receiver_cnta, TypeInt::INT, T_INT, string_type->add_offset(count_offset));
 
-    // Pin load from argument string since it could be NULL.
     Node* argument_cnta = basic_plus_adr(argument, argument, count_offset);
-    argument_cnt  = make_load(control(), argument_cnta, TypeInt::INT, T_INT, string_type->add_offset(count_offset));
+    argument_cnt  = make_load(no_ctrl, argument_cnta, TypeInt::INT, T_INT, string_type->add_offset(count_offset));
 
     // Check for receiver count != argument count
     Node* cmp = _gvn.transform( new(C, 3) CmpINode(receiver_cnt, argument_cnt) );
@@ -3527,8 +3526,7 @@
       Node* orig_tail = _gvn.transform( new(C, 3) SubINode(orig_length, start) );
       Node* moved = generate_min_max(vmIntrinsics::_min, orig_tail, length);
 
-      const bool raw_mem_only = true;
-      newcopy = new_array(klass_node, length, 0, raw_mem_only);
+      newcopy = new_array(klass_node, length, 0);
 
       // Generate a direct call to the right arraycopy function(s).
       // We know the copy is disjoint but we might not know if the
@@ -4325,8 +4323,6 @@
 
     const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
     int raw_adr_idx = Compile::AliasIdxRaw;
-    const bool raw_mem_only = true;
-
 
     Node* array_ctl = generate_array_guard(obj_klass, (RegionNode*)NULL);
     if (array_ctl != NULL) {
@@ -4335,8 +4331,7 @@
       set_control(array_ctl);
       Node* obj_length = load_array_length(obj);
       Node* obj_size  = NULL;
-      Node* alloc_obj = new_array(obj_klass, obj_length, 0,
-                                  raw_mem_only, &obj_size);
+      Node* alloc_obj = new_array(obj_klass, obj_length, 0, &obj_size);
 
       if (!use_ReduceInitialCardMarks()) {
         // If it is an oop array, it requires very special treatment,
@@ -4408,7 +4403,7 @@
       // It's an instance, and it passed the slow-path tests.
       PreserveJVMState pjvms(this);
       Node* obj_size  = NULL;
-      Node* alloc_obj = new_instance(obj_klass, NULL, raw_mem_only, &obj_size);
+      Node* alloc_obj = new_instance(obj_klass, NULL, &obj_size);
 
       copy_to_clone(obj, alloc_obj, obj_size, false, !use_ReduceInitialCardMarks());
 
--- a/hotspot/src/share/vm/opto/loopPredicate.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/opto/loopPredicate.cpp	Thu May 05 22:28:31 2011 -0700
@@ -341,7 +341,7 @@
   // Cut predicate from old place.
   Node* old = predicate_proj;
   igvn->_worklist.push(old);
-  for (DUIterator_Last imin, i = old->last_outs(imin); i >= imin; ) {
+  for (DUIterator_Last imin, i = old->last_outs(imin); i >= imin;) {
     Node* use = old->last_out(i);  // for each use...
     igvn->hash_delete(use);
     igvn->_worklist.push(use);
@@ -384,24 +384,25 @@
 
 //--------------------------clone_loop_predicates-----------------------
 // Interface from IGVN
-Node* PhaseIterGVN::clone_loop_predicates(Node* old_entry, Node* new_entry) {
-  return PhaseIdealLoop::clone_loop_predicates(old_entry, new_entry, false, NULL, this);
+Node* PhaseIterGVN::clone_loop_predicates(Node* old_entry, Node* new_entry, bool clone_limit_check) {
+  return PhaseIdealLoop::clone_loop_predicates(old_entry, new_entry, false, clone_limit_check, NULL, this);
 }
-Node* PhaseIterGVN::move_loop_predicates(Node* old_entry, Node* new_entry) {
-  return PhaseIdealLoop::clone_loop_predicates(old_entry, new_entry, true, NULL, this);
+Node* PhaseIterGVN::move_loop_predicates(Node* old_entry, Node* new_entry, bool clone_limit_check) {
+  return PhaseIdealLoop::clone_loop_predicates(old_entry, new_entry, true, clone_limit_check, NULL, this);
 }
 
 // Interface from PhaseIdealLoop
-Node* PhaseIdealLoop::clone_loop_predicates(Node* old_entry, Node* new_entry) {
-  return clone_loop_predicates(old_entry, new_entry, false, this, &this->_igvn);
+Node* PhaseIdealLoop::clone_loop_predicates(Node* old_entry, Node* new_entry, bool clone_limit_check) {
+  return clone_loop_predicates(old_entry, new_entry, false, clone_limit_check, this, &this->_igvn);
 }
-Node* PhaseIdealLoop::move_loop_predicates(Node* old_entry, Node* new_entry) {
-  return clone_loop_predicates(old_entry, new_entry, true, this, &this->_igvn);
+Node* PhaseIdealLoop::move_loop_predicates(Node* old_entry, Node* new_entry, bool clone_limit_check) {
+  return clone_loop_predicates(old_entry, new_entry, true, clone_limit_check, this, &this->_igvn);
 }
 
 // Clone loop predicates to cloned loops (peeled, unswitched, split_if).
 Node* PhaseIdealLoop::clone_loop_predicates(Node* old_entry, Node* new_entry,
                                                 bool move_predicates,
+                                                bool clone_limit_check,
                                                 PhaseIdealLoop* loop_phase,
                                                 PhaseIterGVN* igvn) {
 #ifdef ASSERT
@@ -413,10 +414,16 @@
 #endif
   // Search original predicates
   Node* entry = old_entry;
+  ProjNode* limit_check_proj = NULL;
+  if (LoopLimitCheck) {
+    limit_check_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
+    if (limit_check_proj != NULL) {
+      entry = entry->in(0)->in(0);
+    }
+  }
   if (UseLoopPredicate) {
     ProjNode* predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
     if (predicate_proj != NULL) { // right pattern that can be used by loop predication
-      assert(entry->in(0)->in(1)->in(1)->Opcode()==Op_Opaque1, "must be");
       if (move_predicates) {
         new_entry =  move_predicate(predicate_proj, new_entry,
                                     Deoptimization::Reason_predicate,
@@ -435,11 +442,37 @@
       }
     }
   }
+  if (limit_check_proj != NULL && clone_limit_check) {
+    // Clone loop limit check last to insert it before loop.
+    // Don't clone a limit check which was already finalized
+    // for this counted loop (only one limit check is needed).
+    if (move_predicates) {
+      new_entry =  move_predicate(limit_check_proj, new_entry,
+                                  Deoptimization::Reason_loop_limit_check,
+                                  loop_phase, igvn);
+      assert(new_entry == limit_check_proj, "old limit check fall through projection");
+    } else {
+      new_entry = clone_predicate(limit_check_proj, new_entry,
+                                  Deoptimization::Reason_loop_limit_check,
+                                  loop_phase, igvn);
+      assert(new_entry != NULL && new_entry->is_Proj(), "IfTrue or IfFalse after clone limit check");
+    }
+    if (TraceLoopLimitCheck) {
+      tty->print_cr("Loop Limit Check %s: ", move_predicates ? "moved" : "cloned");
+      debug_only( new_entry->in(0)->dump(); )
+    }
+  }
   return new_entry;
 }
 
 //--------------------------eliminate_loop_predicates-----------------------
 void PhaseIdealLoop::eliminate_loop_predicates(Node* entry) {
+  if (LoopLimitCheck) {
+    Node* predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
+    if (predicate != NULL) {
+      entry = entry->in(0)->in(0);
+    }
+  }
   if (UseLoopPredicate) {
     ProjNode* predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
     if (predicate_proj != NULL) { // right pattern that can be used by loop predication
@@ -456,10 +489,15 @@
 // Skip related predicates.
 Node* PhaseIdealLoop::skip_loop_predicates(Node* entry) {
   Node* predicate = NULL;
+  if (LoopLimitCheck) {
+    predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
+    if (predicate != NULL) {
+      entry = entry->in(0)->in(0);
+    }
+  }
   if (UseLoopPredicate) {
     predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
     if (predicate != NULL) { // right pattern that can be used by loop predication
-      assert(entry->is_Proj() && entry->in(0)->in(1)->in(1)->Opcode()==Op_Opaque1, "must be");
       IfNode* iff = entry->in(0)->as_If();
       ProjNode* uncommon_proj = iff->proj_out(1 - entry->as_Proj()->_con);
       Node* rgn = uncommon_proj->unique_ctrl_out();
@@ -491,10 +529,15 @@
 // Find a predicate
 Node* PhaseIdealLoop::find_predicate(Node* entry) {
   Node* predicate = NULL;
+  if (LoopLimitCheck) {
+    predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
+    if (predicate != NULL) { // right pattern that can be used by loop predication
+      return entry;
+    }
+  }
   if (UseLoopPredicate) {
     predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
     if (predicate != NULL) { // right pattern that can be used by loop predication
-      assert(entry->in(0)->in(1)->in(1)->Opcode()==Op_Opaque1, "must be");
       return entry;
     }
   }
@@ -658,7 +701,7 @@
   Node* range = cmp->in(2);
   if (range->Opcode() != Op_LoadRange) {
     const TypeInt* tint = phase->_igvn.type(range)->isa_int();
-    if (!OptimizeFill || tint == NULL || tint->empty() || tint->_lo < 0) {
+    if (tint == NULL || tint->empty() || tint->_lo < 0) {
       // Allow predication on positive values that aren't LoadRanges.
       // This allows optimization of loops where the length of the
       // array is a known value and doesn't need to be loaded back
@@ -696,36 +739,49 @@
 //   max(scale*i + offset) = scale*(limit-stride) + offset
 // (2) stride*scale < 0
 //   max(scale*i + offset) = scale*init + offset
-BoolNode* PhaseIdealLoop::rc_predicate(Node* ctrl,
+BoolNode* PhaseIdealLoop::rc_predicate(IdealLoopTree *loop, Node* ctrl,
                                        int scale, Node* offset,
                                        Node* init, Node* limit, Node* stride,
                                        Node* range, bool upper) {
-  DEBUG_ONLY(ttyLocker ttyl);
-  if (TraceLoopPredicate) tty->print("rc_predicate ");
+  stringStream* predString = NULL;
+  if (TraceLoopPredicate) {
+    predString = new stringStream();
+    predString->print("rc_predicate ");
+  }
 
   Node* max_idx_expr  = init;
   int stride_con = stride->get_int();
   if ((stride_con > 0) == (scale > 0) == upper) {
-    max_idx_expr = new (C, 3) SubINode(limit, stride);
-    register_new_node(max_idx_expr, ctrl);
-    if (TraceLoopPredicate) tty->print("(limit - stride) ");
+    if (LoopLimitCheck) {
+      // With LoopLimitCheck limit is not exact.
+      // Calculate exact limit here.
+      // Note, counted loop's test is '<' or '>'.
+      limit = exact_limit(loop);
+      max_idx_expr = new (C, 3) SubINode(limit, stride);
+      register_new_node(max_idx_expr, ctrl);
+      if (TraceLoopPredicate) predString->print("(limit - stride) ");
+    } else {
+      max_idx_expr = new (C, 3) SubINode(limit, stride);
+      register_new_node(max_idx_expr, ctrl);
+      if (TraceLoopPredicate) predString->print("(limit - stride) ");
+    }
   } else {
-    if (TraceLoopPredicate) tty->print("init ");
+    if (TraceLoopPredicate) predString->print("init ");
   }
 
   if (scale != 1) {
     ConNode* con_scale = _igvn.intcon(scale);
     max_idx_expr = new (C, 3) MulINode(max_idx_expr, con_scale);
     register_new_node(max_idx_expr, ctrl);
-    if (TraceLoopPredicate) tty->print("* %d ", scale);
+    if (TraceLoopPredicate) predString->print("* %d ", scale);
   }
 
   if (offset && (!offset->is_Con() || offset->get_int() != 0)){
     max_idx_expr = new (C, 3) AddINode(max_idx_expr, offset);
     register_new_node(max_idx_expr, ctrl);
     if (TraceLoopPredicate)
-      if (offset->is_Con()) tty->print("+ %d ", offset->get_int());
-      else tty->print("+ offset ");
+      if (offset->is_Con()) predString->print("+ %d ", offset->get_int());
+      else predString->print("+ offset ");
   }
 
   CmpUNode* cmp = new (C, 3) CmpUNode(max_idx_expr, range);
@@ -733,7 +789,10 @@
   BoolNode* bol = new (C, 2) BoolNode(cmp, BoolTest::lt);
   register_new_node(bol, ctrl);
 
-  if (TraceLoopPredicate) tty->print_cr("<u range");
+  if (TraceLoopPredicate) {
+    predString->print_cr("<u range");
+    tty->print(predString->as_string());
+  }
   return bol;
 }
 
@@ -746,29 +805,36 @@
     // Could be a simple region when irreducible loops are present.
     return false;
   }
+  LoopNode* head = loop->_head->as_Loop();
 
-  if (loop->_head->unique_ctrl_out()->Opcode() == Op_NeverBranch) {
+  if (head->unique_ctrl_out()->Opcode() == Op_NeverBranch) {
     // do nothing for infinite loops
     return false;
   }
 
   CountedLoopNode *cl = NULL;
-  if (loop->_head->is_CountedLoop()) {
-    cl = loop->_head->as_CountedLoop();
+  if (head->is_CountedLoop()) {
+    cl = head->as_CountedLoop();
     // do nothing for iteration-splitted loops
     if (!cl->is_normal_loop()) return false;
   }
 
-  LoopNode *lpn  = loop->_head->as_Loop();
-  Node* entry = lpn->in(LoopNode::EntryControl);
+  Node* entry = head->in(LoopNode::EntryControl);
+  ProjNode *predicate_proj = NULL;
+  // Loop limit check predicate should be near the loop.
+  if (LoopLimitCheck) {
+    predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
+    if (predicate_proj != NULL)
+      entry = predicate_proj->in(0)->in(0);
+  }
 
-  ProjNode *predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
+  predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
   if (!predicate_proj) {
 #ifndef PRODUCT
     if (TraceLoopPredicate) {
       tty->print("missing predicate:");
       loop->dump_head();
-      lpn->dump(1);
+      head->dump(1);
     }
 #endif
     return false;
@@ -782,7 +848,6 @@
   // Create list of if-projs such that a newer proj dominates all older
   // projs in the list, and they all dominate loop->tail()
   Node_List if_proj_list(area);
-  LoopNode *head  = loop->_head->as_Loop();
   Node *current_proj = loop->tail(); //start from tail
   while (current_proj != head) {
     if (loop == get_loop(current_proj) && // still in the loop ?
@@ -856,8 +921,8 @@
       const Node*    cmp    = bol->in(1)->as_Cmp();
       Node*          idx    = cmp->in(1);
       assert(!invar.is_invariant(idx), "index is variant");
-      assert(cmp->in(2)->Opcode() == Op_LoadRange || OptimizeFill, "must be");
       Node* rng = cmp->in(2);
+      assert(rng->Opcode() == Op_LoadRange || _igvn.type(rng)->is_int() >= 0, "must be");
       assert(invar.is_invariant(rng), "range must be invariant");
       int scale    = 1;
       Node* offset = zero;
@@ -886,14 +951,14 @@
       }
 
       // Test the lower bound
-      Node*  lower_bound_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, rng, false);
+      Node*  lower_bound_bol = rc_predicate(loop, ctrl, scale, offset, init, limit, stride, rng, false);
       IfNode* lower_bound_iff = lower_bound_proj->in(0)->as_If();
       _igvn.hash_delete(lower_bound_iff);
       lower_bound_iff->set_req(1, lower_bound_bol);
       if (TraceLoopPredicate) tty->print_cr("lower bound check if: %d", lower_bound_iff->_idx);
 
       // Test the upper bound
-      Node* upper_bound_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, rng, true);
+      Node* upper_bound_bol = rc_predicate(loop, ctrl, scale, offset, init, limit, stride, rng, true);
       IfNode* upper_bound_iff = upper_bound_proj->in(0)->as_If();
       _igvn.hash_delete(upper_bound_iff);
       upper_bound_iff->set_req(1, upper_bound_bol);
@@ -957,4 +1022,3 @@
 
   return hoisted;
 }
-
--- a/hotspot/src/share/vm/opto/loopTransform.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/opto/loopTransform.cpp	Thu May 05 22:28:31 2011 -0700
@@ -83,7 +83,7 @@
 #ifdef ASSERT
   BoolTest::mask bt = cl->loopexit()->test_trip();
   assert(bt == BoolTest::lt || bt == BoolTest::gt ||
-         bt == BoolTest::ne, "canonical test is expected");
+         (bt == BoolTest::ne && !LoopLimitCheck), "canonical test is expected");
 #endif
 
   Node* init_n = cl->init_trip();
@@ -510,7 +510,7 @@
   //         the pre-loop with only 1 user (the new peeled iteration), but the
   //         peeled-loop backedge has 2 users.
   Node* new_exit_value = old_new[head->in(LoopNode::LoopBackControl)->_idx];
-  new_exit_value = move_loop_predicates(entry, new_exit_value);
+  new_exit_value = move_loop_predicates(entry, new_exit_value, !counted_loop);
   _igvn.hash_delete(head);
   head->set_req(LoopNode::EntryControl, new_exit_value);
   for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) {
@@ -593,6 +593,12 @@
     return false;
   }
 
+  // Fully unroll a loop with few iterations regardless next
+  // conditions since following loop optimizations will split
+  // such loop anyway (pre-main-post).
+  if (trip_count <= 3)
+    return true;
+
   // Take into account that after unroll conjoined heads and tails will fold,
   // otherwise policy_unroll() may allow more unrolling than max unrolling.
   uint new_body_size = EMPTY_LOOP_SIZE + (body_size - EMPTY_LOOP_SIZE) * trip_count;
@@ -605,15 +611,6 @@
     return false;
   }
 
-  // Currently we don't have policy to optimize one iteration loops.
-  // Maximally unrolling transformation is used for that:
-  // it is peeled and the original loop become non reachable (dead).
-  // Also fully unroll a loop with few iterations regardless next
-  // conditions since following loop optimizations will split
-  // such loop anyway (pre-main-post).
-  if (trip_count <= 3)
-    return true;
-
   // Do not unroll a loop with String intrinsics code.
   // String intrinsics are large and have loops.
   for (uint k = 0; k < _body.size(); k++) {
@@ -632,6 +629,8 @@
 }
 
 
+#define MAX_UNROLL 16 // maximum number of unrolls for main loop
+
 //------------------------------policy_unroll----------------------------------
 // Return TRUE or FALSE if the loop should be unrolled or not.  Unroll if
 // the loop is a CountedLoop and the body is small enough.
@@ -643,13 +642,15 @@
   if (!cl->is_valid_counted_loop())
     return false; // Malformed counted loop
 
-  // protect against over-unrolling
-  if (cl->trip_count() <= 1) return false;
-
-  // Check for stride being a small enough constant
-  if (abs(cl->stride_con()) > (1<<3)) return false;
+  // Protect against over-unrolling.
+  // After split at least one iteration will be executed in pre-loop.
+  if (cl->trip_count() <= (uint)(cl->is_normal_loop() ? 2 : 1)) return false;
 
   int future_unroll_ct = cl->unrolled_count() * 2;
+  if (future_unroll_ct > MAX_UNROLL) return false;
+
+  // Check for initial stride being a small enough constant
+  if (abs(cl->stride_con()) > (1<<2)*future_unroll_ct) return false;
 
   // Don't unroll if the next round of unrolling would push us
   // over the expected trip count of the loop.  One is subtracted
@@ -675,6 +676,7 @@
 
   Node *init_n = cl->init_trip();
   Node *limit_n = cl->limit();
+  int stride_con = cl->stride_con();
   // Non-constant bounds.
   // Protect against over-unrolling when init or/and limit are not constant
   // (so that trip_count's init value is maxint) but iv range is known.
@@ -684,7 +686,7 @@
     if (phi != NULL) {
       assert(phi->is_Phi() && phi->in(0) == _head, "Counted loop should have iv phi.");
       const TypeInt* iv_type = phase->_igvn.type(phi)->is_int();
-      int next_stride = cl->stride_con() * 2; // stride after this unroll
+      int next_stride = stride_con * 2; // stride after this unroll
       if (next_stride > 0) {
         if (iv_type->_lo + next_stride <= iv_type->_lo || // overflow
             iv_type->_lo + next_stride >  iv_type->_hi) {
@@ -699,15 +701,19 @@
     }
   }
 
+  // After unroll limit will be adjusted: new_limit = limit-stride.
+  // Bailout if adjustment overflow.
+  const TypeInt* limit_type = phase->_igvn.type(limit_n)->is_int();
+  if (stride_con > 0 && ((limit_type->_hi - stride_con) >= limit_type->_hi) ||
+      stride_con < 0 && ((limit_type->_lo - stride_con) <= limit_type->_lo))
+    return false;  // overflow
+
   // Adjust body_size to determine if we unroll or not
   uint body_size = _body.size();
-  // Key test to unroll CaffeineMark's Logic test
-  int xors_in_loop = 0;
   // Also count ModL, DivL and MulL which expand mightly
   for (uint k = 0; k < _body.size(); k++) {
     Node* n = _body.at(k);
     switch (n->Opcode()) {
-      case Op_XorI: xors_in_loop++; break; // CaffeineMark's Logic test
       case Op_ModL: body_size += 30; break;
       case Op_DivL: body_size += 30; break;
       case Op_MulL: body_size += 10; break;
@@ -724,8 +730,7 @@
 
   // Check for being too big
   if (body_size > (uint)LoopUnrollLimit) {
-    if (xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true;
-    // Normal case: loop too big
+     // Normal case: loop too big
     return false;
   }
 
@@ -747,28 +752,31 @@
 // Return TRUE or FALSE if the loop should be range-check-eliminated.
 // Actually we do iteration-splitting, a more powerful form of RCE.
 bool IdealLoopTree::policy_range_check( PhaseIdealLoop *phase ) const {
-  if( !RangeCheckElimination ) return false;
+  if (!RangeCheckElimination) return false;
 
   CountedLoopNode *cl = _head->as_CountedLoop();
   // If we unrolled with no intention of doing RCE and we later
   // changed our minds, we got no pre-loop.  Either we need to
   // make a new pre-loop, or we gotta disallow RCE.
-  if( cl->is_main_no_pre_loop() ) return false; // Disallowed for now.
+  if (cl->is_main_no_pre_loop()) return false; // Disallowed for now.
   Node *trip_counter = cl->phi();
 
   // Check loop body for tests of trip-counter plus loop-invariant vs
   // loop-invariant.
-  for( uint i = 0; i < _body.size(); i++ ) {
+  for (uint i = 0; i < _body.size(); i++) {
     Node *iff = _body[i];
-    if( iff->Opcode() == Op_If ) { // Test?
+    if (iff->Opcode() == Op_If) { // Test?
 
       // Comparing trip+off vs limit
       Node *bol = iff->in(1);
-      if( bol->req() != 2 ) continue; // dead constant test
+      if (bol->req() != 2) continue; // dead constant test
       if (!bol->is_Bool()) {
         assert(UseLoopPredicate && bol->Opcode() == Op_Conv2B, "predicate check only");
         continue;
       }
+      if (bol->as_Bool()->_test._test == BoolTest::ne)
+        continue; // not RC
+
       Node *cmp = bol->in(1);
 
       Node *rc_exp = cmp->in(1);
@@ -1064,6 +1072,7 @@
   // negative stride use >
 
   if (pre_end->in(CountedLoopEndNode::TestValue)->as_Bool()->_test._test == BoolTest::ne) {
+    assert(!LoopLimitCheck, "only canonical tests (lt or gt) are expected");
 
     BoolTest::mask new_test = (main_end->stride_con() > 0) ? BoolTest::lt : BoolTest::gt;
     // Modify pre loop end condition
@@ -1090,6 +1099,9 @@
   main_head->set_main_loop();
   if( peel_only ) main_head->set_main_no_pre_loop();
 
+  // Subtract a trip count for the pre-loop.
+  main_head->set_trip_count(main_head->trip_count() - 1);
+
   // It's difficult to be precise about the trip-counts
   // for the pre/post loops.  They are usually very short,
   // so guess that 4 trips is a reasonable value.
@@ -1123,9 +1135,9 @@
     loop->dump_head();
   } else if (TraceLoopOpts) {
     if (loop_head->trip_count() < (uint)LoopUnrollLimit) {
-      tty->print("Unroll  %d(%2d) ", loop_head->unrolled_count()*2, loop_head->trip_count());
+      tty->print("Unroll %d(%2d) ", loop_head->unrolled_count()*2, loop_head->trip_count());
     } else {
-      tty->print("Unroll  %d     ", loop_head->unrolled_count()*2);
+      tty->print("Unroll %d     ", loop_head->unrolled_count()*2);
     }
     loop->dump_head();
   }
@@ -1141,7 +1153,8 @@
   Node *stride = loop_head->stride();
 
   Node *opaq = NULL;
-  if( adjust_min_trip ) {       // If not maximally unrolling, need adjustment
+  if (adjust_min_trip) {       // If not maximally unrolling, need adjustment
+    // Search for zero-trip guard.
     assert( loop_head->is_main_loop(), "" );
     assert( ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, "" );
     Node *iff = ctrl->in(0);
@@ -1151,63 +1164,210 @@
     Node *cmp = bol->in(1);
     assert( cmp->Opcode() == Op_CmpI, "" );
     opaq = cmp->in(2);
-    // Occasionally it's possible for a pre-loop Opaque1 node to be
+    // Occasionally it's possible for a zero-trip guard Opaque1 node to be
     // optimized away and then another round of loop opts attempted.
     // We can not optimize this particular loop in that case.
-    if( opaq->Opcode() != Op_Opaque1 )
-      return;                   // Cannot find pre-loop!  Bail out!
+    if (opaq->Opcode() != Op_Opaque1)
+      return; // Cannot find zero-trip guard!  Bail out!
+    // Zero-trip test uses an 'opaque' node which is not shared.
+    assert(opaq->outcnt() == 1 && opaq->in(1) == limit, "");
   }
 
   C->set_major_progress();
 
-  // Adjust max trip count. The trip count is intentionally rounded
-  // down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll,
-  // the main, unrolled, part of the loop will never execute as it is protected
-  // by the min-trip test.  See bug 4834191 for a case where we over-unrolled
-  // and later determined that part of the unrolled loop was dead.
-  loop_head->set_trip_count(loop_head->trip_count() / 2);
+  Node* new_limit = NULL;
+  if (UnrollLimitCheck) {
+    int stride_con = stride->get_int();
+    int stride_p = (stride_con > 0) ? stride_con : -stride_con;
+    uint old_trip_count = loop_head->trip_count();
+    // Verify that unroll policy result is still valid.
+    assert(old_trip_count > 1 &&
+           (!adjust_min_trip || stride_p <= (1<<3)*loop_head->unrolled_count()), "sanity");
 
-  // Double the count of original iterations in the unrolled loop body.
-  loop_head->double_unrolled_count();
+    // Adjust loop limit to keep valid iterations number after unroll.
+    // Use (limit - stride) instead of (((limit - init)/stride) & (-2))*stride
+    // which may overflow.
+    if (!adjust_min_trip) {
+      assert(old_trip_count > 1 && (old_trip_count & 1) == 0,
+             "odd trip count for maximally unroll");
+      // Don't need to adjust limit for maximally unroll since trip count is even.
+    } else if (loop_head->has_exact_trip_count() && init->is_Con()) {
+      // Loop's limit is constant. Loop's init could be constant when pre-loop
+      // become peeled iteration.
+      long init_con = init->get_int();
+      // We can keep old loop limit if iterations count stays the same:
+      //   old_trip_count == new_trip_count * 2
+      // Note: since old_trip_count >= 2 then new_trip_count >= 1
+      // so we also don't need to adjust zero trip test.
+      long limit_con  = limit->get_int();
+      // (stride_con*2) not overflow since stride_con <= 8.
+      int new_stride_con = stride_con * 2;
+      int stride_m    = new_stride_con - (stride_con > 0 ? 1 : -1);
+      long trip_count = (limit_con - init_con + stride_m)/new_stride_con;
+      // New trip count should satisfy next conditions.
+      assert(trip_count > 0 && (julong)trip_count < (julong)max_juint/2, "sanity");
+      uint new_trip_count = (uint)trip_count;
+      adjust_min_trip = (old_trip_count != new_trip_count*2);
+    }
 
-  // -----------
-  // Step 2: Cut back the trip counter for an unroll amount of 2.
-  // Loop will normally trip (limit - init)/stride_con.  Since it's a
-  // CountedLoop this is exact (stride divides limit-init exactly).
-  // We are going to double the loop body, so we want to knock off any
-  // odd iteration: (trip_cnt & ~1).  Then back compute a new limit.
-  Node *span = new (C, 3) SubINode( limit, init );
-  register_new_node( span, ctrl );
-  Node *trip = new (C, 3) DivINode( 0, span, stride );
-  register_new_node( trip, ctrl );
-  Node *mtwo = _igvn.intcon(-2);
-  set_ctrl(mtwo, C->root());
-  Node *rond = new (C, 3) AndINode( trip, mtwo );
-  register_new_node( rond, ctrl );
-  Node *spn2 = new (C, 3) MulINode( rond, stride );
-  register_new_node( spn2, ctrl );
-  Node *lim2 = new (C, 3) AddINode( spn2, init );
-  register_new_node( lim2, ctrl );
+    if (adjust_min_trip) {
+      // Step 2: Adjust the trip limit if it is called for.
+      // The adjustment amount is -stride. Need to make sure if the
+      // adjustment underflows or overflows, then the main loop is skipped.
+      Node* cmp = loop_end->cmp_node();
+      assert(cmp->in(2) == limit, "sanity");
+      assert(opaq != NULL && opaq->in(1) == limit, "sanity");
 
-  // Hammer in the new limit
-  Node *ctrl2 = loop_end->in(0);
-  Node *cmp2 = new (C, 3) CmpINode( loop_head->incr(), lim2 );
-  register_new_node( cmp2, ctrl2 );
-  Node *bol2 = new (C, 2) BoolNode( cmp2, loop_end->test_trip() );
-  register_new_node( bol2, ctrl2 );
-  _igvn.hash_delete(loop_end);
-  loop_end->set_req(CountedLoopEndNode::TestValue, bol2);
+      // Verify that policy_unroll result is still valid.
+      const TypeInt* limit_type = _igvn.type(limit)->is_int();
+      assert(stride_con > 0 && ((limit_type->_hi - stride_con) < limit_type->_hi) ||
+             stride_con < 0 && ((limit_type->_lo - stride_con) > limit_type->_lo), "sanity");
 
-  // Step 3: Find the min-trip test guaranteed before a 'main' loop.
-  // Make it a 1-trip test (means at least 2 trips).
-  if( adjust_min_trip ) {
-    // Guard test uses an 'opaque' node which is not shared.  Hence I
-    // can edit it's inputs directly.  Hammer in the new limit for the
-    // minimum-trip guard.
-    assert( opaq->outcnt() == 1, "" );
-    _igvn.hash_delete(opaq);
-    opaq->set_req(1, lim2);
-  }
+      if (limit->is_Con()) {
+        // The check in policy_unroll and the assert above guarantee
+        // no underflow if limit is constant.
+        new_limit = _igvn.intcon(limit->get_int() - stride_con);
+        set_ctrl(new_limit, C->root());
+      } else {
+        // Limit is not constant.
+        {
+          // Separate limit by Opaque node in case it is an incremented
+          // variable from previous loop to avoid using pre-incremented
+          // value which could increase register pressure.
+          // Otherwise reorg_offsets() optimization will create a separate
+          // Opaque node for each use of trip-counter and as result
+          // zero trip guard limit will be different from loop limit.
+          assert(has_ctrl(opaq), "should have it");
+          Node* opaq_ctrl = get_ctrl(opaq);
+          limit = new (C, 2) Opaque2Node( C, limit );
+          register_new_node( limit, opaq_ctrl );
+        }
+        if (stride_con > 0 && ((limit_type->_lo - stride_con) < limit_type->_lo) ||
+                   stride_con < 0 && ((limit_type->_hi - stride_con) > limit_type->_hi)) {
+          // No underflow.
+          new_limit = new (C, 3) SubINode(limit, stride);
+        } else {
+          // (limit - stride) may underflow.
+          // Clamp the adjustment value with MININT or MAXINT:
+          //
+          //   new_limit = limit-stride
+          //   if (stride > 0)
+          //     new_limit = (limit < new_limit) ? MININT : new_limit;
+          //   else
+          //     new_limit = (limit > new_limit) ? MAXINT : new_limit;
+          //
+          BoolTest::mask bt = loop_end->test_trip();
+          assert(bt == BoolTest::lt || bt == BoolTest::gt, "canonical test is expected");
+          Node* adj_max = _igvn.intcon((stride_con > 0) ? min_jint : max_jint);
+          set_ctrl(adj_max, C->root());
+          Node* old_limit = NULL;
+          Node* adj_limit = NULL;
+          Node* bol = limit->is_CMove() ? limit->in(CMoveNode::Condition) : NULL;
+          if (loop_head->unrolled_count() > 1 &&
+              limit->is_CMove() && limit->Opcode() == Op_CMoveI &&
+              limit->in(CMoveNode::IfTrue) == adj_max &&
+              bol->as_Bool()->_test._test == bt &&
+              bol->in(1)->Opcode() == Op_CmpI &&
+              bol->in(1)->in(2) == limit->in(CMoveNode::IfFalse)) {
+            // Loop was unrolled before.
+            // Optimize the limit to avoid nested CMove:
+            // use original limit as old limit.
+            old_limit = bol->in(1)->in(1);
+            // Adjust previous adjusted limit.
+            adj_limit = limit->in(CMoveNode::IfFalse);
+            adj_limit = new (C, 3) SubINode(adj_limit, stride);
+          } else {
+            old_limit = limit;
+            adj_limit = new (C, 3) SubINode(limit, stride);
+          }
+          assert(old_limit != NULL && adj_limit != NULL, "");
+          register_new_node( adj_limit, ctrl ); // adjust amount
+          Node* adj_cmp = new (C, 3) CmpINode(old_limit, adj_limit);
+          register_new_node( adj_cmp, ctrl );
+          Node* adj_bool = new (C, 2) BoolNode(adj_cmp, bt);
+          register_new_node( adj_bool, ctrl );
+          new_limit = new (C, 4) CMoveINode(adj_bool, adj_limit, adj_max, TypeInt::INT);
+        }
+        register_new_node(new_limit, ctrl);
+      }
+      assert(new_limit != NULL, "");
+      // Replace in loop test.
+      _igvn.hash_delete(cmp);
+      cmp->set_req(2, new_limit);
+
+      // Step 3: Find the min-trip test guaranteed before a 'main' loop.
+      // Make it a 1-trip test (means at least 2 trips).
+
+      // Guard test uses an 'opaque' node which is not shared.  Hence I
+      // can edit it's inputs directly.  Hammer in the new limit for the
+      // minimum-trip guard.
+      assert(opaq->outcnt() == 1, "");
+      _igvn.hash_delete(opaq);
+      opaq->set_req(1, new_limit);
+    }
+
+    // Adjust max trip count. The trip count is intentionally rounded
+    // down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll,
+    // the main, unrolled, part of the loop will never execute as it is protected
+    // by the min-trip test.  See bug 4834191 for a case where we over-unrolled
+    // and later determined that part of the unrolled loop was dead.
+    loop_head->set_trip_count(old_trip_count / 2);
+
+    // Double the count of original iterations in the unrolled loop body.
+    loop_head->double_unrolled_count();
+
+  } else { // LoopLimitCheck
+
+    // Adjust max trip count. The trip count is intentionally rounded
+    // down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll,
+    // the main, unrolled, part of the loop will never execute as it is protected
+    // by the min-trip test.  See bug 4834191 for a case where we over-unrolled
+    // and later determined that part of the unrolled loop was dead.
+    loop_head->set_trip_count(loop_head->trip_count() / 2);
+
+    // Double the count of original iterations in the unrolled loop body.
+    loop_head->double_unrolled_count();
+
+    // -----------
+    // Step 2: Cut back the trip counter for an unroll amount of 2.
+    // Loop will normally trip (limit - init)/stride_con.  Since it's a
+    // CountedLoop this is exact (stride divides limit-init exactly).
+    // We are going to double the loop body, so we want to knock off any
+    // odd iteration: (trip_cnt & ~1).  Then back compute a new limit.
+    Node *span = new (C, 3) SubINode( limit, init );
+    register_new_node( span, ctrl );
+    Node *trip = new (C, 3) DivINode( 0, span, stride );
+    register_new_node( trip, ctrl );
+    Node *mtwo = _igvn.intcon(-2);
+    set_ctrl(mtwo, C->root());
+    Node *rond = new (C, 3) AndINode( trip, mtwo );
+    register_new_node( rond, ctrl );
+    Node *spn2 = new (C, 3) MulINode( rond, stride );
+    register_new_node( spn2, ctrl );
+    new_limit = new (C, 3) AddINode( spn2, init );
+    register_new_node( new_limit, ctrl );
+
+    // Hammer in the new limit
+    Node *ctrl2 = loop_end->in(0);
+    Node *cmp2 = new (C, 3) CmpINode( loop_head->incr(), new_limit );
+    register_new_node( cmp2, ctrl2 );
+    Node *bol2 = new (C, 2) BoolNode( cmp2, loop_end->test_trip() );
+    register_new_node( bol2, ctrl2 );
+    _igvn.hash_delete(loop_end);
+    loop_end->set_req(CountedLoopEndNode::TestValue, bol2);
+
+    // Step 3: Find the min-trip test guaranteed before a 'main' loop.
+    // Make it a 1-trip test (means at least 2 trips).
+    if( adjust_min_trip ) {
+      assert( new_limit != NULL, "" );
+      // Guard test uses an 'opaque' node which is not shared.  Hence I
+      // can edit it's inputs directly.  Hammer in the new limit for the
+      // minimum-trip guard.
+      assert( opaq->outcnt() == 1, "" );
+      _igvn.hash_delete(opaq);
+      opaq->set_req(1, new_limit);
+    }
+  } // LoopLimitCheck
 
   // ---------
   // Step 4: Clone the loop body.  Move it inside the loop.  This loop body
@@ -1263,6 +1423,7 @@
 
 void PhaseIdealLoop::do_maximally_unroll( IdealLoopTree *loop, Node_List &old_new ) {
   CountedLoopNode *cl = loop->_head->as_CountedLoop();
+  assert(cl->has_exact_trip_count(), "trip count is not exact");
   assert(cl->trip_count() > 0, "");
 #ifndef PRODUCT
   if (TraceLoopOpts) {
@@ -1279,6 +1440,7 @@
   // Now its tripping an even number of times remaining.  Double loop body.
   // Do not adjust pre-guards; they are not needed and do not exist.
   if (cl->trip_count() > 0) {
+    assert((cl->trip_count() & 1) == 0, "missed peeling");
     do_unroll(loop, old_new, false);
   }
 }
@@ -1292,22 +1454,13 @@
 }
 
 //------------------------------add_constraint---------------------------------
-// Constrain the main loop iterations so the condition:
-//    scale_con * I + offset  <  limit
+// Constrain the main loop iterations so the conditions:
+//    low_limit <= scale_con * I + offset  <  upper_limit
 // always holds true.  That is, either increase the number of iterations in
 // the pre-loop or the post-loop until the condition holds true in the main
 // loop.  Stride, scale, offset and limit are all loop invariant.  Further,
 // stride and scale are constants (offset and limit often are).
-void PhaseIdealLoop::add_constraint( int stride_con, int scale_con, Node *offset, Node *limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit ) {
-
-  // Compute "I :: (limit-offset)/scale_con"
-  Node *con = new (C, 3) SubINode( limit, offset );
-  register_new_node( con, pre_ctrl );
-  Node *scale = _igvn.intcon(scale_con);
-  set_ctrl(scale, C->root());
-  Node *X = new (C, 3) DivINode( 0, con, scale );
-  register_new_node( X, pre_ctrl );
-
+void PhaseIdealLoop::add_constraint( int stride_con, int scale_con, Node *offset, Node *low_limit, Node *upper_limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit ) {
   // For positive stride, the pre-loop limit always uses a MAX function
   // and the main loop a MIN function.  For negative stride these are
   // reversed.
@@ -1316,48 +1469,143 @@
   // pre-loop must check for underflow and the post-loop for overflow.
   // Negative stride*scale reverses this; pre-loop checks for overflow and
   // post-loop for underflow.
-  if( stride_con*scale_con > 0 ) {
-    // Compute I < (limit-offset)/scale_con
-    // Adjust main-loop last iteration to be MIN/MAX(main_loop,X)
-    *main_limit = (stride_con > 0)
-      ? (Node*)(new (C, 3) MinINode( *main_limit, X ))
-      : (Node*)(new (C, 3) MaxINode( *main_limit, X ));
-    register_new_node( *main_limit, pre_ctrl );
+  if (stride_con*scale_con > 0) {
+    // The overflow limit: scale*I+offset < upper_limit
+    // For main-loop compute
+    //   ( if (scale > 0) /* and stride > 0 */
+    //       I < (upper_limit-offset)/scale
+    //     else /* scale < 0 and stride < 0 */
+    //       I > (upper_limit-offset)/scale
+    //   )
+    //
+    // (upper_limit-offset) may overflow when offset < 0.
+    // But it is fine since main loop will either have
+    // less iterations or will be skipped in such case.
+    Node *con = new (C, 3) SubINode(upper_limit, offset);
+    register_new_node(con, pre_ctrl);
+    Node *scale = _igvn.intcon(scale_con);
+    set_ctrl(scale, C->root());
+    Node *X = new (C, 3) DivINode(0, con, scale);
+    register_new_node(X, pre_ctrl);
 
-  } else {
-    // Compute (limit-offset)/scale_con + SGN(-scale_con) <= I
-    // Add the negation of the main-loop constraint to the pre-loop.
-    // See footnote [++] below for a derivation of the limit expression.
-    Node *incr = _igvn.intcon(scale_con > 0 ? -1 : 1);
-    set_ctrl(incr, C->root());
-    Node *adj = new (C, 3) AddINode( X, incr );
-    register_new_node( adj, pre_ctrl );
-    *pre_limit = (scale_con > 0)
-      ? (Node*)new (C, 3) MinINode( *pre_limit, adj )
-      : (Node*)new (C, 3) MaxINode( *pre_limit, adj );
-    register_new_node( *pre_limit, pre_ctrl );
+    // Adjust main-loop last iteration
+    Node *loop_limit = *main_limit;
+    loop_limit = (stride_con > 0) // scale > 0
+      ? (Node*)(new (C, 3) MinINode(loop_limit, X))
+      : (Node*)(new (C, 3) MaxINode(loop_limit, X));
+    register_new_node(loop_limit, pre_ctrl);
+    *main_limit = loop_limit;
 
-//   [++] Here's the algebra that justifies the pre-loop limit expression:
-//
-//   NOT( scale_con * I + offset  <  limit )
-//      ==
-//   scale_con * I + offset  >=  limit
-//      ==
-//   SGN(scale_con) * I  >=  (limit-offset)/|scale_con|
-//      ==
-//   (limit-offset)/|scale_con|   <=  I * SGN(scale_con)
-//      ==
-//   (limit-offset)/|scale_con|-1  <  I * SGN(scale_con)
-//      ==
-//   ( if (scale_con > 0) /*common case*/
-//       (limit-offset)/scale_con - 1  <  I
-//     else
-//       (limit-offset)/scale_con + 1  >  I
-//    )
-//   ( if (scale_con > 0) /*common case*/
-//       (limit-offset)/scale_con + SGN(-scale_con)  <  I
-//     else
-//       (limit-offset)/scale_con + SGN(-scale_con)  >  I
+    // The underflow limit: low_limit <= scale*I+offset.
+    // For pre-loop compute
+    //   NOT(scale*I+offset >= low_limit)
+    //   scale*I+offset < low_limit
+    //   ( if (scale > 0) /* and stride > 0 */
+    //       I < (low_limit-offset)/scale
+    //     else /* scale < 0 and stride < 0 */
+    //       I > (low_limit-offset)/scale
+    //   )
+
+    if (low_limit->get_int() == -max_jint) {
+      if (!RangeLimitCheck) return;
+      // We need this guard when scale*pre_limit+offset >= limit
+      // due to underflow so we need execute pre-loop until
+      // scale*I+offset >= min_int. But (low_limit-offset) will
+      // underflow when offset > 0 and X will be > original_limit.
+      // To avoid it we replace offset = offset > 0 ? 0 : offset
+      // and add min(pre_limit, original_limit).
+      Node* shift = _igvn.intcon(31);
+      set_ctrl(shift, C->root());
+      Node *neg_off = new (C, 3) RShiftINode(offset, shift);
+      register_new_node(neg_off, pre_ctrl);
+      offset = new (C, 3) AndINode(offset, neg_off);
+      register_new_node(offset, pre_ctrl);
+    } else {
+      assert(low_limit->get_int() == 0, "wrong low limit for range check");
+      // The only problem we have here when offset == min_int
+      // since (0-min_int) == min_int. It may be fine for scale > 0
+      // but for scale < 0 X will be < original_limit.
+    }
+    con = new (C, 3) SubINode(low_limit, offset);
+    register_new_node(con, pre_ctrl);
+    scale = _igvn.intcon(scale_con);
+    set_ctrl(scale, C->root());
+    X = new (C, 3) DivINode(0, con, scale);
+    register_new_node(X, pre_ctrl);
+
+    // Adjust pre-loop last iteration
+    loop_limit = *pre_limit;
+    loop_limit = (stride_con > 0) // scale > 0
+      ? (Node*)(new (C, 3) MaxINode(loop_limit, X))
+      : (Node*)(new (C, 3) MinINode(loop_limit, X));
+    register_new_node( loop_limit, pre_ctrl );
+    *pre_limit = loop_limit;
+
+  } else { // stride_con*scale_con < 0
+    // For negative stride*scale pre-loop checks for overflow and
+    // post-loop for underflow.
+    //
+    // The underflow limit: low_limit <= scale*I+offset.
+    // For main-loop compute
+    //   scale*I+offset+1 > low_limit
+    //   ( if (scale < 0) /* and stride > 0 */
+    //       I < (low_limit-(offset+1))/scale
+    //     else /* scale < 0 and stride < 0 */
+    //       I > (low_limit-(offset+1))/scale
+    //   )
+
+    if (low_limit->get_int() == -max_jint) {
+      if (!RangeLimitCheck) return;
+    } else {
+      assert(low_limit->get_int() == 0, "wrong low limit for range check");
+    }
+
+    Node *one  = _igvn.intcon(1);
+    set_ctrl(one, C->root());
+    Node *plus_one = new (C, 3) AddINode(offset, one);
+    register_new_node( plus_one, pre_ctrl );
+    Node *con = new (C, 3) SubINode(low_limit, plus_one);
+    register_new_node(con, pre_ctrl);
+    Node *scale = _igvn.intcon(scale_con);
+    set_ctrl(scale, C->root());
+    Node *X = new (C, 3) DivINode(0, con, scale);
+    register_new_node(X, pre_ctrl);
+
+    // Adjust main-loop last iteration
+    Node *loop_limit = *main_limit;
+    loop_limit = (stride_con > 0) // scale < 0
+      ? (Node*)(new (C, 3) MinINode(loop_limit, X))
+      : (Node*)(new (C, 3) MaxINode(loop_limit, X));
+    register_new_node(loop_limit, pre_ctrl);
+    *main_limit = loop_limit;
+
+    // The overflow limit: scale*I+offset < upper_limit
+    // For pre-loop compute
+    //   NOT(scale*I+offset < upper_limit)
+    //   scale*I+offset >= upper_limit
+    //   scale*I+offset+1 > upper_limit
+    //   ( if (scale < 0) /* and stride > 0 */
+    //       I < (upper_limit-(offset+1))/scale
+    //     else /* scale < 0 and stride < 0 */
+    //       I > (upper_limit-(offset+1))/scale
+    //   )
+    plus_one = new (C, 3) AddINode(offset, one);
+    register_new_node( plus_one, pre_ctrl );
+    con = new (C, 3) SubINode(upper_limit, plus_one);
+    register_new_node(con, pre_ctrl);
+    scale = _igvn.intcon(scale_con);
+    set_ctrl(scale, C->root());
+    X = new (C, 3) DivINode(0, con, scale);
+    register_new_node(X, pre_ctrl);
+
+    // Adjust pre-loop last iteration
+    loop_limit = *pre_limit;
+    loop_limit = (stride_con > 0) // scale < 0
+      ? (Node*)(new (C, 3) MaxINode(loop_limit, X))
+      : (Node*)(new (C, 3) MinINode(loop_limit, X));
+    register_new_node( loop_limit, pre_ctrl );
+    *pre_limit = loop_limit;
+
   }
 }
 
@@ -1488,7 +1736,7 @@
   Node *cmpzm = bolzm->in(1);
   assert(cmpzm->is_Cmp(), "");
   Node *opqzm = cmpzm->in(2);
-  // Can not optimize a loop if pre-loop Opaque1 node is optimized
+  // Can not optimize a loop if zero-trip Opaque1 node is optimized
   // away and then another round of loop opts attempted.
   if (opqzm->Opcode() != Op_Opaque1)
     return;
@@ -1523,8 +1771,11 @@
   int stride_con = cl->stride_con();
   Node *zero = _igvn.intcon(0);
   Node *one  = _igvn.intcon(1);
+  // Use symmetrical int range [-max_jint,max_jint]
+  Node *mini = _igvn.intcon(-max_jint);
   set_ctrl(zero, C->root());
   set_ctrl(one,  C->root());
+  set_ctrl(mini, C->root());
 
   // Range checks that do not dominate the loop backedge (ie.
   // conditionally executed) can lengthen the pre loop limit beyond
@@ -1599,7 +1850,12 @@
       if( offset_c == ctrl ) {
         continue; // Don't rce this check but continue looking for other candidates.
       }
-
+#ifdef ASSERT
+      if (TraceRangeLimitCheck) {
+        tty->print_cr("RC bool node%s", flip ? " flipped:" : ":");
+        bol->dump(2);
+      }
+#endif
       // At this point we have the expression as:
       //   scale_con * trip_counter + offset :: limit
       // where scale_con, offset and limit are loop invariant.  Trip_counter
@@ -1610,17 +1866,16 @@
       // Adjust pre and main loop limits to guard the correct iteration set
       if( cmp->Opcode() == Op_CmpU ) {// Unsigned compare is really 2 tests
         if( b_test._test == BoolTest::lt ) { // Range checks always use lt
-          // The overflow limit: scale*I+offset < limit
-          add_constraint( stride_con, scale_con, offset, limit, pre_ctrl, &pre_limit, &main_limit );
-          // The underflow limit: 0 <= scale*I+offset.
-          // Some math yields: -scale*I-(offset+1) < 0
-          Node *plus_one = new (C, 3) AddINode( offset, one );
-          register_new_node( plus_one, pre_ctrl );
-          Node *neg_offset = new (C, 3) SubINode( zero, plus_one );
-          register_new_node( neg_offset, pre_ctrl );
-          add_constraint( stride_con, -scale_con, neg_offset, zero, pre_ctrl, &pre_limit, &main_limit );
+          // The underflow and overflow limits: 0 <= scale*I+offset < limit
+          add_constraint( stride_con, scale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit );
           if (!conditional_rc) {
             conditional_rc = !loop->dominates_backedge(iff);
+            // It is also needed if offset->_lo == min_int since
+            // (0-min_int) == min_int. It may be fine for stride > 0
+            // but for stride < 0 pre_limit will be < original_limit.
+            const TypeInt* offset_t = _igvn.type(offset)->is_int();
+            conditional_rc |= RangeLimitCheck && (offset_t->_lo == min_jint) &&
+                              (scale_con<0) && (stride_con<0);
           }
         } else {
 #ifndef PRODUCT
@@ -1631,21 +1886,35 @@
         }
       } else {                  // Otherwise work on normal compares
         switch( b_test._test ) {
-        case BoolTest::ge:      // Convert X >= Y to -X <= -Y
+        case BoolTest::gt:
+          // Fall into GE case
+        case BoolTest::ge:
+          // Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit
           scale_con = -scale_con;
           offset = new (C, 3) SubINode( zero, offset );
           register_new_node( offset, pre_ctrl );
           limit  = new (C, 3) SubINode( zero, limit  );
           register_new_node( limit, pre_ctrl );
           // Fall into LE case
-        case BoolTest::le:      // Convert X <= Y to X < Y+1
-          limit = new (C, 3) AddINode( limit, one );
-          register_new_node( limit, pre_ctrl );
+        case BoolTest::le:
+          if (b_test._test != BoolTest::gt) {
+            // Convert X <= Y to X < Y+1
+            limit = new (C, 3) AddINode( limit, one );
+            register_new_node( limit, pre_ctrl );
+          }
           // Fall into LT case
         case BoolTest::lt:
-          add_constraint( stride_con, scale_con, offset, limit, pre_ctrl, &pre_limit, &main_limit );
+          // The underflow and overflow limits: MIN_INT <= scale*I+offset < limit
+          add_constraint( stride_con, scale_con, offset, mini, limit, pre_ctrl, &pre_limit, &main_limit );
           if (!conditional_rc) {
             conditional_rc = !loop->dominates_backedge(iff);
+            // It is also needed if scale*pre_limit+offset >= limit
+            // due to underflow so we need execute pre-loop until
+            // scale*I+offset >= min_int. But (low_limit-offset) will
+            // underflow when offset > 0 and X will be > original_limit.
+            const TypeInt* offset_t = _igvn.type(offset)->is_int();
+            conditional_rc |= RangeLimitCheck && (offset_t->_hi > 0) &&
+                              (scale_con>0) && (stride_con>0);
           }
           break;
         default:
@@ -1696,7 +1965,8 @@
 
   // Note:: we are making the main loop limit no longer precise;
   // need to round up based on stride.
-  if( stride_con != 1 && stride_con != -1 ) { // Cutout for common case
+  cl->set_nonexact_trip_count();
+  if (!LoopLimitCheck && stride_con != 1 && stride_con != -1) { // Cutout for common case
     // "Standard" round-up logic:  ([main_limit-init+(y-1)]/y)*y+init
     // Hopefully, compiler will optimize for powers of 2.
     Node *ctrl = get_ctrl(main_limit);
@@ -1876,7 +2146,19 @@
   // iteration.  Then the CountedLoopEnd will collapse (backedge never
   // taken) and all loop-invariant uses of the exit values will be correct.
   Node *phi = cl->phi();
-  Node *final = new (phase->C, 3) SubINode( cl->limit(), cl->stride() );
+  Node *exact_limit = phase->exact_limit(this);
+  if (exact_limit != cl->limit()) {
+    // We also need to replace the original limit to collapse loop exit.
+    Node* cmp = cl->loopexit()->cmp_node();
+    assert(cl->limit() == cmp->in(2), "sanity");
+    phase->_igvn._worklist.push(cmp->in(2)); // put limit on worklist
+    phase->_igvn.hash_delete(cmp);
+    cmp->set_req(2, exact_limit);
+    phase->_igvn._worklist.push(cmp);        // put cmp on worklist
+  }
+  // Note: the final value after increment should not overflow since
+  // counted loop has limit check predicate.
+  Node *final = new (phase->C, 3) SubINode( exact_limit, cl->stride() );
   phase->register_new_node(final,cl->in(LoopNode::EntryControl));
   phase->_igvn.replace_node(phi,final);
   phase->C->set_major_progress();
--- a/hotspot/src/share/vm/opto/loopUnswitch.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/opto/loopUnswitch.cpp	Thu May 05 22:28:31 2011 -0700
@@ -130,6 +130,11 @@
   Node* uniqc = proj_true->unique_ctrl_out();
   Node* entry = head->in(LoopNode::EntryControl);
   Node* predicate = find_predicate(entry);
+  if (predicate != NULL && LoopLimitCheck && UseLoopPredicate) {
+    // We may have two predicates, find first.
+    entry = find_predicate(entry->in(0)->in(0));
+    if (entry != NULL) predicate = entry;
+  }
   if (predicate != NULL) predicate = predicate->in(0);
   assert(proj_true->is_IfTrue() &&
          (predicate == NULL && uniqc == head ||
@@ -217,6 +222,7 @@
 ProjNode* PhaseIdealLoop::create_slow_version_of_loop(IdealLoopTree *loop,
                                                       Node_List &old_new) {
   LoopNode* head  = loop->_head->as_Loop();
+  bool counted_loop = head->is_CountedLoop();
   Node*     entry = head->in(LoopNode::EntryControl);
   _igvn.hash_delete(entry);
   _igvn._worklist.push(entry);
@@ -242,14 +248,14 @@
   assert(old_new[head->_idx]->is_Loop(), "" );
 
   // Fast (true) control
-  Node* iffast_pred = clone_loop_predicates(entry, iffast);
+  Node* iffast_pred = clone_loop_predicates(entry, iffast, !counted_loop);
   _igvn.hash_delete(head);
   head->set_req(LoopNode::EntryControl, iffast_pred);
   set_idom(head, iffast_pred, dom_depth(head));
   _igvn._worklist.push(head);
 
   // Slow (false) control
-  Node* ifslow_pred = move_loop_predicates(entry, ifslow);
+  Node* ifslow_pred = move_loop_predicates(entry, ifslow, !counted_loop);
   LoopNode* slow_head = old_new[head->_idx]->as_Loop();
   _igvn.hash_delete(slow_head);
   slow_head->set_req(LoopNode::EntryControl, ifslow_pred);
--- a/hotspot/src/share/vm/opto/loopnode.cpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/opto/loopnode.cpp	Thu May 05 22:28:31 2011 -0700
@@ -206,7 +206,7 @@
   // Get backedge compare
   Node *cmp = test->in(1);
   int cmp_op = cmp->Opcode();
-  if( cmp_op != Op_CmpI )
+  if (cmp_op != Op_CmpI)
     return false;                // Avoid pointer & float compares
 
   // Find the trip-counter increment & limit.  Limit must be loop invariant.
@@ -259,7 +259,8 @@
   }
   // Stride must be constant
   int stride_con = stride->get_int();
-  assert(stride_con != 0, "missed some peephole opt");
+  if (stride_con == 0)
+    return false; // missed some peephole opt
 
   if (!xphi->is_Phi())
     return false; // Too much math on the trip counter
@@ -319,7 +320,7 @@
       // Count down loop rolls through MAXINT
       (bt == BoolTest::le || bt == BoolTest::lt) && stride_con < 0 ||
       // Count up loop rolls through MININT
-      (bt == BoolTest::ge || bt == BoolTest::gt) && stride_con > 0 ) {
+      (bt == BoolTest::ge || bt == BoolTest::gt) && stride_con > 0) {
     return false; // Bail out
   }
 
@@ -341,12 +342,137 @@
   //
   assert(x->Opcode() == Op_Loop, "regular loops only");
   C->print_method("Before CountedLoop", 3);
+
+  Node *hook = new (C, 6) Node(6);
+
+  if (LoopLimitCheck) {
+
+  // ===================================================
+  // Generate loop limit check to avoid integer overflow
+  // in cases like next (cyclic loops):
+  //
+  // for (i=0; i <= max_jint; i++) {}
+  // for (i=0; i <  max_jint; i+=2) {}
+  //
+  //
+  // Limit check predicate depends on the loop test:
+  //
+  // for(;i != limit; i++)       --> limit <= (max_jint)
+  // for(;i <  limit; i+=stride) --> limit <= (max_jint - stride + 1)
+  // for(;i <= limit; i+=stride) --> limit <= (max_jint - stride    )
+  //
+
+  // Check if limit is excluded to do more precise int overflow check.
+  bool incl_limit = (bt == BoolTest::le || bt == BoolTest::ge);
+  int stride_m  = stride_con - (incl_limit ? 0 : (stride_con > 0 ? 1 : -1));
+
+  // If compare points directly to the phi we need to adjust
+  // the compare so that it points to the incr. Limit have
+  // to be adjusted to keep trip count the same and the
+  // adjusted limit should be checked for int overflow.
+  if (phi_incr != NULL) {
+    stride_m  += stride_con;
+  }
+
+  if (limit->is_Con()) {
+    int limit_con = limit->get_int();
+    if ((stride_con > 0 && limit_con > (max_jint - stride_m)) ||
+        (stride_con < 0 && limit_con < (min_jint - stride_m))) {
+      // Bailout: it could be integer overflow.
+      return false;
+    }
+  } else if ((stride_con > 0 && limit_t->_hi <= (max_jint - stride_m)) ||
+             (stride_con < 0 && limit_t->_lo >= (min_jint - stride_m))) {
+      // Limit's type may satisfy the condition, for example,
+      // when it is an array length.
+  } else {
+    // Generate loop's limit check.
+    // Loop limit check predicate should be near the loop.
+    ProjNode *limit_check_proj = find_predicate_insertion_point(init_control, Deoptimization::Reason_loop_limit_check);
+    if (!limit_check_proj) {
+      // The limit check predicate is not generated if this method trapped here before.
+#ifdef ASSERT
+      if (TraceLoopLimitCheck) {
+        tty->print("missing loop limit check:");
+        loop->dump_head();
+        x->dump(1);
+      }
+#endif
+      return false;
+    }
+
+    IfNode* check_iff = limit_check_proj->in(0)->as_If();
+    Node* cmp_limit;
+    Node* bol;
+
+    if (stride_con > 0) {
+      cmp_limit = new (C, 3) CmpINode(limit, _igvn.intcon(max_jint - stride_m));
+      bol = new (C, 2) BoolNode(cmp_limit, BoolTest::le);
+    } else {
+      cmp_limit = new (C, 3) CmpINode(limit, _igvn.intcon(min_jint - stride_m));
+      bol = new (C, 2) BoolNode(cmp_limit, BoolTest::ge);
+    }
+    cmp_limit = _igvn.register_new_node_with_optimizer(cmp_limit);
+    bol = _igvn.register_new_node_with_optimizer(bol);
+    set_subtree_ctrl(bol);
+
+    // Replace condition in original predicate but preserve Opaque node
+    // so that previous predicates could be found.
+    assert(check_iff->in(1)->Opcode() == Op_Conv2B &&
+           check_iff->in(1)->in(1)->Opcode() == Op_Opaque1, "");
+    Node* opq = check_iff->in(1)->in(1);
+    _igvn.hash_delete(opq);
+    opq->set_req(1, bol);
+    // Update ctrl.
+    set_ctrl(opq, check_iff->in(0));
+    set_ctrl(check_iff->in(1), check_iff->in(0));
+
 #ifndef PRODUCT
-  if (TraceLoopOpts) {
-    tty->print("Counted      ");
-    loop->dump_head();
+    // report that the loop predication has been actually performed
+    // for this loop
+    if (TraceLoopLimitCheck) {
+      tty->print_cr("Counted Loop Limit Check generated:");
+      debug_only( bol->dump(2); )
+    }
+#endif
   }
-#endif
+
+  if (phi_incr != NULL) {
+    // If compare points directly to the phi we need to adjust
+    // the compare so that it points to the incr. Limit have
+    // to be adjusted to keep trip count the same and we
+    // should avoid int overflow.
+    //
+    //   i = init; do {} while(i++ < limit);
+    // is converted to
+    //   i = init; do {} while(++i < limit+1);
+    //
+    limit = gvn->transform(new (C, 3) AddINode(limit, stride));
+  }
+
+  // Now we need to canonicalize loop condition.
+  if (bt == BoolTest::ne) {
+    assert(stride_con == 1 || stride_con == -1, "simple increment only");
+    bt = (stride_con > 0) ? BoolTest::lt : BoolTest::gt;
+  }
+
+  if (incl_limit) {
+    // The limit check guaranties that 'limit <= (max_jint - stride)' so
+    // we can convert 'i <= limit' to 'i < limit+1' since stride != 0.
+    //
+    Node* one = (stride_con > 0) ? gvn->intcon( 1) : gvn->intcon(-1);
+    limit = gvn->transform(new (C, 3) AddINode(limit, one));
+    if (bt == BoolTest::le)
+      bt = BoolTest::lt;
+    else if (bt == BoolTest::ge)
+      bt = BoolTest::gt;
+    else
+      ShouldNotReachHere();
+  }
+  set_subtree_ctrl( limit );
+
+  } else { // LoopLimitCheck
+
   // If compare points to incr, we are ok.  Otherwise the compare
   // can directly point to the phi; in this case adjust the compare so that
   // it points to the incr by adjusting the limit.
@@ -359,7 +485,6 @@
   Node *one_m = gvn->intcon(-1);
 
   Node *trip_count = NULL;
-  Node *hook = new (C, 6) Node(6);
   switch( bt ) {
   case BoolTest::eq:
     ShouldNotReachHere();
@@ -441,6 +566,8 @@
   limit = gvn->transform(new (C, 3) AddINode(span,init_trip));
   set_subtree_ctrl( limit );
 
+  } // LoopLimitCheck
+
   // Check for SafePoint on backedge and remove
   Node *sfpt = x->in(LoopNode::LoopBackControl);
   if (sfpt->Opcode() == Op_SafePoint && is_deleteable_safept(sfpt)) {
@@ -531,7 +658,7 @@
 
   // Check for immediately preceding SafePoint and remove
   Node *sfpt2 = le->in(0);
-  if( sfpt2->Opcode() == Op_SafePoint && is_deleteable_safept(sfpt2))
+  if (sfpt2->Opcode() == Op_SafePoint && is_deleteable_safept(sfpt2))
     lazy_replace( sfpt2, sfpt2->in(TypeFunc::Control));
 
   // Free up intermediate goo
@@ -541,12 +668,56 @@
   assert(l->is_valid_counted_loop(), "counted loop shape is messed up");
   assert(l == loop->_head && l->phi() == phi && l->loopexit() == lex, "" );
 #endif
+#ifndef PRODUCT
+  if (TraceLoopOpts) {
+    tty->print("Counted      ");
+    loop->dump_head();
+  }
+#endif
 
   C->print_method("After CountedLoop", 3);
 
   return true;
 }
 
+//----------------------exact_limit-------------------------------------------
+Node* PhaseIdealLoop::exact_limit( IdealLoopTree *loop ) {
+  assert(loop->_head->is_CountedLoop(), "");
+  CountedLoopNode *cl = loop->_head->as_CountedLoop();
+
+  if (!LoopLimitCheck || ABS(cl->stride_con()) == 1 ||
+      cl->limit()->Opcode() == Op_LoopLimit) {
+    // Old code has exact limit (it could be incorrect in case of int overflow).
+    // Loop limit is exact with stride == 1. And loop may already have exact limit.
+    return cl->limit();
+  }
+  Node *limit = NULL;
+#ifdef ASSERT
+  BoolTest::mask bt = cl->loopexit()->test_trip();
+  assert(bt == BoolTest::lt || bt == BoolTest::gt, "canonical test is expected");
+#endif
+  if (cl->has_exact_trip_count()) {
+    // Simple case: loop has constant boundaries.
+    // Use longs to avoid integer overflow.
+    int stride_con = cl->stride_con();
+    long  init_con = cl->init_trip()->get_int();
+    long limit_con = cl->limit()->get_int();
+    julong trip_cnt = cl->trip_count();
+    long final_con = init_con + trip_cnt*stride_con;
+    final_con -= stride_con;
+    int final_int = (int)final_con;
+    // The final value should be in integer range since the loop
+    // is counted and the limit was checked for overflow.
+    assert(final_con == (long)final_int, "final value should be integer");
+    limit = _igvn.intcon(final_int);
+  } else {
+    // Create new LoopLimit node to get exact limit (final iv value).
+    limit = new (C, 4) LoopLimitNode(C, cl->init_trip(), cl->limit(), cl->stride());
+    register_new_node(limit, cl->in(LoopNode::EntryControl));
+  }
+  assert(limit != NULL, "sanity");
+  return limit;
+}
 
 //------------------------------Ideal------------------------------------------
 // Return a node which is more "ideal" than the current node.
@@ -572,14 +743,12 @@
 #ifndef PRODUCT
 void CountedLoopNode::dump_spec(outputStream *st) const {
   LoopNode::dump_spec(st);
-  if( stride_is_con() ) {
+  if (stride_is_con()) {
     st->print("stride: %d ",stride_con());
-  } else {
-    st->print("stride: not constant ");
   }
-  if( is_pre_loop () ) st->print("pre of N%d" , _main_idx );
-  if( is_main_loop() ) st->print("main of N%d", _idx );
-  if( is_post_loop() ) st->print("post of N%d", _main_idx );
+  if (is_pre_loop ()) st->print("pre of N%d" , _main_idx);
+  if (is_main_loop()) st->print("main of N%d", _idx);
+  if (is_post_loop()) st->print("post of N%d", _main_idx);
 }
 #endif
 
@@ -588,7 +757,130 @@
   return stride()->bottom_type()->is_int()->get_con();
 }
 
-
+//=============================================================================
+//------------------------------Value-----------------------------------------
+const Type *LoopLimitNode::Value( PhaseTransform *phase ) const {
+  const Type* init_t   = phase->type(in(Init));
+  const Type* limit_t  = phase->type(in(Limit));
+  const Type* stride_t = phase->type(in(Stride));
+  // Either input is TOP ==> the result is TOP
+  if (init_t   == Type::TOP) return Type::TOP;
+  if (limit_t  == Type::TOP) return Type::TOP;
+  if (stride_t == Type::TOP) return Type::TOP;
+
+  int stride_con = stride_t->is_int()->get_con();
+  if (stride_con == 1)
+    return NULL;  // Identity
+
+  if (init_t->is_int()->is_con() && limit_t->is_int()->is_con()) {
+    // Use longs to avoid integer overflow.
+    long init_con   =  init_t->is_int()->get_con();
+    long limit_con  = limit_t->is_int()->get_con();
+    int  stride_m   = stride_con - (stride_con > 0 ? 1 : -1);
+    long trip_count = (limit_con - init_con + stride_m)/stride_con;
+    long final_con  = init_con + stride_con*trip_count;
+    int final_int = (int)final_con;
+    // The final value should be in integer range since the loop
+    // is counted and the limit was checked for overflow.
+    assert(final_con == (long)final_int, "final value should be integer");
+    return TypeInt::make(final_int);
+  }
+
+  return bottom_type(); // TypeInt::INT
+}
+
+//------------------------------Ideal------------------------------------------
+// Return a node which is more "ideal" than the current node.
+Node *LoopLimitNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  if (phase->type(in(Init))   == Type::TOP ||
+      phase->type(in(Limit))  == Type::TOP ||
+      phase->type(in(Stride)) == Type::TOP)
+    return NULL;  // Dead
+
+  int stride_con = phase->type(in(Stride))->is_int()->get_con();
+  if (stride_con == 1)
+    return NULL;  // Identity
+
+  if (in(Init)->is_Con() && in(Limit)->is_Con())
+    return NULL;  // Value
+
+  // Delay following optimizations until all loop optimizations
+  // done to keep Ideal graph simple.
+  if (!can_reshape || phase->C->major_progress())
+    return NULL;
+
+  const TypeInt* init_t  = phase->type(in(Init) )->is_int();
+  const TypeInt* limit_t = phase->type(in(Limit))->is_int();
+  int stride_p;
+  long lim, ini;
+  julong max;
+  if (stride_con > 0) {
+    stride_p = stride_con;
+    lim = limit_t->_hi;
+    ini = init_t->_lo;
+    max = (julong)max_jint;
+  } else {
+    stride_p = -stride_con;
+    lim = init_t->_hi;
+    ini = limit_t->_lo;
+    max = (julong)min_jint;
+  }
+  julong range = lim - ini + stride_p;
+  if (range <= max) {
+    // Convert to integer expression if it is not overflow.
+    Node* stride_m = phase->intcon(stride_con - (stride_con > 0 ? 1 : -1));
+    Node *range = phase->transform(new (phase->C, 3) SubINode(in(Limit), in(Init)));
+    Node *bias  = phase->transform(new (phase->C, 3) AddINode(range, stride_m));
+    Node *trip  = phase->transform(new (phase->C, 3) DivINode(0, bias, in(Stride)));
+    Node *span  = phase->transform(new (phase->C, 3) MulINode(trip, in(Stride)));
+    return new (phase->C, 3) AddINode(span, in(Init)); // exact limit
+  }
+
+  if (is_power_of_2(stride_p) ||                // divisor is 2^n
+      !Matcher::has_match_rule(Op_LoopLimit)) { // or no specialized Mach node?
+    // Convert to long expression to avoid integer overflow
+    // and let igvn optimizer convert this division.
+    //
+    Node*   init   = phase->transform( new (phase->C, 2) ConvI2LNode(in(Init)));
+    Node*  limit   = phase->transform( new (phase->C, 2) ConvI2LNode(in(Limit)));
+    Node* stride   = phase->longcon(stride_con);
+    Node* stride_m = phase->longcon(stride_con - (stride_con > 0 ? 1 : -1));
+
+    Node *range = phase->transform(new (phase->C, 3) SubLNode(limit, init));
+    Node *bias  = phase->transform(new (phase->C, 3) AddLNode(range, stride_m));
+    Node *span;
+    if (stride_con > 0 && is_power_of_2(stride_p)) {
+      // bias >= 0 if stride >0, so if stride is 2^n we can use &(-stride)
+      // and avoid generating rounding for division. Zero trip guard should
+      // guarantee that init < limit but sometimes the guard is missing and
+      // we can get situation when init > limit. Note, for the empty loop
+      // optimization zero trip guard is generated explicitly which leaves
+      // only RCE predicate where exact limit is used and the predicate
+      // will simply fail forcing recompilation.
+      Node* neg_stride   = phase->longcon(-stride_con);
+      span = phase->transform(new (phase->C, 3) AndLNode(bias, neg_stride));
+    } else {
+      Node *trip  = phase->transform(new (phase->C, 3) DivLNode(0, bias, stride));
+      span = phase->transform(new (phase->C, 3) MulLNode(trip, stride));
+    }
+    // Convert back to int
+    Node *span_int = phase->transform(new (phase->C, 2) ConvL2INode(span));
+    return new (phase->C, 3) AddINode(span_int, in(Init)); // exact limit
+  }
+
+  return NULL;    // No progress
+}
+
+//------------------------------Identity---------------------------------------
+// If stride == 1 return limit node.
+Node *LoopLimitNode::Identity( PhaseTransform *phase ) {
+  int stride_con = phase->type(in(Stride))->is_int()->get_con();
+  if (stride_con == 1 || stride_con == -1)
+    return in(Limit);
+  return this;
+}
+
+//=============================================================================
 //----------------------match_incr_with_optional_truncation--------------------
 // Match increment with optional truncation:
 // CHAR: (i+1)&0x7fff, BYTE: ((i+1)<<8)>>8, or SHORT: ((i+1)<<16)>>16
@@ -870,7 +1162,7 @@
   outer = igvn.register_new_node_with_optimizer(outer, _head);
   phase->set_created_loop_node();
 
-  Node* pred = phase->clone_loop_predicates(ctl, outer);
+  Node* pred = phase->clone_loop_predicates(ctl, outer, true);
   // Outermost loop falls into '_head' loop
   _head->set_req(LoopNode::EntryControl, pred);
   _head->del_req(outer_idx);
@@ -1440,9 +1732,16 @@
     tty->print("  ");
   tty->print("Loop: N%d/N%d ",_head->_idx,_tail->_idx);
   if (_irreducible) tty->print(" IRREDUCIBLE");
+  Node* entry = _head->in(LoopNode::EntryControl);
+  if (LoopLimitCheck) {
+    Node* predicate = PhaseIdealLoop::find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
+    if (predicate != NULL ) {
+      tty->print(" limit_check");
+      entry = entry->in(0)->in(0);
+    }
+  }
   if (UseLoopPredicate) {
-    Node* entry = PhaseIdealLoop::find_predicate_insertion_point(_head->in(LoopNode::EntryControl),
-                                                                 Deoptimization::Reason_predicate);
+    entry = PhaseIdealLoop::find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
     if (entry != NULL) {
       tty->print(" predicated");
     }
@@ -1528,10 +1827,15 @@
       !loop->tail()->is_top()) {
     LoopNode* lpn = loop->_head->as_Loop();
     Node* entry = lpn->in(LoopNode::EntryControl);
-    Node* predicate_proj = find_predicate(entry);
+    Node* predicate_proj = find_predicate(entry); // loop_limit_check first
     if (predicate_proj != NULL ) { // right pattern that can be used by loop predication
       assert(entry->in(0)->in(1)->in(1)->Opcode() == Op_Opaque1, "must be");
       useful_predicates.push(entry->in(0)->in(1)->in(1)); // good one
+      entry = entry->in(0)->in(0);
+    }
+    predicate_proj = find_predicate(entry); // Predicate
+    if (predicate_proj != NULL ) {
+      useful_predicates.push(entry->in(0)->in(1)->in(1)); // good one
     }
   }
 
@@ -1542,6 +1846,8 @@
 
 //------------------------eliminate_useless_predicates-----------------------------
 // Eliminate all inserted predicates if they could not be used by loop predication.
+// Note: it will also eliminates loop limits check predicate since it also uses
+// Opaque1 node (see Parse::add_predicate()).
 void PhaseIdealLoop::eliminate_useless_predicates() {
   if (C->predicate_count() == 0)
     return; // no predicate left
@@ -1731,7 +2037,7 @@
   // Some parser-inserted loop predicates could never be used by loop
   // predication or they were moved away from loop during some optimizations.
   // For example, peeling. Eliminate them before next loop optimizations.
-  if (UseLoopPredicate) {
+  if (UseLoopPredicate || LoopLimitCheck) {
     eliminate_useless_predicates();
   }
 
--- a/hotspot/src/share/vm/opto/loopnode.hpp	Wed May 04 12:00:14 2011 -0700
+++ b/hotspot/src/share/vm/opto/loopnode.hpp	Thu May 05 22:28:31 2011 -0700
@@ -289,6 +289,28 @@
 inline Node *CountedLoopNode::incr() const { return loopexit() ? loopexit()->incr() : NULL; }
 inline Node *CountedLoopNode::phi() const { return loopexit() ? loopexit()->phi() : NULL; }
 
+//------------------------------LoopLimitNode-----------------------------
+// Counted Loop limit node which represents exact final iterator value:
+// trip_count = (limit - init_trip + stride - 1)/stride
+// final_value= trip_count * stride + init_trip.
+// Use HW instructions to calculate it when it can overflow in integer.
+// Note, final_value should fit into integer since counted loop has
+// limit check: limit <= max_int-stride.
+class LoopLimitNode : public Node {
+  enum { Init=1, Limit=2, Stride=3 };
+ public:
+  LoopLimitNode( Compile* C, Node *init