OpenJDK / valhalla / valhalla
changeset 9669:1a8a68098051
Merge
author | lana |
---|---|
date | Sat, 14 May 2011 15:21:38 -0700 |
parents | 13c8e69958e6 04f88d98efe3 |
children | 51a9096f6799 |
files | hotspot/make/linux/makefiles/cscope.make hotspot/make/solaris/makefiles/cscope.make jdk/src/share/classes/sun/security/util/SignatureFileManifest.java jdk/test/java/beans/XMLEncoder/java_io_File.java langtools/test/tools/javac/generics/diamond/7030150/Neg01.java langtools/test/tools/javac/generics/diamond/7030150/Neg01.out langtools/test/tools/javac/generics/diamond/7030150/Neg02.java langtools/test/tools/javac/generics/diamond/7030150/Neg02.out langtools/test/tools/javac/generics/diamond/7030150/Neg03.java langtools/test/tools/javac/generics/diamond/7030150/Neg03.out langtools/test/tools/javac/generics/diamond/7030150/Pos01.java langtools/test/tools/javac/generics/diamond/7030150/Pos02.java |
diffstat | 847 files changed, 37165 insertions(+), 16542 deletions(-) [+] |
line wrap: on
line diff
--- a/.hgignore Wed May 11 08:02:44 2011 +0900 +++ b/.hgignore Sat May 14 15:21:38 2011 -0700 @@ -2,3 +2,4 @@ ^dist/ /nbproject/private/ ^webrev +^.hgtip
--- a/.hgtags Wed May 11 08:02:44 2011 +0900 +++ b/.hgtags Sat May 14 15:21:38 2011 -0700 @@ -114,3 +114,5 @@ d1cf7d4ee16c341f5b8c7e7f1d68a8c412b6c693 jdk7-b137 62b8e328f8c8c66c14b0713222116f2add473f3f jdk7-b138 955488f34ca418f6cdab843d61c20d2c615637d9 jdk7-b139 +f4298bc3f4b6baa315643be06966f09684290068 jdk7-b140 +5d86d0c7692e8f4a58d430d68c03594e2d3403b3 jdk7-b141
--- a/.hgtags-top-repo Wed May 11 08:02:44 2011 +0900 +++ b/.hgtags-top-repo Sat May 14 15:21:38 2011 -0700 @@ -114,3 +114,5 @@ 7654afc6a29e43cb0a1343ce7f1287bf690d5e5f jdk7-b137 fc47c97bbbd91b1f774d855c48a7e285eb1a351a jdk7-b138 7ed6d0b9aaa12320832a7ddadb88d6d8d0dda4c1 jdk7-b139 +dcfe74f1c6553c556e7d361c30b0b614eb5e40f6 jdk7-b140 +c6569c5585851dfd39b8de8e021c3c312f51af12 jdk7-b141
--- a/Makefile Wed May 11 08:02:44 2011 +0900 +++ b/Makefile Sat May 14 15:21:38 2011 -0700 @@ -97,7 +97,7 @@ endef # Generic build of basic repo series -generic_build_repo_series:: +generic_build_repo_series:: $(SOURCE_TIPS) $(MKDIR) -p $(OUTPUTDIR) $(MKDIR) -p $(OUTPUTDIR)/j2sdk-image @$(call StartTimer) @@ -243,6 +243,14 @@ debug_build:: build_debug_image fastdebug_build:: build_fastdebug_image +# The source tips are stored with the relative path to the repo. +# This file will be used when constructing the jdk image. +source_tips: $(SOURCE_TIPS) + $(CAT) $< +$(SOURCE_TIPS): FRC + @$(prep-target) + @$(call GetSourceTips) + clobber:: REPORT_BUILD_TIMES= clobber:: $(RM) -r $(OUTPUTDIR)/*
--- a/corba/.hgignore Wed May 11 08:02:44 2011 +0900 +++ b/corba/.hgignore Sat May 14 15:21:38 2011 -0700 @@ -1,3 +1,4 @@ ^build/ ^dist/ /nbproject/private/ +^.hgtip
--- a/corba/.hgtags Wed May 11 08:02:44 2011 +0900 +++ b/corba/.hgtags Sat May 14 15:21:38 2011 -0700 @@ -114,3 +114,5 @@ a66c01d8bf895261715955df0b95545c000ed6a8 jdk7-b137 78d8cf04697e9df54f7f11e195b7da29b8e345a2 jdk7-b138 60b074ec6fcf5cdf9efce22fdfb02326ed8fa2d3 jdk7-b139 +cdf5d19ec142424489549025e9c42e51f32cf688 jdk7-b140 +a58635cdd921bafef353f4864184a0481353197b jdk7-b141
--- a/corba/src/share/classes/com/sun/corba/se/impl/orbutil/resources/sunorb_pt_BR.properties Wed May 11 08:02:44 2011 +0900 +++ b/corba/src/share/classes/com/sun/corba/se/impl/orbutil/resources/sunorb_pt_BR.properties Sat May 14 15:21:38 2011 -0700 @@ -23,9 +23,9 @@ # questions. # -orbd.usage=Uso: {0} <options> \n\nem que <options> inclui:\n -port porta de ativa\u00E7\u00E3o na qual o ORBD deve ser iniciado, default 1049 (opcional)\n -defaultdb diret\u00F3rio dos arquivos ORBD, default "./orb.db" (opcional)\n -serverid id do servidor para ORBD, default 1 (opcional)\n -ORBInitialPort porta inicial (necess\u00E1rio)\n -ORBInitialHost nome de host inicial (necess\u00E1rio)\n +orbd.usage=Uso: {0} <op\u00E7\u00F5es> \n\nem que <op\u00E7\u00F5es> inclui:\n -port porta de ativa\u00E7\u00E3o na qual o ORBD deve ser iniciado, default 1049 (opcional)\n -defaultdb diret\u00F3rio dos arquivos ORBD, default "./orb.db" (opcional)\n -serverid id do servidor para ORBD, default 1 (opcional)\n -ORBInitialPort porta inicial (obrigat\u00F3rio)\n -ORBInitialHost nome de host inicial (obrigat\u00F3rio)\n -servertool.usage=Uso: {0} <options> \n\nem que <options> inclui:\n -ORBInitialPort porta inicial (necess\u00E1rio)\n -ORBInitialHost nome de host inicial (necess\u00E1rio)\n +servertool.usage=Uso: {0} <op\u00E7\u00F5es> \n\nem que <op\u00E7\u00F5es> inclui:\n -ORBInitialPort porta inicial (obrigat\u00F3rio)\n -ORBInitialHost nome de host inicial (obrigat\u00F3rio)\n servertool.banner=\n\nBem-vindo \u00E0 Ferramenta de Servidor IDL Java \ninsira os comandos no prompt \n servertool.shorthelp=\n\n\tComandos Dispon\u00EDveis: \n\t------------------- \n servertool.baddef=Defini\u00E7\u00E3o do servidor inv\u00E1lida: {0} @@ -40,23 +40,23 @@ servertool.vmargs=\tvmargs - {0} servertool.serverid=\tserver id - {0} servertool.servernotrunning=\to servidor n\u00E3o est\u00E1 em execu\u00E7\u00E3o. -servertool.register=\n\n\tregister -server <server class name> \n\t -applicationName <alternate server name> \n\t -classpath <classpath to server> \n\t -args <args to server> \n\t -vmargs <args to server Java VM>\n +servertool.register=\n\n\tregister -server <nome da classe do servidor> \n\t -applicationName <nome do servidor alternativo> \n\t -classpath <classpath para o servidor> \n\t -args <args para o servidor> \n\t -vmargs <args para a VM Java do servidor>\n servertool.register1=registra um servidor ativ\u00E1vel servertool.register2=\tservidor registrado (serverid = {0}). servertool.register3=\tservidor registrado, mas em espera (serverid = {0}). servertool.register4=\tservidor j\u00E1 registrado (serverid = {0}). -servertool.unregister=\n\tunregister [ -serverid <server id> | -applicationName <name> ] \n +servertool.unregister=\n\tunregister [ -serverid <id do servidor> | -applicationName <nome> ] \n servertool.unregister1=cancela o registro de um servidor registrado servertool.unregister2=\tservidor n\u00E3o registrado. -servertool.locate=\n\tlocate [ -serverid <server id> | -applicationName <name> ] [ <-endpointType <endpointType> ] \n +servertool.locate=\n\tlocate [ -serverid <id do servidor> | -applicationName <nome> ] [ <-endpointType <endpointType> ] \n servertool.locate1=localiza portas de tipo espec\u00EDfico para um servidor registrado servertool.locate2=\n\n\tNome do Host {0} \n\n\t\tPorta\t\tTipo de Porta\t\tId do ORB\n\t\t----\t\t---------\t\t------\n -servertool.locateorb=\n\tlocateperorb [ -serverid <server id> | -applicationName <name> ] [ -orbid <ORB name> ]\n +servertool.locateorb=\n\tlocateperorb [ -serverid <id do servidor> | -applicationName <nome> ] [ -orbid <nome ORB> ]\n servertool.locateorb1=localiza portas para um orb espec\u00EDfico de servidor registrado servertool.locateorb2=\n\n\tNome do Host {0} \n\n\t\tPorta\t\tTipo de Porta\t\tId do ORB\n\t\t----\t\t--------\t\t------\n -servertool.getserverid=\n\tgetserverid [ -applicationName <name> ] \n +servertool.getserverid=\n\tgetserverid [ -applicationName <nome> ] \n servertool.getserverid1=retorna o id do servidor de um applicationName servertool.getserverid2=\tID do Servidor de applicationName {0} \u00E9 {1} @@ -69,33 +69,33 @@ servertool.listappnames1=lista os applicationNames atualmente definidos servertool.listappnames2=applicationNames do servidor definidos atualmente: -servertool.shutdown=\n\tshutdown [ -serverid <server id> | -applicationName <name> ]\n +servertool.shutdown=\n\tshutdown [ -serverid <id do servidor> | -applicationName <nome> ]\n servertool.shutdown1=faz shutdown de um servidor registrado servertool.shutdown2=\tshutdown do servidor bem-sucedido. -servertool.startserver=\n\tstartup [ -serverid <server id> | -applicationName <name> ]\n +servertool.startserver=\n\tstartup [ -serverid <id do servidor> | -applicationName <nome> ]\n servertool.startserver1=inicia um servidor registrado servertool.startserver2=\tservidor iniciado com \u00EAxito. servertool.quit=\n\tquit\n servertool.quit1=sai desta ferramenta -servertool.help=\thelp\n\tOR\n\thelp <command name>\n +servertool.help=\thelp\n\tOR\n\thelp <nome do comando>\n servertool.help1=obt\u00E9m ajuda -servertool.orbidmap=\tUso: orblist [ -serverid <server id> | -applicationName <name> ]\n +servertool.orbidmap=\tUso: orblist [ -serverid <id do servidor> | -applicationName <nome> ]\n servertool.orbidmap1=lista de nomes de orb e seus mapeamentos servertool.orbidmap2=\n\tId de ORB\t\tNome de ORB\n\t------\t\t--------\n pnameserv.success=NameServer Persistente Iniciado com \u00CAxito -bootstrap.usage=Uso: {0} <options> \n\nem que <options> inclui:\n -ORBInitialPort porta inicial (necess\u00E1rio)\n -InitialServicesFile arquivo que cont\u00E9m a lista de servi\u00E7os iniciais (necess\u00E1rio)\n +bootstrap.usage=Uso: {0} <op\u00E7\u00F5es> \n\nem que <op\u00E7\u00F5es> inclui:\n -ORBInitialPort porta inicial (obrigat\u00F3rio)\n -InitialServicesFile arquivo que cont\u00E9m a lista de servi\u00E7os iniciais (obrigat\u00F3rio)\n bootstrap.success=definindo porta para {0} e lendo servi\u00E7os de {1} bootstrap.filenotreadable=o arquivo {0} n\u00E3o \u00E9 leg\u00EDvel bootstrap.filenotfound=arquivo {0} n\u00E3o encontrado bootstrap.exception=exce\u00E7\u00E3o capturada ao salvar as propriedades no Arquivo {0}: exce\u00E7\u00E3o {1} tnameserv.exception=uma exce\u00E7\u00E3o capturada ao iniciar o servi\u00E7o de inicializa\u00E7\u00E3o na porta {0} -tnameserv.usage=tente usar outra porta com os argumentos de linha de comandos -ORBInitialPort <portno> +tnameserv.usage=tente usar outra porta com os argumentos de linha de comandos -ORBInitialPort <n\u00BA da porta> tnameserv.invalidhostoption=ORBInitialHost n\u00E3o \u00E9 uma op\u00E7\u00E3o v\u00E1lida para NameService tnameserv.orbinitialport0=ORBInitialPort 0 n\u00E3o \u00E9 uma op\u00E7\u00E3o v\u00E1lida para NameService tnameserv.hs1=Contexto de Nomea\u00E7\u00E3o Inicial:\n{0}
--- a/hotspot/.hgignore Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/.hgignore Sat May 14 15:21:38 2011 -0700 @@ -5,3 +5,4 @@ ^src/share/tools/IdealGraphVisualizer/[a-zA-Z0-9]*/build/ ^src/share/tools/IdealGraphVisualizer/build/ ^src/share/tools/IdealGraphVisualizer/dist/ +^.hgtip
--- a/hotspot/.hgtags Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/.hgtags Sat May 14 15:21:38 2011 -0700 @@ -166,3 +166,7 @@ 0930dc920c185afbf40fed9a655290b8e5b16783 hs21-b08 611e19a16519d6fb5deea9ab565336e6e6ee475d jdk7-b139 611e19a16519d6fb5deea9ab565336e6e6ee475d hs21-b09 +d283b82966712b353fa307845a1316da42a355f4 jdk7-b140 +d283b82966712b353fa307845a1316da42a355f4 hs21-b10 +5d07913abd59261c77f24cc04a759cb75d804099 jdk7-b141 +3aea9e9feb073f5500e031be6186666bcae89aa2 hs21-b11
--- a/hotspot/agent/make/Makefile Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/agent/make/Makefile Sat May 14 15:21:38 2011 -0700 @@ -257,7 +257,7 @@ all: filelist @mkdir -p $(OUTPUT_DIR) @echo "$(SA_BUILD_VERSION_PROP)" > $(SA_PROPERTIES) - $(JAVAC) -source 1.4 -classpath $(CLASSPATH) -deprecation -sourcepath $(SRC_DIR) -g -d $(OUTPUT_DIR) @filelist + $(JAVAC) -classpath $(CLASSPATH) -deprecation -sourcepath $(SRC_DIR) -g -d $(OUTPUT_DIR) @filelist $(RMIC) -classpath $(OUTPUT_DIR) -d $(OUTPUT_DIR) sun.jvm.hotspot.debugger.remote.RemoteDebuggerServer rm -f $(OUTPUT_DIR)/sun/jvm/hotspot/utilities/soql/sa.js cp $(SRC_DIR)/sun/jvm/hotspot/utilities/soql/sa.js $(OUTPUT_DIR)/sun/jvm/hotspot/utilities/soql @@ -269,7 +269,7 @@ allprof: filelist @mkdir -p $(OUTPUT_DIR) @echo "$(SA_BUILD_VERSION_PROP)" > $(SA_PROPERTIES) - $(JAVAC) -source 1.4 -J-Xprof -classpath $(CLASSPATH) -deprecation -sourcepath $(SRC_DIR) -g -d $(OUTPUT_DIR) @filelist + $(JAVAC) -J-Xprof -classpath $(CLASSPATH) -deprecation -sourcepath $(SRC_DIR) -g -d $(OUTPUT_DIR) @filelist $(RMIC) -classpath $(OUTPUT_DIR) -d $(OUTPUT_DIR) sun.jvm.hotspot.debugger.remote.RemoteDebuggerServer rm -f $(OUTPUT_DIR)/sun/jvm/hotspot/utilities/soql/sa.js cp $(SRC_DIR)/sun/jvm/hotspot/utilities/soql/sa.js $(OUTPUT_DIR)/sun/jvm/hotspot/utilities/soql
--- a/hotspot/agent/src/os/solaris/proc/libproc.h Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/agent/src/os/solaris/proc/libproc.h Sat May 14 15:21:38 2011 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -420,7 +420,22 @@ /* * Stack frame iteration interface. */ +#ifdef SOLARIS_11_B159_OR_LATER +/* building on Nevada-B159 or later so define the new callback */ +typedef int proc_stack_f( + void *, /* the cookie given to Pstack_iter() */ + const prgregset_t, /* the frame's registers */ + uint_t, /* argc for the frame's function */ + const long *, /* argv for the frame's function */ + int, /* bitwise flags describing the frame (see below) */ + int); /* a signal number */ + +#define PR_SIGNAL_FRAME 1 /* called by a signal handler */ +#define PR_FOUND_SIGNAL 2 /* we found the corresponding signal number */ +#else +/* building on Nevada-B158 or earlier so define the old callback */ typedef int proc_stack_f(void *, const prgregset_t, uint_t, const long *); +#endif extern int Pstack_iter(struct ps_prochandle *, const prgregset_t, proc_stack_f *, void *);
--- a/hotspot/agent/src/os/solaris/proc/salibproc.h Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/agent/src/os/solaris/proc/salibproc.h Sat May 14 15:21:38 2011 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2005, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -101,7 +101,23 @@ /* * Stack frame iteration interface. */ +#ifdef SOLARIS_11_B159_OR_LATER +/* building on Nevada-B159 or later so define the new callback */ +typedef int proc_stack_f( + void *, /* the cookie given to Pstack_iter() */ + const prgregset_t, /* the frame's registers */ + uint_t, /* argc for the frame's function */ + const long *, /* argv for the frame's function */ + int, /* bitwise flags describing the frame (see below) */ + int); /* a signal number */ + +#define PR_SIGNAL_FRAME 1 /* called by a signal handler */ +#define PR_FOUND_SIGNAL 2 /* we found the corresponding signal number */ +#else +/* building on Nevada-B158 or earlier so define the old callback */ typedef int proc_stack_f(void *, const prgregset_t, uint_t, const long *); +#endif + extern int Pstack_iter(struct ps_prochandle *, const prgregset_t, proc_stack_f *, void *);
--- a/hotspot/agent/src/os/solaris/proc/saproc.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/agent/src/os/solaris/proc/saproc.cpp Sat May 14 15:21:38 2011 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002, 2009, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -24,6 +24,9 @@ #include "salibproc.h" #include "sun_jvm_hotspot_debugger_proc_ProcDebuggerLocal.h" +#ifndef SOLARIS_11_B159_OR_LATER +#include <sys/utsname.h> +#endif #include <thread_db.h> #include <strings.h> #include <limits.h> @@ -40,8 +43,22 @@ #define SYMBOL_BUF_SIZE 256 #define ERR_MSG_SIZE (PATH_MAX + 256) -// debug mode +// debug modes static int _libsaproc_debug = 0; +#ifndef SOLARIS_11_B159_OR_LATER +static bool _Pstack_iter_debug = false; + +static void dprintf_2(const char* format,...) { + if (_Pstack_iter_debug) { + va_list alist; + + va_start(alist, format); + fputs("Pstack_iter DEBUG: ", stderr); + vfprintf(stderr, format, alist); + va_end(alist); + } +} +#endif // !SOLARIS_11_B159_OR_LATER static void print_debug(const char* format,...) { if (_libsaproc_debug) { @@ -450,6 +467,7 @@ return 0; } +// Pstack_iter() proc_stack_f callback prior to Nevada-B159 static int fill_cframe_list(void *cd, const prgregset_t regs, uint_t argc, const long *argv) { DebuggerWith2Objects* dbgo2 = (DebuggerWith2Objects*) cd; @@ -472,6 +490,14 @@ return 0; } +// Pstack_iter() proc_stack_f callback in Nevada-B159 or later +/*ARGSUSED*/ +static int +wrapper_fill_cframe_list(void *cd, const prgregset_t regs, uint_t argc, + const long *argv, int frame_flags, int sig) { + return(fill_cframe_list(cd, regs, argc, argv)); +} + // part of the class sharing workaround // FIXME: !!HACK ALERT!! @@ -970,6 +996,11 @@ TD_THR_ANY_STATE, TD_THR_LOWEST_PRIORITY, TD_SIGNO_MASK, TD_THR_ANY_USER_FLAGS); } +#ifndef SOLARIS_11_B159_OR_LATER +// building on Nevada-B158 or earlier so more hoops to jump through +static bool has_newer_Pstack_iter = false; // older version by default +#endif + /* * Class: sun_jvm_hotspot_debugger_proc_ProcDebuggerLocal * Method: fillCFrameList0 @@ -997,7 +1028,24 @@ env->ReleaseLongArrayElements(regsArray, ptr, JNI_ABORT); CHECK_EXCEPTION_(0); - Pstack_iter((struct ps_prochandle*) p_ps_prochandle, gregs, fill_cframe_list, &dbgo2); + +#ifdef SOLARIS_11_B159_OR_LATER + // building on Nevada-B159 or later so use the new callback + Pstack_iter((struct ps_prochandle*) p_ps_prochandle, gregs, + wrapper_fill_cframe_list, &dbgo2); +#else + // building on Nevada-B158 or earlier so figure out which callback to use + + if (has_newer_Pstack_iter) { + // Since we're building on Nevada-B158 or earlier, we have to + // cast wrapper_fill_cframe_list to make the compiler happy. + Pstack_iter((struct ps_prochandle*) p_ps_prochandle, gregs, + (proc_stack_f *)wrapper_fill_cframe_list, &dbgo2); + } else { + Pstack_iter((struct ps_prochandle*) p_ps_prochandle, gregs, + fill_cframe_list, &dbgo2); + } +#endif // SOLARIS_11_B159_OR_LATER return dbgo2.obj; } @@ -1218,6 +1266,102 @@ return res; } +#ifndef SOLARIS_11_B159_OR_LATER +// Determine if the OS we're running on has the newer version +// of libproc's Pstack_iter. +// +// Set env var PSTACK_ITER_DEBUG=true to debug this logic. +// Set env var PSTACK_ITER_DEBUG_RELEASE to simulate a 'release' value. +// Set env var PSTACK_ITER_DEBUG_VERSION to simulate a 'version' value. +// +// frankenputer 'uname -r -v': 5.10 Generic_141445-09 +// jurassic 'uname -r -v': 5.11 snv_164 +// lonepeak 'uname -r -v': 5.11 snv_127 +// +static void set_has_newer_Pstack_iter(JNIEnv *env) { + static bool done_set = false; + + if (done_set) { + // already set has_newer_Pstack_iter + return; + } + + struct utsname name; + if (uname(&name) == -1) { + THROW_NEW_DEBUGGER_EXCEPTION("uname() failed!"); + } + dprintf_2("release='%s' version='%s'\n", name.release, name.version); + + if (_Pstack_iter_debug) { + char *override = getenv("PSTACK_ITER_DEBUG_RELEASE"); + if (override != NULL) { + strncpy(name.release, override, SYS_NMLN - 1); + name.release[SYS_NMLN - 2] = '\0'; + dprintf_2("overriding with release='%s'\n", name.release); + } + override = getenv("PSTACK_ITER_DEBUG_VERSION"); + if (override != NULL) { + strncpy(name.version, override, SYS_NMLN - 1); + name.version[SYS_NMLN - 2] = '\0'; + dprintf_2("overriding with version='%s'\n", name.version); + } + } + + // the major number corresponds to the old SunOS major number + int major = atoi(name.release); + if (major >= 6) { + dprintf_2("release is SunOS 6 or later\n"); + has_newer_Pstack_iter = true; + done_set = true; + return; + } + if (major < 5) { + dprintf_2("release is SunOS 4 or earlier\n"); + done_set = true; + return; + } + + // some SunOS 5.* build so now check for Solaris versions + char *dot = strchr(name.release, '.'); + int minor = 0; + if (dot != NULL) { + // release is major.minor format + *dot = NULL; + minor = atoi(dot + 1); + } + + if (minor <= 10) { + dprintf_2("release is Solaris 10 or earlier\n"); + done_set = true; + return; + } else if (minor >= 12) { + dprintf_2("release is Solaris 12 or later\n"); + has_newer_Pstack_iter = true; + done_set = true; + return; + } + + // some Solaris 11 build so now check for internal build numbers + if (strncmp(name.version, "snv_", 4) != 0) { + dprintf_2("release is Solaris 11 post-GA or later\n"); + has_newer_Pstack_iter = true; + done_set = true; + return; + } + + // version begins with "snv_" so a pre-GA build of Solaris 11 + int build = atoi(&name.version[4]); + if (build >= 159) { + dprintf_2("release is Nevada-B159 or later\n"); + has_newer_Pstack_iter = true; + } else { + dprintf_2("release is Nevada-B158 or earlier\n"); + } + + done_set = true; +} +#endif // !SOLARIS_11_B159_OR_LATER + /* * Class: sun_jvm_hotspot_debugger_proc_ProcDebuggerLocal * Method: initIDs @@ -1237,6 +1381,14 @@ if (libproc_handle == 0) THROW_NEW_DEBUGGER_EXCEPTION("can't load libproc.so, if you are using Solaris 5.7 or below, copy libproc.so from 5.8!"); +#ifndef SOLARIS_11_B159_OR_LATER + _Pstack_iter_debug = getenv("PSTACK_ITER_DEBUG") != NULL; + + set_has_newer_Pstack_iter(env); + CHECK_EXCEPTION; + dprintf_2("has_newer_Pstack_iter=%d\n", has_newer_Pstack_iter); +#endif + p_ps_prochandle_ID = env->GetFieldID(clazz, "p_ps_prochandle", "J"); CHECK_EXCEPTION;
--- a/hotspot/make/altsrc.make Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/make/altsrc.make Sat May 14 15:21:38 2011 -0700 @@ -24,7 +24,8 @@ # This file defines variables and macros which are used in the makefiles to # allow distributions to augment or replace common hotspot code with -# distribution-specific source files. +# distribution-specific source files. This capability is disabled when +# an OPENJDK build is requested, unless HS_ALT_SRC_REL has been set externally. # Requires: GAMMADIR # Provides: @@ -33,14 +34,17 @@ HS_COMMON_SRC_REL=src -# This needs to be changed to a more generic location, but we keep it as this -# for now for compatibility -HS_ALT_SRC_REL=src/closed +ifneq ($(OPENJDK),true) + # This needs to be changed to a more generic location, but we keep it + # as this for now for compatibility + HS_ALT_SRC_REL=src/closed +else + HS_ALT_SRC_REL=NO_SUCH_PATH +endif HS_COMMON_SRC=$(GAMMADIR)/$(HS_COMMON_SRC_REL) HS_ALT_SRC=$(GAMMADIR)/$(HS_ALT_SRC_REL) - ## altsrc-equiv # # Convert a common source path to an alternative source path
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/make/cscope.make Sat May 14 15:21:38 2011 -0700 @@ -0,0 +1,141 @@ +# +# Copyright (c) 2000, 2008, Oracle and/or its affiliates. All rights reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# This code is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License version 2 only, as +# published by the Free Software Foundation. +# +# This code is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# version 2 for more details (a copy is included in the LICENSE file that +# accompanied this code). +# +# You should have received a copy of the GNU General Public License version +# 2 along with this work; if not, write to the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +# or visit www.oracle.com if you need additional information or have any +# questions. +# +# + +# The cscope.out file is generated in the current directory. The old cscope.out +# file is *not* removed because cscope is smart enough to only build what has +# changed. cscope can be confused if files are renamed or removed, so it may be +# necessary to remove cscope.out (gmake cscope.clean) if a lot of reorganization +# has occurred. + +include $(GAMMADIR)/make/scm.make + +RM = rm -f +HG = hg +CS_TOP = $(GAMMADIR) + +CSDIRS = $(CS_TOP)/src $(CS_TOP)/make +CSINCS = $(CSDIRS:%=-I%) + +CSCOPE = cscope +CSCOPE_OUT = cscope.out +CSCOPE_FLAGS = -b + +# Allow .java files to be added from the environment (CSCLASSES=yes). +ifdef CSCLASSES +ADDCLASSES= -o -name '*.java' +endif + +# Adding CClassHeaders also pushes the file count of a full workspace up about +# 200 files (these files also don't exist in a new workspace, and thus will +# cause the recreation of the database as they get created, which might seem +# a little confusing). Thus allow these files to be added from the environment +# (CSHEADERS=yes). +ifndef CSHEADERS +RMCCHEADERS= -o -name CClassHeaders +endif + +# Ignore build products. +CS_PRUNE_GENERATED = -o -name '${OSNAME}_*_core' -o \ + -name '${OSNAME}_*_compiler?' + +# O/S-specific files for all systems are included by default. Set CS_OS to a +# space-separated list of identifiers to include only those systems. +ifdef CS_OS +CS_PRUNE_OS = $(patsubst %,-o -name '*%*',\ + $(filter-out ${CS_OS},linux macos solaris windows)) +endif + +# CPU-specific files for all processors are included by default. Set CS_CPU +# space-separated list identifiers to include only those CPUs. +ifdef CS_CPU +CS_PRUNE_CPU = $(patsubst %,-o -name '*%*',\ + $(filter-out ${CS_CPU},arm ppc sparc x86 zero)) +endif + +# What files should we include? A simple rule might be just those files under +# SCCS control, however this would miss files we create like the opcodes and +# CClassHeaders. The following attempts to find everything that is *useful*. +# (.del files are created by sccsrm, demo directories contain many .java files +# that probably aren't useful for development, and the pkgarchive may contain +# duplicates of files within the source hierarchy). + +# Directories to exclude. +CS_PRUNE_STD = $(SCM_DIRS) \ + -o -name '.del-*' \ + -o -name '*demo' \ + -o -name pkgarchive + +# Placeholder for user-defined excludes. +CS_PRUNE_EX = + +CS_PRUNE = $(CS_PRUNE_STD) \ + $(CS_PRUNE_OS) \ + $(CS_PRUNE_CPU) \ + $(CS_PRUNE_GENERATED) \ + $(CS_PRUNE_EX) \ + $(RMCCHEADERS) + +# File names to include. +CSFILENAMES = -name '*.[ch]pp' \ + -o -name '*.[Ccshlxy]' \ + $(CS_ADD_GENERATED) \ + -o -name '*.d' \ + -o -name '*.il' \ + -o -name '*.cc' \ + -o -name '*[Mm]akefile*' \ + -o -name '*.gmk' \ + -o -name '*.make' \ + -o -name '*.ad' \ + $(ADDCLASSES) + +.PHONY: cscope cscope.clean cscope.scratch TAGS.clean FORCE +.PRECIOUS: cscope.out + +cscope $(CSCOPE_OUT): cscope.files FORCE + $(CSCOPE) -f $(CSCOPE_OUT) $(CSCOPE_FLAGS) + +cscope.clean: + $(QUIETLY) $(RM) $(CSCOPE_OUT) cscope.files + +cscope.scratch: cscope.clean cscope + +# The raw list is reordered so cscope displays the most relevant files first. +cscope.files: + $(QUIETLY) \ + raw=cscope.$$$$; \ + find $(CSDIRS) -type d \( $(CS_PRUNE) \) -prune -o \ + -type f \( $(CSFILENAMES) \) -print > $$raw; \ + { \ + echo "$(CSINCS)"; \ + egrep -v "\.java|/make/" $$raw; \ + fgrep ".java" $$raw; \ + fgrep "/make/" $$raw; \ + } > $@; \ + rm -f $$raw + +TAGS: cscope.files FORCE + egrep -v '^-|^$$' $< | etags --members - + +TAGS.clean: + $(RM) TAGS
--- a/hotspot/make/hotspot_version Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/make/hotspot_version Sat May 14 15:21:38 2011 -0700 @@ -35,7 +35,7 @@ HS_MAJOR_VER=21 HS_MINOR_VER=0 -HS_BUILD_NUMBER=10 +HS_BUILD_NUMBER=12 JDK_MAJOR_VER=1 JDK_MINOR_VER=7
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/make/jdk6_hotspot_distro Sat May 14 15:21:38 2011 -0700 @@ -0,0 +1,32 @@ +# +# Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# This code is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License version 2 only, as +# published by the Free Software Foundation. +# +# This code is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# version 2 for more details (a copy is included in the LICENSE file that +# accompanied this code). +# +# You should have received a copy of the GNU General Public License version +# 2 along with this work; if not, write to the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +# or visit www.oracle.com if you need additional information or have any +# questions. +# + +# +# This file format must remain compatible with both +# GNU Makefile and Microsoft nmake formats. +# + +# Don't put quotes (fail windows build). +HOTSPOT_VM_DISTRO=Java HotSpot(TM) +COMPANY_NAME=Sun Microsystems, Inc. +PRODUCT_NAME=Java(TM) Platform SE
--- a/hotspot/make/linux/Makefile Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/make/linux/Makefile Sat May 14 15:21:38 2011 -0700 @@ -359,7 +359,7 @@ clean: clean_compiler2 clean_compiler1 clean_core clean_zero clean_shark clean_docs -include $(GAMMADIR)/make/$(OSNAME)/makefiles/cscope.make +include $(GAMMADIR)/make/cscope.make #-------------------------------------------------------------------------------
--- a/hotspot/make/linux/makefiles/cscope.make Wed May 11 08:02:44 2011 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,160 +0,0 @@ -# -# Copyright (c) 2005, 2008, Oracle and/or its affiliates. All rights reserved. -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# This code is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License version 2 only, as -# published by the Free Software Foundation. -# -# This code is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -# version 2 for more details (a copy is included in the LICENSE file that -# accompanied this code). -# -# You should have received a copy of the GNU General Public License version -# 2 along with this work; if not, write to the Free Software Foundation, -# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -# -# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -# or visit www.oracle.com if you need additional information or have any -# questions. -# -# - -# -# The cscope.out file is made in the current directory and spans the entire -# source tree. -# -# Things to note: -# 1. We use relative names for cscope. -# 2. We *don't* remove the old cscope.out file, because cscope is smart -# enough to only build what has changed. It can be confused, however, -# if files are renamed or removed, so it may be necessary to manually -# remove cscope.out if a lot of reorganization has occurred. -# - -include $(GAMMADIR)/make/scm.make - -NAWK = awk -RM = rm -f -HG = hg -CS_TOP = ../.. - -CSDIRS = $(CS_TOP)/src $(CS_TOP)/build -CSINCS = $(CSDIRS:%=-I%) - -CSCOPE = cscope -CSCOPE_FLAGS = -b - -# Allow .java files to be added from the environment (CSCLASSES=yes). -ifdef CSCLASSES -ADDCLASSES= -o -name '*.java' -endif - -# Adding CClassHeaders also pushes the file count of a full workspace up about -# 200 files (these files also don't exist in a new workspace, and thus will -# cause the recreation of the database as they get created, which might seem -# a little confusing). Thus allow these files to be added from the environment -# (CSHEADERS=yes). -ifndef CSHEADERS -RMCCHEADERS= -o -name CClassHeaders -endif - -# Use CS_GENERATED=x to include auto-generated files in the build directories. -ifdef CS_GENERATED -CS_ADD_GENERATED = -o -name '*.incl' -else -CS_PRUNE_GENERATED = -o -name '${OS}_*_core' -o -name '${OS}_*_compiler?' -endif - -# OS-specific files for other systems are excluded by default. Use CS_OS=yes -# to include platform-specific files for other platforms. -ifndef CS_OS -CS_OS = linux macos solaris win32 -CS_PRUNE_OS = $(patsubst %,-o -name '*%*',$(filter-out ${OS},${CS_OS})) -endif - -# Processor-specific files for other processors are excluded by default. Use -# CS_CPU=x to include platform-specific files for other platforms. -ifndef CS_CPU -CS_CPU = i486 sparc amd64 ia64 -CS_PRUNE_CPU = $(patsubst %,-o -name '*%*',$(filter-out ${SRCARCH},${CS_CPU})) -endif - -# What files should we include? A simple rule might be just those files under -# SCCS control, however this would miss files we create like the opcodes and -# CClassHeaders. The following attempts to find everything that is *useful*. -# (.del files are created by sccsrm, demo directories contain many .java files -# that probably aren't useful for development, and the pkgarchive may contain -# duplicates of files within the source hierarchy). - -# Directories to exclude. -CS_PRUNE_STD = $(SCM_DIRS) \ - -o -name '.del-*' \ - -o -name '*demo' \ - -o -name pkgarchive - -CS_PRUNE = $(CS_PRUNE_STD) \ - $(CS_PRUNE_OS) \ - $(CS_PRUNE_CPU) \ - $(CS_PRUNE_GENERATED) \ - $(RMCCHEADERS) - -# File names to include. -CSFILENAMES = -name '*.[ch]pp' \ - -o -name '*.[Ccshlxy]' \ - $(CS_ADD_GENERATED) \ - -o -name '*.il' \ - -o -name '*.cc' \ - -o -name '*[Mm]akefile*' \ - -o -name '*.gmk' \ - -o -name '*.make' \ - -o -name '*.ad' \ - $(ADDCLASSES) - -.PRECIOUS: cscope.out - -cscope cscope.out: cscope.files FORCE - $(CSCOPE) $(CSCOPE_FLAGS) - -# The .raw file is reordered here in an attempt to make cscope display the most -# relevant files first. -cscope.files: .cscope.files.raw - echo "$(CSINCS)" > $@ - -egrep -v "\.java|\/make\/" $< >> $@ - -fgrep ".java" $< >> $@ - -fgrep "/make/" $< >> $@ - -.cscope.files.raw: .nametable.files - -find $(CSDIRS) -type d \( $(CS_PRUNE) \) -prune -o \ - -type f \( $(CSFILENAMES) \) -print > $@ - -cscope.clean: nametable.clean - -$(RM) cscope.out cscope.files .cscope.files.raw - -TAGS: cscope.files FORCE - egrep -v '^-|^$$' $< | etags --members - - -TAGS.clean: nametable.clean - -$(RM) TAGS - -# .nametable.files and .nametable.files.tmp are used to determine if any files -# were added to/deleted from/renamed in the workspace. If not, then there's -# normally no need to rebuild the cscope database. To force a rebuild of -# the cscope database: gmake nametable.clean. -.nametable.files: .nametable.files.tmp - ( cmp -s $@ $< ) || ( cp $< $@ ) - -$(RM) $< - -# `hg status' is slightly faster than `hg fstatus'. Both are -# quite a bit slower on an NFS mounted file system, so this is -# really geared towards repos on local file systems. -.nametable.files.tmp: - -$(HG) fstatus -acmn > $@ -nametable.clean: - -$(RM) .nametable.files .nametable.files.tmp - -FORCE: - -.PHONY: cscope cscope.clean TAGS.clean nametable.clean FORCE
--- a/hotspot/make/linux/makefiles/gcc.make Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/make/linux/makefiles/gcc.make Sat May 14 15:21:38 2011 -0700 @@ -205,7 +205,7 @@ SHARED_FLAG = -shared # Keep symbols even they are not used -AOUT_FLAGS += -export-dynamic +AOUT_FLAGS += -Xlinker -export-dynamic #------------------------------------------------------------------------ # Debug flags
--- a/hotspot/make/linux/makefiles/vm.make Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/make/linux/makefiles/vm.make Sat May 14 15:21:38 2011 -0700 @@ -102,6 +102,10 @@ CFLAGS += $(EXTRA_CFLAGS) LFLAGS += $(EXTRA_CFLAGS) +# Don't set excutable bit on stack segment +# the same could be done by separate execstack command +LFLAGS += -Xlinker -z -Xlinker noexecstack + LIBS += -lm -ldl -lpthread # By default, link the *.o into the library, not the executable.
--- a/hotspot/make/solaris/Makefile Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/make/solaris/Makefile Sat May 14 15:21:38 2011 -0700 @@ -296,7 +296,7 @@ clean: clean_compiler2 clean_compiler1 clean_core clean_docs clean_kernel -include $(GAMMADIR)/make/$(OSNAME)/makefiles/cscope.make +include $(GAMMADIR)/make/cscope.make #-------------------------------------------------------------------------------
--- a/hotspot/make/solaris/makefiles/cscope.make Wed May 11 08:02:44 2011 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,162 +0,0 @@ -# -# Copyright (c) 2000, 2008, Oracle and/or its affiliates. All rights reserved. -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# This code is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License version 2 only, as -# published by the Free Software Foundation. -# -# This code is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -# version 2 for more details (a copy is included in the LICENSE file that -# accompanied this code). -# -# You should have received a copy of the GNU General Public License version -# 2 along with this work; if not, write to the Free Software Foundation, -# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -# -# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -# or visit www.oracle.com if you need additional information or have any -# questions. -# -# - -# -# The cscope.out file is made in the current directory and spans the entire -# source tree. -# -# Things to note: -# 1. We use relative names for cscope. -# 2. We *don't* remove the old cscope.out file, because cscope is smart -# enough to only build what has changed. It can be confused, however, -# if files are renamed or removed, so it may be necessary to manually -# remove cscope.out if a lot of reorganization has occurred. -# - -include $(GAMMADIR)/make/scm.make - -NAWK = /usr/xpg4/bin/awk -RM = rm -f -HG = hg -CS_TOP = ../.. - -CSDIRS = $(CS_TOP)/src $(CS_TOP)/make -CSINCS = $(CSDIRS:%=-I%) - -CSCOPE = cscope -CSCOPE_FLAGS = -b - -# Allow .java files to be added from the environment (CSCLASSES=yes). -ifdef CSCLASSES -ADDCLASSES= -o -name '*.java' -endif - -# Adding CClassHeaders also pushes the file count of a full workspace up about -# 200 files (these files also don't exist in a new workspace, and thus will -# cause the recreation of the database as they get created, which might seem -# a little confusing). Thus allow these files to be added from the environment -# (CSHEADERS=yes). -ifndef CSHEADERS -RMCCHEADERS= -o -name CClassHeaders -endif - -# Use CS_GENERATED=x to include auto-generated files in the make directories. -ifdef CS_GENERATED -CS_ADD_GENERATED = -o -name '*.incl' -else -CS_PRUNE_GENERATED = -o -name '${OS}_*_core' -o -name '${OS}_*_compiler?' -endif - -# OS-specific files for other systems are excluded by default. Use CS_OS=yes -# to include platform-specific files for other platforms. -ifndef CS_OS -CS_OS = linux macos solaris win32 -CS_PRUNE_OS = $(patsubst %,-o -name '*%*',$(filter-out ${OS},${CS_OS})) -endif - -# Processor-specific files for other processors are excluded by default. Use -# CS_CPU=x to include platform-specific files for other platforms. -ifndef CS_CPU -CS_CPU = i486 sparc amd64 ia64 -CS_PRUNE_CPU = $(patsubst %,-o -name '*%*',$(filter-out ${SRCARCH},${CS_CPU})) -endif - -# What files should we include? A simple rule might be just those files under -# SCCS control, however this would miss files we create like the opcodes and -# CClassHeaders. The following attempts to find everything that is *useful*. -# (.del files are created by sccsrm, demo directories contain many .java files -# that probably aren't useful for development, and the pkgarchive may contain -# duplicates of files within the source hierarchy). - -# Directories to exclude. -CS_PRUNE_STD = $(SCM_DIRS) \ - -o -name '.del-*' \ - -o -name '*demo' \ - -o -name pkgarchive - -CS_PRUNE = $(CS_PRUNE_STD) \ - $(CS_PRUNE_OS) \ - $(CS_PRUNE_CPU) \ - $(CS_PRUNE_GENERATED) \ - $(RMCCHEADERS) - -# File names to include. -CSFILENAMES = -name '*.[ch]pp' \ - -o -name '*.[Ccshlxy]' \ - $(CS_ADD_GENERATED) \ - -o -name '*.d' \ - -o -name '*.il' \ - -o -name '*.cc' \ - -o -name '*[Mm]akefile*' \ - -o -name '*.gmk' \ - -o -name '*.make' \ - -o -name '*.ad' \ - $(ADDCLASSES) - -.PRECIOUS: cscope.out - -cscope cscope.out: cscope.files FORCE - $(CSCOPE) $(CSCOPE_FLAGS) - -# The .raw file is reordered here in an attempt to make cscope display the most -# relevant files first. -cscope.files: .cscope.files.raw - echo "$(CSINCS)" > $@ - -egrep -v "\.java|\/make\/" $< >> $@ - -fgrep ".java" $< >> $@ - -fgrep "/make/" $< >> $@ - -.cscope.files.raw: .nametable.files - -find $(CSDIRS) -type d \( $(CS_PRUNE) \) -prune -o \ - -type f \( $(CSFILENAMES) \) -print > $@ - -cscope.clean: nametable.clean - -$(RM) cscope.out cscope.files .cscope.files.raw - -TAGS: cscope.files FORCE - egrep -v '^-|^$$' $< | etags --members - - -TAGS.clean: nametable.clean - -$(RM) TAGS - -# .nametable.files and .nametable.files.tmp are used to determine if any files -# were added to/deleted from/renamed in the workspace. If not, then there's -# normally no need to rebuild the cscope database. To force a rebuild of -# the cscope database: gmake nametable.clean. -.nametable.files: .nametable.files.tmp - ( cmp -s $@ $< ) || ( cp $< $@ ) - -$(RM) $< - -# `hg status' is slightly faster than `hg fstatus'. Both are -# quite a bit slower on an NFS mounted file system, so this is -# really geared towards repos on local file systems. -.nametable.files.tmp: - -$(HG) fstatus -acmn > $@ - -nametable.clean: - -$(RM) .nametable.files .nametable.files.tmp - -FORCE: - -.PHONY: cscope cscope.clean TAGS.clean nametable.clean FORCE
--- a/hotspot/make/solaris/makefiles/saproc.make Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/make/solaris/makefiles/saproc.make Sat May 14 15:21:38 2011 -0700 @@ -1,5 +1,5 @@ # -# Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -56,6 +56,30 @@ SA_LFLAGS += -mt -xnolib -norunpath endif +# The libproc Pstack_iter() interface changed in Nevada-B159. +# This logic needs to match +# agent/src/os/solaris/proc/saproc.cpp: set_has_newer_Pstack_iter(): +# - skip SunOS 4 or older +# - skip Solaris 10 or older +# - skip two digit Nevada builds +# - skip three digit Nevada builds thru 149 +# - skip Nevada builds 150-158 +SOLARIS_11_B159_OR_LATER := \ +$(shell uname -r -v \ + | sed -n ' \ + /^[0-3]\. /b \ + /^5\.[0-9] /b \ + /^5\.10 /b \ + / snv_[0-9][0-9]$/b \ + / snv_[01][0-4][0-9]$/b \ + / snv_15[0-8]$/b \ + s/.*/-DSOLARIS_11_B159_OR_LATER/p \ + ') + +# Uncomment the following to simulate building on Nevada-B159 or later +# when actually building on Nevada-B158 or earlier: +#SOLARIS_11_B159_OR_LATER=-DSOLARIS_11_B159_OR_LATER + $(LIBSAPROC): $(SASRCFILES) $(SAMAPFILE) $(QUIETLY) if [ "$(BOOT_JAVA_HOME)" = "" ]; then \ echo "ALT_BOOTDIR, BOOTDIR or JAVA_HOME needs to be defined to build SA"; \ @@ -68,6 +92,7 @@ -I$(GENERATED) \ -I$(BOOT_JAVA_HOME)/include \ -I$(BOOT_JAVA_HOME)/include/$(Platform_os_family) \ + $(SOLARIS_11_B159_OR_LATER) \ $(SASRCFILES) \ $(SA_LFLAGS) \ -o $@ \
--- a/hotspot/make/solaris/makefiles/sparcWorks.make Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/make/solaris/makefiles/sparcWorks.make Sat May 14 15:21:38 2011 -0700 @@ -100,11 +100,6 @@ LINK_LIB.CC/PRE_HOOK += $(JVM_CHECK_SYMBOLS) || exit 1; -# Some interfaces (_lwp_create) changed with LP64 and Solaris 7 -SOLARIS_7_OR_LATER := \ -$(shell uname -r | awk -F. '{ if ($$2 >= 7) print "-DSOLARIS_7_OR_LATER"; }') -CFLAGS += ${SOLARIS_7_OR_LATER} - # New architecture options started in SS12 (5.9), we need both styles to build. # The older arch options for SS11 (5.8) or older and also for /usr/ccs/bin/as. # Note: default for 32bit sparc is now the same as v8plus, so the
--- a/hotspot/make/windows/build.make Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/make/windows/build.make Sat May 14 15:21:38 2011 -0700 @@ -1,5 +1,5 @@ # -# Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -125,7 +125,25 @@ # or make/hotspot_distro. !ifndef HOTSPOT_VM_DISTRO !if exists($(WorkSpace)\src\closed) + +# if the build is for JDK6 or earlier version, it should include jdk6_hotspot_distro, +# instead of hotspot_distro. +JDK6_OR_EARLIER=0 +!if "$(JDK_MAJOR_VERSION)" != "" && "$(JDK_MINOR_VERSION)" != "" && "$(JDK_MICRO_VERSION)" != "" +!if $(JDK_MAJOR_VERSION) == 1 && $(JDK_MINOR_VERSION) < 7 +JDK6_OR_EARLIER=1 +!endif +!else +!if $(JDK_MAJOR_VER) == 1 && $(JDK_MINOR_VER) < 7 +JDK6_OR_EARLIER=1 +!endif +!endif + +!if $(JDK6_OR_EARLIER) == 1 +!include $(WorkSpace)\make\jdk6_hotspot_distro +!else !include $(WorkSpace)\make\hotspot_distro +!endif !else !include $(WorkSpace)\make\openjdk_distro !endif @@ -260,7 +278,7 @@ @ echo Variant=$(realVariant) >> $@ @ echo WorkSpace=$(WorkSpace) >> $@ @ echo BootStrapDir=$(BootStrapDir) >> $@ - @ if "$(USERNAME)" NEQ "" echo BuildUser=$(USERNAME) >> $@ + @ if "$(USERNAME)" NEQ "" echo BuildUser=$(USERNAME) >> $@ @ echo HS_VER=$(HS_VER) >> $@ @ echo HS_DOTVER=$(HS_DOTVER) >> $@ @ echo HS_COMPANY=$(COMPANY_NAME) >> $@
--- a/hotspot/src/cpu/sparc/vm/frame_sparc.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/cpu/sparc/vm/frame_sparc.cpp Sat May 14 15:21:38 2011 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -806,3 +806,34 @@ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize) - 1; return &interpreter_frame_tos_address()[index]; } + + +#ifdef ASSERT + +#define DESCRIBE_FP_OFFSET(name) \ + values.describe(-1, fp() + frame::name##_offset, #name) + +void frame::describe_pd(FrameValues& values, int frame_no) { + for (int w = 0; w < frame::register_save_words; w++) { + values.describe(frame_no, sp() + w, err_msg("register save area word %d", w), 1); + } + + if (is_interpreted_frame()) { + DESCRIBE_FP_OFFSET(interpreter_frame_d_scratch_fp); + DESCRIBE_FP_OFFSET(interpreter_frame_l_scratch_fp); + DESCRIBE_FP_OFFSET(interpreter_frame_padding); + DESCRIBE_FP_OFFSET(interpreter_frame_oop_temp); + } + + if (!is_compiled_frame()) { + if (frame::callee_aggregate_return_pointer_words != 0) { + values.describe(frame_no, sp() + frame::callee_aggregate_return_pointer_sp_offset, "callee_aggregate_return_pointer_word"); + } + for (int w = 0; w < frame::callee_register_argument_save_area_words; w++) { + values.describe(frame_no, sp() + frame::callee_register_argument_save_area_sp_offset + w, + err_msg("callee_register_argument_save_area_words %d", w)); + } + } +} + +#endif
--- a/hotspot/src/cpu/sparc/vm/methodHandles_sparc.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/cpu/sparc/vm/methodHandles_sparc.cpp Sat May 14 15:21:38 2011 -0700 @@ -350,8 +350,9 @@ #ifndef PRODUCT extern "C" void print_method_handle(oop mh); void trace_method_handle_stub(const char* adaptername, - oopDesc* mh) { - printf("MH %s mh="INTPTR_FORMAT"\n", adaptername, (intptr_t) mh); + oopDesc* mh, + intptr_t* saved_sp) { + tty->print_cr("MH %s mh="INTPTR_FORMAT " saved_sp=" INTPTR_FORMAT, adaptername, (intptr_t) mh, saved_sp); print_method_handle(mh); } void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) { @@ -361,6 +362,7 @@ __ save_frame(16); __ set((intptr_t) adaptername, O0); __ mov(G3_method_handle, O1); + __ mov(I5_savedSP, O2); __ mov(G3_method_handle, L3); __ mov(Gargs, L4); __ mov(G5_method_type, L5); @@ -486,7 +488,7 @@ if (ek == _invokespecial_mh) { // Must load & check the first argument before entering the target method. __ load_method_handle_vmslots(O0_argslot, G3_method_handle, O1_scratch); - __ ld_ptr(__ argument_address(O0_argslot), G3_method_handle); + __ ld_ptr(__ argument_address(O0_argslot, -1), G3_method_handle); __ null_check(G3_method_handle); __ verify_oop(G3_method_handle); } @@ -643,9 +645,10 @@ // Live at this point: // - G5_klass : klass required by the target method + // - O0_argslot : argslot index in vmarg; may be required in the failing path // - O1_scratch : argument klass to test // - G3_method_handle: adapter method handle - __ check_klass_subtype(O1_scratch, G5_klass, O0_argslot, O2_scratch, done); + __ check_klass_subtype(O1_scratch, G5_klass, O2_scratch, O3_scratch, done); // If we get here, the type check failed! __ load_heap_oop(G3_amh_argument, O2_required); // required class
--- a/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp Sat May 14 15:21:38 2011 -0700 @@ -1698,35 +1698,21 @@ popframe_extra_args; int local_words = method->max_locals() * Interpreter::stackElementWords; - int parm_words = method->size_of_parameters() * Interpreter::stackElementWords; - NEEDS_CLEANUP; intptr_t* locals; - if (caller->is_interpreted_frame()) { - // Can force the locals area to end up properly overlapping the top of the expression stack. - intptr_t* Lesp_ptr = caller->interpreter_frame_tos_address() - 1; - // Note that this computation means we replace size_of_parameters() values from the caller - // interpreter frame's expression stack with our argument locals - locals = Lesp_ptr + parm_words; - int delta = local_words - parm_words; - int computed_sp_adjustment = (delta > 0) ? round_to(delta, WordsPerLong) : 0; - *interpreter_frame->register_addr(I5_savedSP) = (intptr_t) (fp + computed_sp_adjustment) - STACK_BIAS; + if (caller->is_compiled_frame()) { + // Compiled frames do not allocate a varargs area so place them + // next to the register save area. + locals = fp + frame::register_save_words + local_words - 1; + // Caller wants his own SP back + int caller_frame_size = caller->cb()->frame_size(); + *interpreter_frame->register_addr(I5_savedSP) = (intptr_t)(caller->fp() - caller_frame_size) - STACK_BIAS; } else { - assert(caller->is_compiled_frame() || caller->is_entry_frame(), "only possible cases"); - // Don't have Lesp available; lay out locals block in the caller - // adjacent to the register window save area. - // - // Compiled frames do not allocate a varargs area which is why this if - // statement is needed. - // - if (caller->is_compiled_frame()) { - locals = fp + frame::register_save_words + local_words - 1; - } else { - locals = fp + frame::memory_parameter_word_sp_offset + local_words - 1; - } - if (!caller->is_entry_frame()) { - // Caller wants his own SP back - int caller_frame_size = caller->cb()->frame_size(); - *interpreter_frame->register_addr(I5_savedSP) = (intptr_t)(caller->fp() - caller_frame_size) - STACK_BIAS; + assert(caller->is_interpreted_frame() || caller->is_entry_frame(), "only possible cases"); + // The entry and interpreter frames are laid out like normal C + // frames so place the locals adjacent to the varargs area. + locals = fp + frame::memory_parameter_word_sp_offset + local_words - 1; + if (caller->is_interpreted_frame()) { + *interpreter_frame->register_addr(I5_savedSP) = (intptr_t) (fp + rounded_cls) - STACK_BIAS; } } if (TraceDeoptimization) {
--- a/hotspot/src/cpu/sparc/vm/templateTable_sparc.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/cpu/sparc/vm/templateTable_sparc.cpp Sat May 14 15:21:38 2011 -0700 @@ -3293,8 +3293,6 @@ /*virtual*/ false, /*vfinal*/ false, /*indy*/ true); __ mov(SP, O5_savedSP); // record SP that we wanted the callee to restore - __ verify_oop(G5_callsite); - // profile this call __ profile_call(O4); @@ -3307,8 +3305,10 @@ __ sll(Rret, LogBytesPerWord, Rret); __ ld_ptr(Rtemp, Rret, Rret); // get return address + __ verify_oop(G5_callsite); __ load_heap_oop(G5_callsite, __ delayed_value(java_lang_invoke_CallSite::target_offset_in_bytes, Rscratch), G3_method_handle); __ null_check(G3_method_handle); + __ verify_oop(G3_method_handle); // Adjust Rret first so Llast_SP can be same as Rret __ add(Rret, -frame::pc_return_offset, O7);
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp Sat May 14 15:21:38 2011 -0700 @@ -6039,6 +6039,43 @@ call_VM_leaf(entry_point, 3); } +void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { + pass_arg0(this, arg_0); + MacroAssembler::call_VM_leaf_base(entry_point, 1); +} + +void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { + + LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); + pass_arg1(this, arg_1); + pass_arg0(this, arg_0); + MacroAssembler::call_VM_leaf_base(entry_point, 2); +} + +void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { + LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); + LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); + pass_arg2(this, arg_2); + LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); + pass_arg1(this, arg_1); + pass_arg0(this, arg_0); + MacroAssembler::call_VM_leaf_base(entry_point, 3); +} + +void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { + LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg")); + LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); + LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); + pass_arg3(this, arg_3); + LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); + LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); + pass_arg2(this, arg_2); + LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); + pass_arg1(this, arg_1); + pass_arg0(this, arg_0); + MacroAssembler::call_VM_leaf_base(entry_point, 4); +} + void MacroAssembler::check_and_handle_earlyret(Register java_thread) { }
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp Sat May 14 15:21:38 2011 -0700 @@ -1655,6 +1655,14 @@ void call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); + // These always tightly bind to MacroAssembler::call_VM_leaf_base + // bypassing the virtual implementation + void super_call_VM_leaf(address entry_point); + void super_call_VM_leaf(address entry_point, Register arg_1); + void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2); + void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); + void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4); + // last Java Frame (fills frame anchor) void set_last_Java_frame(Register thread, Register last_java_sp,
--- a/hotspot/src/cpu/x86/vm/frame_x86.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/cpu/x86/vm/frame_x86.cpp Sat May 14 15:21:38 2011 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -669,3 +669,23 @@ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); return &interpreter_frame_tos_address()[index]; } + +#ifdef ASSERT + +#define DESCRIBE_FP_OFFSET(name) \ + values.describe(-1, fp() + frame::name##_offset, #name) + +void frame::describe_pd(FrameValues& values, int frame_no) { + if (is_interpreted_frame()) { + DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp); + DESCRIBE_FP_OFFSET(interpreter_frame_last_sp); + DESCRIBE_FP_OFFSET(interpreter_frame_method); + DESCRIBE_FP_OFFSET(interpreter_frame_mdx); + DESCRIBE_FP_OFFSET(interpreter_frame_cache); + DESCRIBE_FP_OFFSET(interpreter_frame_locals); + DESCRIBE_FP_OFFSET(interpreter_frame_bcx); + DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp); + } + +} +#endif
--- a/hotspot/src/cpu/x86/vm/interp_masm_x86_32.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/cpu/x86/vm/interp_masm_x86_32.cpp Sat May 14 15:21:38 2011 -0700 @@ -383,32 +383,6 @@ movptr(Address(rsp, Interpreter::expr_offset_in_bytes(n)), val); } -void InterpreterMacroAssembler::super_call_VM_leaf(address entry_point) { - MacroAssembler::call_VM_leaf_base(entry_point, 0); -} - - -void InterpreterMacroAssembler::super_call_VM_leaf(address entry_point, Register arg_1) { - push(arg_1); - MacroAssembler::call_VM_leaf_base(entry_point, 1); -} - - -void InterpreterMacroAssembler::super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2) { - push(arg_2); - push(arg_1); - MacroAssembler::call_VM_leaf_base(entry_point, 2); -} - - -void InterpreterMacroAssembler::super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) { - push(arg_3); - push(arg_2); - push(arg_1); - MacroAssembler::call_VM_leaf_base(entry_point, 3); -} - - void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() { // set sender sp lea(rsi, Address(rsp, wordSize));
--- a/hotspot/src/cpu/x86/vm/interp_masm_x86_32.hpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/cpu/x86/vm/interp_masm_x86_32.hpp Sat May 14 15:21:38 2011 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -124,12 +124,6 @@ void load_ptr(int n, Register val); void store_ptr(int n, Register val); - // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls - void super_call_VM_leaf(address entry_point); - void super_call_VM_leaf(address entry_point, Register arg_1); - void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2); - void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); - // Generate a subtype check: branch to ok_is_subtype if sub_klass is // a subtype of super_klass. EAX holds the super_klass. Blows ECX // and EDI. Register sub_klass cannot be any of the above.
--- a/hotspot/src/cpu/x86/vm/interp_masm_x86_64.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/cpu/x86/vm/interp_masm_x86_64.cpp Sat May 14 15:21:38 2011 -0700 @@ -381,56 +381,6 @@ } -void InterpreterMacroAssembler::super_call_VM_leaf(address entry_point) { - MacroAssembler::call_VM_leaf_base(entry_point, 0); -} - - -void InterpreterMacroAssembler::super_call_VM_leaf(address entry_point, - Register arg_1) { - if (c_rarg0 != arg_1) { - mov(c_rarg0, arg_1); - } - MacroAssembler::call_VM_leaf_base(entry_point, 1); -} - - -void InterpreterMacroAssembler::super_call_VM_leaf(address entry_point, - Register arg_1, - Register arg_2) { - assert(c_rarg0 != arg_2, "smashed argument"); - assert(c_rarg1 != arg_1, "smashed argument"); - if (c_rarg0 != arg_1) { - mov(c_rarg0, arg_1); - } - if (c_rarg1 != arg_2) { - mov(c_rarg1, arg_2); - } - MacroAssembler::call_VM_leaf_base(entry_point, 2); -} - -void InterpreterMacroAssembler::super_call_VM_leaf(address entry_point, - Register arg_1, - Register arg_2, - Register arg_3) { - assert(c_rarg0 != arg_2, "smashed argument"); - assert(c_rarg0 != arg_3, "smashed argument"); - assert(c_rarg1 != arg_1, "smashed argument"); - assert(c_rarg1 != arg_3, "smashed argument"); - assert(c_rarg2 != arg_1, "smashed argument"); - assert(c_rarg2 != arg_2, "smashed argument"); - if (c_rarg0 != arg_1) { - mov(c_rarg0, arg_1); - } - if (c_rarg1 != arg_2) { - mov(c_rarg1, arg_2); - } - if (c_rarg2 != arg_3) { - mov(c_rarg2, arg_3); - } - MacroAssembler::call_VM_leaf_base(entry_point, 3); -} - void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() { // set sender sp lea(r13, Address(rsp, wordSize));
--- a/hotspot/src/cpu/x86/vm/interp_masm_x86_64.hpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/cpu/x86/vm/interp_masm_x86_64.hpp Sat May 14 15:21:38 2011 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -136,13 +136,6 @@ void load_ptr(int n, Register val); void store_ptr(int n, Register val); - // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls - void super_call_VM_leaf(address entry_point); - void super_call_VM_leaf(address entry_point, Register arg_1); - void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2); - void super_call_VM_leaf(address entry_point, - Register arg_1, Register arg_2, Register arg_3); - // Generate a subtype check: branch to ok_is_subtype if sub_klass is // a subtype of super_klass. void gen_subtype_check( Register sub_klass, Label &ok_is_subtype );
--- a/hotspot/src/cpu/x86/vm/methodHandles_x86.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/cpu/x86/vm/methodHandles_x86.cpp Sat May 14 15:21:38 2011 -0700 @@ -315,56 +315,38 @@ #ifndef PRODUCT extern "C" void print_method_handle(oop mh); void trace_method_handle_stub(const char* adaptername, + intptr_t* saved_sp, oop mh, - intptr_t* saved_regs, - intptr_t* entry_sp, - intptr_t* saved_sp, - intptr_t* saved_bp) { + intptr_t* sp) { // called as a leaf from native code: do not block the JVM! - intptr_t* last_sp = (intptr_t*) saved_bp[frame::interpreter_frame_last_sp_offset]; - intptr_t* base_sp = (intptr_t*) saved_bp[frame::interpreter_frame_monitor_block_top_offset]; - printf("MH %s mh="INTPTR_FORMAT" sp=("INTPTR_FORMAT"+"INTX_FORMAT") stack_size="INTX_FORMAT" bp="INTPTR_FORMAT"\n", - adaptername, (intptr_t)mh, (intptr_t)entry_sp, (intptr_t)(saved_sp - entry_sp), (intptr_t)(base_sp - last_sp), (intptr_t)saved_bp); - if (last_sp != saved_sp && last_sp != NULL) - printf("*** last_sp="INTPTR_FORMAT"\n", (intptr_t)last_sp); + intptr_t* entry_sp = sp + LP64_ONLY(16) NOT_LP64(8); + tty->print_cr("MH %s mh="INTPTR_FORMAT" sp="INTPTR_FORMAT" saved_sp="INTPTR_FORMAT")", + adaptername, (intptr_t)mh, (intptr_t)entry_sp, saved_sp); if (Verbose) { - printf(" reg dump: "); - int saved_regs_count = (entry_sp-1) - saved_regs; - // 32 bit: rdi rsi rbp rsp; rbx rdx rcx (*) rax - int i; - for (i = 0; i <= saved_regs_count; i++) { - if (i > 0 && i % 4 == 0 && i != saved_regs_count) - printf("\n + dump: "); - printf(" %d: "INTPTR_FORMAT, i, saved_regs[i]); - } - printf("\n"); - int stack_dump_count = 16; - if (stack_dump_count < (int)(saved_bp + 2 - saved_sp)) - stack_dump_count = (int)(saved_bp + 2 - saved_sp); - if (stack_dump_count > 64) stack_dump_count = 48; - for (i = 0; i < stack_dump_count; i += 4) { - printf(" dump at SP[%d] "INTPTR_FORMAT": "INTPTR_FORMAT" "INTPTR_FORMAT" "INTPTR_FORMAT" "INTPTR_FORMAT"\n", - i, (intptr_t) &entry_sp[i+0], entry_sp[i+0], entry_sp[i+1], entry_sp[i+2], entry_sp[i+3]); - } print_method_handle(mh); } } void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) { if (!TraceMethodHandles) return; BLOCK_COMMENT("trace_method_handle {"); - __ push(rax); - __ lea(rax, Address(rsp, wordSize*6)); // entry_sp __ pusha(); +#ifdef _LP64 + // Pass arguments carefully since the registers overlap with the calling convention. + // rcx: method handle + // r13: saved sp + __ mov(c_rarg2, rcx); // mh + __ mov(c_rarg1, r13); // saved sp + __ mov(c_rarg3, rsp); // sp + __ movptr(c_rarg0, (intptr_t) adaptername); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, trace_method_handle_stub), c_rarg0, c_rarg1, c_rarg2, c_rarg3); +#else // arguments: - __ push(rbp); // interpreter frame pointer - __ push(rsi); // saved_sp - __ push(rax); // entry_sp - __ push(rcx); // mh - __ push(rcx); - __ movptr(Address(rsp, 0), (intptr_t) adaptername); - __ call_VM_leaf(CAST_FROM_FN_PTR(address, trace_method_handle_stub), 5); + // rcx: method handle + // rsi: saved sp + __ movptr(rbx, (intptr_t) adaptername); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, trace_method_handle_stub), rbx, rsi, rcx, rsp); +#endif __ popa(); - __ pop(rax); BLOCK_COMMENT("} trace_method_handle"); } #endif //PRODUCT
--- a/hotspot/src/cpu/x86/vm/templateTable_x86_32.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/cpu/x86/vm/templateTable_x86_32.cpp Sat May 14 15:21:38 2011 -0700 @@ -422,7 +422,7 @@ Label L_done, L_throw_exception; const Register con_klass_temp = rcx; // same as Rcache - __ movptr(con_klass_temp, Address(rax, oopDesc::klass_offset_in_bytes())); + __ load_klass(con_klass_temp, rax); __ cmpptr(con_klass_temp, ExternalAddress((address)Universe::systemObjArrayKlassObj_addr())); __ jcc(Assembler::notEqual, L_done); __ cmpl(Address(rax, arrayOopDesc::length_offset_in_bytes()), 0); @@ -432,7 +432,7 @@ // Load the exception from the system-array which wraps it: __ bind(L_throw_exception); - __ movptr(rax, Address(rax, arrayOopDesc::base_offset_in_bytes(T_OBJECT))); + __ load_heap_oop(rax, Address(rax, arrayOopDesc::base_offset_in_bytes(T_OBJECT))); __ jump(ExternalAddress(Interpreter::throw_exception_entry())); __ bind(L_done); @@ -946,9 +946,9 @@ __ jcc(Assembler::zero, is_null); // Move subklass into EBX - __ movptr(rbx, Address(rax, oopDesc::klass_offset_in_bytes())); + __ load_klass(rbx, rax); // Move superklass into EAX - __ movptr(rax, Address(rdx, oopDesc::klass_offset_in_bytes())); + __ load_klass(rax, rdx); __ movptr(rax, Address(rax, sizeof(oopDesc) + objArrayKlass::element_klass_offset_in_bytes())); // Compress array+index*wordSize+12 into a single register. Frees ECX. __ lea(rdx, element_address); @@ -2001,7 +2001,7 @@ if (_desc->bytecode() == Bytecodes::_return_register_finalizer) { assert(state == vtos, "only valid state"); __ movptr(rax, aaddress(0)); - __ movptr(rdi, Address(rax, oopDesc::klass_offset_in_bytes())); + __ load_klass(rdi, rax); __ movl(rdi, Address(rdi, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc))); __ testl(rdi, JVM_ACC_HAS_FINALIZER); Label skip_register_finalizer; @@ -2948,7 +2948,7 @@ // get receiver klass __ null_check(recv, oopDesc::klass_offset_in_bytes()); // Keep recv in rcx for callee expects it there - __ movptr(rax, Address(recv, oopDesc::klass_offset_in_bytes())); + __ load_klass(rax, recv); __ verify_oop(rax); // profile this call @@ -3028,7 +3028,7 @@ // Get receiver klass into rdx - also a null check __ restore_locals(); // restore rdi - __ movptr(rdx, Address(rcx, oopDesc::klass_offset_in_bytes())); + __ load_klass(rdx, rcx); __ verify_oop(rdx); // profile this call @@ -3083,6 +3083,7 @@ void TemplateTable::invokedynamic(int byte_no) { transition(vtos, vtos); + assert(byte_no == f1_oop, "use this argument"); if (!EnableInvokeDynamic) { // We should not encounter this bytecode if !EnableInvokeDynamic. @@ -3095,7 +3096,6 @@ return; } - assert(byte_no == f1_oop, "use this argument"); prepare_invoke(rax, rbx, byte_no); // rax: CallSite object (f1) @@ -3106,14 +3106,14 @@ Register rax_callsite = rax; Register rcx_method_handle = rcx; - if (ProfileInterpreter) { - // %%% should make a type profile for any invokedynamic that takes a ref argument - // profile this call - __ profile_call(rsi); - } - - __ movptr(rcx_method_handle, Address(rax_callsite, __ delayed_value(java_lang_invoke_CallSite::target_offset_in_bytes, rcx))); + // %%% should make a type profile for any invokedynamic that takes a ref argument + // profile this call + __ profile_call(rsi); + + __ verify_oop(rax_callsite); + __ load_heap_oop(rcx_method_handle, Address(rax_callsite, __ delayed_value(java_lang_invoke_CallSite::target_offset_in_bytes, rdx))); __ null_check(rcx_method_handle); + __ verify_oop(rcx_method_handle); __ prepare_to_jump_from_interpreted(); __ jump_to_method_handle_entry(rcx_method_handle, rdx); } @@ -3258,7 +3258,7 @@ (int32_t)markOopDesc::prototype()); // header __ pop(rcx); // get saved klass back in the register. } - __ movptr(Address(rax, oopDesc::klass_offset_in_bytes()), rcx); // klass + __ store_klass(rax, rcx); // klass { SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0); @@ -3333,7 +3333,7 @@ __ movptr(rax, Address(rcx, rbx, Address::times_ptr, sizeof(constantPoolOopDesc))); __ bind(resolved); - __ movptr(rbx, Address(rdx, oopDesc::klass_offset_in_bytes())); + __ load_klass(rbx, rdx); // Generate subtype check. Blows ECX. Resets EDI. Object in EDX. // Superklass in EAX. Subklass in EBX. @@ -3376,12 +3376,12 @@ __ push(atos); call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc) ); __ pop_ptr(rdx); - __ movptr(rdx, Address(rdx, oopDesc::klass_offset_in_bytes())); + __ load_klass(rdx, rdx); __ jmp(resolved); // Get superklass in EAX and subklass in EDX __ bind(quicked); - __ movptr(rdx, Address(rax, oopDesc::klass_offset_in_bytes())); + __ load_klass(rdx, rax); __ movptr(rax, Address(rcx, rbx, Address::times_ptr, sizeof(constantPoolOopDesc))); __ bind(resolved);
--- a/hotspot/src/cpu/x86/vm/templateTable_x86_64.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/cpu/x86/vm/templateTable_x86_64.cpp Sat May 14 15:21:38 2011 -0700 @@ -436,7 +436,7 @@ Label L_done, L_throw_exception; const Register con_klass_temp = rcx; // same as cache const Register array_klass_temp = rdx; // same as index - __ movptr(con_klass_temp, Address(rax, oopDesc::klass_offset_in_bytes())); + __ load_klass(con_klass_temp, rax); __ lea(array_klass_temp, ExternalAddress((address)Universe::systemObjArrayKlassObj_addr())); __ cmpptr(con_klass_temp, Address(array_klass_temp, 0)); __ jcc(Assembler::notEqual, L_done); @@ -447,7 +447,7 @@ // Load the exception from the system-array which wraps it: __ bind(L_throw_exception); - __ movptr(rax, Address(rax, arrayOopDesc::base_offset_in_bytes(T_OBJECT))); + __ load_heap_oop(rax, Address(rax, arrayOopDesc::base_offset_in_bytes(T_OBJECT))); __ jump(ExternalAddress(Interpreter::throw_exception_entry())); __ bind(L_done); @@ -3137,7 +3137,6 @@ return; } - assert(byte_no == f1_oop, "use this argument"); prepare_invoke(rax, rbx, byte_no); // rax: CallSite object (f1) @@ -3148,14 +3147,14 @@ Register rax_callsite = rax; Register rcx_method_handle = rcx; - if (ProfileInterpreter) { - // %%% should make a type profile for any invokedynamic that takes a ref argument - // profile this call - __ profile_call(r13); - } - - __ load_heap_oop(rcx_method_handle, Address(rax_callsite, __ delayed_value(java_lang_invoke_CallSite::target_offset_in_bytes, rcx))); + // %%% should make a type profile for any invokedynamic that takes a ref argument + // profile this call + __ profile_call(r13); + + __ verify_oop(rax_callsite); + __ load_heap_oop(rcx_method_handle, Address(rax_callsite, __ delayed_value(java_lang_invoke_CallSite::target_offset_in_bytes, rdx))); __ null_check(rcx_method_handle); + __ verify_oop(rcx_method_handle); __ prepare_to_jump_from_interpreted(); __ jump_to_method_handle_entry(rcx_method_handle, rdx); }
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp Sat May 14 15:21:38 2011 -0700 @@ -441,12 +441,25 @@ } } - // On family 21 processors default is no sw prefetch - if ( cpu_family() == 21 ) { + // some defaults for AMD family 15h + if ( cpu_family() == 0x15 ) { + // On family 15h processors default is no sw prefetch if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { AllocatePrefetchStyle = 0; } + // Also, if some other prefetch style is specified, default instruction type is PREFETCHW + if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { + AllocatePrefetchInstr = 3; + } + // On family 15h processors use XMM and UnalignedLoadStores for Array Copy + if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) { + UseXMMForArrayCopy = true; + } + if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) { + UseUnalignedLoadStores = true; + } } + } if( is_intel() ) { // Intel cpus specific settings
--- a/hotspot/src/cpu/x86/vm/x86_32.ad Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/cpu/x86/vm/x86_32.ad Sat May 14 15:21:38 2011 -0700 @@ -12989,6 +12989,53 @@ %} // ============================================================================ +// Counted Loop limit node which represents exact final iterator value. +// Note: the resulting value should fit into integer range since +// counted loops have limit check on overflow. +instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ + match(Set limit (LoopLimit (Binary init limit) stride)); + effect(TEMP limit_hi, TEMP tmp, KILL flags); + ins_cost(300); + + format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} + ins_encode %{ + int strd = (int)$stride$$constant; + assert(strd != 1 && strd != -1, "sanity"); + int m1 = (strd > 0) ? 1 : -1; + // Convert limit to long (EAX:EDX) + __ cdql(); + // Convert init to long (init:tmp) + __ movl($tmp$$Register, $init$$Register); + __ sarl($tmp$$Register, 31); + // $limit - $init + __ subl($limit$$Register, $init$$Register); + __ sbbl($limit_hi$$Register, $tmp$$Register); + // + ($stride - 1) + if (strd > 0) { + __ addl($limit$$Register, (strd - 1)); + __ adcl($limit_hi$$Register, 0); + __ movl($tmp$$Register, strd); + } else { + __ addl($limit$$Register, (strd + 1)); + __ adcl($limit_hi$$Register, -1); + __ lneg($limit_hi$$Register, $limit$$Register); + __ movl($tmp$$Register, -strd); + } + // signed devision: (EAX:EDX) / pos_stride + __ idivl($tmp$$Register); + if (strd < 0) { + // restore sign + __ negl($tmp$$Register); + } + // (EAX) * stride + __ mull($tmp$$Register); + // + init (ignore upper bits) + __ addl($limit$$Register, $init$$Register); + %} + ins_pipe( pipe_slow ); +%} + +// ============================================================================ // Branch Instructions // Jump Table instruct jumpXtnd(eRegI switch_val) %{
--- a/hotspot/src/os/linux/vm/globals_linux.hpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/os/linux/vm/globals_linux.hpp Sat May 14 15:21:38 2011 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -29,19 +29,25 @@ // Defines Linux specific flags. They are not available on other platforms. // #define RUNTIME_OS_FLAGS(develop, develop_pd, product, product_pd, diagnostic, notproduct) \ - product(bool, UseOprofile, false, \ - "enable support for Oprofile profiler") \ - \ - product(bool, UseLinuxPosixThreadCPUClocks, true, \ - "enable fast Linux Posix clocks where available") -// NB: The default value of UseLinuxPosixThreadCPUClocks may be -// overridden in Arguments::parse_each_vm_init_arg. + product(bool, UseOprofile, false, \ + "enable support for Oprofile profiler") \ + \ + product(bool, UseLinuxPosixThreadCPUClocks, true, \ + "enable fast Linux Posix clocks where available") \ +/* NB: The default value of UseLinuxPosixThreadCPUClocks may be \ + overridden in Arguments::parse_each_vm_init_arg. */ \ + \ + product(bool, UseHugeTLBFS, false, \ + "Use MAP_HUGETLB for large pages") \ + \ + product(bool, UseSHM, false, \ + "Use SYSV shared memory for large pages") // // Defines Linux-specific default values. The flags are available on all // platforms, but they may have different default values on other platforms. // -define_pd_global(bool, UseLargePages, false); +define_pd_global(bool, UseLargePages, true); define_pd_global(bool, UseLargePagesIndividualAllocation, false); define_pd_global(bool, UseOSErrorReporting, false); define_pd_global(bool, UseThreadPriorities, true) ;
--- a/hotspot/src/os/linux/vm/os_linux.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/os/linux/vm/os_linux.cpp Sat May 14 15:21:38 2011 -0700 @@ -2465,16 +2465,40 @@ return res != (uintptr_t) MAP_FAILED; } +// Define MAP_HUGETLB here so we can build HotSpot on old systems. +#ifndef MAP_HUGETLB +#define MAP_HUGETLB 0x40000 +#endif + +// Define MADV_HUGEPAGE here so we can build HotSpot on old systems. +#ifndef MADV_HUGEPAGE +#define MADV_HUGEPAGE 14 +#endif + bool os::commit_memory(char* addr, size_t size, size_t alignment_hint, bool exec) { + if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) { + int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE; + uintptr_t res = + (uintptr_t) ::mmap(addr, size, prot, + MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS|MAP_HUGETLB, + -1, 0); + return res != (uintptr_t) MAP_FAILED; + } + return commit_memory(addr, size, exec); } -void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) { } +void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) { + if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) { + // We don't check the return value: madvise(MADV_HUGEPAGE) may not + // be supported or the memory may already be backed by huge pages. + ::madvise(addr, bytes, MADV_HUGEPAGE); + } +} void os::free_memory(char *addr, size_t bytes) { - ::mmap(addr, bytes, PROT_READ | PROT_WRITE, - MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0); + ::madvise(addr, bytes, MADV_DONTNEED); } void os::numa_make_global(char *addr, size_t bytes) { @@ -2812,6 +2836,43 @@ return linux_mprotect(addr, size, PROT_READ|PROT_WRITE); } +bool os::Linux::hugetlbfs_sanity_check(bool warn, size_t page_size) { + bool result = false; + void *p = mmap (NULL, page_size, PROT_READ|PROT_WRITE, + MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB, + -1, 0); + + if (p != (void *) -1) { + // We don't know if this really is a huge page or not. + FILE *fp = fopen("/proc/self/maps", "r"); + if (fp) { + while (!feof(fp)) { + char chars[257]; + long x = 0; + if (fgets(chars, sizeof(chars), fp)) { + if (sscanf(chars, "%lx-%*lx", &x) == 1 + && x == (long)p) { + if (strstr (chars, "hugepage")) { + result = true; + break; + } + } + } + } + fclose(fp); + } + munmap (p, page_size); + if (result) + return true; + } + + if (warn) { + warning("HugeTLBFS is not supported by the operating system."); + } + + return result; +} + /* * Set the coredump_filter bits to include largepages in core dump (bit 6) * @@ -2853,8 +2914,22 @@ static size_t _large_page_size = 0; -bool os::large_page_init() { - if (!UseLargePages) return false; +void os::large_page_init() { + if (!UseLargePages) { + UseHugeTLBFS = false; + UseSHM = false; + return; + } + + if (FLAG_IS_DEFAULT(UseHugeTLBFS) && FLAG_IS_DEFAULT(UseSHM)) { + // If UseLargePages is specified on the command line try both methods, + // if it's default, then try only HugeTLBFS. + if (FLAG_IS_DEFAULT(UseLargePages)) { + UseHugeTLBFS = true; + } else { + UseHugeTLBFS = UseSHM = true; + } + } if (LargePageSizeInBytes) { _large_page_size = LargePageSizeInBytes; @@ -2899,20 +2974,24 @@ } } + // print a warning if any large page related flag is specified on command line + bool warn_on_failure = !FLAG_IS_DEFAULT(UseHugeTLBFS); + const size_t default_page_size = (size_t)Linux::page_size(); if (_large_page_size > default_page_size) { _page_sizes[0] = _large_page_size; _page_sizes[1] = default_page_size; _page_sizes[2] = 0; } + UseHugeTLBFS = UseHugeTLBFS && + Linux::hugetlbfs_sanity_check(warn_on_failure, _large_page_size); + + if (UseHugeTLBFS) + UseSHM = false; + + UseLargePages = UseHugeTLBFS || UseSHM; set_coredump_filter(); - - // Large page support is available on 2.6 or newer kernel, some vendors - // (e.g. Redhat) have backported it to their 2.4 based distributions. - // We optimistically assume the support is available. If later it turns out - // not true, VM will automatically switch to use regular page size. - return true; } #ifndef SHM_HUGETLB @@ -2922,7 +3001,7 @@ char* os::reserve_memory_special(size_t bytes, char* req_addr, bool exec) { // "exec" is passed in but not used. Creating the shared image for // the code cache doesn't have an SHM_X executable permission to check. - assert(UseLargePages, "only for large pages"); + assert(UseLargePages && UseSHM, "only for SHM large pages"); key_t key = IPC_PRIVATE; char *addr; @@ -2989,16 +3068,15 @@ return _large_page_size; } -// Linux does not support anonymous mmap with large page memory. The only way -// to reserve large page memory without file backing is through SysV shared -// memory API. The entire memory region is committed and pinned upfront. -// Hopefully this will change in the future... +// HugeTLBFS allows application to commit large page memory on demand; +// with SysV SHM the entire memory region must be allocated as shared +// memory. bool os::can_commit_large_page_memory() { - return false; + return UseHugeTLBFS; } bool os::can_execute_large_page_memory() { - return false; + return UseHugeTLBFS; } // Reserve memory at an arbitrary address, only if that area is @@ -4038,7 +4116,7 @@ #endif } - FLAG_SET_DEFAULT(UseLargePages, os::large_page_init()); + os::large_page_init(); // initialize suspend/resume support - must do this before signal_sets_init() if (SR_initialize() != 0) { @@ -4090,6 +4168,23 @@ UseNUMA = false; } } + // With SHM large pages we cannot uncommit a page, so there's not way + // we can make the adaptive lgrp chunk resizing work. If the user specified + // both UseNUMA and UseLargePages (or UseSHM) on the command line - warn and + // disable adaptive resizing. + if (UseNUMA && UseLargePages && UseSHM) { + if (!FLAG_IS_DEFAULT(UseNUMA)) { + if (FLAG_IS_DEFAULT(UseLargePages) && FLAG_IS_DEFAULT(UseSHM)) { + UseLargePages = false; + } else { + warning("UseNUMA is not fully compatible with SHM large pages, disabling adaptive resizing"); + UseAdaptiveSizePolicy = false; + UseAdaptiveNUMAChunkSizing = false; + } + } else { + UseNUMA = false; + } + } if (!UseNUMA && ForceNUMA) { UseNUMA = true; }
--- a/hotspot/src/os/linux/vm/os_linux.hpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/os/linux/vm/os_linux.hpp Sat May 14 15:21:38 2011 -0700 @@ -86,6 +86,9 @@ static void rebuild_cpu_to_node_map(); static GrowableArray<int>* cpu_to_node() { return _cpu_to_node; } + + static bool hugetlbfs_sanity_check(bool warn, size_t page_size); + public: static void init_thread_fpu_state(); static int get_fpu_control_word();
--- a/hotspot/src/os/solaris/vm/os_solaris.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/os/solaris/vm/os_solaris.cpp Sat May 14 15:21:38 2011 -0700 @@ -2826,7 +2826,9 @@ void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) { assert((intptr_t)addr % alignment_hint == 0, "Address should be aligned."); assert((intptr_t)(addr + bytes) % alignment_hint == 0, "End should be aligned."); - Solaris::set_mpss_range(addr, bytes, alignment_hint); + if (UseLargePages && UseMPSS) { + Solaris::set_mpss_range(addr, bytes, alignment_hint); + } } // Tell the OS to make the range local to the first-touching LWP @@ -3334,11 +3336,11 @@ return true; } -bool os::large_page_init() { +void os::large_page_init() { if (!UseLargePages) { UseISM = false; UseMPSS = false; - return false; + return; } // print a warning if any large page related flag is specified on command line @@ -3359,7 +3361,6 @@ Solaris::mpss_sanity_check(warn_on_failure, &_large_page_size); UseLargePages = UseISM || UseMPSS; - return UseLargePages; } bool os::Solaris::set_mpss_range(caddr_t start, size_t bytes, size_t align) { @@ -4990,7 +4991,7 @@ #endif } - FLAG_SET_DEFAULT(UseLargePages, os::large_page_init()); + os::large_page_init(); // Check minimum allowable stack size for thread creation and to initialize // the java system classes, including StackOverflowError - depends on page @@ -5044,6 +5045,20 @@ UseNUMA = false; } } + // ISM is not compatible with the NUMA allocator - it always allocates + // pages round-robin across the lgroups. + if (UseNUMA && UseLargePages && UseISM) { + if (!FLAG_IS_DEFAULT(UseNUMA)) { + if (FLAG_IS_DEFAULT(UseLargePages) && FLAG_IS_DEFAULT(UseISM)) { + UseLargePages = false; + } else { + warning("UseNUMA is not compatible with ISM large pages, disabling NUMA allocator"); + UseNUMA = false; + } + } else { + UseNUMA = false; + } + } if (!UseNUMA && ForceNUMA) { UseNUMA = true; }
--- a/hotspot/src/os/windows/vm/os_windows.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/os/windows/vm/os_windows.cpp Sat May 14 15:21:38 2011 -0700 @@ -2762,8 +2762,8 @@ _hToken = NULL; } -bool os::large_page_init() { - if (!UseLargePages) return false; +void os::large_page_init() { + if (!UseLargePages) return; // print a warning if any large page related flag is specified on command line bool warn_on_failure = !FLAG_IS_DEFAULT(UseLargePages) || @@ -2808,7 +2808,7 @@ } cleanup_after_large_page_init(); - return success; + UseLargePages = success; } // On win32, one cannot release just a part of reserved memory, it's an @@ -3561,7 +3561,7 @@ #endif } - FLAG_SET_DEFAULT(UseLargePages, os::large_page_init()); + os::large_page_init(); // Setup Windows Exceptions
--- a/hotspot/src/os_cpu/linux_x86/vm/orderAccess_linux_x86.inline.hpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/os_cpu/linux_x86/vm/orderAccess_linux_x86.inline.hpp Sat May 14 15:21:38 2011 -0700 @@ -93,7 +93,7 @@ inline void OrderAccess::store_fence(jbyte* p, jbyte v) { __asm__ volatile ( "xchgb (%2),%0" - : "=r" (v) + : "=q" (v) : "0" (v), "r" (p) : "memory"); } @@ -155,7 +155,7 @@ // Must duplicate definitions instead of calling store_fence because we don't want to cast away volatile. inline void OrderAccess::release_store_fence(volatile jbyte* p, jbyte v) { __asm__ volatile ( "xchgb (%2),%0" - : "=r" (v) + : "=q" (v) : "0" (v), "r" (p) : "memory"); }
--- a/hotspot/src/share/vm/c1/c1_Runtime1.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/c1/c1_Runtime1.cpp Sat May 14 15:21:38 2011 -0700 @@ -1026,9 +1026,21 @@ // first replace the tail, then the call #ifdef ARM if(stub_id == Runtime1::load_klass_patching_id && !VM_Version::supports_movw()) { + nmethod* nm = CodeCache::find_nmethod(instr_pc); + oop* oop_addr = NULL; + assert(nm != NULL, "invalid nmethod_pc"); + RelocIterator oops(nm, copy_buff, copy_buff + 1); + while (oops.next()) { + if (oops.type() == relocInfo::oop_type) { + oop_Relocation* r = oops.oop_reloc(); + oop_addr = r->oop_addr(); + break; + } + } + assert(oop_addr != NULL, "oop relocation must exist"); copy_buff -= *byte_count; NativeMovConstReg* n_copy2 = nativeMovConstReg_at(copy_buff); - n_copy2->set_data((intx) (load_klass()), instr_pc); + n_copy2->set_pc_relative_offset((address)oop_addr, instr_pc); } #endif
--- a/hotspot/src/share/vm/ci/bcEscapeAnalyzer.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/ci/bcEscapeAnalyzer.cpp Sat May 14 15:21:38 2011 -0700 @@ -232,14 +232,7 @@ } // compute size of arguments - int arg_size = target->arg_size(); - if (code == Bytecodes::_invokedynamic) { - assert(!target->is_static(), "receiver explicit in method"); - arg_size--; // implicit, not really on stack - } - if (!target->is_loaded() && code == Bytecodes::_invokestatic) { - arg_size--; - } + int arg_size = target->invoke_arg_size(code); int arg_base = MAX2(state._stack_height - arg_size, 0); // direct recursive calls are skipped if they can be bound statically without introducing
--- a/hotspot/src/share/vm/ci/ciEnv.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/ci/ciEnv.cpp Sat May 14 15:21:38 2011 -0700 @@ -756,7 +756,7 @@ assert(bc == Bytecodes::_invokedynamic, "must be invokedynamic"); bool is_resolved = cpool->cache()->main_entry_at(index)->is_resolved(bc); - if (is_resolved && (oop) cpool->cache()->secondary_entry_at(index)->f1() == NULL) + if (is_resolved && cpool->cache()->secondary_entry_at(index)->is_f1_null()) // FIXME: code generation could allow for null (unlinked) call site is_resolved = false; @@ -770,7 +770,7 @@ // Get the invoker methodOop from the constant pool. oop f1_value = cpool->cache()->main_entry_at(index)->f1(); - methodOop signature_invoker = methodOop(f1_value); + methodOop signature_invoker = (methodOop) f1_value; assert(signature_invoker != NULL && signature_invoker->is_method() && signature_invoker->is_method_handle_invoke(), "correct result from LinkResolver::resolve_invokedynamic");
--- a/hotspot/src/share/vm/ci/ciMethod.hpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/ci/ciMethod.hpp Sat May 14 15:21:38 2011 -0700 @@ -127,7 +127,24 @@ ciSignature* signature() const { return _signature; } ciType* return_type() const { return _signature->return_type(); } int arg_size_no_receiver() const { return _signature->size(); } - int arg_size() const { return _signature->size() + (_flags.is_static() ? 0 : 1); } + // Can only be used on loaded ciMethods + int arg_size() const { + check_is_loaded(); + return _signature->size() + (_flags.is_static() ? 0 : 1); + } + // Report the number of elements on stack when invoking this method. + // This is different than the regular arg_size because invokdynamic + // has an implicit receiver. + int invoke_arg_size(Bytecodes::Code code) const { + int arg_size = _signature->size(); + // Add a receiver argument, maybe: + if (code != Bytecodes::_invokestatic && + code != Bytecodes::_invokedynamic) { + arg_size++; + } + return arg_size; + } + // Method code and related information. address code() { if (_code == NULL) load_code(); return _code; } @@ -276,9 +293,9 @@ void print_short_name(outputStream* st = tty); methodOop get_method_handle_target() { - klassOop receiver_limit_oop = NULL; - int flags = 0; - return MethodHandles::decode_method(get_oop(), receiver_limit_oop, flags); + KlassHandle receiver_limit; int flags = 0; + methodHandle m = MethodHandles::decode_method(get_oop(), receiver_limit, flags); + return m(); } };
--- a/hotspot/src/share/vm/ci/ciObject.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/ci/ciObject.cpp Sat May 14 15:21:38 2011 -0700 @@ -194,6 +194,16 @@ // ciObject::should_be_constant() bool ciObject::should_be_constant() { if (ScavengeRootsInCode >= 2) return true; // force everybody to be a constant + if (!JavaObjectsInPerm && !is_null_object()) { + // We want Strings and Classes to be embeddable by default since + // they used to be in the perm world. Not all Strings used to be + // embeddable but there's no easy way to distinguish the interned + // from the regulars ones so just treat them all that way. + ciEnv* env = CURRENT_ENV; + if (klass() == env->String_klass() || klass() == env->Class_klass()) { + return true; + } + } return handle() == NULL || !is_scavengable(); }
--- a/hotspot/src/share/vm/classfile/javaClasses.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/classfile/javaClasses.cpp Sat May 14 15:21:38 2011 -0700 @@ -1357,7 +1357,7 @@ }; -void java_lang_Throwable::fill_in_stack_trace(Handle throwable, TRAPS) { +void java_lang_Throwable::fill_in_stack_trace(Handle throwable, methodHandle method, TRAPS) { if (!StackTraceInThrowable) return; ResourceMark rm(THREAD); @@ -1374,6 +1374,16 @@ JavaThread* thread = (JavaThread*)THREAD; BacktraceBuilder bt(CHECK); + // If there is no Java frame just return the method that was being called + // with bci 0 + if (!thread->has_last_Java_frame()) { + if (max_depth >= 1 && method() != NULL) { + bt.push(method(), 0, CHECK); + set_backtrace(throwable(), bt.backtrace()); + } + return; + } + // Instead of using vframe directly, this version of fill_in_stack_trace // basically handles everything by hand. This significantly improved the // speed of this method call up to 28.5% on Solaris sparc. 27.1% on Windows. @@ -1477,7 +1487,7 @@ set_backtrace(throwable(), bt.backtrace()); } -void java_lang_Throwable::fill_in_stack_trace(Handle throwable) { +void java_lang_Throwable::fill_in_stack_trace(Handle throwable, methodHandle method) { // No-op if stack trace is disabled if (!StackTraceInThrowable) { return; @@ -1491,7 +1501,7 @@ PRESERVE_EXCEPTION_MARK; JavaThread* thread = JavaThread::active(); - fill_in_stack_trace(throwable, thread); + fill_in_stack_trace(throwable, method, thread); // ignore exceptions thrown during stack trace filling CLEAR_PENDING_EXCEPTION; }
--- a/hotspot/src/share/vm/classfile/javaClasses.hpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/classfile/javaClasses.hpp Sat May 14 15:21:38 2011 -0700 @@ -440,8 +440,8 @@ static void fill_in_stack_trace_of_preallocated_backtrace(Handle throwable); // Fill in current stack trace, can cause GC - static void fill_in_stack_trace(Handle throwable, TRAPS); - static void fill_in_stack_trace(Handle throwable); + static void fill_in_stack_trace(Handle throwable, methodHandle method, TRAPS); + static void fill_in_stack_trace(Handle throwable, methodHandle method = methodHandle()); // Programmatic access to stack trace static oop get_stack_trace_element(oop throwable, int index, TRAPS); static int get_stack_trace_depth(oop throwable, TRAPS);
--- a/hotspot/src/share/vm/compiler/compileBroker.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/compiler/compileBroker.cpp Sat May 14 15:21:38 2011 -0700 @@ -976,6 +976,15 @@ return; } + // If the requesting thread is holding the pending list lock + // then we just return. We can't risk blocking while holding + // the pending list lock or a 3-way deadlock may occur + // between the reference handler thread, a GC (instigated + // by a compiler thread), and compiled method registration. + if (instanceRefKlass::owns_pending_list_lock(JavaThread::current())) { + return; + } + // Outputs from the following MutexLocker block: CompileTask* task = NULL; bool blocking = false; @@ -1304,17 +1313,8 @@ // Should the current thread be blocked until this compilation request // has been fulfilled? bool CompileBroker::is_compile_blocking(methodHandle method, int osr_bci) { - if (!BackgroundCompilation) { - Symbol* class_name = method->method_holder()->klass_part()->name(); - if (class_name->starts_with("java/lang/ref/Reference", 23)) { - // The reference handler thread can dead lock with the GC if compilation is blocking, - // so we avoid blocking compiles for anything in the java.lang.ref.Reference class, - // including inner classes such as ReferenceHandler. - return false; - } - return true; - } - return false; + assert(!instanceRefKlass::owns_pending_list_lock(JavaThread::current()), "possible deadlock"); + return !BackgroundCompilation; }
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp Sat May 14 15:21:38 2011 -0700 @@ -1963,10 +1963,21 @@ // Iteration support, mostly delegated from a CMS generation void CompactibleFreeListSpace::save_marks() { - // mark the "end" of the used space at the time of this call; + assert(Thread::current()->is_VM_thread(), + "Global variable should only be set when single-threaded"); + // Mark the "end" of the used space at the time of this call; // note, however, that promoted objects from this point // on are tracked in the _promoInfo below. set_saved_mark_word(unallocated_block()); +#ifdef ASSERT + // Check the sanity of save_marks() etc. + MemRegion ur = used_region(); + MemRegion urasm = used_region_at_save_marks(); + assert(ur.contains(urasm), + err_msg(" Error at save_marks(): [" PTR_FORMAT "," PTR_FORMAT ")" + " should contain [" PTR_FORMAT "," PTR_FORMAT ")", + ur.start(), ur.end(), urasm.start(), urasm.end())); +#endif // inform allocator that promotions should be tracked. assert(_promoInfo.noPromotions(), "_promoInfo inconsistency"); _promoInfo.startTrackingPromotions();
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Sat May 14 15:21:38 2011 -0700 @@ -3189,10 +3189,9 @@ } void CMSCollector::setup_cms_unloading_and_verification_state() { - const bool should_verify = VerifyBeforeGC || VerifyAfterGC || VerifyDuringGC + const bool should_verify = VerifyBeforeGC || VerifyAfterGC || VerifyDuringGC || VerifyBeforeExit; - const int rso = SharedHeap::SO_Symbols | SharedHeap::SO_Strings - | SharedHeap::SO_CodeCache; + const int rso = SharedHeap::SO_Strings | SharedHeap::SO_CodeCache; if (should_unload_classes()) { // Should unload classes this cycle remove_root_scanning_option(rso); // Shrink the root set appropriately
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp Sat May 14 15:21:38 2011 -0700 @@ -826,6 +826,14 @@ void ConcurrentMark::checkpointRootsInitialPost() { G1CollectedHeap* g1h = G1CollectedHeap::heap(); + // If we force an overflow during remark, the remark operation will + // actually abort and we'll restart concurrent marking. If we always + // force an oveflow during remark we'll never actually complete the + // marking phase. So, we initilize this here, at the start of the + // cycle, so that at the remaining overflow number will decrease at + // every remark and we'll eventually not need to cause one. + force_overflow_stw()->init(); + // For each region note start of marking. NoteStartOfMarkHRClosure startcl; g1h->heap_region_iterate(&startcl); @@ -893,27 +901,37 @@ } /* - Notice that in the next two methods, we actually leave the STS - during the barrier sync and join it immediately afterwards. If we - do not do this, this then the following deadlock can occur: one - thread could be in the barrier sync code, waiting for the other - thread to also sync up, whereas another one could be trying to - yield, while also waiting for the other threads to sync up too. - - Because the thread that does the sync barrier has left the STS, it - is possible to be suspended for a Full GC or an evacuation pause - could occur. This is actually safe, since the entering the sync - barrier is one of the last things do_marking_step() does, and it - doesn't manipulate any data structures afterwards. -*/ + * Notice that in the next two methods, we actually leave the STS + * during the barrier sync and join it immediately afterwards. If we + * do not do this, the following deadlock can occur: one thread could + * be in the barrier sync code, waiting for the other thread to also + * sync up, whereas another one could be trying to yield, while also + * waiting for the other threads to sync up too. + * + * Note, however, that this code is also used during remark and in + * this case we should not attempt to leave / enter the STS, otherwise + * we'll either hit an asseert (debug / fastdebug) or deadlock + * (product). So we should only leave / enter the STS if we are + * operating concurrently. + * + * Because the thread that does the sync barrier has left the STS, it + * is possible to be suspended for a Full GC or an evacuation pause + * could occur. This is actually safe, since the entering the sync + * barrier is one of the last things do_marking_step() does, and it + * doesn't manipulate any data structures afterwards. + */ void ConcurrentMark::enter_first_sync_barrier(int task_num) { if (verbose_low()) gclog_or_tty->print_cr("[%d] entering first barrier", task_num); - ConcurrentGCThread::stsLeave(); + if (concurrent()) { + ConcurrentGCThread::stsLeave(); + } _first_overflow_barrier_sync.enter(); - ConcurrentGCThread::stsJoin(); + if (concurrent()) { + ConcurrentGCThread::stsJoin(); + } // at this point everyone should have synced up and not be doing any // more work @@ -923,7 +941,12 @@ // let task 0 do this if (task_num == 0) { // task 0 is responsible for clearing the global data structures - clear_marking_state(); + // We should be here because of an overflow. During STW we should + // not clear the overflow flag since we rely on it being true when + // we exit this method to abort the pause and restart concurent + // marking. + clear_marking_state(concurrent() /* clear_overflow */); + force_overflow()->update(); if (PrintGC) { gclog_or_tty->date_stamp(PrintGCDateStamps); @@ -940,15 +963,45 @@ if (verbose_low()) gclog_or_tty->print_cr("[%d] entering second barrier", task_num); - ConcurrentGCThread::stsLeave(); + if (concurrent()) { + ConcurrentGCThread::stsLeave(); + } _second_overflow_barrier_sync.enter(); - ConcurrentGCThread::stsJoin(); + if (concurrent()) { + ConcurrentGCThread::stsJoin(); + } // at this point everything should be re-initialised and ready to go if (verbose_low()) gclog_or_tty->print_cr("[%d] leaving second barrier", task_num); } +#ifndef PRODUCT +void ForceOverflowSettings::init() { + _num_remaining = G1ConcMarkForceOverflow; + _force = false; + update(); +} + +void ForceOverflowSettings::update() { + if (_num_remaining > 0) { + _num_remaining -= 1; + _force = true; + } else { + _force = false; + } +} + +bool ForceOverflowSettings::should_force() { + if (_force) { + _force = false; + return true; + } else { + return false; + } +} +#endif // !PRODUCT + void ConcurrentMark::grayRoot(oop p) { HeapWord* addr = (HeapWord*) p; // We can't really check against _heap_start and _heap_end, since it @@ -1117,6 +1170,7 @@ _restart_for_overflow = false; size_t active_workers = MAX2((size_t) 1, parallel_marking_threads()); + force_overflow_conc()->init(); set_phase(active_workers, true /* concurrent */); CMConcurrentMarkingTask markingTask(this, cmThread()); @@ -1845,7 +1899,7 @@ while (!_cleanup_list.is_empty()) { HeapRegion* hr = _cleanup_list.remove_head(); assert(hr != NULL, "the list was not empty"); - hr->rem_set()->clear(); + hr->par_clear(); tmp_free_list.add_as_tail(hr); // Instead of adding one region at a time to the secondary_free_list, @@ -2703,12 +2757,16 @@ } -void ConcurrentMark::clear_marking_state() { +void ConcurrentMark::clear_marking_state(bool clear_overflow) { _markStack.setEmpty(); _markStack.clear_overflow(); _regionStack.setEmpty(); _regionStack.clear_overflow(); - clear_has_overflown(); + if (clear_overflow) { + clear_has_overflown(); + } else { + assert(has_overflown(), "pre-condition"); + } _finger = _heap_start; for (int i = 0; i < (int)_max_task_num; ++i) { @@ -4279,6 +4337,15 @@ } } + // If we are about to wrap up and go into termination, check if we + // should raise the overflow flag. + if (do_termination && !has_aborted()) { + if (_cm->force_overflow()->should_force()) { + _cm->set_has_overflown(); + regular_clock_call(); + } + } + // We still haven't aborted. Now, let's try to get into the // termination protocol. if (do_termination && !has_aborted()) {
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp Sat May 14 15:21:38 2011 -0700 @@ -316,6 +316,19 @@ void setEmpty() { _index = 0; clear_overflow(); } }; +class ForceOverflowSettings VALUE_OBJ_CLASS_SPEC { +private: +#ifndef PRODUCT + uintx _num_remaining; + bool _force; +#endif // !defined(PRODUCT) + +public: + void init() PRODUCT_RETURN; + void update() PRODUCT_RETURN; + bool should_force() PRODUCT_RETURN_( return false; ); +}; + // this will enable a variety of different statistics per GC task #define _MARKING_STATS_ 0 // this will enable the higher verbose levels @@ -462,6 +475,9 @@ WorkGang* _parallel_workers; + ForceOverflowSettings _force_overflow_conc; + ForceOverflowSettings _force_overflow_stw; + void weakRefsWork(bool clear_all_soft_refs); void swapMarkBitMaps(); @@ -470,7 +486,7 @@ // task local ones; should be called during initial mark. void reset(); // It resets all the marking data structures. - void clear_marking_state(); + void clear_marking_state(bool clear_overflow = true); // It should be called to indicate which phase we're in (concurrent // mark or remark) and how many threads are currently active. @@ -547,6 +563,22 @@ void enter_first_sync_barrier(int task_num); void enter_second_sync_barrier(int task_num); + ForceOverflowSettings* force_overflow_conc() { + return &_force_overflow_conc; + } + + ForceOverflowSettings* force_overflow_stw() { + return &_force_overflow_stw; + } + + ForceOverflowSettings* force_overflow() { + if (concurrent()) { + return force_overflow_conc(); + } else { + return force_overflow_stw(); + } + } + public: // Manipulation of the global mark stack. // Notice that the first mark_stack_push is CAS-based, whereas the
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Sat May 14 15:21:38 2011 -0700 @@ -1161,6 +1161,7 @@ TraceTime t(system_gc ? "Full GC (System.gc())" : "Full GC", PrintGC, true, gclog_or_tty); + TraceCollectorStats tcs(g1mm()->full_collection_counters()); TraceMemoryManagerStats tms(true /* fullGC */); double start = os::elapsedTime(); @@ -1339,6 +1340,7 @@ if (PrintHeapAtGC) { Universe::print_heap_after_gc(); } + g1mm()->update_counters(); return true; } @@ -1971,6 +1973,10 @@ init_mutator_alloc_region(); + // Do create of the monitoring and management support so that + // values in the heap have been properly initialized. + _g1mm = new G1MonitoringSupport(this, &_g1_storage); + return JNI_OK; } @@ -2113,6 +2119,28 @@ (cause == GCCause::_java_lang_system_gc && ExplicitGCInvokesConcurrent)); } +#ifndef PRODUCT +void G1CollectedHeap::allocate_dummy_regions() { + // Let's fill up most of the region + size_t word_size = HeapRegion::GrainWords - 1024; + // And as a result the region we'll allocate will be humongous. + guarantee(isHumongous(word_size), "sanity"); + + for (uintx i = 0; i < G1DummyRegionsPerGC; ++i) { + // Let's use the existing mechanism for the allocation + HeapWord* dummy_obj = humongous_obj_allocate(word_size); + if (dummy_obj != NULL) { + MemRegion mr(dummy_obj, word_size); + CollectedHeap::fill_with_object(mr); + } else { + // If we can't allocate once, we probably cannot allocate + // again. Let's get out of the loop. + break; + } + } +} +#endif // !PRODUCT + void G1CollectedHeap::increment_full_collections_completed(bool concurrent) { MonitorLockerEx x(FullGCCount_lock, Mutex::_no_safepoint_check_flag); @@ -2777,17 +2805,26 @@ bool silent, bool use_prev_marking) { if (SafepointSynchronize::is_at_safepoint() || ! UseTLAB) { - if (!silent) { gclog_or_tty->print("roots "); } + if (!silent) { gclog_or_tty->print("Roots (excluding permgen) "); } VerifyRootsClosure rootsCl(use_prev_marking); CodeBlobToOopClosure blobsCl(&rootsCl, /*do_marking=*/ false); - process_strong_roots(true, // activate StrongRootsScope - false, - SharedHeap::SO_AllClasses, + // We apply the relevant closures to all the oops in the + // system dictionary, the string table and the code cache. + const int so = SharedHeap::SO_AllClasses | SharedHeap::SO_Strings | SharedHeap::SO_CodeCache; + process_strong_roots(true, // activate StrongRootsScope + true, // we set "collecting perm gen" to true, + // so we don't reset the dirty cards in the perm gen. + SharedHeap::ScanningOption(so), // roots scanning options &rootsCl, &blobsCl, &rootsCl); + // Since we used "collecting_perm_gen" == true above, we will not have + // checked the refs from perm into the G1-collected heap. We check those + // references explicitly below. Whether the relevant cards are dirty + // is checked further below in the rem set verification. + if (!silent) { gclog_or_tty->print("Permgen roots "); } + perm_gen()->oop_iterate(&rootsCl); bool failures = rootsCl.failures(); - rem_set()->invalidate(perm_gen()->used_region(), false); if (!silent) { gclog_or_tty->print("HeapRegionSets "); } verify_region_sets(); if (!silent) { gclog_or_tty->print("HeapRegions "); } @@ -3164,6 +3201,7 @@ TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty); TraceTime t(verbose_str, PrintGC && !PrintGCDetails, true, gclog_or_tty); + TraceCollectorStats tcs(g1mm()->incremental_collection_counters()); TraceMemoryManagerStats tms(false /* fullGC */); // If the secondary_free_list is not empty, append it to the @@ -3338,6 +3376,8 @@ doConcurrentMark(); } + allocate_dummy_regions(); + #if YOUNG_LIST_VERBOSE gclog_or_tty->print_cr("\nEnd of the pause.\nYoung_list:"); _young_list->print(); @@ -3401,6 +3441,8 @@ if (PrintHeapAtGC) { Universe::print_heap_after_gc(); } + g1mm()->update_counters(); + if (G1SummarizeRSetStats && (G1SummarizeRSetStatsPeriod > 0) && (total_collections() % G1SummarizeRSetStatsPeriod == 0)) { @@ -3933,6 +3975,9 @@ oop G1CollectedHeap::handle_evacuation_failure_par(OopsInHeapRegionClosure* cl, oop old) { + assert(obj_in_cs(old), + err_msg("obj: "PTR_FORMAT" should still be in the CSet", + (HeapWord*) old)); markOop m = old->mark(); oop forward_ptr = old->forward_to_atomic(old); if (forward_ptr == NULL) { @@ -3955,7 +4000,13 @@ } return old; } else { - // Someone else had a place to copy it. + // Forward-to-self failed. Either someone else managed to allocate + // space for this object (old != forward_ptr) or they beat us in + // self-forwarding it (old == forward_ptr). + assert(old == forward_ptr || !obj_in_cs(forward_ptr), + err_msg("obj: "PTR_FORMAT" forwarded to: "PTR_FORMAT" " + "should not be in the CSet", + (HeapWord*) old, (HeapWord*) forward_ptr)); return forward_ptr; } } @@ -4266,11 +4317,10 @@ T heap_oop = oopDesc::load_heap_oop(p); if (!oopDesc::is_null(heap_oop)) { oop obj = oopDesc::decode_heap_oop(heap_oop); - assert((_g1->evacuation_failed()) || (!_g1->obj_in_cs(obj)), - "shouldn't still be in the CSet if evacuation didn't fail."); HeapWord* addr = (HeapWord*)obj; - if (_g1->is_in_g1_reserved(addr)) + if (_g1->is_in_g1_reserved(addr)) { _cm->grayRoot(oop(addr)); + } } } @@ -4919,36 +4969,45 @@ #ifndef PRODUCT class G1VerifyCardTableCleanup: public HeapRegionClosure { + G1CollectedHeap* _g1h; CardTableModRefBS* _ct_bs; public: - G1VerifyCardTableCleanup(CardTableModRefBS* ct_bs) - : _ct_bs(ct_bs) { } + G1VerifyCardTableCleanup(G1CollectedHeap* g1h, CardTableModRefBS* ct_bs) + : _g1h(g1h), _ct_bs(ct_bs) { } virtual bool doHeapRegion(HeapRegion* r) { - MemRegion mr(r->bottom(), r->end()); if (r->is_survivor()) { - _ct_bs->verify_dirty_region(mr); + _g1h->verify_dirty_region(r); } else { - _ct_bs->verify_clean_region(mr); + _g1h->verify_not_dirty_region(r); } return false; } }; +void G1CollectedHeap::verify_not_dirty_region(HeapRegion* hr) { + // All of the region should be clean. + CardTableModRefBS* ct_bs = (CardTableModRefBS*)barrier_set(); + MemRegion mr(hr->bottom(), hr->end()); + ct_bs->verify_not_dirty_region(mr); +} + +void G1CollectedHeap::verify_dirty_region(HeapRegion* hr) { + // We cannot guarantee that [bottom(),end()] is dirty. Threads + // dirty allocated blocks as they allocate them. The thread that + // retires each region and replaces it with a new one will do a + // maximal allocation to fill in [pre_dummy_top(),end()] but will + // not dirty that area (one less thing to have to do while holding + // a lock). So we can only verify that [bottom(),pre_dummy_top()] + // is dirty. + CardTableModRefBS* ct_bs = (CardTableModRefBS*) barrier_set(); + MemRegion mr(hr->bottom(), hr->pre_dummy_top()); + ct_bs->verify_dirty_region(mr); +} + void G1CollectedHeap::verify_dirty_young_list(HeapRegion* head) { - CardTableModRefBS* ct_bs = (CardTableModRefBS*) (barrier_set()); + CardTableModRefBS* ct_bs = (CardTableModRefBS*) barrier_set(); for (HeapRegion* hr = head; hr != NULL; hr = hr->get_next_young_region()) { - // We cannot guarantee that [bottom(),end()] is dirty. Threads - // dirty allocated blocks as they allocate them. The thread that - // retires each region and replaces it with a new one will do a - // maximal allocation to fill in [pre_dummy_top(),end()] but will - // not dirty that area (one less thing to have to do while holding - // a lock). So we can only verify that [bottom(),pre_dummy_top()] - // is dirty. Also note that verify_dirty_region() requires - // mr.start() and mr.end() to be card aligned and pre_dummy_top() - // is not guaranteed to be. - MemRegion mr(hr->bottom(), - ct_bs->align_to_card_boundary(hr->pre_dummy_top())); - ct_bs->verify_dirty_region(mr); + verify_dirty_region(hr); } } @@ -4991,7 +5050,7 @@ g1_policy()->record_clear_ct_time( elapsed * 1000.0); #ifndef PRODUCT if (G1VerifyCTCleanup || VerifyAfterGC) { - G1VerifyCardTableCleanup cleanup_verifier(ct_bs); + G1VerifyCardTableCleanup cleanup_verifier(this, ct_bs); heap_region_iterate(&cleanup_verifier); } #endif @@ -5314,6 +5373,7 @@ if (new_alloc_region != NULL) { g1_policy()->update_region_num(true /* next_is_young */); set_region_short_lived_locked(new_alloc_region); + g1mm()->update_eden_counters(); return new_alloc_region; } }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Sat May 14 15:21:38 2011 -0700 @@ -28,7 +28,9 @@ #include "gc_implementation/g1/concurrentMark.hpp" #include "gc_implementation/g1/g1AllocRegion.hpp" #include "gc_implementation/g1/g1RemSet.hpp" +#include "gc_implementation/g1/g1MonitoringSupport.hpp" #include "gc_implementation/g1/heapRegionSets.hpp" +#include "gc_implementation/shared/hSpaceCounters.hpp" #include "gc_implementation/parNew/parGCAllocBuffer.hpp" #include "memory/barrierSet.hpp" #include "memory/memRegion.hpp" @@ -57,6 +59,7 @@ class ConcurrentMark; class ConcurrentMarkThread; class ConcurrentG1Refine; +class GenerationCounters; typedef OverflowTaskQueue<StarTask> RefToScanQueue; typedef GenericTaskQueueSet<RefToScanQueue> RefToScanQueueSet; @@ -236,6 +239,9 @@ // current collection. HeapRegion* _gc_alloc_region_list; + // Helper for monitoring and management support. + G1MonitoringSupport* _g1mm; + // Determines PLAB size for a particular allocation purpose. static size_t desired_plab_sz(GCAllocPurpose purpose); @@ -298,6 +304,14 @@ // started is maintained in _total_full_collections in CollectedHeap. volatile unsigned int _full_collections_completed; + // This is a non-product method that is helpful for testing. It is + // called at the end of a GC and artificially expands the heap by + // allocating a number of dead regions. This way we can induce very + // frequent marking cycles and stress the cleanup / concurrent + // cleanup code more (as all the regions that will be allocated by + // this method will be found dead by the marking cycle). + void allocate_dummy_regions() PRODUCT_RETURN; + // These are macros so that, if the assert fires, we get the correct // line number, file, etc. @@ -542,6 +556,9 @@ HeapWord* expand_and_allocate(size_t word_size); public: + + G1MonitoringSupport* g1mm() { return _g1mm; } + // Expand the garbage-first heap by at least the given size (in bytes!). // Returns true if the heap was expanded by the requested amount; // false otherwise. @@ -953,6 +970,8 @@ // The number of regions available for "regular" expansion. size_t expansion_regions() { return _expansion_regions; } + void verify_not_dirty_region(HeapRegion* hr) PRODUCT_RETURN; + void verify_dirty_region(HeapRegion* hr) PRODUCT_RETURN; void verify_dirty_young_list(HeapRegion* head) PRODUCT_RETURN; void verify_dirty_young_regions() PRODUCT_RETURN;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/src/share/vm/gc_implementation/g1/g1MonitoringSupport.cpp Sat May 14 15:21:38 2011 -0700 @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc_implementation/g1/g1MonitoringSupport.hpp" +#include "gc_implementation/g1/g1CollectedHeap.inline.hpp" +#include "gc_implementation/g1/g1CollectorPolicy.hpp" + +G1MonitoringSupport::G1MonitoringSupport(G1CollectedHeap* g1h, + VirtualSpace* g1_storage_addr) : + _g1h(g1h), + _incremental_collection_counters(NULL), + _full_collection_counters(NULL), + _non_young_collection_counters(NULL), + _old_space_counters(NULL), + _young_collection_counters(NULL), + _eden_counters(NULL), + _from_counters(NULL), + _to_counters(NULL), + _g1_storage_addr(g1_storage_addr) +{ + // Counters for GC collections + // + // name "collector.0". In a generational collector this would be the + // young generation collection. + _incremental_collection_counters = + new CollectorCounters("G1 incremental collections", 0); + // name "collector.1". In a generational collector this would be the + // old generation collection. + _full_collection_counters = + new CollectorCounters("G1 stop-the-world full collections", 1); + + // timer sampling for all counters supporting sampling only update the + // used value. See the take_sample() method. G1 requires both used and + // capacity updated so sampling is not currently used. It might + // be sufficient to update all counters in take_sample() even though + // take_sample() only returns "used". When sampling was used, there + // were some anomolous values emitted which may have been the consequence + // of not updating all values simultaneously (i.e., see the calculation done + // in eden_space_used(), is it possbile that the values used to + // calculate either eden_used or survivor_used are being updated by + // the collector when the sample is being done?). + const bool sampled = false; + + // "Generation" and "Space" counters. + // + // name "generation.1" This is logically the old generation in + // generational GC terms. The "1, 1" parameters are for + // the n-th generation (=1) with 1 space. + // Counters are created from minCapacity, maxCapacity, and capacity + _non_young_collection_counters = + new GenerationCounters("whole heap", 1, 1, _g1_storage_addr); + + // name "generation.1.space.0" + // Counters are created from maxCapacity, capacity, initCapacity, + // and used. + _old_space_counters = new HSpaceCounters("space", 0, + _g1h->max_capacity(), _g1h->capacity(), _non_young_collection_counters); + + // Young collection set + // name "generation.0". This is logically the young generation. + // The "0, 3" are paremeters for the n-th genertaion (=0) with 3 spaces. + // See _non_young_collection_counters for additional counters + _young_collection_counters = new GenerationCounters("young", 0, 3, NULL); + + // Replace "max_heap_byte_size() with maximum young gen size for + // g1Collectedheap + // name "generation.0.space.0" + // See _old_space_counters for additional counters + _eden_counters = new HSpaceCounters("eden", 0, + _g1h->max_capacity(), eden_space_committed(), + _young_collection_counters); + + // name "generation.0.space.1" + // See _old_space_counters for additional counters + // Set the arguments to indicate that this survivor space is not used. + _from_counters = new HSpaceCounters("s0", 1, (long) 0, (long) 0, + _young_collection_counters); + + // name "generation.0.space.2" + // See _old_space_counters for additional counters + _to_counters = new HSpaceCounters("s1", 2, + _g1h->max_capacity(), + survivor_space_committed(), + _young_collection_counters); +} + +size_t G1MonitoringSupport::overall_committed() { + return g1h()->capacity(); +} + +size_t G1MonitoringSupport::overall_used() { + return g1h()->used_unlocked(); +} + +size_t G1MonitoringSupport::eden_space_committed() { + return MAX2(eden_space_used(), (size_t) HeapRegion::GrainBytes); +} + +size_t G1MonitoringSupport::eden_space_used() { + size_t young_list_length = g1h()->young_list()->length(); + size_t eden_used = young_list_length * HeapRegion::GrainBytes; + size_t survivor_used = survivor_space_used(); + eden_used = subtract_up_to_zero(eden_used, survivor_used); + return eden_used; +} + +size_t G1MonitoringSupport::survivor_space_committed() { + return MAX2(survivor_space_used(), + (size_t) HeapRegion::GrainBytes); +} + +size_t G1MonitoringSupport::survivor_space_used() { + size_t survivor_num = g1h()->g1_policy()->recorded_survivor_regions(); + size_t survivor_used = survivor_num * HeapRegion::GrainBytes; + return survivor_used; +} + +size_t G1MonitoringSupport::old_space_committed() { + size_t committed = overall_committed(); + size_t eden_committed = eden_space_committed(); + size_t survivor_committed = survivor_space_committed(); + committed = subtract_up_to_zero(committed, eden_committed); + committed = subtract_up_to_zero(committed, survivor_committed); + committed = MAX2(committed, (size_t) HeapRegion::GrainBytes); + return committed; +} + +// See the comment near the top of g1MonitoringSupport.hpp for +// an explanation of these calculations for "used" and "capacity". +size_t G1MonitoringSupport::old_space_used() { + size_t used = overall_used(); + size_t eden_used = eden_space_used(); + size_t survivor_used = survivor_space_used(); + used = subtract_up_to_zero(used, eden_used); + used = subtract_up_to_zero(used, survivor_used); + return used; +} + +void G1MonitoringSupport::update_counters() { + if (UsePerfData) { + eden_counters()->update_capacity(eden_space_committed()); + eden_counters()->update_used(eden_space_used()); + to_counters()->update_capacity(survivor_space_committed()); + to_counters()->update_used(survivor_space_used()); + old_space_counters()->update_capacity(old_space_committed()); + old_space_counters()->update_used(old_space_used()); + non_young_collection_counters()->update_all(); + } +} + +void G1MonitoringSupport::update_eden_counters() { + if (UsePerfData) { + eden_counters()->update_capacity(eden_space_committed()); + eden_counters()->update_used(eden_space_used()); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/src/share/vm/gc_implementation/g1/g1MonitoringSupport.hpp Sat May 14 15:21:38 2011 -0700 @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1MONITORINGSUPPORT_HPP +#define SHARE_VM_GC_IMPLEMENTATION_G1_G1MONITORINGSUPPORT_HPP + +#include "gc_implementation/shared/hSpaceCounters.hpp" + +class G1CollectedHeap; +class G1SpaceMonitoringSupport; + +// Class for monitoring logical spaces in G1. +// G1 defines a set of regions as a young +// collection (analogous to a young generation). +// The young collection is a logical generation +// with no fixed chunk (see space.hpp) reflecting +// the address space for the generation. In addition +// to the young collection there is its complement +// the non-young collection that is simply the regions +// not in the young collection. The non-young collection +// is treated here as a logical old generation only +// because the monitoring tools expect a generational +// heap. The monitoring tools expect that a Space +// (see space.hpp) exists that describe the +// address space of young collection and non-young +// collection and such a view is provided here. +// +// This class provides interfaces to access +// the value of variables for the young collection +// that include the "capacity" and "used" of the +// young collection along with constant values +// for the minimum and maximum capacities for +// the logical spaces. Similarly for the non-young +// collection. +// +// Also provided are counters for G1 concurrent collections +// and stop-the-world full heap collecitons. +// +// Below is a description of how "used" and "capactiy" +// (or committed) is calculated for the logical spaces. +// +// 1) The used space calculation for a pool is not necessarily +// independent of the others. We can easily get from G1 the overall +// used space in the entire heap, the number of regions in the young +// generation (includes both eden and survivors), and the number of +// survivor regions. So, from that we calculate: +// +// survivor_used = survivor_num * region_size +// eden_used = young_region_num * region_size - survivor_used +// old_gen_used = overall_used - eden_used - survivor_used +// +// Note that survivor_used and eden_used are upper bounds. To get the +// actual value we would have to iterate over the regions and add up +// ->used(). But that'd be expensive. So, we'll accept some lack of +// accuracy for those two. But, we have to be careful when calculating +// old_gen_used, in case we subtract from overall_used more then the +// actual number and our result goes negative. +// +// 2) Calculating the used space is straightforward, as described +// above. However, how do we calculate the committed space, given that +// we allocate space for the eden, survivor, and old gen out of the +// same pool of regions? One way to do this is to use the used value +// as also the committed value for the eden and survivor spaces and +// then calculate the old gen committed space as follows: +// +// old_gen_committed = overall_committed - eden_committed - survivor_committed +// +// Maybe a better way to do that would be to calculate used for eden +// and survivor as a sum of ->used() over their regions and then +// calculate committed as region_num * region_size (i.e., what we use +// to calculate the used space now). This is something to consider +// in the future. +// +// 3) Another decision that is again not straightforward is what is +// the max size that each memory pool can grow to. One way to do this +// would be to use the committed size for the max for the eden and +// survivors and calculate the old gen max as follows (basically, it's +// a similar pattern to what we use for the committed space, as +// described above): +// +// old_gen_max = overall_max - eden_max - survivor_max +// +// Unfortunately, the above makes the max of each pool fluctuate over +// time and, even though this is allowed according to the spec, it +// broke several assumptions in the M&M framework (there were cases +// where used would reach a value greater than max). So, for max we +// use -1, which means "undefined" according to the spec. +// +// 4) Now, there is a very subtle issue with all the above. The +// framework will call get_memory_usage() on the three pools +// asynchronously. As a result, each call might get a different value +// for, say, survivor_num which will yield inconsistent values for +// eden_used, survivor_used, and old_gen_used (as survivor_num is used +// in the calculation of all three). This would normally be +// ok. However, it's possible that this might cause the sum of +// eden_used, survivor_used, and old_gen_used to go over the max heap +// size and this seems to sometimes cause JConsole (and maybe other +// clients) to get confused. There's not a really an easy / clean +// solution to this problem, due to the asynchrounous nature of the +// framework. + +class G1MonitoringSupport : public CHeapObj { + G1CollectedHeap* _g1h; + VirtualSpace* _g1_storage_addr; + + // jstat performance counters + // incremental collections both fully and partially young + CollectorCounters* _incremental_collection_counters; + // full stop-the-world collections + CollectorCounters* _full_collection_counters; + // young collection set counters. The _eden_counters, + // _from_counters, and _to_counters are associated with + // this "generational" counter. + GenerationCounters* _young_collection_counters; + // non-young collection set counters. The _old_space_counters + // below are associated with this "generational" counter. + GenerationCounters* _non_young_collection_counters; + // Counters for the capacity and used for + // the whole heap + HSpaceCounters* _old_space_counters; + // the young collection + HSpaceCounters* _eden_counters; + // the survivor collection (only one, _to_counters, is actively used) + HSpaceCounters* _from_counters; + HSpaceCounters* _to_counters; + + // It returns x - y if x > y, 0 otherwise. + // As described in the comment above, some of the inputs to the + // calculations we have to do are obtained concurrently and hence + // may be inconsistent with each other. So, this provides a + // defensive way of performing the subtraction and avoids the value + // going negative (which would mean a very large result, given that + // the parameter are size_t). + static size_t subtract_up_to_zero(size_t x, size_t y) { + if (x > y) { + return x - y; + } else { + return 0; + } + } + + public: + G1MonitoringSupport(G1CollectedHeap* g1h, VirtualSpace* g1_storage_addr); + + G1CollectedHeap* g1h() { return _g1h; } + VirtualSpace* g1_storage_addr() { return _g1_storage_addr; } + + // Performance Counter accessors + void update_counters(); + void update_eden_counters(); + + CollectorCounters* incremental_collection_counters() { + return _incremental_collection_counters; + } + CollectorCounters* full_collection_counters() { + return _full_collection_counters; + } + GenerationCounters* non_young_collection_counters() { + return _non_young_collection_counters; + } + HSpaceCounters* old_space_counters() { return _old_space_counters; } + HSpaceCounters* eden_counters() { return _eden_counters; } + HSpaceCounters* from_counters() { return _from_counters; } + HSpaceCounters* to_counters() { return _to_counters; } + + // Monitoring support used by + // MemoryService + // jstat counters + size_t overall_committed(); + size_t overall_used(); + + size_t eden_space_committed(); + size_t eden_space_used(); + + size_t survivor_space_committed(); + size_t survivor_space_used(); + + size_t old_space_committed(); + size_t old_space_used(); +}; + +#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1MONITORINGSUPPORT_HPP
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp Sat May 14 15:21:38 2011 -0700 @@ -157,7 +157,6 @@ void set_try_claimed() { _try_claimed = true; } void scanCard(size_t index, HeapRegion *r) { - _cards_done++; DirtyCardToOopClosure* cl = r->new_dcto_closure(_oc, CardTableModRefBS::Precise, @@ -168,17 +167,14 @@ HeapWord* card_start = _bot_shared->address_for_index(index); HeapWord* card_end = card_start + G1BlockOffsetSharedArray::N_words; Space *sp = SharedHeap::heap()->space_containing(card_start); - MemRegion sm_region; - if (ParallelGCThreads > 0) { - // first find the used area - sm_region = sp->used_region_at_save_marks(); - } else { - // The closure is not idempotent. We shouldn't look at objects - // allocated during the GC. - sm_region = sp->used_region_at_save_marks(); - } + MemRegion sm_region = sp->used_region_at_save_marks(); MemRegion mr = sm_region.intersection(MemRegion(card_start,card_end)); - if (!mr.is_empty()) { + if (!mr.is_empty() && !_ct_bs->is_card_claimed(index)) { + // We make the card as "claimed" lazily (so races are possible + // but they're benign), which reduces the number of duplicate + // scans (the rsets of the regions in the cset can intersect). + _ct_bs->set_card_claimed(index); + _cards_done++; cl->do_MemRegion(mr); } } @@ -199,6 +195,9 @@ HeapRegionRemSet* hrrs = r->rem_set(); if (hrrs->iter_is_complete()) return false; // All done. if (!_try_claimed && !hrrs->claim_iter()) return false; + // If we ever free the collection set concurrently, we should also + // clear the card table concurrently therefore we won't need to + // add regions of the collection set to the dirty cards region. _g1h->push_dirty_cards_region(r); // If we didn't return above, then // _try_claimed || r->claim_iter() @@ -230,15 +229,10 @@ _g1h->push_dirty_cards_region(card_region); } - // If the card is dirty, then we will scan it during updateRS. - if (!card_region->in_collection_set() && !_ct_bs->is_card_dirty(card_index)) { - // We make the card as "claimed" lazily (so races are possible but they're benign), - // which reduces the number of duplicate scans (the rsets of the regions in the cset - // can intersect). - if (!_ct_bs->is_card_claimed(card_index)) { - _ct_bs->set_card_claimed(card_index); - scanCard(card_index, card_region); - } + // If the card is dirty, then we will scan it during updateRS. + if (!card_region->in_collection_set() && + !_ct_bs->is_card_dirty(card_index)) { + scanCard(card_index, card_region); } } if (!_try_claimed) { @@ -246,8 +240,6 @@ } return false; } - // Set all cards back to clean. - void cleanup() {_g1h->cleanUpCardTable();} size_t cards_done() { return _cards_done;} size_t cards_looked_up() { return _cards;} }; @@ -566,8 +558,9 @@ update_rs_cl.set_region(r); HeapWord* stop_point = r->oops_on_card_seq_iterate_careful(scanRegion, - &filter_then_update_rs_cset_oop_cl, - false /* filter_young */); + &filter_then_update_rs_cset_oop_cl, + false /* filter_young */, + NULL /* card_ptr */); // Since this is performed in the event of an evacuation failure, we // we shouldn't see a non-null stop point @@ -735,12 +728,6 @@ (OopClosure*)&mux : (OopClosure*)&update_rs_oop_cl)); - // Undirty the card. - *card_ptr = CardTableModRefBS::clean_card_val(); - // We must complete this write before we do any of the reads below. - OrderAccess::storeload(); - // And process it, being careful of unallocated portions of TLAB's. - // The region for the current card may be a young region. The // current card may have been a card that was evicted from the // card cache. When the card was inserted into the cache, we had @@ -749,7 +736,7 @@ // and tagged as young. // // We wish to filter out cards for such a region but the current - // thread, if we're running conucrrently, may "see" the young type + // thread, if we're running concurrently, may "see" the young type // change at any time (so an earlier "is_young" check may pass or // fail arbitrarily). We tell the iteration code to perform this // filtering when it has been determined that there has been an actual @@ -759,7 +746,8 @@ HeapWord* stop_point = r->oops_on_card_seq_iterate_careful(dirtyRegion, &filter_then_update_rs_oop_cl, - filter_young); + filter_young, + card_ptr); // If stop_point is non-null, then we encountered an unallocated region // (perhaps the unfilled portion of a TLAB.) For now, we'll dirty the
--- a/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp Sat May 14 15:21:38 2011 -0700 @@ -300,13 +300,22 @@ develop(uintx, G1StressConcRegionFreeingDelayMillis, 0, \ "Artificial delay during concurrent region freeing") \ \ + develop(uintx, G1DummyRegionsPerGC, 0, \ + "The number of dummy regions G1 will allocate at the end of " \ + "each evacuation pause in order to artificially fill up the " \ + "heap and stress the marking implementation.") \ + \ develop(bool, ReduceInitialCardMarksForG1, false, \ "When ReduceInitialCardMarks is true, this flag setting " \ " controls whether G1 allows the RICM optimization") \ \ develop(bool, G1ExitOnExpansionFailure, false, \ "Raise a fatal VM exit out of memory failure in the event " \ - " that heap expansion fails due to running out of swap.") + " that heap expansion fails due to running out of swap.") \ + \ + develop(uintx, G1ConcMarkForceOverflow, 0, \ + "The number of times we'll force an overflow during " \ + "concurrent marking") G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG)
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp Sat May 14 15:21:38 2011 -0700 @@ -376,6 +376,17 @@ if (clear_space) clear(SpaceDecorator::Mangle); } +void HeapRegion::par_clear() { + assert(used() == 0, "the region should have been already cleared"); + assert(capacity() == (size_t) HeapRegion::GrainBytes, + "should be back to normal"); + HeapRegionRemSet* hrrs = rem_set(); + hrrs->clear(); + CardTableModRefBS* ct_bs = + (CardTableModRefBS*)G1CollectedHeap::heap()->barrier_set(); + ct_bs->clear(MemRegion(bottom(), end())); +} + // <PREDICTION> void HeapRegion::calc_gc_efficiency() { G1CollectedHeap* g1h = G1CollectedHeap::heap(); @@ -600,7 +611,15 @@ HeapRegion:: oops_on_card_seq_iterate_careful(MemRegion mr, FilterOutOfRegionClosure* cl, - bool filter_young) { + bool filter_young, + jbyte* card_ptr) { + // Currently, we should only have to clean the card if filter_young + // is true and vice versa. + if (filter_young) { + assert(card_ptr != NULL, "pre-condition"); + } else { + assert(card_ptr == NULL, "pre-condition"); + } G1CollectedHeap* g1h = G1CollectedHeap::heap(); // If we're within a stop-world GC, then we might look at a card in a @@ -626,6 +645,15 @@ assert(!is_young(), "check value of filter_young"); + // We can only clean the card here, after we make the decision that + // the card is not young. And we only clean the card if we have been + // asked to (i.e., card_ptr != NULL). + if (card_ptr != NULL) { + *card_ptr = CardTableModRefBS::clean_card_val(); + // We must complete this write before we do any of the reads below. + OrderAccess::storeload(); + } + // We used to use "block_start_careful" here. But we're actually happy // to update the BOT while we do this... HeapWord* cur = block_start(mr.start());
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp Sat May 14 15:21:38 2011 -0700 @@ -584,6 +584,7 @@ // Reset HR stuff to default values. void hr_clear(bool par, bool clear_space); + void par_clear(); void initialize(MemRegion mr, bool clear_space, bool mangle_space); @@ -802,12 +803,16 @@ HeapWord* object_iterate_mem_careful(MemRegion mr, ObjectClosure* cl); - // In this version - if filter_young is true and the region - // is a young region then we skip the iteration. + // filter_young: if true and the region is a young region then we + // skip the iteration. + // card_ptr: if not NULL, and we decide that the card is not young + // and we iterate over it, we'll clean the card before we start the + // iteration. HeapWord* oops_on_card_seq_iterate_careful(MemRegion mr, FilterOutOfRegionClosure* cl, - bool filter_young); + bool filter_young, + jbyte* card_ptr); // A version of block start that is guaranteed to find *some* block // boundary at or before "p", but does not object iteration, and may
--- a/hotspot/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp Sat May 14 15:21:38 2011 -0700 @@ -33,44 +33,43 @@ #include "runtime/mutexLocker.hpp" #include "runtime/virtualspace.hpp" -void CardTableModRefBS::par_non_clean_card_iterate_work(Space* sp, MemRegion mr, - DirtyCardToOopClosure* dcto_cl, - MemRegionClosure* cl, - int n_threads) { - if (n_threads > 0) { - assert((n_threads == 1 && ParallelGCThreads == 0) || - n_threads <= (int)ParallelGCThreads, - "# worker threads != # requested!"); - // Make sure the LNC array is valid for the space. - jbyte** lowest_non_clean; - uintptr_t lowest_non_clean_base_chunk_index; - size_t lowest_non_clean_chunk_size; - get_LNC_array_for_space(sp, lowest_non_clean, - lowest_non_clean_base_chunk_index, - lowest_non_clean_chunk_size); +void CardTableModRefBS::non_clean_card_iterate_parallel_work(Space* sp, MemRegion mr, + DirtyCardToOopClosure* dcto_cl, + ClearNoncleanCardWrapper* cl, + int n_threads) { + assert(n_threads > 0, "Error: expected n_threads > 0"); + assert((n_threads == 1 && ParallelGCThreads == 0) || + n_threads <= (int)ParallelGCThreads, + "# worker threads != # requested!"); + // Make sure the LNC array is valid for the space. + jbyte** lowest_non_clean; + uintptr_t lowest_non_clean_base_chunk_index; + size_t lowest_non_clean_chunk_size; + get_LNC_array_for_space(sp, lowest_non_clean, + lowest_non_clean_base_chunk_index, + lowest_non_clean_chunk_size); - int n_strides = n_threads * StridesPerThread; - SequentialSubTasksDone* pst = sp->par_seq_tasks(); - pst->set_n_threads(n_threads); - pst->set_n_tasks(n_strides); + int n_strides = n_threads * StridesPerThread; + SequentialSubTasksDone* pst = sp->par_seq_tasks(); + pst->set_n_threads(n_threads); + pst->set_n_tasks(n_strides); - int stride = 0; - while (!pst->is_task_claimed(/* reference */ stride)) { - process_stride(sp, mr, stride, n_strides, dcto_cl, cl, - lowest_non_clean, - lowest_non_clean_base_chunk_index, - lowest_non_clean_chunk_size); - } - if (pst->all_tasks_completed()) { - // Clear lowest_non_clean array for next time. - intptr_t first_chunk_index = addr_to_chunk_index(mr.start()); - uintptr_t last_chunk_index = addr_to_chunk_index(mr.last()); - for (uintptr_t ch = first_chunk_index; ch <= last_chunk_index; ch++) { - intptr_t ind = ch - lowest_non_clean_base_chunk_index; - assert(0 <= ind && ind < (intptr_t)lowest_non_clean_chunk_size, - "Bounds error"); - lowest_non_clean[ind] = NULL; - } + int stride = 0; + while (!pst->is_task_claimed(/* reference */ stride)) { + process_stride(sp, mr, stride, n_strides, dcto_cl, cl, + lowest_non_clean, + lowest_non_clean_base_chunk_index, + lowest_non_clean_chunk_size); + } + if (pst->all_tasks_completed()) { + // Clear lowest_non_clean array for next time. + intptr_t first_chunk_index = addr_to_chunk_index(mr.start()); + uintptr_t last_chunk_index = addr_to_chunk_index(mr.last()); + for (uintptr_t ch = first_chunk_index; ch <= last_chunk_index; ch++) { + intptr_t ind = ch - lowest_non_clean_base_chunk_index; + assert(0 <= ind && ind < (intptr_t)lowest_non_clean_chunk_size, + "Bounds error"); + lowest_non_clean[ind] = NULL; } } } @@ -81,7 +80,7 @@ MemRegion used, jint stride, int n_strides, DirtyCardToOopClosure* dcto_cl, - MemRegionClosure* cl, + ClearNoncleanCardWrapper* cl, jbyte** lowest_non_clean, uintptr_t lowest_non_clean_base_chunk_index, size_t lowest_non_clean_chunk_size) { @@ -127,7 +126,11 @@ lowest_non_clean_base_chunk_index, lowest_non_clean_chunk_size); - non_clean_card_iterate_work(chunk_mr, cl); + // We do not call the non_clean_card_iterate_serial() version because + // we want to clear the cards, and the ClearNoncleanCardWrapper closure + // itself does the work of finding contiguous dirty ranges of cards to + // process (and clear). + cl->do_MemRegion(chunk_mr); // Find the next chunk of the stride. chunk_card_start += CardsPerStrideChunk * n_strides;
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp Sat May 14 15:21:38 2011 -0700 @@ -224,6 +224,12 @@ const size_t alignment = virtual_space()->alignment(); size_t aligned_bytes = align_size_up(bytes, alignment); size_t aligned_expand_bytes = align_size_up(MinHeapDeltaBytes, alignment); + + if (UseNUMA) { + // With NUMA we use round-robin page allocation for the old gen. Expand by at least + // providing a page per lgroup. Alignment is larger or equal to the page size. + aligned_expand_bytes = MAX2(aligned_expand_bytes, alignment * os::numa_get_groups_num()); + } if (aligned_bytes == 0){ // The alignment caused the number of bytes to wrap. An expand_by(0) will // return true with the implication that and expansion was done when it
--- a/hotspot/src/share/vm/gc_implementation/shared/generationCounters.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/gc_implementation/shared/generationCounters.cpp Sat May 14 15:21:38 2011 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -51,15 +51,18 @@ cname = PerfDataManager::counter_name(_name_space, "minCapacity"); PerfDataManager::create_constant(SUN_GC, cname, PerfData::U_Bytes, + _virtual_space == NULL ? 0 : _virtual_space->committed_size(), CHECK); cname = PerfDataManager::counter_name(_name_space, "maxCapacity"); PerfDataManager::create_constant(SUN_GC, cname, PerfData::U_Bytes, + _virtual_space == NULL ? 0 : _virtual_space->reserved_size(), CHECK); cname = PerfDataManager::counter_name(_name_space, "capacity"); _current_size = PerfDataManager::create_variable(SUN_GC, cname, - PerfData::U_Bytes, + PerfData::U_Bytes, + _virtual_space == NULL ? 0 : _virtual_space->committed_size(), CHECK); } }
--- a/hotspot/src/share/vm/gc_implementation/shared/generationCounters.hpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/gc_implementation/shared/generationCounters.hpp Sat May 14 15:21:38 2011 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -61,10 +61,11 @@ } virtual void update_all() { - _current_size->set_value(_virtual_space->committed_size()); + _current_size->set_value(_virtual_space == NULL ? 0 : + _virtual_space->committed_size()); } const char* name_space() const { return _name_space; } + }; - #endif // SHARE_VM_GC_IMPLEMENTATION_SHARED_GENERATIONCOUNTERS_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/src/share/vm/gc_implementation/shared/hSpaceCounters.cpp Sat May 14 15:21:38 2011 -0700 @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc_implementation/shared/hSpaceCounters.hpp" +#include "memory/generation.hpp" +#include "memory/resourceArea.hpp" + +HSpaceCounters::HSpaceCounters(const char* name, + int ordinal, + size_t max_size, + size_t initial_capacity, + GenerationCounters* gc) { + + if (UsePerfData) { + EXCEPTION_MARK; + ResourceMark rm; + + const char* cns = + PerfDataManager::name_space(gc->name_space(), "space", ordinal); + + _name_space = NEW_C_HEAP_ARRAY(char, strlen(cns)+1); + strcpy(_name_space, cns); + + const char* cname = PerfDataManager::counter_name(_name_space, "name"); + PerfDataManager::create_string_constant(SUN_GC, cname, name, CHECK); + + cname = PerfDataManager::counter_name(_name_space, "maxCapacity"); + PerfDataManager::create_constant(SUN_GC, cname, PerfData::U_Bytes, + (jlong)max_size, CHECK); + + cname = PerfDataManager::counter_name(_name_space, "capacity"); + _capacity = PerfDataManager::create_variable(SUN_GC, cname, + PerfData::U_Bytes, + initial_capacity, CHECK); + + cname = PerfDataManager::counter_name(_name_space, "used"); + _used = PerfDataManager::create_variable(SUN_GC, cname, PerfData::U_Bytes, + (jlong) 0, CHECK); + + cname = PerfDataManager::counter_name(_name_space, "initCapacity"); + PerfDataManager::create_constant(SUN_GC, cname, PerfData::U_Bytes, + initial_capacity, CHECK); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/src/share/vm/gc_implementation/shared/hSpaceCounters.hpp Sat May 14 15:21:38 2011 -0700 @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_GC_IMPLEMENTATION_SHARED_HSPACECOUNTERS_HPP +#define SHARE_VM_GC_IMPLEMENTATION_SHARED_HSPACECOUNTERS_HPP + +#ifndef SERIALGC +#include "gc_implementation/shared/generationCounters.hpp" +#include "memory/generation.hpp" +#include "runtime/perfData.hpp" +#endif + +// A HSpaceCounter is a holder class for performance counters +// that track a collections (logical spaces) in a heap; + +class HeapSpaceUsedHelper; +class G1SpaceMonitoringSupport; + +class HSpaceCounters: public CHeapObj { + friend class VMStructs; + + private: + PerfVariable* _capacity; + PerfVariable* _used; + + // Constant PerfData types don't need to retain a reference. + // However, it's a good idea to document them here. + + char* _name_space; + + public: + + HSpaceCounters(const char* name, int ordinal, size_t max_size, + size_t initial_capacity, GenerationCounters* gc); + + ~HSpaceCounters() { + if (_name_space != NULL) FREE_C_HEAP_ARRAY(char, _name_space); + } + + inline void update_capacity(size_t v) { + _capacity->set_value(v); + } + + inline void update_used(size_t v) { + _used->set_value(v); + } + + debug_only( + // for security reasons, we do not allow arbitrary reads from + // the counters as they may live in shared memory. + jlong used() { + return _used->get_value(); + } + jlong capacity() { + return _used->get_value(); + } + ) + + inline void update_all(size_t capacity, size_t used) { + update_capacity(capacity); + update_used(used); + } + + const char* name_space() const { return _name_space; } +}; +#endif // SHARE_VM_GC_IMPLEMENTATION_SHARED_HSPACECOUNTERS_HPP
--- a/hotspot/src/share/vm/interpreter/linkResolver.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/interpreter/linkResolver.cpp Sat May 14 15:21:38 2011 -0700 @@ -327,6 +327,7 @@ // 1. check if klass is not interface if (resolved_klass->is_interface()) { + ResourceMark rm(THREAD); char buf[200]; jio_snprintf(buf, sizeof(buf), "Found interface %s, but class was expected", Klass::cast(resolved_klass())->external_name()); THROW_MSG(vmSymbols::java_lang_IncompatibleClassChangeError(), buf); @@ -413,6 +414,7 @@ // check if klass is interface if (!resolved_klass->is_interface()) { + ResourceMark rm(THREAD); char buf[200]; jio_snprintf(buf, sizeof(buf), "Found class %s, but interface was expected", Klass::cast(resolved_klass())->external_name()); THROW_MSG(vmSymbols::java_lang_IncompatibleClassChangeError(), buf); @@ -534,6 +536,7 @@ // check for errors if (is_static != fd.is_static()) { + ResourceMark rm(THREAD); char msg[200]; jio_snprintf(msg, sizeof(msg), "Expected %s field %s.%s", is_static ? "static" : "non-static", Klass::cast(resolved_klass())->external_name(), fd.name()->as_C_string()); THROW_MSG(vmSymbols::java_lang_IncompatibleClassChangeError(), msg); @@ -631,6 +634,7 @@ // check if static if (!resolved_method->is_static()) { + ResourceMark rm(THREAD); char buf[200]; jio_snprintf(buf, sizeof(buf), "Expected static method %s", methodOopDesc::name_and_sig_as_C_string(Klass::cast(resolved_klass()), resolved_method->name(), @@ -671,6 +675,7 @@ // check if not static if (resolved_method->is_static()) { + ResourceMark rm(THREAD); char buf[200]; jio_snprintf(buf, sizeof(buf), "Expecting non-static method %s", @@ -717,6 +722,7 @@ // check if not static if (sel_method->is_static()) { + ResourceMark rm(THREAD); char buf[200]; jio_snprintf(buf, sizeof(buf), "Expecting non-static method %s", methodOopDesc::name_and_sig_as_C_string(Klass::cast(resolved_klass()), resolved_method->name(), @@ -757,6 +763,7 @@ // check if not static if (resolved_method->is_static()) { + ResourceMark rm(THREAD); char buf[200]; jio_snprintf(buf, sizeof(buf), "Expecting non-static method %s", methodOopDesc::name_and_sig_as_C_string(Klass::cast(resolved_klass()), resolved_method->name(), @@ -873,6 +880,7 @@ // check if receiver klass implements the resolved interface if (!recv_klass->is_subtype_of(resolved_klass())) { + ResourceMark rm(THREAD); char buf[200]; jio_snprintf(buf, sizeof(buf), "Class %s does not implement the requested interface %s", (Klass::cast(recv_klass()))->external_name(),
--- a/hotspot/src/share/vm/memory/allocation.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/memory/allocation.cpp Sat May 14 15:21:38 2011 -0700 @@ -44,6 +44,14 @@ return (void *) AllocateHeap(size, "CHeapObj-new"); } +void* CHeapObj::operator new (size_t size, const std::nothrow_t& nothrow_constant) { + char* p = (char*) os::malloc(size); +#ifdef ASSERT + if (PrintMallocFree) trace_heap_malloc(size, "CHeapObj-new", p); +#endif + return p; +} + void CHeapObj::operator delete(void* p){ FreeHeap(p); }
--- a/hotspot/src/share/vm/memory/allocation.hpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/memory/allocation.hpp Sat May 14 15:21:38 2011 -0700 @@ -34,6 +34,8 @@ #include "opto/c2_globals.hpp" #endif +#include <new> + #define ARENA_ALIGN_M1 (((size_t)(ARENA_AMALLOC_ALIGNMENT)) - 1) #define ARENA_ALIGN_MASK (~((size_t)ARENA_ALIGN_M1)) #define ARENA_ALIGN(x) ((((size_t)(x)) + ARENA_ALIGN_M1) & ARENA_ALIGN_MASK) @@ -99,6 +101,7 @@ class CHeapObj ALLOCATION_SUPER_CLASS_SPEC { public: void* operator new(size_t size); + void* operator new (size_t size, const std::nothrow_t& nothrow_constant); void operator delete(void* p); void* new_array(size_t size); };
--- a/hotspot/src/share/vm/memory/cardTableModRefBS.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/memory/cardTableModRefBS.cpp Sat May 14 15:21:38 2011 -0700 @@ -456,31 +456,35 @@ } -void CardTableModRefBS::non_clean_card_iterate(Space* sp, - MemRegion mr, - DirtyCardToOopClosure* dcto_cl, - MemRegionClosure* cl) { +void CardTableModRefBS::non_clean_card_iterate_possibly_parallel(Space* sp, + MemRegion mr, + DirtyCardToOopClosure* dcto_cl, + ClearNoncleanCardWrapper* cl) { if (!mr.is_empty()) { int n_threads = SharedHeap::heap()->n_par_threads(); if (n_threads > 0) { #ifndef SERIALGC - par_non_clean_card_iterate_work(sp, mr, dcto_cl, cl, n_threads); + non_clean_card_iterate_parallel_work(sp, mr, dcto_cl, cl, n_threads); #else // SERIALGC fatal("Parallel gc not supported here."); #endif // SERIALGC } else { - non_clean_card_iterate_work(mr, cl); + // We do not call the non_clean_card_iterate_serial() version below because + // we want to clear the cards (which non_clean_card_iterate_serial() does not + // do for us), and the ClearNoncleanCardWrapper closure itself does the work + // of finding contiguous dirty ranges of cards to process (and clear). + cl->do_MemRegion(mr); } } } -// NOTE: For this to work correctly, it is important that -// we look for non-clean cards below (so as to catch those -// marked precleaned), rather than look explicitly for dirty -// cards (and miss those marked precleaned). In that sense, -// the name precleaned is currently somewhat of a misnomer. -void CardTableModRefBS::non_clean_card_iterate_work(MemRegion mr, - MemRegionClosure* cl) { +// The iterator itself is not MT-aware, but +// MT-aware callers and closures can use this to +// accomplish dirty card iteration in parallel. The +// iterator itself does not clear the dirty cards, or +// change their values in any manner. +void CardTableModRefBS::non_clean_card_iterate_serial(MemRegion mr, + MemRegionClosure* cl) { for (int i = 0; i < _cur_covered_regions; i++) { MemRegion mri = mr.intersection(_covered[i]); if (mri.word_size() > 0) { @@ -648,43 +652,37 @@ } #ifndef PRODUCT -class GuaranteeNotModClosure: public MemRegionClosure { - CardTableModRefBS* _ct; -public: - GuaranteeNotModClosure(CardTableModRefBS* ct) : _ct(ct) {} - void do_MemRegion(MemRegion mr) { - jbyte* entry = _ct->byte_for(mr.start()); - guarantee(*entry != CardTableModRefBS::clean_card, - "Dirty card in region that should be clean"); +void CardTableModRefBS::verify_region(MemRegion mr, + jbyte val, bool val_equals) { + jbyte* start = byte_for(mr.start()); + jbyte* end = byte_for(mr.last()); + bool failures = false; + for (jbyte* curr = start; curr <= end; ++curr) { + jbyte curr_val = *curr; + bool failed = (val_equals) ? (curr_val != val) : (curr_val == val); + if (failed) { + if (!failures) { + tty->cr(); + tty->print_cr("== CT verification failed: ["PTR_FORMAT","PTR_FORMAT"]"); + tty->print_cr("== %sexpecting value: %d", + (val_equals) ? "" : "not ", val); + failures = true; + } + tty->print_cr("== card "PTR_FORMAT" ["PTR_FORMAT","PTR_FORMAT"], " + "val: %d", curr, addr_for(curr), + (HeapWord*) (((size_t) addr_for(curr)) + card_size), + (int) curr_val); + } } -}; - -void CardTableModRefBS::verify_clean_region(MemRegion mr) { - GuaranteeNotModClosure blk(this); - non_clean_card_iterate_work(mr, &blk); + guarantee(!failures, "there should not have been any failures"); } -// To verify a MemRegion is entirely dirty this closure is passed to -// dirty_card_iterate. If the region is dirty do_MemRegion will be -// invoked only once with a MemRegion equal to the one being -// verified. -class GuaranteeDirtyClosure: public MemRegionClosure { - CardTableModRefBS* _ct; - MemRegion _mr; - bool _result; -public: - GuaranteeDirtyClosure(CardTableModRefBS* ct, MemRegion mr) - : _ct(ct), _mr(mr), _result(false) {} - void do_MemRegion(MemRegion mr) { - _result = _mr.equals(mr); - } - bool result() const { return _result; } -}; +void CardTableModRefBS::verify_not_dirty_region(MemRegion mr) { + verify_region(mr, dirty_card, false /* val_equals */); +} void CardTableModRefBS::verify_dirty_region(MemRegion mr) { - GuaranteeDirtyClosure blk(this, mr); - dirty_card_iterate(mr, &blk); - guarantee(blk.result(), "Non-dirty cards in region that should be dirty"); + verify_region(mr, dirty_card, true /* val_equals */); } #endif
--- a/hotspot/src/share/vm/memory/cardTableModRefBS.hpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/memory/cardTableModRefBS.hpp Sat May 14 15:21:38 2011 -0700 @@ -44,6 +44,7 @@ class Generation; class OopsInGenClosure; class DirtyCardToOopClosure; +class ClearNoncleanCardWrapper; class CardTableModRefBS: public ModRefBarrierSet { // Some classes get to look at some private stuff. @@ -165,22 +166,28 @@ // Iterate over the portion of the card-table which covers the given // region mr in the given space and apply cl to any dirty sub-regions - // of mr. cl and dcto_cl must either be the same closure or cl must - // wrap dcto_cl. Both are required - neither may be NULL. Also, dcto_cl - // may be modified. Note that this function will operate in a parallel - // mode if worker threads are available. - void non_clean_card_iterate(Space* sp, MemRegion mr, - DirtyCardToOopClosure* dcto_cl, - MemRegionClosure* cl); + // of mr. Dirty cards are _not_ cleared by the iterator method itself, + // but closures may arrange to do so on their own should they so wish. + void non_clean_card_iterate_serial(MemRegion mr, MemRegionClosure* cl); - // Utility function used to implement the other versions below. - void non_clean_card_iterate_work(MemRegion mr, MemRegionClosure* cl); + // A variant of the above that will operate in a parallel mode if + // worker threads are available, and clear the dirty cards as it + // processes them. + // ClearNoncleanCardWrapper cl must wrap the DirtyCardToOopClosure dcto_cl, + // which may itself be modified by the method. + void non_clean_card_iterate_possibly_parallel(Space* sp, MemRegion mr, + DirtyCardToOopClosure* dcto_cl, + ClearNoncleanCardWrapper* cl); - void par_non_clean_card_iterate_work(Space* sp, MemRegion mr, - DirtyCardToOopClosure* dcto_cl, - MemRegionClosure* cl, - int n_threads); + private: + // Work method used to implement non_clean_card_iterate_possibly_parallel() + // above in the parallel case. + void non_clean_card_iterate_parallel_work(Space* sp, MemRegion mr, + DirtyCardToOopClosure* dcto_cl, + ClearNoncleanCardWrapper* cl, + int n_threads); + protected: // Dirty the bytes corresponding to "mr" (not all of which must be // covered.) void dirty_MemRegion(MemRegion mr); @@ -237,7 +244,7 @@ MemRegion used, jint stride, int n_strides, DirtyCardToOopClosure* dcto_cl, - MemRegionClosure* cl, + ClearNoncleanCardWrapper* cl, jbyte** lowest_non_clean, uintptr_t lowest_non_clean_base_chunk_index, size_t lowest_non_clean_chunk_size); @@ -409,14 +416,14 @@ // marking, where a dirty card may cause scanning, and summarization // marking, of objects that extend onto subsequent cards.) void mod_card_iterate(MemRegionClosure* cl) { - non_clean_card_iterate_work(_whole_heap, cl); + non_clean_card_iterate_serial(_whole_heap, cl); } // Like the "mod_cards_iterate" above, except only invokes the closure // for cards within the MemRegion "mr" (which is required to be // card-aligned and sized.) void mod_card_iterate(MemRegion mr, MemRegionClosure* cl) { - non_clean_card_iterate_work(mr, cl); + non_clean_card_iterate_serial(mr, cl); } static uintx ct_max_alignment_constraint(); @@ -468,7 +475,10 @@ void verify(); void verify_guard(); - void verify_clean_region(MemRegion mr) PRODUCT_RETURN; + // val_equals -> it will check that all cards covered by mr equal val + // !val_equals -> it will check that all cards covered by mr do not equal val + void verify_region(MemRegion mr, jbyte val, bool val_equals) PRODUCT_RETURN; + void verify_not_dirty_region(MemRegion mr) PRODUCT_RETURN; void verify_dirty_region(MemRegion mr) PRODUCT_RETURN; static size_t par_chunk_heapword_alignment() { @@ -493,4 +503,5 @@ void set_CTRS(CardTableRS* rs) { _rs = rs; } }; + #endif // SHARE_VM_MEMORY_CARDTABLEMODREFBS_HPP
--- a/hotspot/src/share/vm/memory/cardTableRS.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/memory/cardTableRS.cpp Sat May 14 15:21:38 2011 -0700 @@ -105,107 +105,111 @@ g->younger_refs_iterate(blk); } -class ClearNoncleanCardWrapper: public MemRegionClosure { - MemRegionClosure* _dirty_card_closure; - CardTableRS* _ct; - bool _is_par; -private: - // Clears the given card, return true if the corresponding card should be - // processed. - bool clear_card(jbyte* entry) { - if (_is_par) { - while (true) { - // In the parallel case, we may have to do this several times. - jbyte entry_val = *entry; - assert(entry_val != CardTableRS::clean_card_val(), - "We shouldn't be looking at clean cards, and this should " - "be the only place they get cleaned."); - if (CardTableRS::card_is_dirty_wrt_gen_iter(entry_val) - || _ct->is_prev_youngergen_card_val(entry_val)) { - jbyte res = - Atomic::cmpxchg(CardTableRS::clean_card_val(), entry, entry_val); - if (res == entry_val) { - break; - } else { - assert(res == CardTableRS::cur_youngergen_and_prev_nonclean_card, - "The CAS above should only fail if another thread did " - "a GC write barrier."); - } - } else if (entry_val == - CardTableRS::cur_youngergen_and_prev_nonclean_card) { - // Parallelism shouldn't matter in this case. Only the thread - // assigned to scan the card should change this value. - *entry = _ct->cur_youngergen_card_val(); - break; - } else { - assert(entry_val == _ct->cur_youngergen_card_val(), - "Should be the only possibility."); - // In this case, the card was clean before, and become - // cur_youngergen only because of processing of a promoted object. - // We don't have to look at the card. - return false; - } +inline bool ClearNoncleanCardWrapper::clear_card(jbyte* entry) { + if (_is_par) { + return clear_card_parallel(entry); + } else { + return clear_card_serial(entry); + } +} + +inline bool ClearNoncleanCardWrapper::clear_card_parallel(jbyte* entry) { + while (true) { + // In the parallel case, we may have to do this several times. + jbyte entry_val = *entry; + assert(entry_val != CardTableRS::clean_card_val(), + "We shouldn't be looking at clean cards, and this should " + "be the only place they get cleaned."); + if (CardTableRS::card_is_dirty_wrt_gen_iter(entry_val) + || _ct->is_prev_youngergen_card_val(entry_val)) { + jbyte res = + Atomic::cmpxchg(CardTableRS::clean_card_val(), entry, entry_val); + if (res == entry_val) { + break; + } else { + assert(res == CardTableRS::cur_youngergen_and_prev_nonclean_card, + "The CAS above should only fail if another thread did " + "a GC write barrier."); } - return true; + } else if (entry_val == + CardTableRS::cur_youngergen_and_prev_nonclean_card) { + // Parallelism shouldn't matter in this case. Only the thread + // assigned to scan the card should change this value. + *entry = _ct->cur_youngergen_card_val(); + break; } else { - jbyte entry_val = *entry; - assert(entry_val != CardTableRS::clean_card_val(), - "We shouldn't be looking at clean cards, and this should " - "be the only place they get cleaned."); - assert(entry_val != CardTableRS::cur_youngergen_and_prev_nonclean_card, - "This should be possible in the sequential case."); - *entry = CardTableRS::clean_card_val(); - return true; + assert(entry_val == _ct->cur_youngergen_card_val(), + "Should be the only possibility."); + // In this case, the card was clean before, and become + // cur_youngergen only because of processing of a promoted object. + // We don't have to look at the card. + return false; } } + return true; +} -public: - ClearNoncleanCardWrapper(MemRegionClosure* dirty_card_closure, - CardTableRS* ct) : + +inline bool ClearNoncleanCardWrapper::clear_card_serial(jbyte* entry) { + jbyte entry_val = *entry; + assert(entry_val != CardTableRS::clean_card_val(), + "We shouldn't be looking at clean cards, and this should " + "be the only place they get cleaned."); + assert(entry_val != CardTableRS::cur_youngergen_and_prev_nonclean_card, + "This should be possible in the sequential case."); + *entry = CardTableRS::clean_card_val(); + return true; +} + +ClearNoncleanCardWrapper::ClearNoncleanCardWrapper( + MemRegionClosure* dirty_card_closure, CardTableRS* ct) : _dirty_card_closure(dirty_card_closure), _ct(ct) { _is_par = (SharedHeap::heap()->n_par_threads() > 0); +} + +void ClearNoncleanCardWrapper::do_MemRegion(MemRegion mr) { + assert(mr.word_size() > 0, "Error"); + assert(_ct->is_aligned(mr.start()), "mr.start() should be card aligned"); + // mr.end() may not necessarily be card aligned. + jbyte* cur_entry = _ct->byte_for(mr.last()); + const jbyte* limit = _ct->byte_for(mr.start()); + HeapWord* end_of_non_clean = mr.end(); + HeapWord* start_of_non_clean = end_of_non_clean; + while (cur_entry >= limit) { + HeapWord* cur_hw = _ct->addr_for(cur_entry); + if ((*cur_entry != CardTableRS::clean_card_val()) && clear_card(cur_entry)) { + // Continue the dirty range by opening the + // dirty window one card to the left. + start_of_non_clean = cur_hw; + } else { + // We hit a "clean" card; process any non-empty + // "dirty" range accumulated so far. + if (start_of_non_clean < end_of_non_clean) { + const MemRegion mrd(start_of_non_clean, end_of_non_clean); + _dirty_card_closure->do_MemRegion(mrd); + } + // Reset the dirty window, while continuing to look + // for the next dirty card that will start a + // new dirty window. + end_of_non_clean = cur_hw; + start_of_non_clean = cur_hw; + } + // Note that "cur_entry" leads "start_of_non_clean" in + // its leftward excursion after this point + // in the loop and, when we hit the left end of "mr", + // will point off of the left end of the card-table + // for "mr". + cur_entry--; } - void do_MemRegion(MemRegion mr) { - // We start at the high end of "mr", walking backwards - // while accumulating a contiguous dirty range of cards in - // [start_of_non_clean, end_of_non_clean) which we then - // process en masse. - HeapWord* end_of_non_clean = mr.end(); - HeapWord* start_of_non_clean = end_of_non_clean; - jbyte* entry = _ct->byte_for(mr.last()); - const jbyte* first_entry = _ct->byte_for(mr.start()); - while (entry >= first_entry) { - HeapWord* cur = _ct->addr_for(entry); - if (!clear_card(entry)) { - // We hit a clean card; process any non-empty - // dirty range accumulated so far. - if (start_of_non_clean < end_of_non_clean) { - MemRegion mr2(start_of_non_clean, end_of_non_clean); - _dirty_card_closure->do_MemRegion(mr2); - } - // Reset the dirty window while continuing to - // look for the next dirty window to process. - end_of_non_clean = cur; - start_of_non_clean = end_of_non_clean; - } - // Open the left end of the window one card to the left. - start_of_non_clean = cur; - // Note that "entry" leads "start_of_non_clean" in - // its leftward excursion after this point - // in the loop and, when we hit the left end of "mr", - // will point off of the left end of the card-table - // for "mr". - entry--; - } - // If the first card of "mr" was dirty, we will have - // been left with a dirty window, co-initial with "mr", - // which we now process. - if (start_of_non_clean < end_of_non_clean) { - MemRegion mr2(start_of_non_clean, end_of_non_clean); - _dirty_card_closure->do_MemRegion(mr2); - } + // If the first card of "mr" was dirty, we will have + // been left with a dirty window, co-initial with "mr", + // which we now process. + if (start_of_non_clean < end_of_non_clean) { + const MemRegion mrd(start_of_non_clean, end_of_non_clean); + _dirty_card_closure->do_MemRegion(mrd); } -}; +} + // clean (by dirty->clean before) ==> cur_younger_gen // dirty ==> cur_youngergen_and_prev_nonclean_card // precleaned ==> cur_youngergen_and_prev_nonclean_card @@ -246,8 +250,35 @@ cl->gen_boundary()); ClearNoncleanCardWrapper clear_cl(dcto_cl, this); - _ct_bs->non_clean_card_iterate(sp, sp->used_region_at_save_marks(), - dcto_cl, &clear_cl); + const MemRegion urasm = sp->used_region_at_save_marks(); +#ifdef ASSERT + // Convert the assertion check to a warning if we are running + // CMS+ParNew until related bug is fixed. + MemRegion ur = sp->used_region(); + assert(ur.contains(urasm) || (UseConcMarkSweepGC && UseParNewGC), + err_msg("Did you forget to call save_marks()? " + "[" PTR_FORMAT ", " PTR_FORMAT ") is not contained in " + "[" PTR_FORMAT ", " PTR_FORMAT ")", + urasm.start(), urasm.end(), ur.start(), ur.end())); + // In the case of CMS+ParNew, issue a warning + if (!ur.contains(urasm)) { + assert(UseConcMarkSweepGC && UseParNewGC, "Tautology: see assert above"); + warning("CMS+ParNew: Did you forget to call save_marks()? " + "[" PTR_FORMAT ", " PTR_FORMAT ") is not contained in " + "[" PTR_FORMAT ", " PTR_FORMAT ")", + urasm.start(), urasm.end(), ur.start(), ur.end()); + MemRegion ur2 = sp->used_region(); + MemRegion urasm2 = sp->used_region_at_save_marks(); + if (!ur.equals(ur2)) { + warning("CMS+ParNew: Flickering used_region()!!"); + } + if (!urasm.equals(urasm2)) { + warning("CMS+ParNew: Flickering used_region_at_save_marks()!!"); + } + } +#endif + _ct_bs->non_clean_card_iterate_possibly_parallel(sp, urasm, + dcto_cl, &clear_cl); } void CardTableRS::clear_into_younger(Generation* gen, bool clear_perm) {
--- a/hotspot/src/share/vm/memory/cardTableRS.hpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/memory/cardTableRS.hpp Sat May 14 15:21:38 2011 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -166,4 +166,21 @@ }; +class ClearNoncleanCardWrapper: public MemRegionClosure { + MemRegionClosure* _dirty_card_closure; + CardTableRS* _ct; + bool _is_par; +private: + // Clears the given card, return true if the corresponding card should be + // processed. + inline bool clear_card(jbyte* entry); + // Work methods called by the clear_card() + inline bool clear_card_serial(jbyte* entry); + inline bool clear_card_parallel(jbyte* entry); + +public: + ClearNoncleanCardWrapper(MemRegionClosure* dirty_card_closure, CardTableRS* ct); + void do_MemRegion(MemRegion mr); +}; + #endif // SHARE_VM_MEMORY_CARDTABLERS_HPP
--- a/hotspot/src/share/vm/memory/collectorPolicy.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/memory/collectorPolicy.cpp Sat May 14 15:21:38 2011 -0700 @@ -265,8 +265,6 @@ MaxHeapSize = align_size_up(MaxHeapSize, max_alignment()); always_do_update_barrier = UseConcMarkSweepGC; - BlockOffsetArrayUseUnallocatedBlock = - BlockOffsetArrayUseUnallocatedBlock || ParallelGCThreads > 0; // Check validity of heap flags assert(OldSize % min_alignment() == 0, "old space alignment");
--- a/hotspot/src/share/vm/memory/genCollectedHeap.hpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/memory/genCollectedHeap.hpp Sat May 14 15:21:38 2011 -0700 @@ -427,13 +427,13 @@ // explicitly mark reachable objects in younger generations, to avoid // excess storage retention.) If "collecting_perm_gen" is false, then // roots that may only contain references to permGen objects are not - // scanned. The "so" argument determines which of the roots + // scanned; instead, the older_gens closure is applied to all outgoing + // references in the perm gen. The "so" argument determines which of the roots // the closure is applied to: // "SO_None" does none; // "SO_AllClasses" applies the closure to all entries in the SystemDictionary; // "SO_SystemClasses" to all the "system" classes and loaders; - // "SO_Symbols_and_Strings" applies the closure to all entries in - // SymbolsTable and StringTable. + // "SO_Strings" applies the closure to all entries in the StringTable. void gen_process_strong_roots(int level, bool younger_gens_as_roots, // The remaining arguments are in an order
--- a/hotspot/src/share/vm/memory/genOopClosures.hpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/memory/genOopClosures.hpp Sat May 14 15:21:38 2011 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -175,7 +175,7 @@ protected: template <class T> inline void do_oop_work(T* p) { oop obj = oopDesc::load_decode_heap_oop(p); - guarantee(obj->is_oop_or_null(), "invalid oop"); + guarantee(obj->is_oop_or_null(), err_msg("invalid oop: " INTPTR_FORMAT, (oopDesc*) obj)); } public: virtual void do_oop(oop* p);
--- a/hotspot/src/share/vm/memory/modRefBarrierSet.hpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/memory/modRefBarrierSet.hpp Sat May 14 15:21:38 2011 -0700 @@ -100,12 +100,6 @@ // Pass along the argument to the superclass. ModRefBarrierSet(int max_covered_regions) : BarrierSet(max_covered_regions) {} - -#ifndef PRODUCT - // Verifies that the given region contains no modified references. - virtual void verify_clean_region(MemRegion mr) = 0; -#endif - }; #endif // SHARE_VM_MEMORY_MODREFBARRIERSET_HPP
--- a/hotspot/src/share/vm/memory/sharedHeap.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/memory/sharedHeap.cpp Sat May 14 15:21:38 2011 -0700 @@ -46,7 +46,6 @@ SH_PS_Management_oops_do, SH_PS_SystemDictionary_oops_do, SH_PS_jvmti_oops_do, - SH_PS_SymbolTable_oops_do, SH_PS_StringTable_oops_do, SH_PS_CodeCache_oops_do, // Leave this one last. @@ -161,13 +160,9 @@ if (!_process_strong_tasks->is_task_claimed(SH_PS_SystemDictionary_oops_do)) { if (so & SO_AllClasses) { SystemDictionary::oops_do(roots); - } else - if (so & SO_SystemClasses) { - SystemDictionary::always_strong_oops_do(roots); - } - } - - if (!_process_strong_tasks->is_task_claimed(SH_PS_SymbolTable_oops_do)) { + } else if (so & SO_SystemClasses) { + SystemDictionary::always_strong_oops_do(roots); + } } if (!_process_strong_tasks->is_task_claimed(SH_PS_StringTable_oops_do)) {
--- a/hotspot/src/share/vm/memory/sharedHeap.hpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/memory/sharedHeap.hpp Sat May 14 15:21:38 2011 -0700 @@ -192,9 +192,8 @@ SO_None = 0x0, SO_AllClasses = 0x1, SO_SystemClasses = 0x2, - SO_Symbols = 0x4, - SO_Strings = 0x8, - SO_CodeCache = 0x10 + SO_Strings = 0x4, + SO_CodeCache = 0x8 }; FlexibleWorkGang* workers() const { return _workers; } @@ -208,14 +207,13 @@ // Invoke the "do_oop" method the closure "roots" on all root locations. // If "collecting_perm_gen" is false, then roots that may only contain - // references to permGen objects are not scanned. If true, the - // "perm_gen" closure is applied to all older-to-younger refs in the + // references to permGen objects are not scanned; instead, in that case, + // the "perm_blk" closure is applied to all outgoing refs in the // permanent generation. The "so" argument determines which of roots // the closure is applied to: // "SO_None" does none; // "SO_AllClasses" applies the closure to all entries in the SystemDictionary; // "SO_SystemClasses" to all the "system" classes and loaders; - // "SO_Symbols" applies the closure to all entries in SymbolsTable; // "SO_Strings" applies the closure to all entries in StringTable; // "SO_CodeCache" applies the closure to all elements of the CodeCache. void process_strong_roots(bool activate_scope,
--- a/hotspot/src/share/vm/oops/cpCacheOop.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/oops/cpCacheOop.cpp Sat May 14 15:21:38 2011 -0700 @@ -104,7 +104,7 @@ void* result = Atomic::cmpxchg_ptr(f1, f1_addr, NULL); bool success = (result == NULL); if (success) { - update_barrier_set(f1_addr, f1); + update_barrier_set((void*) f1_addr, f1); } } @@ -275,21 +275,23 @@ return (int) bsm_cache_index; } -void ConstantPoolCacheEntry::set_dynamic_call(Handle call_site, - methodHandle signature_invoker) { +void ConstantPoolCacheEntry::set_dynamic_call(Handle call_site, methodHandle signature_invoker) { assert(is_secondary_entry(), ""); + // NOTE: it's important that all other values are set before f1 is + // set since some users short circuit on f1 being set + // (i.e. non-null) and that may result in uninitialized values for + // other racing threads (e.g. flags). int param_size = signature_invoker->size_of_parameters(); assert(param_size >= 1, "method argument size must include MH.this"); - param_size -= 1; // do not count MH.this; it is not stacked for invokedynamic - if (Atomic::cmpxchg_ptr(call_site(), &_f1, NULL) == NULL) { - // racing threads might be trying to install their own favorites - set_f1(call_site()); - } + param_size -= 1; // do not count MH.this; it is not stacked for invokedynamic bool is_final = true; assert(signature_invoker->is_final_method(), "is_final"); - set_flags(as_flags(as_TosState(signature_invoker->result_type()), is_final, false, false, false, true) | param_size); + int flags = as_flags(as_TosState(signature_invoker->result_type()), is_final, false, false, false, true) | param_size; + assert(_flags == 0 || _flags == flags, "flags should be the same"); + set_flags(flags); // do not do set_bytecode on a secondary CP cache entry //set_bytecode_1(Bytecodes::_invokedynamic); + set_f1_if_null_atomic(call_site()); // This must be the last one to set (see NOTE above)! }
--- a/hotspot/src/share/vm/oops/methodDataOop.hpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/oops/methodDataOop.hpp Sat May 14 15:21:38 2011 -0700 @@ -1194,7 +1194,7 @@ // Whole-method sticky bits and flags public: enum { - _trap_hist_limit = 16, // decoupled from Deoptimization::Reason_LIMIT + _trap_hist_limit = 17, // decoupled from Deoptimization::Reason_LIMIT _trap_hist_mask = max_jubyte, _extra_data_count = 4 // extra DataLayout headers, for trap history }; // Public flag values
--- a/hotspot/src/share/vm/opto/bytecodeInfo.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/opto/bytecodeInfo.cpp Sat May 14 15:21:38 2011 -0700 @@ -310,13 +310,14 @@ return "inlining too deep"; } - // We need to detect recursive inlining of method handle targets: if - // the current method is a method handle adapter and one of the - // callers is the same method as the callee, we bail out if - // MaxRecursiveInlineLevel is hit. - if (method()->is_method_handle_adapter()) { + // detect direct and indirect recursive inlining + { + // count the current method and the callee + int inline_level = (method() == callee_method) ? 1 : 0; + if (inline_level > MaxRecursiveInlineLevel) + return "recursively inlining too deep"; + // count callers of current method and callee JVMState* jvms = caller_jvms(); - int inline_level = 0; while (jvms != NULL && jvms->has_method()) { if (jvms->method() == callee_method) { inline_level++; @@ -327,10 +328,6 @@ } } - if (method() == callee_method && inline_depth() > MaxRecursiveInlineLevel) { - return "recursively inlining too deep"; - } - int size = callee_method->code_size(); if (UseOldInlining && ClipInlining @@ -376,7 +373,6 @@ return true; } -#ifndef PRODUCT //------------------------------print_inlining--------------------------------- // Really, the failure_msg can be a success message also. void InlineTree::print_inlining(ciMethod* callee_method, int caller_bci, const char* failure_msg) const { @@ -388,7 +384,6 @@ tty->print(" bcs: %d+%d invoked: %d", top->count_inline_bcs(), callee_method->code_size(), callee_method->interpreter_invocation_count()); } } -#endif //------------------------------ok_to_inline----------------------------------- WarmCallInfo* InlineTree::ok_to_inline(ciMethod* callee_method, JVMState* jvms, ciCallProfile& profile, WarmCallInfo* initial_wci) {
--- a/hotspot/src/share/vm/opto/c2_globals.hpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/opto/c2_globals.hpp Sat May 14 15:21:38 2011 -0700 @@ -183,6 +183,21 @@ develop(bool, TraceLoopOpts, false, \ "Trace executed loop optimizations") \ \ + diagnostic(bool, LoopLimitCheck, true, \ + "Generate a loop limits check for overflow") \ + \ + develop(bool, TraceLoopLimitCheck, false, \ + "Trace generation of loop limits checks") \ + \ + diagnostic(bool, RangeLimitCheck, true, \ + "Additional overflow checks during range check elimination") \ + \ + develop(bool, TraceRangeLimitCheck, false, \ + "Trace additional overflow checks in RCE") \ + \ + diagnostic(bool, UnrollLimitCheck, true, \ + "Additional overflow checks during loop unroll") \ + \ product(bool, OptimizeFill, false, \ "convert fill/copy loops into intrinsic") \ \
--- a/hotspot/src/share/vm/opto/cfgnode.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/opto/cfgnode.cpp Sat May 14 15:21:38 2011 -0700 @@ -1373,7 +1373,7 @@ // Clone loop predicates if (predicate_proj != NULL) { - newn = igvn->clone_loop_predicates(predicate_proj, newn); + newn = igvn->clone_loop_predicates(predicate_proj, newn, !n->is_CountedLoop()); } // Now I can point to the new node.
--- a/hotspot/src/share/vm/opto/classes.hpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/opto/classes.hpp Sat May 14 15:21:38 2011 -0700 @@ -156,6 +156,7 @@ macro(LogD) macro(Log10D) macro(Loop) +macro(LoopLimit) macro(Mach) macro(MachProj) macro(MaxI)
--- a/hotspot/src/share/vm/opto/escape.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/opto/escape.cpp Sat May 14 15:21:38 2011 -0700 @@ -1437,7 +1437,10 @@ // Update the memory inputs of MemNodes with the value we computed // in Phase 2 and move stores memory users to corresponding memory slices. -#ifdef ASSERT + + // Disable memory split verification code until the fix for 6984348. + // Currently it produces false negative results since it does not cover all cases. +#if 0 // ifdef ASSERT visited.Reset(); Node_Stack old_mems(arena, _compile->unique() >> 2); #endif @@ -1447,7 +1450,7 @@ Node *n = ptnode_adr(i)->_node; assert(n != NULL, "sanity"); if (n->is_Mem()) { -#ifdef ASSERT +#if 0 // ifdef ASSERT Node* old_mem = n->in(MemNode::Memory); if (!visited.test_set(old_mem->_idx)) { old_mems.push(old_mem, old_mem->outcnt()); @@ -1469,13 +1472,13 @@ } } } -#ifdef ASSERT +#if 0 // ifdef ASSERT // Verify that memory was split correctly while (old_mems.is_nonempty()) { Node* old_mem = old_mems.node(); uint old_cnt = old_mems.index(); old_mems.pop(); - assert(old_cnt = old_mem->outcnt(), "old mem could be lost"); + assert(old_cnt == old_mem->outcnt(), "old mem could be lost"); } #endif }
--- a/hotspot/src/share/vm/opto/graphKit.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/opto/graphKit.cpp Sat May 14 15:21:38 2011 -0700 @@ -1033,14 +1033,10 @@ iter.reset_to_bci(bci()); iter.next(); ciMethod* method = iter.get_method(ignore); - inputs = method->arg_size_no_receiver(); - // Add a receiver argument, maybe: - if (code != Bytecodes::_invokestatic && - code != Bytecodes::_invokedynamic) - inputs += 1; // (Do not use ciMethod::arg_size(), because // it might be an unloaded method, which doesn't // know whether it is static or not.) + inputs = method->invoke_arg_size(code); int size = method->return_type()->size(); depth = size - inputs; } @@ -2957,8 +2953,7 @@ //---------------------------set_output_for_allocation------------------------- Node* GraphKit::set_output_for_allocation(AllocateNode* alloc, - const TypeOopPtr* oop_type, - bool raw_mem_only) { + const TypeOopPtr* oop_type) { int rawidx = Compile::AliasIdxRaw; alloc->set_req( TypeFunc::FramePtr, frameptr() ); add_safepoint_edges(alloc); @@ -2982,7 +2977,7 @@ rawoop)->as_Initialize(); assert(alloc->initialization() == init, "2-way macro link must work"); assert(init ->allocation() == alloc, "2-way macro link must work"); - if (ReduceFieldZeroing && !raw_mem_only) { + { // Extract memory strands which may participate in the new object's // initialization, and source them from the new InitializeNode. // This will allow us to observe initializations when they occur, @@ -3043,11 +3038,9 @@ // the type to a constant. // The optional arguments are for specialized use by intrinsics: // - If 'extra_slow_test' if not null is an extra condition for the slow-path. -// - If 'raw_mem_only', do not cast the result to an oop. // - If 'return_size_val', report the the total object size to the caller. Node* GraphKit::new_instance(Node* klass_node, Node* extra_slow_test, - bool raw_mem_only, // affect only raw memory Node* *return_size_val) { // Compute size in doublewords // The size is always an integral number of doublewords, represented @@ -3118,7 +3111,7 @@ size, klass_node, initial_slow_test); - return set_output_for_allocation(alloc, oop_type, raw_mem_only); + return set_output_for_allocation(alloc, oop_type); } //-------------------------------new_array------------------------------------- @@ -3128,7 +3121,6 @@ Node* GraphKit::new_array(Node* klass_node, // array klass (maybe variable) Node* length, // number of array elements int nargs, // number of arguments to push back for uncommon trap - bool raw_mem_only, // affect only raw memory Node* *return_size_val) { jint layout_con = Klass::_lh_neutral_value; Node* layout_val = get_layout_helper(klass_node, layout_con); @@ -3273,7 +3265,7 @@ ary_type = ary_type->is_aryptr()->cast_to_size(length_type); } - Node* javaoop = set_output_for_allocation(alloc, ary_type, raw_mem_only); + Node* javaoop = set_output_for_allocation(alloc, ary_type); // Cast length on remaining path to be as narrow as possible if (map()->find_edge(length) >= 0) { @@ -3386,6 +3378,10 @@ if (UseLoopPredicate) { add_predicate_impl(Deoptimization::Reason_predicate, nargs); } + // loop's limit check predicate should be near the loop. + if (LoopLimitCheck) { + add_predicate_impl(Deoptimization::Reason_loop_limit_check, nargs); + } } //----------------------------- store barriers ---------------------------- @@ -3462,9 +3458,22 @@ // Get the alias_index for raw card-mark memory int adr_type = Compile::AliasIdxRaw; + Node* zero = __ ConI(0); // Dirty card value + BasicType bt = T_BYTE; + + if (UseCondCardMark) { + // The classic GC reference write barrier is typically implemented + // as a store into the global card mark table. Unfortunately + // unconditional stores can result in false sharing and excessive + // coherence traffic as well as false transactional aborts. + // UseCondCardMark enables MP "polite" conditional card mark + // stores. In theory we could relax the load from ctrl() to + // no_ctrl, but that doesn't buy much latitude. + Node* card_val = __ load( __ ctrl(), card_adr, TypeInt::BYTE, bt, adr_type); + __ if_then(card_val, BoolTest::ne, zero); + } + // Smash zero into card - Node* zero = __ ConI(0); - BasicType bt = T_BYTE; if( !UseConcMarkSweepGC ) { __ store(__ ctrl(), card_adr, zero, bt, adr_type); } else { @@ -3472,6 +3481,10 @@ __ storeCM(__ ctrl(), card_adr, zero, oop_store, adr_idx, bt, adr_type); } + if (UseCondCardMark) { + __ end_if(); + } + // Final sync IdealKit and GraphKit. final_sync(ideal); }
--- a/hotspot/src/share/vm/opto/graphKit.hpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/opto/graphKit.hpp Sat May 14 15:21:38 2011 -0700 @@ -773,15 +773,13 @@ // implementation of object creation Node* set_output_for_allocation(AllocateNode* alloc, - const TypeOopPtr* oop_type, - bool raw_mem_only); + const TypeOopPtr* oop_type); Node* get_layout_helper(Node* klass_node, jint& constant_value); Node* new_instance(Node* klass_node, Node* slow_test = NULL, - bool raw_mem_only = false, Node* *return_size_val = NULL); Node* new_array(Node* klass_node, Node* count_val, int nargs, - bool raw_mem_only = false, Node* *return_size_val = NULL); + Node* *return_size_val = NULL); // Handy for making control flow IfNode* create_and_map_if(Node* ctrl, Node* tst, float prob, float cnt) {
--- a/hotspot/src/share/vm/opto/ifnode.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/opto/ifnode.cpp Sat May 14 15:21:38 2011 -0700 @@ -236,6 +236,7 @@ } Node* predicate_c = NULL; Node* predicate_x = NULL; + bool counted_loop = r->is_CountedLoop(); Node *region_c = new (igvn->C, req_c + 1) RegionNode(req_c + 1); Node *phi_c = con1; @@ -294,16 +295,16 @@ if (predicate_c != NULL) { assert(predicate_x == NULL, "only one predicate entry expected"); // Clone loop predicates to each path - iff_c_t = igvn->clone_loop_predicates(predicate_c, iff_c_t); - iff_c_f = igvn->clone_loop_predicates(predicate_c, iff_c_f); + iff_c_t = igvn->clone_loop_predicates(predicate_c, iff_c_t, !counted_loop); + iff_c_f = igvn->clone_loop_predicates(predicate_c, iff_c_f, !counted_loop); } Node *iff_x_t = phase->transform(new (igvn->C, 1) IfTrueNode (iff_x)); Node *iff_x_f = phase->transform(new (igvn->C, 1) IfFalseNode(iff_x)); if (predicate_x != NULL) { assert(predicate_c == NULL, "only one predicate entry expected"); // Clone loop predicates to each path - iff_x_t = igvn->clone_loop_predicates(predicate_x, iff_x_t); - iff_x_f = igvn->clone_loop_predicates(predicate_x, iff_x_f); + iff_x_t = igvn->clone_loop_predicates(predicate_x, iff_x_t, !counted_loop); + iff_x_f = igvn->clone_loop_predicates(predicate_x, iff_x_f, !counted_loop); } // Merge the TRUE paths @@ -545,6 +546,7 @@ Node *new_bol = gvn->transform( new (gvn->C, 2) BoolNode( new_cmp, bol->as_Bool()->_test._test ) ); igvn->hash_delete( iff ); iff->set_req_X( 1, new_bol, igvn ); + igvn->_worklist.push( iff ); } //------------------------------up_one_dom-------------------------------------
--- a/hotspot/src/share/vm/opto/library_call.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/opto/library_call.cpp Sat May 14 15:21:38 2011 -0700 @@ -867,12 +867,10 @@ Node* str1_offset = make_load(no_ctrl, str1_offseta, TypeInt::INT, T_INT, string_type->add_offset(offset_offset)); Node* str1_start = array_element_address(str1_value, str1_offset, T_CHAR); - // Pin loads from String::equals() argument since it could be NULL. - Node* str2_ctrl = (opcode == Op_StrEquals) ? control() : no_ctrl; Node* str2_valuea = basic_plus_adr(str2, str2, value_offset); - Node* str2_value = make_load(str2_ctrl, str2_valuea, value_type, T_OBJECT, string_type->add_offset(value_offset)); + Node* str2_value = make_load(no_ctrl, str2_valuea, value_type, T_OBJECT, string_type->add_offset(value_offset)); Node* str2_offseta = basic_plus_adr(str2, str2, offset_offset); - Node* str2_offset = make_load(str2_ctrl, str2_offseta, TypeInt::INT, T_INT, string_type->add_offset(offset_offset)); + Node* str2_offset = make_load(no_ctrl, str2_offseta, TypeInt::INT, T_INT, string_type->add_offset(offset_offset)); Node* str2_start = array_element_address(str2_value, str2_offset, T_CHAR); Node* result = NULL; @@ -1012,14 +1010,15 @@ if (!stopped()) { // Properly cast the argument to String argument = _gvn.transform(new (C, 2) CheckCastPPNode(control(), argument, string_type)); + // This path is taken only when argument's type is String:NotNull. + argument = cast_not_null(argument, false); // Get counts for string and argument Node* receiver_cnta = basic_plus_adr(receiver, receiver, count_offset); receiver_cnt = make_load(no_ctrl, receiver_cnta, TypeInt::INT, T_INT, string_type->add_offset(count_offset)); - // Pin load from argument string since it could be NULL. Node* argument_cnta = basic_plus_adr(argument, argument, count_offset); - argument_cnt = make_load(control(), argument_cnta, TypeInt::INT, T_INT, string_type->add_offset(count_offset)); + argument_cnt = make_load(no_ctrl, argument_cnta, TypeInt::INT, T_INT, string_type->add_offset(count_offset)); // Check for receiver count != argument count Node* cmp = _gvn.transform( new(C, 3) CmpINode(receiver_cnt, argument_cnt) ); @@ -3527,8 +3526,7 @@ Node* orig_tail = _gvn.transform( new(C, 3) SubINode(orig_length, start) ); Node* moved = generate_min_max(vmIntrinsics::_min, orig_tail, length); - const bool raw_mem_only = true; - newcopy = new_array(klass_node, length, 0, raw_mem_only); + newcopy = new_array(klass_node, length, 0); // Generate a direct call to the right arraycopy function(s). // We know the copy is disjoint but we might not know if the @@ -4325,8 +4323,6 @@ const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM; int raw_adr_idx = Compile::AliasIdxRaw; - const bool raw_mem_only = true; - Node* array_ctl = generate_array_guard(obj_klass, (RegionNode*)NULL); if (array_ctl != NULL) { @@ -4335,8 +4331,7 @@ set_control(array_ctl); Node* obj_length = load_array_length(obj); Node* obj_size = NULL; - Node* alloc_obj = new_array(obj_klass, obj_length, 0, - raw_mem_only, &obj_size); + Node* alloc_obj = new_array(obj_klass, obj_length, 0, &obj_size); if (!use_ReduceInitialCardMarks()) { // If it is an oop array, it requires very special treatment, @@ -4408,7 +4403,7 @@ // It's an instance, and it passed the slow-path tests. PreserveJVMState pjvms(this); Node* obj_size = NULL; - Node* alloc_obj = new_instance(obj_klass, NULL, raw_mem_only, &obj_size); + Node* alloc_obj = new_instance(obj_klass, NULL, &obj_size); copy_to_clone(obj, alloc_obj, obj_size, false, !use_ReduceInitialCardMarks());
--- a/hotspot/src/share/vm/opto/loopPredicate.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/opto/loopPredicate.cpp Sat May 14 15:21:38 2011 -0700 @@ -341,7 +341,7 @@ // Cut predicate from old place. Node* old = predicate_proj; igvn->_worklist.push(old); - for (DUIterator_Last imin, i = old->last_outs(imin); i >= imin; ) { + for (DUIterator_Last imin, i = old->last_outs(imin); i >= imin;) { Node* use = old->last_out(i); // for each use... igvn->hash_delete(use); igvn->_worklist.push(use); @@ -384,24 +384,25 @@ //--------------------------clone_loop_predicates----------------------- // Interface from IGVN -Node* PhaseIterGVN::clone_loop_predicates(Node* old_entry, Node* new_entry) { - return PhaseIdealLoop::clone_loop_predicates(old_entry, new_entry, false, NULL, this); +Node* PhaseIterGVN::clone_loop_predicates(Node* old_entry, Node* new_entry, bool clone_limit_check) { + return PhaseIdealLoop::clone_loop_predicates(old_entry, new_entry, false, clone_limit_check, NULL, this); } -Node* PhaseIterGVN::move_loop_predicates(Node* old_entry, Node* new_entry) { - return PhaseIdealLoop::clone_loop_predicates(old_entry, new_entry, true, NULL, this); +Node* PhaseIterGVN::move_loop_predicates(Node* old_entry, Node* new_entry, bool clone_limit_check) { + return PhaseIdealLoop::clone_loop_predicates(old_entry, new_entry, true, clone_limit_check, NULL, this); } // Interface from PhaseIdealLoop -Node* PhaseIdealLoop::clone_loop_predicates(Node* old_entry, Node* new_entry) { - return clone_loop_predicates(old_entry, new_entry, false, this, &this->_igvn); +Node* PhaseIdealLoop::clone_loop_predicates(Node* old_entry, Node* new_entry, bool clone_limit_check) { + return clone_loop_predicates(old_entry, new_entry, false, clone_limit_check, this, &this->_igvn); } -Node* PhaseIdealLoop::move_loop_predicates(Node* old_entry, Node* new_entry) { - return clone_loop_predicates(old_entry, new_entry, true, this, &this->_igvn); +Node* PhaseIdealLoop::move_loop_predicates(Node* old_entry, Node* new_entry, bool clone_limit_check) { + return clone_loop_predicates(old_entry, new_entry, true, clone_limit_check, this, &this->_igvn); } // Clone loop predicates to cloned loops (peeled, unswitched, split_if). Node* PhaseIdealLoop::clone_loop_predicates(Node* old_entry, Node* new_entry, bool move_predicates, + bool clone_limit_check, PhaseIdealLoop* loop_phase, PhaseIterGVN* igvn) { #ifdef ASSERT @@ -413,10 +414,16 @@ #endif // Search original predicates Node* entry = old_entry; + ProjNode* limit_check_proj = NULL; + if (LoopLimitCheck) { + limit_check_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check); + if (limit_check_proj != NULL) { + entry = entry->in(0)->in(0); + } + } if (UseLoopPredicate) { ProjNode* predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate); if (predicate_proj != NULL) { // right pattern that can be used by loop predication - assert(entry->in(0)->in(1)->in(1)->Opcode()==Op_Opaque1, "must be"); if (move_predicates) { new_entry = move_predicate(predicate_proj, new_entry, Deoptimization::Reason_predicate, @@ -435,11 +442,37 @@ } } } + if (limit_check_proj != NULL && clone_limit_check) { + // Clone loop limit check last to insert it before loop. + // Don't clone a limit check which was already finalized + // for this counted loop (only one limit check is needed). + if (move_predicates) { + new_entry = move_predicate(limit_check_proj, new_entry, + Deoptimization::Reason_loop_limit_check, + loop_phase, igvn); + assert(new_entry == limit_check_proj, "old limit check fall through projection"); + } else { + new_entry = clone_predicate(limit_check_proj, new_entry, + Deoptimization::Reason_loop_limit_check, + loop_phase, igvn); + assert(new_entry != NULL && new_entry->is_Proj(), "IfTrue or IfFalse after clone limit check"); + } + if (TraceLoopLimitCheck) { + tty->print_cr("Loop Limit Check %s: ", move_predicates ? "moved" : "cloned"); + debug_only( new_entry->in(0)->dump(); ) + } + } return new_entry; } //--------------------------eliminate_loop_predicates----------------------- void PhaseIdealLoop::eliminate_loop_predicates(Node* entry) { + if (LoopLimitCheck) { + Node* predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check); + if (predicate != NULL) { + entry = entry->in(0)->in(0); + } + } if (UseLoopPredicate) { ProjNode* predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate); if (predicate_proj != NULL) { // right pattern that can be used by loop predication @@ -456,10 +489,15 @@ // Skip related predicates. Node* PhaseIdealLoop::skip_loop_predicates(Node* entry) { Node* predicate = NULL; + if (LoopLimitCheck) { + predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check); + if (predicate != NULL) { + entry = entry->in(0)->in(0); + } + } if (UseLoopPredicate) { predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate); if (predicate != NULL) { // right pattern that can be used by loop predication - assert(entry->is_Proj() && entry->in(0)->in(1)->in(1)->Opcode()==Op_Opaque1, "must be"); IfNode* iff = entry->in(0)->as_If(); ProjNode* uncommon_proj = iff->proj_out(1 - entry->as_Proj()->_con); Node* rgn = uncommon_proj->unique_ctrl_out(); @@ -491,10 +529,15 @@ // Find a predicate Node* PhaseIdealLoop::find_predicate(Node* entry) { Node* predicate = NULL; + if (LoopLimitCheck) { + predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check); + if (predicate != NULL) { // right pattern that can be used by loop predication + return entry; + } + } if (UseLoopPredicate) { predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate); if (predicate != NULL) { // right pattern that can be used by loop predication - assert(entry->in(0)->in(1)->in(1)->Opcode()==Op_Opaque1, "must be"); return entry; } } @@ -658,7 +701,7 @@ Node* range = cmp->in(2); if (range->Opcode() != Op_LoadRange) { const TypeInt* tint = phase->_igvn.type(range)->isa_int(); - if (!OptimizeFill || tint == NULL || tint->empty() || tint->_lo < 0) { + if (tint == NULL || tint->empty() || tint->_lo < 0) { // Allow predication on positive values that aren't LoadRanges. // This allows optimization of loops where the length of the // array is a known value and doesn't need to be loaded back @@ -696,36 +739,49 @@ // max(scale*i + offset) = scale*(limit-stride) + offset // (2) stride*scale < 0 // max(scale*i + offset) = scale*init + offset -BoolNode* PhaseIdealLoop::rc_predicate(Node* ctrl, +BoolNode* PhaseIdealLoop::rc_predicate(IdealLoopTree *loop, Node* ctrl, int scale, Node* offset, Node* init, Node* limit, Node* stride, Node* range, bool upper) { - DEBUG_ONLY(ttyLocker ttyl); - if (TraceLoopPredicate) tty->print("rc_predicate "); + stringStream* predString = NULL; + if (TraceLoopPredicate) { + predString = new stringStream(); + predString->print("rc_predicate "); + } Node* max_idx_expr = init; int stride_con = stride->get_int(); if ((stride_con > 0) == (scale > 0) == upper) { - max_idx_expr = new (C, 3) SubINode(limit, stride); - register_new_node(max_idx_expr, ctrl); - if (TraceLoopPredicate) tty->print("(limit - stride) "); + if (LoopLimitCheck) { + // With LoopLimitCheck limit is not exact. + // Calculate exact limit here. + // Note, counted loop's test is '<' or '>'. + limit = exact_limit(loop); + max_idx_expr = new (C, 3) SubINode(limit, stride); + register_new_node(max_idx_expr, ctrl); + if (TraceLoopPredicate) predString->print("(limit - stride) "); + } else { + max_idx_expr = new (C, 3) SubINode(limit, stride); + register_new_node(max_idx_expr, ctrl); + if (TraceLoopPredicate) predString->print("(limit - stride) "); + } } else { - if (TraceLoopPredicate) tty->print("init "); + if (TraceLoopPredicate) predString->print("init "); } if (scale != 1) { ConNode* con_scale = _igvn.intcon(scale); max_idx_expr = new (C, 3) MulINode(max_idx_expr, con_scale); register_new_node(max_idx_expr, ctrl); - if (TraceLoopPredicate) tty->print("* %d ", scale); + if (TraceLoopPredicate) predString->print("* %d ", scale); } if (offset && (!offset->is_Con() || offset->get_int() != 0)){ max_idx_expr = new (C, 3) AddINode(max_idx_expr, offset); register_new_node(max_idx_expr, ctrl); if (TraceLoopPredicate) - if (offset->is_Con()) tty->print("+ %d ", offset->get_int()); - else tty->print("+ offset "); + if (offset->is_Con()) predString->print("+ %d ", offset->get_int()); + else predString->print("+ offset "); } CmpUNode* cmp = new (C, 3) CmpUNode(max_idx_expr, range); @@ -733,7 +789,10 @@ BoolNode* bol = new (C, 2) BoolNode(cmp, BoolTest::lt); register_new_node(bol, ctrl); - if (TraceLoopPredicate) tty->print_cr("<u range"); + if (TraceLoopPredicate) { + predString->print_cr("<u range"); + tty->print(predString->as_string()); + } return bol; } @@ -746,29 +805,36 @@ // Could be a simple region when irreducible loops are present. return false; } + LoopNode* head = loop->_head->as_Loop(); - if (loop->_head->unique_ctrl_out()->Opcode() == Op_NeverBranch) { + if (head->unique_ctrl_out()->Opcode() == Op_NeverBranch) { // do nothing for infinite loops return false; } CountedLoopNode *cl = NULL; - if (loop->_head->is_CountedLoop()) { - cl = loop->_head->as_CountedLoop(); + if (head->is_CountedLoop()) { + cl = head->as_CountedLoop(); // do nothing for iteration-splitted loops if (!cl->is_normal_loop()) return false; } - LoopNode *lpn = loop->_head->as_Loop(); - Node* entry = lpn->in(LoopNode::EntryControl); + Node* entry = head->in(LoopNode::EntryControl); + ProjNode *predicate_proj = NULL; + // Loop limit check predicate should be near the loop. + if (LoopLimitCheck) { + predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check); + if (predicate_proj != NULL) + entry = predicate_proj->in(0)->in(0); + } - ProjNode *predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate); + predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate); if (!predicate_proj) { #ifndef PRODUCT if (TraceLoopPredicate) { tty->print("missing predicate:"); loop->dump_head(); - lpn->dump(1); + head->dump(1); } #endif return false; @@ -782,7 +848,6 @@ // Create list of if-projs such that a newer proj dominates all older // projs in the list, and they all dominate loop->tail() Node_List if_proj_list(area); - LoopNode *head = loop->_head->as_Loop(); Node *current_proj = loop->tail(); //start from tail while (current_proj != head) { if (loop == get_loop(current_proj) && // still in the loop ? @@ -856,8 +921,8 @@ const Node* cmp = bol->in(1)->as_Cmp(); Node* idx = cmp->in(1); assert(!invar.is_invariant(idx), "index is variant"); - assert(cmp->in(2)->Opcode() == Op_LoadRange || OptimizeFill, "must be"); Node* rng = cmp->in(2); + assert(rng->Opcode() == Op_LoadRange || _igvn.type(rng)->is_int() >= 0, "must be"); assert(invar.is_invariant(rng), "range must be invariant"); int scale = 1; Node* offset = zero; @@ -886,14 +951,14 @@ } // Test the lower bound - Node* lower_bound_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, rng, false); + Node* lower_bound_bol = rc_predicate(loop, ctrl, scale, offset, init, limit, stride, rng, false); IfNode* lower_bound_iff = lower_bound_proj->in(0)->as_If(); _igvn.hash_delete(lower_bound_iff); lower_bound_iff->set_req(1, lower_bound_bol); if (TraceLoopPredicate) tty->print_cr("lower bound check if: %d", lower_bound_iff->_idx); // Test the upper bound - Node* upper_bound_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, rng, true); + Node* upper_bound_bol = rc_predicate(loop, ctrl, scale, offset, init, limit, stride, rng, true); IfNode* upper_bound_iff = upper_bound_proj->in(0)->as_If(); _igvn.hash_delete(upper_bound_iff); upper_bound_iff->set_req(1, upper_bound_bol); @@ -957,4 +1022,3 @@ return hoisted; } -
--- a/hotspot/src/share/vm/opto/loopTransform.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/opto/loopTransform.cpp Sat May 14 15:21:38 2011 -0700 @@ -83,7 +83,7 @@ #ifdef ASSERT BoolTest::mask bt = cl->loopexit()->test_trip(); assert(bt == BoolTest::lt || bt == BoolTest::gt || - bt == BoolTest::ne, "canonical test is expected"); + (bt == BoolTest::ne && !LoopLimitCheck), "canonical test is expected"); #endif Node* init_n = cl->init_trip(); @@ -510,7 +510,7 @@ // the pre-loop with only 1 user (the new peeled iteration), but the // peeled-loop backedge has 2 users. Node* new_exit_value = old_new[head->in(LoopNode::LoopBackControl)->_idx]; - new_exit_value = move_loop_predicates(entry, new_exit_value); + new_exit_value = move_loop_predicates(entry, new_exit_value, !counted_loop); _igvn.hash_delete(head); head->set_req(LoopNode::EntryControl, new_exit_value); for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) { @@ -593,6 +593,12 @@ return false; } + // Fully unroll a loop with few iterations regardless next + // conditions since following loop optimizations will split + // such loop anyway (pre-main-post). + if (trip_count <= 3) + return true; + // Take into account that after unroll conjoined heads and tails will fold, // otherwise policy_unroll() may allow more unrolling than max unrolling. uint new_body_size = EMPTY_LOOP_SIZE + (body_size - EMPTY_LOOP_SIZE) * trip_count; @@ -605,15 +611,6 @@ return false; } - // Currently we don't have policy to optimize one iteration loops. - // Maximally unrolling transformation is used for that: - // it is peeled and the original loop become non reachable (dead). - // Also fully unroll a loop with few iterations regardless next - // conditions since following loop optimizations will split - // such loop anyway (pre-main-post). - if (trip_count <= 3) - return true; - // Do not unroll a loop with String intrinsics code. // String intrinsics are large and have loops. for (uint k = 0; k < _body.size(); k++) { @@ -632,6 +629,8 @@ } +#define MAX_UNROLL 16 // maximum number of unrolls for main loop + //------------------------------policy_unroll---------------------------------- // Return TRUE or FALSE if the loop should be unrolled or not. Unroll if // the loop is a CountedLoop and the body is small enough. @@ -643,13 +642,15 @@ if (!cl->is_valid_counted_loop()) return false; // Malformed counted loop - // protect against over-unrolling - if (cl->trip_count() <= 1) return false; - - // Check for stride being a small enough constant - if (abs(cl->stride_con()) > (1<<3)) return false; + // Protect against over-unrolling. + // After split at least one iteration will be executed in pre-loop. + if (cl->trip_count() <= (uint)(cl->is_normal_loop() ? 2 : 1)) return false; int future_unroll_ct = cl->unrolled_count() * 2; + if (future_unroll_ct > MAX_UNROLL) return false; + + // Check for initial stride being a small enough constant + if (abs(cl->stride_con()) > (1<<2)*future_unroll_ct) return false; // Don't unroll if the next round of unrolling would push us // over the expected trip count of the loop. One is subtracted @@ -675,6 +676,7 @@ Node *init_n = cl->init_trip(); Node *limit_n = cl->limit(); + int stride_con = cl->stride_con(); // Non-constant bounds. // Protect against over-unrolling when init or/and limit are not constant // (so that trip_count's init value is maxint) but iv range is known. @@ -684,7 +686,7 @@ if (phi != NULL) { assert(phi->is_Phi() && phi->in(0) == _head, "Counted loop should have iv phi."); const TypeInt* iv_type = phase->_igvn.type(phi)->is_int(); - int next_stride = cl->stride_con() * 2; // stride after this unroll + int next_stride = stride_con * 2; // stride after this unroll if (next_stride > 0) { if (iv_type->_lo + next_stride <= iv_type->_lo || // overflow iv_type->_lo + next_stride > iv_type->_hi) { @@ -699,15 +701,19 @@ } } + // After unroll limit will be adjusted: new_limit = limit-stride. + // Bailout if adjustment overflow. + const TypeInt* limit_type = phase->_igvn.type(limit_n)->is_int(); + if (stride_con > 0 && ((limit_type->_hi - stride_con) >= limit_type->_hi) || + stride_con < 0 && ((limit_type->_lo - stride_con) <= limit_type->_lo)) + return false; // overflow + // Adjust body_size to determine if we unroll or not uint body_size = _body.size(); - // Key test to unroll CaffeineMark's Logic test - int xors_in_loop = 0; // Also count ModL, DivL and MulL which expand mightly for (uint k = 0; k < _body.size(); k++) { Node* n = _body.at(k); switch (n->Opcode()) { - case Op_XorI: xors_in_loop++; break; // CaffeineMark's Logic test case Op_ModL: body_size += 30; break; case Op_DivL: body_size += 30; break; case Op_MulL: body_size += 10; break; @@ -724,8 +730,7 @@ // Check for being too big if (body_size > (uint)LoopUnrollLimit) { - if (xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true; - // Normal case: loop too big + // Normal case: loop too big return false; } @@ -747,28 +752,31 @@ // Return TRUE or FALSE if the loop should be range-check-eliminated. // Actually we do iteration-splitting, a more powerful form of RCE. bool IdealLoopTree::policy_range_check( PhaseIdealLoop *phase ) const { - if( !RangeCheckElimination ) return false; + if (!RangeCheckElimination) return false; CountedLoopNode *cl = _head->as_CountedLoop(); // If we unrolled with no intention of doing RCE and we later // changed our minds, we got no pre-loop. Either we need to // make a new pre-loop, or we gotta disallow RCE. - if( cl->is_main_no_pre_loop() ) return false; // Disallowed for now. + if (cl->is_main_no_pre_loop()) return false; // Disallowed for now. Node *trip_counter = cl->phi(); // Check loop body for tests of trip-counter plus loop-invariant vs // loop-invariant. - for( uint i = 0; i < _body.size(); i++ ) { + for (uint i = 0; i < _body.size(); i++) { Node *iff = _body[i]; - if( iff->Opcode() == Op_If ) { // Test? + if (iff->Opcode() == Op_If) { // Test? // Comparing trip+off vs limit Node *bol = iff->in(1); - if( bol->req() != 2 ) continue; // dead constant test + if (bol->req() != 2) continue; // dead constant test if (!bol->is_Bool()) { assert(UseLoopPredicate && bol->Opcode() == Op_Conv2B, "predicate check only"); continue; } + if (bol->as_Bool()->_test._test == BoolTest::ne) + continue; // not RC + Node *cmp = bol->in(1); Node *rc_exp = cmp->in(1); @@ -1064,6 +1072,7 @@ // negative stride use > if (pre_end->in(CountedLoopEndNode::TestValue)->as_Bool()->_test._test == BoolTest::ne) { + assert(!LoopLimitCheck, "only canonical tests (lt or gt) are expected"); BoolTest::mask new_test = (main_end->stride_con() > 0) ? BoolTest::lt : BoolTest::gt; // Modify pre loop end condition @@ -1090,6 +1099,9 @@ main_head->set_main_loop(); if( peel_only ) main_head->set_main_no_pre_loop(); + // Subtract a trip count for the pre-loop. + main_head->set_trip_count(main_head->trip_count() - 1); + // It's difficult to be precise about the trip-counts // for the pre/post loops. They are usually very short, // so guess that 4 trips is a reasonable value. @@ -1123,9 +1135,9 @@ loop->dump_head(); } else if (TraceLoopOpts) { if (loop_head->trip_count() < (uint)LoopUnrollLimit) { - tty->print("Unroll %d(%2d) ", loop_head->unrolled_count()*2, loop_head->trip_count()); + tty->print("Unroll %d(%2d) ", loop_head->unrolled_count()*2, loop_head->trip_count()); } else { - tty->print("Unroll %d ", loop_head->unrolled_count()*2); + tty->print("Unroll %d ", loop_head->unrolled_count()*2); } loop->dump_head(); } @@ -1141,7 +1153,8 @@ Node *stride = loop_head->stride(); Node *opaq = NULL; - if( adjust_min_trip ) { // If not maximally unrolling, need adjustment + if (adjust_min_trip) { // If not maximally unrolling, need adjustment + // Search for zero-trip guard. assert( loop_head->is_main_loop(), "" ); assert( ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, "" ); Node *iff = ctrl->in(0); @@ -1151,63 +1164,210 @@ Node *cmp = bol->in(1); assert( cmp->Opcode() == Op_CmpI, "" ); opaq = cmp->in(2); - // Occasionally it's possible for a pre-loop Opaque1 node to be + // Occasionally it's possible for a zero-trip guard Opaque1 node to be // optimized away and then another round of loop opts attempted. // We can not optimize this particular loop in that case. - if( opaq->Opcode() != Op_Opaque1 ) - return; // Cannot find pre-loop! Bail out! + if (opaq->Opcode() != Op_Opaque1) + return; // Cannot find zero-trip guard! Bail out! + // Zero-trip test uses an 'opaque' node which is not shared. + assert(opaq->outcnt() == 1 && opaq->in(1) == limit, ""); } C->set_major_progress(); - // Adjust max trip count. The trip count is intentionally rounded - // down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll, - // the main, unrolled, part of the loop will never execute as it is protected - // by the min-trip test. See bug 4834191 for a case where we over-unrolled - // and later determined that part of the unrolled loop was dead. - loop_head->set_trip_count(loop_head->trip_count() / 2); + Node* new_limit = NULL; + if (UnrollLimitCheck) { + int stride_con = stride->get_int(); + int stride_p = (stride_con > 0) ? stride_con : -stride_con; + uint old_trip_count = loop_head->trip_count(); + // Verify that unroll policy result is still valid. + assert(old_trip_count > 1 && + (!adjust_min_trip || stride_p <= (1<<3)*loop_head->unrolled_count()), "sanity"); - // Double the count of original iterations in the unrolled loop body. - loop_head->double_unrolled_count(); + // Adjust loop limit to keep valid iterations number after unroll. + // Use (limit - stride) instead of (((limit - init)/stride) & (-2))*stride + // which may overflow. + if (!adjust_min_trip) { + assert(old_trip_count > 1 && (old_trip_count & 1) == 0, + "odd trip count for maximally unroll"); + // Don't need to adjust limit for maximally unroll since trip count is even. + } else if (loop_head->has_exact_trip_count() && init->is_Con()) { + // Loop's limit is constant. Loop's init could be constant when pre-loop + // become peeled iteration. + long init_con = init->get_int(); + // We can keep old loop limit if iterations count stays the same: + // old_trip_count == new_trip_count * 2 + // Note: since old_trip_count >= 2 then new_trip_count >= 1 + // so we also don't need to adjust zero trip test. + long limit_con = limit->get_int(); + // (stride_con*2) not overflow since stride_con <= 8. + int new_stride_con = stride_con * 2; + int stride_m = new_stride_con - (stride_con > 0 ? 1 : -1); + long trip_count = (limit_con - init_con + stride_m)/new_stride_con; + // New trip count should satisfy next conditions. + assert(trip_count > 0 && (julong)trip_count < (julong)max_juint/2, "sanity"); + uint new_trip_count = (uint)trip_count; + adjust_min_trip = (old_trip_count != new_trip_count*2); + } - // ----------- - // Step 2: Cut back the trip counter for an unroll amount of 2. - // Loop will normally trip (limit - init)/stride_con. Since it's a - // CountedLoop this is exact (stride divides limit-init exactly). - // We are going to double the loop body, so we want to knock off any - // odd iteration: (trip_cnt & ~1). Then back compute a new limit. - Node *span = new (C, 3) SubINode( limit, init ); - register_new_node( span, ctrl ); - Node *trip = new (C, 3) DivINode( 0, span, stride ); - register_new_node( trip, ctrl ); - Node *mtwo = _igvn.intcon(-2); - set_ctrl(mtwo, C->root()); - Node *rond = new (C, 3) AndINode( trip, mtwo ); - register_new_node( rond, ctrl ); - Node *spn2 = new (C, 3) MulINode( rond, stride ); - register_new_node( spn2, ctrl ); - Node *lim2 = new (C, 3) AddINode( spn2, init ); - register_new_node( lim2, ctrl ); + if (adjust_min_trip) { + // Step 2: Adjust the trip limit if it is called for. + // The adjustment amount is -stride. Need to make sure if the + // adjustment underflows or overflows, then the main loop is skipped. + Node* cmp = loop_end->cmp_node(); + assert(cmp->in(2) == limit, "sanity"); + assert(opaq != NULL && opaq->in(1) == limit, "sanity"); - // Hammer in the new limit - Node *ctrl2 = loop_end->in(0); - Node *cmp2 = new (C, 3) CmpINode( loop_head->incr(), lim2 ); - register_new_node( cmp2, ctrl2 ); - Node *bol2 = new (C, 2) BoolNode( cmp2, loop_end->test_trip() ); - register_new_node( bol2, ctrl2 ); - _igvn.hash_delete(loop_end); - loop_end->set_req(CountedLoopEndNode::TestValue, bol2); + // Verify that policy_unroll result is still valid. + const TypeInt* limit_type = _igvn.type(limit)->is_int(); + assert(stride_con > 0 && ((limit_type->_hi - stride_con) < limit_type->_hi) || + stride_con < 0 && ((limit_type->_lo - stride_con) > limit_type->_lo), "sanity"); - // Step 3: Find the min-trip test guaranteed before a 'main' loop. - // Make it a 1-trip test (means at least 2 trips). - if( adjust_min_trip ) { - // Guard test uses an 'opaque' node which is not shared. Hence I - // can edit it's inputs directly. Hammer in the new limit for the - // minimum-trip guard. - assert( opaq->outcnt() == 1, "" ); - _igvn.hash_delete(opaq); - opaq->set_req(1, lim2); - } + if (limit->is_Con()) { + // The check in policy_unroll and the assert above guarantee + // no underflow if limit is constant. + new_limit = _igvn.intcon(limit->get_int() - stride_con); + set_ctrl(new_limit, C->root()); + } else { + // Limit is not constant. + { + // Separate limit by Opaque node in case it is an incremented + // variable from previous loop to avoid using pre-incremented + // value which could increase register pressure. + // Otherwise reorg_offsets() optimization will create a separate + // Opaque node for each use of trip-counter and as result + // zero trip guard limit will be different from loop limit. + assert(has_ctrl(opaq), "should have it"); + Node* opaq_ctrl = get_ctrl(opaq); + limit = new (C, 2) Opaque2Node( C, limit ); + register_new_node( limit, opaq_ctrl ); + } + if (stride_con > 0 && ((limit_type->_lo - stride_con) < limit_type->_lo) || + stride_con < 0 && ((limit_type->_hi - stride_con) > limit_type->_hi)) { + // No underflow. + new_limit = new (C, 3) SubINode(limit, stride); + } else { + // (limit - stride) may underflow. + // Clamp the adjustment value with MININT or MAXINT: + // + // new_limit = limit-stride + // if (stride > 0) + // new_limit = (limit < new_limit) ? MININT : new_limit; + // else + // new_limit = (limit > new_limit) ? MAXINT : new_limit; + // + BoolTest::mask bt = loop_end->test_trip(); + assert(bt == BoolTest::lt || bt == BoolTest::gt, "canonical test is expected"); + Node* adj_max = _igvn.intcon((stride_con > 0) ? min_jint : max_jint); + set_ctrl(adj_max, C->root()); + Node* old_limit = NULL; + Node* adj_limit = NULL; + Node* bol = limit->is_CMove() ? limit->in(CMoveNode::Condition) : NULL; + if (loop_head->unrolled_count() > 1 && + limit->is_CMove() && limit->Opcode() == Op_CMoveI && + limit->in(CMoveNode::IfTrue) == adj_max && + bol->as_Bool()->_test._test == bt && + bol->in(1)->Opcode() == Op_CmpI && + bol->in(1)->in(2) == limit->in(CMoveNode::IfFalse)) { + // Loop was unrolled before. + // Optimize the limit to avoid nested CMove: + // use original limit as old limit. + old_limit = bol->in(1)->in(1); + // Adjust previous adjusted limit. + adj_limit = limit->in(CMoveNode::IfFalse); + adj_limit = new (C, 3) SubINode(adj_limit, stride); + } else { + old_limit = limit; + adj_limit = new (C, 3) SubINode(limit, stride); + } + assert(old_limit != NULL && adj_limit != NULL, ""); + register_new_node( adj_limit, ctrl ); // adjust amount + Node* adj_cmp = new (C, 3) CmpINode(old_limit, adj_limit); + register_new_node( adj_cmp, ctrl ); + Node* adj_bool = new (C, 2) BoolNode(adj_cmp, bt); + register_new_node( adj_bool, ctrl ); + new_limit = new (C, 4) CMoveINode(adj_bool, adj_limit, adj_max, TypeInt::INT); + } + register_new_node(new_limit, ctrl); + } + assert(new_limit != NULL, ""); + // Replace in loop test. + _igvn.hash_delete(cmp); + cmp->set_req(2, new_limit); + + // Step 3: Find the min-trip test guaranteed before a 'main' loop. + // Make it a 1-trip test (means at least 2 trips). + + // Guard test uses an 'opaque' node which is not shared. Hence I + // can edit it's inputs directly. Hammer in the new limit for the + // minimum-trip guard. + assert(opaq->outcnt() == 1, ""); + _igvn.hash_delete(opaq); + opaq->set_req(1, new_limit); + } + + // Adjust max trip count. The trip count is intentionally rounded + // down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll, + // the main, unrolled, part of the loop will never execute as it is protected + // by the min-trip test. See bug 4834191 for a case where we over-unrolled + // and later determined that part of the unrolled loop was dead. + loop_head->set_trip_count(old_trip_count / 2); + + // Double the count of original iterations in the unrolled loop body. + loop_head->double_unrolled_count(); + + } else { // LoopLimitCheck + + // Adjust max trip count. The trip count is intentionally rounded + // down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll, + // the main, unrolled, part of the loop will never execute as it is protected + // by the min-trip test. See bug 4834191 for a case where we over-unrolled + // and later determined that part of the unrolled loop was dead. + loop_head->set_trip_count(loop_head->trip_count() / 2); + + // Double the count of original iterations in the unrolled loop body. + loop_head->double_unrolled_count(); + + // ----------- + // Step 2: Cut back the trip counter for an unroll amount of 2. + // Loop will normally trip (limit - init)/stride_con. Since it's a + // CountedLoop this is exact (stride divides limit-init exactly). + // We are going to double the loop body, so we want to knock off any + // odd iteration: (trip_cnt & ~1). Then back compute a new limit. + Node *span = new (C, 3) SubINode( limit, init ); + register_new_node( span, ctrl ); + Node *trip = new (C, 3) DivINode( 0, span, stride ); + register_new_node( trip, ctrl ); + Node *mtwo = _igvn.intcon(-2); + set_ctrl(mtwo, C->root()); + Node *rond = new (C, 3) AndINode( trip, mtwo ); + register_new_node( rond, ctrl ); + Node *spn2 = new (C, 3) MulINode( rond, stride ); + register_new_node( spn2, ctrl ); + new_limit = new (C, 3) AddINode( spn2, init ); + register_new_node( new_limit, ctrl ); + + // Hammer in the new limit + Node *ctrl2 = loop_end->in(0); + Node *cmp2 = new (C, 3) CmpINode( loop_head->incr(), new_limit ); + register_new_node( cmp2, ctrl2 ); + Node *bol2 = new (C, 2) BoolNode( cmp2, loop_end->test_trip() ); + register_new_node( bol2, ctrl2 ); + _igvn.hash_delete(loop_end); + loop_end->set_req(CountedLoopEndNode::TestValue, bol2); + + // Step 3: Find the min-trip test guaranteed before a 'main' loop. + // Make it a 1-trip test (means at least 2 trips). + if( adjust_min_trip ) { + assert( new_limit != NULL, "" ); + // Guard test uses an 'opaque' node which is not shared. Hence I + // can edit it's inputs directly. Hammer in the new limit for the + // minimum-trip guard. + assert( opaq->outcnt() == 1, "" ); + _igvn.hash_delete(opaq); + opaq->set_req(1, new_limit); + } + } // LoopLimitCheck // --------- // Step 4: Clone the loop body. Move it inside the loop. This loop body @@ -1263,6 +1423,7 @@ void PhaseIdealLoop::do_maximally_unroll( IdealLoopTree *loop, Node_List &old_new ) { CountedLoopNode *cl = loop->_head->as_CountedLoop(); + assert(cl->has_exact_trip_count(), "trip count is not exact"); assert(cl->trip_count() > 0, ""); #ifndef PRODUCT if (TraceLoopOpts) { @@ -1279,6 +1440,7 @@ // Now its tripping an even number of times remaining. Double loop body. // Do not adjust pre-guards; they are not needed and do not exist. if (cl->trip_count() > 0) { + assert((cl->trip_count() & 1) == 0, "missed peeling"); do_unroll(loop, old_new, false); } } @@ -1292,22 +1454,13 @@ } //------------------------------add_constraint--------------------------------- -// Constrain the main loop iterations so the condition: -// scale_con * I + offset < limit +// Constrain the main loop iterations so the conditions: +// low_limit <= scale_con * I + offset < upper_limit // always holds true. That is, either increase the number of iterations in // the pre-loop or the post-loop until the condition holds true in the main // loop. Stride, scale, offset and limit are all loop invariant. Further, // stride and scale are constants (offset and limit often are). -void PhaseIdealLoop::add_constraint( int stride_con, int scale_con, Node *offset, Node *limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit ) { - - // Compute "I :: (limit-offset)/scale_con" - Node *con = new (C, 3) SubINode( limit, offset ); - register_new_node( con, pre_ctrl ); - Node *scale = _igvn.intcon(scale_con); - set_ctrl(scale, C->root()); - Node *X = new (C, 3) DivINode( 0, con, scale ); - register_new_node( X, pre_ctrl ); - +void PhaseIdealLoop::add_constraint( int stride_con, int scale_con, Node *offset, Node *low_limit, Node *upper_limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit ) { // For positive stride, the pre-loop limit always uses a MAX function // and the main loop a MIN function. For negative stride these are // reversed. @@ -1316,48 +1469,143 @@ // pre-loop must check for underflow and the post-loop for overflow. // Negative stride*scale reverses this; pre-loop checks for overflow and // post-loop for underflow. - if( stride_con*scale_con > 0 ) { - // Compute I < (limit-offset)/scale_con - // Adjust main-loop last iteration to be MIN/MAX(main_loop,X) - *main_limit = (stride_con > 0) - ? (Node*)(new (C, 3) MinINode( *main_limit, X )) - : (Node*)(new (C, 3) MaxINode( *main_limit, X )); - register_new_node( *main_limit, pre_ctrl ); + if (stride_con*scale_con > 0) { + // The overflow limit: scale*I+offset < upper_limit + // For main-loop compute + // ( if (scale > 0) /* and stride > 0 */ + // I < (upper_limit-offset)/scale + // else /* scale < 0 and stride < 0 */ + // I > (upper_limit-offset)/scale + // ) + // + // (upper_limit-offset) may overflow when offset < 0. + // But it is fine since main loop will either have + // less iterations or will be skipped in such case. + Node *con = new (C, 3) SubINode(upper_limit, offset); + register_new_node(con, pre_ctrl); + Node *scale = _igvn.intcon(scale_con); + set_ctrl(scale, C->root()); + Node *X = new (C, 3) DivINode(0, con, scale); + register_new_node(X, pre_ctrl); - } else { - // Compute (limit-offset)/scale_con + SGN(-scale_con) <= I - // Add the negation of the main-loop constraint to the pre-loop. - // See footnote [++] below for a derivation of the limit expression. - Node *incr = _igvn.intcon(scale_con > 0 ? -1 : 1); - set_ctrl(incr, C->root()); - Node *adj = new (C, 3) AddINode( X, incr ); - register_new_node( adj, pre_ctrl ); - *pre_limit = (scale_con > 0) - ? (Node*)new (C, 3) MinINode( *pre_limit, adj ) - : (Node*)new (C, 3) MaxINode( *pre_limit, adj ); - register_new_node( *pre_limit, pre_ctrl ); + // Adjust main-loop last iteration + Node *loop_limit = *main_limit; + loop_limit = (stride_con > 0) // scale > 0 + ? (Node*)(new (C, 3) MinINode(loop_limit, X)) + : (Node*)(new (C, 3) MaxINode(loop_limit, X)); + register_new_node(loop_limit, pre_ctrl); + *main_limit = loop_limit; -// [++] Here's the algebra that justifies the pre-loop limit expression: -// -// NOT( scale_con * I + offset < limit ) -// == -// scale_con * I + offset >= limit -// == -// SGN(scale_con) * I >= (limit-offset)/|scale_con| -// == -// (limit-offset)/|scale_con| <= I * SGN(scale_con) -// == -// (limit-offset)/|scale_con|-1 < I * SGN(scale_con) -// == -// ( if (scale_con > 0) /*common case*/ -// (limit-offset)/scale_con - 1 < I -// else -// (limit-offset)/scale_con + 1 > I -// ) -// ( if (scale_con > 0) /*common case*/ -// (limit-offset)/scale_con + SGN(-scale_con) < I -// else -// (limit-offset)/scale_con + SGN(-scale_con) > I + // The underflow limit: low_limit <= scale*I+offset. + // For pre-loop compute + // NOT(scale*I+offset >= low_limit) + // scale*I+offset < low_limit + // ( if (scale > 0) /* and stride > 0 */ + // I < (low_limit-offset)/scale + // else /* scale < 0 and stride < 0 */ + // I > (low_limit-offset)/scale + // ) + + if (low_limit->get_int() == -max_jint) { + if (!RangeLimitCheck) return; + // We need this guard when scale*pre_limit+offset >= limit + // due to underflow so we need execute pre-loop until + // scale*I+offset >= min_int. But (low_limit-offset) will + // underflow when offset > 0 and X will be > original_limit. + // To avoid it we replace offset = offset > 0 ? 0 : offset + // and add min(pre_limit, original_limit). + Node* shift = _igvn.intcon(31); + set_ctrl(shift, C->root()); + Node *neg_off = new (C, 3) RShiftINode(offset, shift); + register_new_node(neg_off, pre_ctrl); + offset = new (C, 3) AndINode(offset, neg_off); + register_new_node(offset, pre_ctrl); + } else { + assert(low_limit->get_int() == 0, "wrong low limit for range check"); + // The only problem we have here when offset == min_int + // since (0-min_int) == min_int. It may be fine for scale > 0 + // but for scale < 0 X will be < original_limit. + } + con = new (C, 3) SubINode(low_limit, offset); + register_new_node(con, pre_ctrl); + scale = _igvn.intcon(scale_con); + set_ctrl(scale, C->root()); + X = new (C, 3) DivINode(0, con, scale); + register_new_node(X, pre_ctrl); + + // Adjust pre-loop last iteration + loop_limit = *pre_limit; + loop_limit = (stride_con > 0) // scale > 0 + ? (Node*)(new (C, 3) MaxINode(loop_limit, X)) + : (Node*)(new (C, 3) MinINode(loop_limit, X)); + register_new_node( loop_limit, pre_ctrl ); + *pre_limit = loop_limit; + + } else { // stride_con*scale_con < 0 + // For negative stride*scale pre-loop checks for overflow and + // post-loop for underflow. + // + // The underflow limit: low_limit <= scale*I+offset. + // For main-loop compute + // scale*I+offset+1 > low_limit + // ( if (scale < 0) /* and stride > 0 */ + // I < (low_limit-(offset+1))/scale + // else /* scale < 0 and stride < 0 */ + // I > (low_limit-(offset+1))/scale + // ) + + if (low_limit->get_int() == -max_jint) { + if (!RangeLimitCheck) return; + } else { + assert(low_limit->get_int() == 0, "wrong low limit for range check"); + } + + Node *one = _igvn.intcon(1); + set_ctrl(one, C->root()); + Node *plus_one = new (C, 3) AddINode(offset, one); + register_new_node( plus_one, pre_ctrl ); + Node *con = new (C, 3) SubINode(low_limit, plus_one); + register_new_node(con, pre_ctrl); + Node *scale = _igvn.intcon(scale_con); + set_ctrl(scale, C->root()); + Node *X = new (C, 3) DivINode(0, con, scale); + register_new_node(X, pre_ctrl); + + // Adjust main-loop last iteration + Node *loop_limit = *main_limit; + loop_limit = (stride_con > 0) // scale < 0 + ? (Node*)(new (C, 3) MinINode(loop_limit, X)) + : (Node*)(new (C, 3) MaxINode(loop_limit, X)); + register_new_node(loop_limit, pre_ctrl); + *main_limit = loop_limit; + + // The overflow limit: scale*I+offset < upper_limit + // For pre-loop compute + // NOT(scale*I+offset < upper_limit) + // scale*I+offset >= upper_limit + // scale*I+offset+1 > upper_limit + // ( if (scale < 0) /* and stride > 0 */ + // I < (upper_limit-(offset+1))/scale + // else /* scale < 0 and stride < 0 */ + // I > (upper_limit-(offset+1))/scale + // ) + plus_one = new (C, 3) AddINode(offset, one); + register_new_node( plus_one, pre_ctrl ); + con = new (C, 3) SubINode(upper_limit, plus_one); + register_new_node(con, pre_ctrl); + scale = _igvn.intcon(scale_con); + set_ctrl(scale, C->root()); + X = new (C, 3) DivINode(0, con, scale); + register_new_node(X, pre_ctrl); + + // Adjust pre-loop last iteration + loop_limit = *pre_limit; + loop_limit = (stride_con > 0) // scale < 0 + ? (Node*)(new (C, 3) MaxINode(loop_limit, X)) + : (Node*)(new (C, 3) MinINode(loop_limit, X)); + register_new_node( loop_limit, pre_ctrl ); + *pre_limit = loop_limit; + } } @@ -1488,7 +1736,7 @@ Node *cmpzm = bolzm->in(1); assert(cmpzm->is_Cmp(), ""); Node *opqzm = cmpzm->in(2); - // Can not optimize a loop if pre-loop Opaque1 node is optimized + // Can not optimize a loop if zero-trip Opaque1 node is optimized // away and then another round of loop opts attempted. if (opqzm->Opcode() != Op_Opaque1) return; @@ -1523,8 +1771,11 @@ int stride_con = cl->stride_con(); Node *zero = _igvn.intcon(0); Node *one = _igvn.intcon(1); + // Use symmetrical int range [-max_jint,max_jint] + Node *mini = _igvn.intcon(-max_jint); set_ctrl(zero, C->root()); set_ctrl(one, C->root()); + set_ctrl(mini, C->root()); // Range checks that do not dominate the loop backedge (ie. // conditionally executed) can lengthen the pre loop limit beyond @@ -1599,7 +1850,12 @@ if( offset_c == ctrl ) { continue; // Don't rce this check but continue looking for other candidates. } - +#ifdef ASSERT + if (TraceRangeLimitCheck) { + tty->print_cr("RC bool node%s", flip ? " flipped:" : ":"); + bol->dump(2); + } +#endif // At this point we have the expression as: // scale_con * trip_counter + offset :: limit // where scale_con, offset and limit are loop invariant. Trip_counter @@ -1610,17 +1866,16 @@ // Adjust pre and main loop limits to guard the correct iteration set if( cmp->Opcode() == Op_CmpU ) {// Unsigned compare is really 2 tests if( b_test._test == BoolTest::lt ) { // Range checks always use lt - // The overflow limit: scale*I+offset < limit - add_constraint( stride_con, scale_con, offset, limit, pre_ctrl, &pre_limit, &main_limit ); - // The underflow limit: 0 <= scale*I+offset. - // Some math yields: -scale*I-(offset+1) < 0 - Node *plus_one = new (C, 3) AddINode( offset, one ); - register_new_node( plus_one, pre_ctrl ); - Node *neg_offset = new (C, 3) SubINode( zero, plus_one ); - register_new_node( neg_offset, pre_ctrl ); - add_constraint( stride_con, -scale_con, neg_offset, zero, pre_ctrl, &pre_limit, &main_limit ); + // The underflow and overflow limits: 0 <= scale*I+offset < limit + add_constraint( stride_con, scale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit ); if (!conditional_rc) { conditional_rc = !loop->dominates_backedge(iff); + // It is also needed if offset->_lo == min_int since + // (0-min_int) == min_int. It may be fine for stride > 0 + // but for stride < 0 pre_limit will be < original_limit. + const TypeInt* offset_t = _igvn.type(offset)->is_int(); + conditional_rc |= RangeLimitCheck && (offset_t->_lo == min_jint) && + (scale_con<0) && (stride_con<0); } } else { #ifndef PRODUCT @@ -1631,21 +1886,35 @@ } } else { // Otherwise work on normal compares switch( b_test._test ) { - case BoolTest::ge: // Convert X >= Y to -X <= -Y + case BoolTest::gt: + // Fall into GE case + case BoolTest::ge: + // Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit scale_con = -scale_con; offset = new (C, 3) SubINode( zero, offset ); register_new_node( offset, pre_ctrl ); limit = new (C, 3) SubINode( zero, limit ); register_new_node( limit, pre_ctrl ); // Fall into LE case - case BoolTest::le: // Convert X <= Y to X < Y+1 - limit = new (C, 3) AddINode( limit, one ); - register_new_node( limit, pre_ctrl ); + case BoolTest::le: + if (b_test._test != BoolTest::gt) { + // Convert X <= Y to X < Y+1 + limit = new (C, 3) AddINode( limit, one ); + register_new_node( limit, pre_ctrl ); + } // Fall into LT case case BoolTest::lt: - add_constraint( stride_con, scale_con, offset, limit, pre_ctrl, &pre_limit, &main_limit ); + // The underflow and overflow limits: MIN_INT <= scale*I+offset < limit + add_constraint( stride_con, scale_con, offset, mini, limit, pre_ctrl, &pre_limit, &main_limit ); if (!conditional_rc) { conditional_rc = !loop->dominates_backedge(iff); + // It is also needed if scale*pre_limit+offset >= limit + // due to underflow so we need execute pre-loop until + // scale*I+offset >= min_int. But (low_limit-offset) will + // underflow when offset > 0 and X will be > original_limit. + const TypeInt* offset_t = _igvn.type(offset)->is_int(); + conditional_rc |= RangeLimitCheck && (offset_t->_hi > 0) && + (scale_con>0) && (stride_con>0); } break; default: @@ -1696,7 +1965,8 @@ // Note:: we are making the main loop limit no longer precise; // need to round up based on stride. - if( stride_con != 1 && stride_con != -1 ) { // Cutout for common case + cl->set_nonexact_trip_count(); + if (!LoopLimitCheck && stride_con != 1 && stride_con != -1) { // Cutout for common case // "Standard" round-up logic: ([main_limit-init+(y-1)]/y)*y+init // Hopefully, compiler will optimize for powers of 2. Node *ctrl = get_ctrl(main_limit); @@ -1876,7 +2146,19 @@ // iteration. Then the CountedLoopEnd will collapse (backedge never // taken) and all loop-invariant uses of the exit values will be correct. Node *phi = cl->phi(); - Node *final = new (phase->C, 3) SubINode( cl->limit(), cl->stride() ); + Node *exact_limit = phase->exact_limit(this); + if (exact_limit != cl->limit()) { + // We also need to replace the original limit to collapse loop exit. + Node* cmp = cl->loopexit()->cmp_node(); + assert(cl->limit() == cmp->in(2), "sanity"); + phase->_igvn._worklist.push(cmp->in(2)); // put limit on worklist + phase->_igvn.hash_delete(cmp); + cmp->set_req(2, exact_limit); + phase->_igvn._worklist.push(cmp); // put cmp on worklist + } + // Note: the final value after increment should not overflow since + // counted loop has limit check predicate. + Node *final = new (phase->C, 3) SubINode( exact_limit, cl->stride() ); phase->register_new_node(final,cl->in(LoopNode::EntryControl)); phase->_igvn.replace_node(phi,final); phase->C->set_major_progress();
--- a/hotspot/src/share/vm/opto/loopUnswitch.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/opto/loopUnswitch.cpp Sat May 14 15:21:38 2011 -0700 @@ -130,6 +130,11 @@ Node* uniqc = proj_true->unique_ctrl_out(); Node* entry = head->in(LoopNode::EntryControl); Node* predicate = find_predicate(entry); + if (predicate != NULL && LoopLimitCheck && UseLoopPredicate) { + // We may have two predicates, find first. + entry = find_predicate(entry->in(0)->in(0)); + if (entry != NULL) predicate = entry; + } if (predicate != NULL) predicate = predicate->in(0); assert(proj_true->is_IfTrue() && (predicate == NULL && uniqc == head || @@ -217,6 +222,7 @@ ProjNode* PhaseIdealLoop::create_slow_version_of_loop(IdealLoopTree *loop, Node_List &old_new) { LoopNode* head = loop->_head->as_Loop(); + bool counted_loop = head->is_CountedLoop(); Node* entry = head->in(LoopNode::EntryControl); _igvn.hash_delete(entry); _igvn._worklist.push(entry); @@ -242,14 +248,14 @@ assert(old_new[head->_idx]->is_Loop(), "" ); // Fast (true) control - Node* iffast_pred = clone_loop_predicates(entry, iffast); + Node* iffast_pred = clone_loop_predicates(entry, iffast, !counted_loop); _igvn.hash_delete(head); head->set_req(LoopNode::EntryControl, iffast_pred); set_idom(head, iffast_pred, dom_depth(head)); _igvn._worklist.push(head); // Slow (false) control - Node* ifslow_pred = move_loop_predicates(entry, ifslow); + Node* ifslow_pred = move_loop_predicates(entry, ifslow, !counted_loop); LoopNode* slow_head = old_new[head->_idx]->as_Loop(); _igvn.hash_delete(slow_head); slow_head->set_req(LoopNode::EntryControl, ifslow_pred);
--- a/hotspot/src/share/vm/opto/loopnode.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/opto/loopnode.cpp Sat May 14 15:21:38 2011 -0700 @@ -206,7 +206,7 @@ // Get backedge compare Node *cmp = test->in(1); int cmp_op = cmp->Opcode(); - if( cmp_op != Op_CmpI ) + if (cmp_op != Op_CmpI) return false; // Avoid pointer & float compares // Find the trip-counter increment & limit. Limit must be loop invariant. @@ -259,7 +259,8 @@ } // Stride must be constant int stride_con = stride->get_int(); - assert(stride_con != 0, "missed some peephole opt"); + if (stride_con == 0) + return false; // missed some peephole opt if (!xphi->is_Phi()) return false; // Too much math on the trip counter @@ -319,7 +320,7 @@ // Count down loop rolls through MAXINT (bt == BoolTest::le || bt == BoolTest::lt) && stride_con < 0 || // Count up loop rolls through MININT - (bt == BoolTest::ge || bt == BoolTest::gt) && stride_con > 0 ) { + (bt == BoolTest::ge || bt == BoolTest::gt) && stride_con > 0) { return false; // Bail out } @@ -341,12 +342,137 @@ // assert(x->Opcode() == Op_Loop, "regular loops only"); C->print_method("Before CountedLoop", 3); + + Node *hook = new (C, 6) Node(6); + + if (LoopLimitCheck) { + + // =================================================== + // Generate loop limit check to avoid integer overflow + // in cases like next (cyclic loops): + // + // for (i=0; i <= max_jint; i++) {} + // for (i=0; i < max_jint; i+=2) {} + // + // + // Limit check predicate depends on the loop test: + // + // for(;i != limit; i++) --> limit <= (max_jint) + // for(;i < limit; i+=stride) --> limit <= (max_jint - stride + 1) + // for(;i <= limit; i+=stride) --> limit <= (max_jint - stride ) + // + + // Check if limit is excluded to do more precise int overflow check. + bool incl_limit = (bt == BoolTest::le || bt == BoolTest::ge); + int stride_m = stride_con - (incl_limit ? 0 : (stride_con > 0 ? 1 : -1)); + + // If compare points directly to the phi we need to adjust + // the compare so that it points to the incr. Limit have + // to be adjusted to keep trip count the same and the + // adjusted limit should be checked for int overflow. + if (phi_incr != NULL) { + stride_m += stride_con; + } + + if (limit->is_Con()) { + int limit_con = limit->get_int(); + if ((stride_con > 0 && limit_con > (max_jint - stride_m)) || + (stride_con < 0 && limit_con < (min_jint - stride_m))) { + // Bailout: it could be integer overflow. + return false; + } + } else if ((stride_con > 0 && limit_t->_hi <= (max_jint - stride_m)) || + (stride_con < 0 && limit_t->_lo >= (min_jint - stride_m))) { + // Limit's type may satisfy the condition, for example, + // when it is an array length. + } else { + // Generate loop's limit check. + // Loop limit check predicate should be near the loop. + ProjNode *limit_check_proj = find_predicate_insertion_point(init_control, Deoptimization::Reason_loop_limit_check); + if (!limit_check_proj) { + // The limit check predicate is not generated if this method trapped here before. +#ifdef ASSERT + if (TraceLoopLimitCheck) { + tty->print("missing loop limit check:"); + loop->dump_head(); + x->dump(1); + } +#endif + return false; + } + + IfNode* check_iff = limit_check_proj->in(0)->as_If(); + Node* cmp_limit; + Node* bol; + + if (stride_con > 0) { + cmp_limit = new (C, 3) CmpINode(limit, _igvn.intcon(max_jint - stride_m)); + bol = new (C, 2) BoolNode(cmp_limit, BoolTest::le); + } else { + cmp_limit = new (C, 3) CmpINode(limit, _igvn.intcon(min_jint - stride_m)); + bol = new (C, 2) BoolNode(cmp_limit, BoolTest::ge); + } + cmp_limit = _igvn.register_new_node_with_optimizer(cmp_limit); + bol = _igvn.register_new_node_with_optimizer(bol); + set_subtree_ctrl(bol); + + // Replace condition in original predicate but preserve Opaque node + // so that previous predicates could be found. + assert(check_iff->in(1)->Opcode() == Op_Conv2B && + check_iff->in(1)->in(1)->Opcode() == Op_Opaque1, ""); + Node* opq = check_iff->in(1)->in(1); + _igvn.hash_delete(opq); + opq->set_req(1, bol); + // Update ctrl. + set_ctrl(opq, check_iff->in(0)); + set_ctrl(check_iff->in(1), check_iff->in(0)); + #ifndef PRODUCT - if (TraceLoopOpts) { - tty->print("Counted "); - loop->dump_head(); + // report that the loop predication has been actually performed + // for this loop + if (TraceLoopLimitCheck) { + tty->print_cr("Counted Loop Limit Check generated:"); + debug_only( bol->dump(2); ) + } +#endif } -#endif + + if (phi_incr != NULL) { + // If compare points directly to the phi we need to adjust + // the compare so that it points to the incr. Limit have + // to be adjusted to keep trip count the same and we + // should avoid int overflow. + // + // i = init; do {} while(i++ < limit); + // is converted to + // i = init; do {} while(++i < limit+1); + // + limit = gvn->transform(new (C, 3) AddINode(limit, stride)); + } + + // Now we need to canonicalize loop condition. + if (bt == BoolTest::ne) { + assert(stride_con == 1 || stride_con == -1, "simple increment only"); + bt = (stride_con > 0) ? BoolTest::lt : BoolTest::gt; + } + + if (incl_limit) { + // The limit check guaranties that 'limit <= (max_jint - stride)' so + // we can convert 'i <= limit' to 'i < limit+1' since stride != 0. + // + Node* one = (stride_con > 0) ? gvn->intcon( 1) : gvn->intcon(-1); + limit = gvn->transform(new (C, 3) AddINode(limit, one)); + if (bt == BoolTest::le) + bt = BoolTest::lt; + else if (bt == BoolTest::ge) + bt = BoolTest::gt; + else + ShouldNotReachHere(); + } + set_subtree_ctrl( limit ); + + } else { // LoopLimitCheck + // If compare points to incr, we are ok. Otherwise the compare // can directly point to the phi; in this case adjust the compare so that // it points to the incr by adjusting the limit. @@ -359,7 +485,6 @@ Node *one_m = gvn->intcon(-1); Node *trip_count = NULL; - Node *hook = new (C, 6) Node(6); switch( bt ) { case BoolTest::eq: ShouldNotReachHere(); @@ -441,6 +566,8 @@ limit = gvn->transform(new (C, 3) AddINode(span,init_trip)); set_subtree_ctrl( limit ); + } // LoopLimitCheck + // Check for SafePoint on backedge and remove Node *sfpt = x->in(LoopNode::LoopBackControl); if (sfpt->Opcode() == Op_SafePoint && is_deleteable_safept(sfpt)) { @@ -531,7 +658,7 @@ // Check for immediately preceding SafePoint and remove Node *sfpt2 = le->in(0); - if( sfpt2->Opcode() == Op_SafePoint && is_deleteable_safept(sfpt2)) + if (sfpt2->Opcode() == Op_SafePoint && is_deleteable_safept(sfpt2)) lazy_replace( sfpt2, sfpt2->in(TypeFunc::Control)); // Free up intermediate goo @@ -541,12 +668,56 @@ assert(l->is_valid_counted_loop(), "counted loop shape is messed up"); assert(l == loop->_head && l->phi() == phi && l->loopexit() == lex, "" ); #endif +#ifndef PRODUCT + if (TraceLoopOpts) { + tty->print("Counted "); + loop->dump_head(); + } +#endif C->print_method("After CountedLoop", 3); return true; } +//----------------------exact_limit------------------------------------------- +Node* PhaseIdealLoop::exact_limit( IdealLoopTree *loop ) { + assert(loop->_head->is_CountedLoop(), ""); + CountedLoopNode *cl = loop->_head->as_CountedLoop(); + + if (!LoopLimitCheck || ABS(cl->stride_con()) == 1 || + cl->limit()->Opcode() == Op_LoopLimit) { + // Old code has exact limit (it could be incorrect in case of int overflow). + // Loop limit is exact with stride == 1. And loop may already have exact limit. + return cl->limit(); + } + Node *limit = NULL; +#ifdef ASSERT + BoolTest::mask bt = cl->loopexit()->test_trip(); + assert(bt == BoolTest::lt || bt == BoolTest::gt, "canonical test is expected"); +#endif + if (cl->has_exact_trip_count()) { + // Simple case: loop has constant boundaries. + // Use longs to avoid integer overflow. + int stride_con = cl->stride_con(); + long init_con = cl->init_trip()->get_int(); + long limit_con = cl->limit()->get_int(); + julong trip_cnt = cl->trip_count(); + long final_con = init_con + trip_cnt*stride_con; + final_con -= stride_con; + int final_int = (int)final_con; + // The final value should be in integer range since the loop + // is counted and the limit was checked for overflow. + assert(final_con == (long)final_int, "final value should be integer"); + limit = _igvn.intcon(final_int); + } else { + // Create new LoopLimit node to get exact limit (final iv value). + limit = new (C, 4) LoopLimitNode(C, cl->init_trip(), cl->limit(), cl->stride()); + register_new_node(limit, cl->in(LoopNode::EntryControl)); + } + assert(limit != NULL, "sanity"); + return limit; +} //------------------------------Ideal------------------------------------------ // Return a node which is more "ideal" than the current node. @@ -572,14 +743,12 @@ #ifndef PRODUCT void CountedLoopNode::dump_spec(outputStream *st) const { LoopNode::dump_spec(st); - if( stride_is_con() ) { + if (stride_is_con()) { st->print("stride: %d ",stride_con()); - } else { - st->print("stride: not constant "); } - if( is_pre_loop () ) st->print("pre of N%d" , _main_idx ); - if( is_main_loop() ) st->print("main of N%d", _idx ); - if( is_post_loop() ) st->print("post of N%d", _main_idx ); + if (is_pre_loop ()) st->print("pre of N%d" , _main_idx); + if (is_main_loop()) st->print("main of N%d", _idx); + if (is_post_loop()) st->print("post of N%d", _main_idx); } #endif @@ -588,7 +757,130 @@ return stride()->bottom_type()->is_int()->get_con(); } - +//============================================================================= +//------------------------------Value----------------------------------------- +const Type *LoopLimitNode::Value( PhaseTransform *phase ) const { + const Type* init_t = phase->type(in(Init)); + const Type* limit_t = phase->type(in(Limit)); + const Type* stride_t = phase->type(in(Stride)); + // Either input is TOP ==> the result is TOP + if (init_t == Type::TOP) return Type::TOP; + if (limit_t == Type::TOP) return Type::TOP; + if (stride_t == Type::TOP) return Type::TOP; + + int stride_con = stride_t->is_int()->get_con(); + if (stride_con == 1) + return NULL; // Identity + + if (init_t->is_int()->is_con() && limit_t->is_int()->is_con()) { + // Use longs to avoid integer overflow. + long init_con = init_t->is_int()->get_con(); + long limit_con = limit_t->is_int()->get_con(); + int stride_m = stride_con - (stride_con > 0 ? 1 : -1); + long trip_count = (limit_con - init_con + stride_m)/stride_con; + long final_con = init_con + stride_con*trip_count; + int final_int = (int)final_con; + // The final value should be in integer range since the loop + // is counted and the limit was checked for overflow. + assert(final_con == (long)final_int, "final value should be integer"); + return TypeInt::make(final_int); + } + + return bottom_type(); // TypeInt::INT +} + +//------------------------------Ideal------------------------------------------ +// Return a node which is more "ideal" than the current node. +Node *LoopLimitNode::Ideal(PhaseGVN *phase, bool can_reshape) { + if (phase->type(in(Init)) == Type::TOP || + phase->type(in(Limit)) == Type::TOP || + phase->type(in(Stride)) == Type::TOP) + return NULL; // Dead + + int stride_con = phase->type(in(Stride))->is_int()->get_con(); + if (stride_con == 1) + return NULL; // Identity + + if (in(Init)->is_Con() && in(Limit)->is_Con()) + return NULL; // Value + + // Delay following optimizations until all loop optimizations + // done to keep Ideal graph simple. + if (!can_reshape || phase->C->major_progress()) + return NULL; + + const TypeInt* init_t = phase->type(in(Init) )->is_int(); + const TypeInt* limit_t = phase->type(in(Limit))->is_int(); + int stride_p; + long lim, ini; + julong max; + if (stride_con > 0) { + stride_p = stride_con; + lim = limit_t->_hi; + ini = init_t->_lo; + max = (julong)max_jint; + } else { + stride_p = -stride_con; + lim = init_t->_hi; + ini = limit_t->_lo; + max = (julong)min_jint; + } + julong range = lim - ini + stride_p; + if (range <= max) { + // Convert to integer expression if it is not overflow. + Node* stride_m = phase->intcon(stride_con - (stride_con > 0 ? 1 : -1)); + Node *range = phase->transform(new (phase->C, 3) SubINode(in(Limit), in(Init))); + Node *bias = phase->transform(new (phase->C, 3) AddINode(range, stride_m)); + Node *trip = phase->transform(new (phase->C, 3) DivINode(0, bias, in(Stride))); + Node *span = phase->transform(new (phase->C, 3) MulINode(trip, in(Stride))); + return new (phase->C, 3) AddINode(span, in(Init)); // exact limit + } + + if (is_power_of_2(stride_p) || // divisor is 2^n + !Matcher::has_match_rule(Op_LoopLimit)) { // or no specialized Mach node? + // Convert to long expression to avoid integer overflow + // and let igvn optimizer convert this division. + // + Node* init = phase->transform( new (phase->C, 2) ConvI2LNode(in(Init))); + Node* limit = phase->transform( new (phase->C, 2) ConvI2LNode(in(Limit))); + Node* stride = phase->longcon(stride_con); + Node* stride_m = phase->longcon(stride_con - (stride_con > 0 ? 1 : -1)); + + Node *range = phase->transform(new (phase->C, 3) SubLNode(limit, init)); + Node *bias = phase->transform(new (phase->C, 3) AddLNode(range, stride_m)); + Node *span; + if (stride_con > 0 && is_power_of_2(stride_p)) { + // bias >= 0 if stride >0, so if stride is 2^n we can use &(-stride) + // and avoid generating rounding for division. Zero trip guard should + // guarantee that init < limit but sometimes the guard is missing and + // we can get situation when init > limit. Note, for the empty loop + // optimization zero trip guard is generated explicitly which leaves + // only RCE predicate where exact limit is used and the predicate + // will simply fail forcing recompilation. + Node* neg_stride = phase->longcon(-stride_con); + span = phase->transform(new (phase->C, 3) AndLNode(bias, neg_stride)); + } else { + Node *trip = phase->transform(new (phase->C, 3) DivLNode(0, bias, stride)); + span = phase->transform(new (phase->C, 3) MulLNode(trip, stride)); + } + // Convert back to int + Node *span_int = phase->transform(new (phase->C, 2) ConvL2INode(span)); + return new (phase->C, 3) AddINode(span_int, in(Init)); // exact limit + } + + return NULL; // No progress +} + +//------------------------------Identity--------------------------------------- +// If stride == 1 return limit node. +Node *LoopLimitNode::Identity( PhaseTransform *phase ) { + int stride_con = phase->type(in(Stride))->is_int()->get_con(); + if (stride_con == 1 || stride_con == -1) + return in(Limit); + return this; +} + +//============================================================================= //----------------------match_incr_with_optional_truncation-------------------- // Match increment with optional truncation: // CHAR: (i+1)&0x7fff, BYTE: ((i+1)<<8)>>8, or SHORT: ((i+1)<<16)>>16 @@ -870,7 +1162,7 @@ outer = igvn.register_new_node_with_optimizer(outer, _head); phase->set_created_loop_node(); - Node* pred = phase->clone_loop_predicates(ctl, outer); + Node* pred = phase->clone_loop_predicates(ctl, outer, true); // Outermost loop falls into '_head' loop _head->set_req(LoopNode::EntryControl, pred); _head->del_req(outer_idx); @@ -1440,9 +1732,16 @@ tty->print(" "); tty->print("Loop: N%d/N%d ",_head->_idx,_tail->_idx); if (_irreducible) tty->print(" IRREDUCIBLE"); + Node* entry = _head->in(LoopNode::EntryControl); + if (LoopLimitCheck) { + Node* predicate = PhaseIdealLoop::find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check); + if (predicate != NULL ) { + tty->print(" limit_check"); + entry = entry->in(0)->in(0); + } + } if (UseLoopPredicate) { - Node* entry = PhaseIdealLoop::find_predicate_insertion_point(_head->in(LoopNode::EntryControl), - Deoptimization::Reason_predicate); + entry = PhaseIdealLoop::find_predicate_insertion_point(entry, Deoptimization::Reason_predicate); if (entry != NULL) { tty->print(" predicated"); } @@ -1528,10 +1827,15 @@ !loop->tail()->is_top()) { LoopNode* lpn = loop->_head->as_Loop(); Node* entry = lpn->in(LoopNode::EntryControl); - Node* predicate_proj = find_predicate(entry); + Node* predicate_proj = find_predicate(entry); // loop_limit_check first if (predicate_proj != NULL ) { // right pattern that can be used by loop predication assert(entry->in(0)->in(1)->in(1)->Opcode() == Op_Opaque1, "must be"); useful_predicates.push(entry->in(0)->in(1)->in(1)); // good one + entry = entry->in(0)->in(0); + } + predicate_proj = find_predicate(entry); // Predicate + if (predicate_proj != NULL ) { + useful_predicates.push(entry->in(0)->in(1)->in(1)); // good one } } @@ -1542,6 +1846,8 @@ //------------------------eliminate_useless_predicates----------------------------- // Eliminate all inserted predicates if they could not be used by loop predication. +// Note: it will also eliminates loop limits check predicate since it also uses +// Opaque1 node (see Parse::add_predicate()). void PhaseIdealLoop::eliminate_useless_predicates() { if (C->predicate_count() == 0) return; // no predicate left @@ -1731,7 +2037,7 @@ // Some parser-inserted loop predicates could never be used by loop // predication or they were moved away from loop during some optimizations. // For example, peeling. Eliminate them before next loop optimizations. - if (UseLoopPredicate) { + if (UseLoopPredicate || LoopLimitCheck) { eliminate_useless_predicates(); }
--- a/hotspot/src/share/vm/opto/loopnode.hpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/opto/loopnode.hpp Sat May 14 15:21:38 2011 -0700 @@ -289,6 +289,28 @@ inline Node *CountedLoopNode::incr() const { return loopexit() ? loopexit()->incr() : NULL; } inline Node *CountedLoopNode::phi() const { return loopexit() ? loopexit()->phi() : NULL; } +//------------------------------LoopLimitNode----------------------------- +// Counted Loop limit node which represents exact final iterator value: +// trip_count = (limit - init_trip + stride - 1)/stride +// final_value= trip_count * stride + init_trip. +// Use HW instructions to calculate it when it can overflow in integer. +// Note, final_value should fit into integer since counted loop has +// limit check: limit <= max_int-stride. +class LoopLimitNode : public Node { + enum { Init=1, Limit=2, Stride=3 }; + public: + LoopLimitNode( Compile* C, Node *init, Node *limit, Node *stride ) : Node(0,init,limit,stride) { + // Put it on the Macro nodes list to optimize during macro nodes expansion. + init_flags(Flag_is_macro); + C->add_macro_node(this); + } + virtual int Opcode() const; + virtual const Type *bottom_type() const { return TypeInt::INT; } + virtual uint ideal_reg() const { return Op_RegI; } + virtual const Type *Value( PhaseTransform *phase ) const; + virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); + virtual Node *Identity( PhaseTransform *phase ); +}; // -----------------------------IdealLoopTree---------------------------------- class IdealLoopTree : public ResourceObj { @@ -775,6 +797,8 @@ bool is_counted_loop( Node *x, IdealLoopTree *loop ); + Node* exact_limit( IdealLoopTree *loop ); + // Return a post-walked LoopNode IdealLoopTree *get_loop( Node *n ) const { // Dead nodes have no loop, so return the top level loop instead @@ -837,7 +861,6 @@ bool is_scaled_iv_plus_offset(Node* exp, Node* iv, int* p_scale, Node** p_offset, int depth = 0); // Return true if proj is for "proj->[region->..]call_uct" - // Return true if proj is for "proj->[region->..]call_uct" static bool is_uncommon_trap_proj(ProjNode* proj, Deoptimization::DeoptReason reason); // Return true for "if(test)-> proj -> ... // | @@ -860,10 +883,11 @@ PhaseIterGVN* igvn); static Node* clone_loop_predicates(Node* old_entry, Node* new_entry, bool move_predicates, + bool clone_limit_check, PhaseIdealLoop* loop_phase, PhaseIterGVN* igvn); - Node* clone_loop_predicates(Node* old_entry, Node* new_entry); - Node* move_loop_predicates(Node* old_entry, Node* new_entry); + Node* clone_loop_predicates(Node* old_entry, Node* new_entry, bool clone_limit_check); + Node* move_loop_predicates(Node* old_entry, Node* new_entry, bool clone_limit_check); void eliminate_loop_predicates(Node* entry); static Node* skip_loop_predicates(Node* entry); @@ -873,7 +897,7 @@ // Find a predicate static Node* find_predicate(Node* entry); // Construct a range check for a predicate if - BoolNode* rc_predicate(Node* ctrl, + BoolNode* rc_predicate(IdealLoopTree *loop, Node* ctrl, int scale, Node* offset, Node* init, Node* limit, Node* stride, Node* range, bool upper); @@ -903,11 +927,11 @@ // Range Check Elimination uses this function! // Constrain the main loop iterations so the affine function: - // scale_con * I + offset < limit + // low_limit <= scale_con * I + offset < upper_limit // always holds true. That is, either increase the number of iterations in // the pre-loop or the post-loop until the condition holds true in the main // loop. Scale_con, offset and limit are all loop invariant. - void add_constraint( int stride_con, int scale_con, Node *offset, Node *limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit ); + void add_constraint( int stride_con, int scale_con, Node *offset, Node *low_limit, Node *upper_limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit ); // Partially peel loop up through last_peel node. bool partial_peel( IdealLoopTree *loop, Node_List &old_new );
--- a/hotspot/src/share/vm/opto/loopopts.cpp Wed May 11 08:02:44 2011 +0900 +++ b/hotspot/src/share/vm/opto/loopopts.cpp Sat May 14 15:21:38 2011 -0700 @@ -2262,6 +2262,9 @@ // stmt1 // | // v +// loop predicate +// | +// v // stmt2 clone // | // v @@ -2272,9 +2275,6 @@ // : false true // : | | // : | v -// : | loop predicate -// : | | -// : | v // : | newloop<-----+ // : | | | // : | stmt3 clone | @@ -2330,7 +2330,6 @@ } } - Node* entry = head->in(LoopNode::EntryControl); int dd = dom_depth(head); // Step 1: find cut point @@ -2627,8 +2626,6 @@ // Backedge of the surviving new_head (the clone) is original last_peel _igvn.hash_delete(new_head_clone); - Node* new_entry = move_loop_predicates(entry, new_head_clone->in(LoopNode::EntryControl)); - new_head_clone->set_req(LoopNode::EntryControl, new_entry); new_head_clone->set_req(LoopNode::LoopBackControl, last_peel); _igvn._worklist.push(new_head_clone);