OpenJDK / amber / amber
changeset 59270:f051fadd0d06
Merge
author | psadhukhan |
---|---|
date | Mon, 02 Dec 2019 12:01:40 +0530 |
parents | 7637e77c4c8a 5028793d05ff |
children | fe5e931830e5 |
files | src/jdk.zipfs/share/classes/jdk/nio/zipfs/JarFileSystem.java src/jdk.zipfs/share/classes/jdk/nio/zipfs/JarFileSystemProvider.java test/jdk/jdk/jfr/api/consumer/streaming/TestRepositoryMigration.java test/jdk/sun/security/tools/jarsigner/warnings/BadKeyUsageTest.java test/langtools/jdk/javadoc/doclet/testDocFiles/pkg/Test.java test/langtools/jdk/javadoc/doclet/testDocFiles/pkg/doc-files/test.txt |
diffstat | 869 files changed, 17882 insertions(+), 9590 deletions(-) [+] |
line wrap: on
line diff
--- a/.hgtags Mon Dec 02 11:58:14 2019 +0530 +++ b/.hgtags Mon Dec 02 12:01:40 2019 +0530 @@ -597,3 +597,4 @@ 83810b7d12e7ff761ad3dd91f323a22dad96f108 jdk-14+22 15936b142f86731afa4b1a2c0fe4a01e806c4944 jdk-14+23 438337c846fb071900ddb6922bddf8b3e895a514 jdk-14+24 +17d242844fc9e7d18b3eac97426490a9c246119e jdk-14+25
--- a/make/autoconf/flags-cflags.m4 Mon Dec 02 11:58:14 2019 +0530 +++ b/make/autoconf/flags-cflags.m4 Mon Dec 02 12:01:40 2019 +0530 @@ -190,20 +190,7 @@ WARNINGS_ENABLE_ALL_CXXFLAGS="$WARNINGS_ENABLE_ALL_CFLAGS $WARNINGS_ENABLE_ADDITIONAL_CXX" DISABLED_WARNINGS="unused-parameter unused" - - # Repeate the check for the BUILD_CC and BUILD_CXX. Need to also reset - # CFLAGS since any target specific flags will likely not work with the - # build compiler - CC_OLD="$CC" - CXX_OLD="$CXX" - CC="$BUILD_CC" - CXX="$BUILD_CXX" - CFLAGS_OLD="$CFLAGS" - CFLAGS="" BUILD_CC_DISABLE_WARNING_PREFIX="-Wno-" - CC="$CC_OLD" - CXX="$CXX_OLD" - CFLAGS="$CFLAGS_OLD" ;; clang) @@ -420,6 +407,17 @@ FLAGS_SETUP_CFLAGS_CPU_DEP([TARGET]) + # Repeat the check for the BUILD_CC and BUILD_CXX. Need to also reset CFLAGS + # since any target specific flags will likely not work with the build compiler. + CC_OLD="$CC" + CXX_OLD="$CXX" + CFLAGS_OLD="$CFLAGS" + CXXFLAGS_OLD="$CXXFLAGS" + CC="$BUILD_CC" + CXX="$BUILD_CXX" + CFLAGS="" + CXXFLAGS="" + FLAGS_OS=$OPENJDK_BUILD_OS FLAGS_OS_TYPE=$OPENJDK_BUILD_OS_TYPE FLAGS_CPU=$OPENJDK_BUILD_CPU @@ -430,6 +428,11 @@ FLAGS_CPU_LEGACY_LIB=$OPENJDK_BUILD_CPU_LEGACY_LIB FLAGS_SETUP_CFLAGS_CPU_DEP([BUILD], [OPENJDK_BUILD_], [BUILD_]) + + CC="$CC_OLD" + CXX="$CXX_OLD" + CFLAGS="$CFLAGS_OLD" + CXXFLAGS="$CXXFLAGS_OLD" ]) ################################################################################ @@ -529,6 +532,11 @@ if test "x$TOOLCHAIN_TYPE" = xgcc; then TOOLCHAIN_CFLAGS_JVM="$TOOLCHAIN_CFLAGS_JVM -fcheck-new -fstack-protector" TOOLCHAIN_CFLAGS_JDK="-pipe -fstack-protector" + # reduce lib size on s390x in link step, this needs also special compile flags + if test "x$OPENJDK_TARGET_CPU" = xs390x; then + TOOLCHAIN_CFLAGS_JVM="$TOOLCHAIN_CFLAGS_JVM -ffunction-sections -fdata-sections" + TOOLCHAIN_CFLAGS_JDK="$TOOLCHAIN_CFLAGS_JDK -ffunction-sections -fdata-sections" + fi # technically NOT for CXX (but since this gives *worse* performance, use # no-strict-aliasing everywhere!) TOOLCHAIN_CFLAGS_JDK_CONLY="-fno-strict-aliasing"
--- a/make/autoconf/flags-ldflags.m4 Mon Dec 02 11:58:14 2019 +0530 +++ b/make/autoconf/flags-ldflags.m4 Mon Dec 02 12:01:40 2019 +0530 @@ -70,10 +70,14 @@ fi # Add -z defs, to forbid undefined symbols in object files. - BASIC_LDFLAGS="$BASIC_LDFLAGS -Wl,-z,defs" + # add relro (mark relocations read only) for all libs + BASIC_LDFLAGS="$BASIC_LDFLAGS -Wl,-z,defs -Wl,-z,relro" + # s390x : remove unused code+data in link step + if test "x$OPENJDK_TARGET_CPU" = xs390x; then + BASIC_LDFLAGS="$BASIC_LDFLAGS -Wl,--gc-sections -Wl,--print-gc-sections" + fi - BASIC_LDFLAGS_JVM_ONLY="-Wl,-O1 -Wl,-z,relro" - + BASIC_LDFLAGS_JVM_ONLY="-Wl,-O1" elif test "x$TOOLCHAIN_TYPE" = xclang; then BASIC_LDFLAGS_JVM_ONLY="-mno-omit-leaf-frame-pointer -mstack-alignment=16 \ @@ -120,9 +124,6 @@ if test "x$OPENJDK_TARGET_OS" = xlinux; then if test x$DEBUG_LEVEL = xrelease; then DEBUGLEVEL_LDFLAGS_JDK_ONLY="$DEBUGLEVEL_LDFLAGS_JDK_ONLY -Wl,-O1" - else - # mark relocations read only on (fast/slow) debug builds - DEBUGLEVEL_LDFLAGS_JDK_ONLY="-Wl,-z,relro" fi if test x$DEBUG_LEVEL = xslowdebug; then # do relocations at load
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make/data/cacerts/amazonrootca1 Mon Dec 02 12:01:40 2019 +0530 @@ -0,0 +1,27 @@ +Owner: CN=Amazon Root CA 1, O=Amazon, C=US +Issuer: CN=Amazon Root CA 1, O=Amazon, C=US +Serial number: 66c9fcf99bf8c0a39e2f0788a43e696365bca +Valid from: Tue May 26 00:00:00 GMT 2015 until: Sun Jan 17 00:00:00 GMT 2038 +Signature algorithm name: SHA256withRSA +Subject Public Key Algorithm: 2048-bit RSA key +Version: 3 +-----BEGIN CERTIFICATE----- +MIIDQTCCAimgAwIBAgITBmyfz5m/jAo54vB4ikPmljZbyjANBgkqhkiG9w0BAQsF +ADA5MQswCQYDVQQGEwJVUzEPMA0GA1UEChMGQW1hem9uMRkwFwYDVQQDExBBbWF6 +b24gUm9vdCBDQSAxMB4XDTE1MDUyNjAwMDAwMFoXDTM4MDExNzAwMDAwMFowOTEL +MAkGA1UEBhMCVVMxDzANBgNVBAoTBkFtYXpvbjEZMBcGA1UEAxMQQW1hem9uIFJv +b3QgQ0EgMTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBALJ4gHHKeNXj +ca9HgFB0fW7Y14h29Jlo91ghYPl0hAEvrAIthtOgQ3pOsqTQNroBvo3bSMgHFzZM +9O6II8c+6zf1tRn4SWiw3te5djgdYZ6k/oI2peVKVuRF4fn9tBb6dNqcmzU5L/qw +IFAGbHrQgLKm+a/sRxmPUDgH3KKHOVj4utWp+UhnMJbulHheb4mjUcAwhmahRWa6 +VOujw5H5SNz/0egwLX0tdHA114gk957EWW67c4cX8jJGKLhD+rcdqsq08p8kDi1L +93FcXmn/6pUCyziKrlA4b9v7LWIbxcceVOF34GfID5yHI9Y/QCB/IIDEgEw+OyQm +jgSubJrIqg0CAwEAAaNCMEAwDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMC +AYYwHQYDVR0OBBYEFIQYzIU07LwMlJQuCFmcx7IQTgoIMA0GCSqGSIb3DQEBCwUA +A4IBAQCY8jdaQZChGsV2USggNiMOruYou6r4lK5IpDB/G/wkjUu0yKGX9rbxenDI +U5PMCCjjmCXPI6T53iHTfIUJrU6adTrCC2qJeHZERxhlbI1Bjjt/msv0tadQ1wUs +N+gDS63pYaACbvXy8MWy7Vu33PqUXHeeE6V/Uq2V8viTO96LXFvKWlJbYK8U90vv +o/ufQJVtMVT8QtPHRh8jrdkPSHCa2XV4cdFyQzR1bldZwgJcJmApzyMZFo6IQ6XU +5MsI+yMRQ+hDKXJioaldXgjUkK642M4UwtBV8ob2xJNDd2ZhwLnoQdeXeGADbkpy +rqXRfboQnoZsG4q5WTP468SQvvG5 +-----END CERTIFICATE-----
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make/data/cacerts/amazonrootca2 Mon Dec 02 12:01:40 2019 +0530 @@ -0,0 +1,38 @@ +Owner: CN=Amazon Root CA 2, O=Amazon, C=US +Issuer: CN=Amazon Root CA 2, O=Amazon, C=US +Serial number: 66c9fd29635869f0a0fe58678f85b26bb8a37 +Valid from: Tue May 26 00:00:00 GMT 2015 until: Sat May 26 00:00:00 GMT 2040 +Signature algorithm name: SHA384withRSA +Subject Public Key Algorithm: 4096-bit RSA key +Version: 3 +-----BEGIN CERTIFICATE----- +MIIFQTCCAymgAwIBAgITBmyf0pY1hp8KD+WGePhbJruKNzANBgkqhkiG9w0BAQwF +ADA5MQswCQYDVQQGEwJVUzEPMA0GA1UEChMGQW1hem9uMRkwFwYDVQQDExBBbWF6 +b24gUm9vdCBDQSAyMB4XDTE1MDUyNjAwMDAwMFoXDTQwMDUyNjAwMDAwMFowOTEL +MAkGA1UEBhMCVVMxDzANBgNVBAoTBkFtYXpvbjEZMBcGA1UEAxMQQW1hem9uIFJv +b3QgQ0EgMjCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK2Wny2cSkxK +gXlRmeyKy2tgURO8TW0G/LAIjd0ZEGrHJgw12MBvIITplLGbhQPDW9tK6Mj4kHbZ +W0/jTOgGNk3Mmqw9DJArktQGGWCsN0R5hYGCrVo34A3MnaZMUnbqQ523BNFQ9lXg +1dKmSYXpN+nKfq5clU1Imj+uIFptiJXZNLhSGkOQsL9sBbm2eLfq0OQ6PBJTYv9K +8nu+NQWpEjTj82R0Yiw9AElaKP4yRLuH3WUnAnE72kr3H9rN9yFVkE8P7K6C4Z9r +2UXTu/Bfh+08LDmG2j/e7HJV63mjrdvdfLC6HM783k81ds8P+HgfajZRRidhW+me +z/CiVX18JYpvL7TFz4QuK/0NURBs+18bvBt+xa47mAExkv8LV/SasrlX6avvDXbR +8O70zoan4G7ptGmh32n2M8ZpLpcTnqWHsFcQgTfJU7O7f/aS0ZzQGPSSbtqDT6Zj +mUyl+17vIWR6IF9sZIUVyzfpYgwLKhbcAS4y2j5L9Z469hdAlO+ekQiG+r5jqFoz +7Mt0Q5X5bGlSNscpb/xVA1wf+5+9R+vnSUeVC06JIglJ4PVhHvG/LopyboBZ/1c6 ++XUyo05f7O0oYtlNc/LMgRdg7c3r3NunysV+Ar3yVAhU/bQtCSwXVEqY0VThUWcI +0u1ufm8/0i2BWSlmy5A5lREedCf+3euvAgMBAAGjQjBAMA8GA1UdEwEB/wQFMAMB +Af8wDgYDVR0PAQH/BAQDAgGGMB0GA1UdDgQWBBSwDPBMMPQFWAJI/TPlUq9LhONm +UjANBgkqhkiG9w0BAQwFAAOCAgEAqqiAjw54o+Ci1M3m9Zh6O+oAA7CXDpO8Wqj2 +LIxyh6mx/H9z/WNxeKWHWc8w4Q0QshNabYL1auaAn6AFC2jkR2vHat+2/XcycuUY ++gn0oJMsXdKMdYV2ZZAMA3m3MSNjrXiDCYZohMr/+c8mmpJ5581LxedhpxfL86kS +k5Nrp+gvU5LEYFiwzAJRGFuFjWJZY7attN6a+yb3ACfAXVU3dJnJUH/jWS5E4ywl +7uxMMne0nxrpS10gxdr9HIcWxkPo1LsmmkVwXqkLN1PiRnsn/eBG8om3zEK2yygm +btmlyTrIQRNg91CMFa6ybRoVGld45pIq2WWQgj9sAq+uEjonljYE1x2igGOpm/Hl +urR8FLBOybEfdF849lHqm/osohHUqS0nGkWxr7JOcQ3AWEbWaQbLU8uz/mtBzUF+ +fUwPfHJ5elnNXkoOrJupmHN5fLT0zLm4BwyydFy4x2+IoZCn9Kr5v2c69BoVYh63 +n749sSmvZ6ES8lgQGVMDMBu4Gon2nL2XA46jCfMdiyHxtN/kHNGfZQIG6lzWE7OE +76KlXIx3KadowGuuQNKotOrN8I1LOJwZmhsoVLiJkO/KdYE+HvJkJMcYr07/R54H +9jVlpNMKVv/1F2Rs76giJUmTtt8AF9pYfl3uxRuw0dFfIRDH+fO6AgonB8Xx1sfT +4PsJYGw= +-----END CERTIFICATE-----
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make/data/cacerts/amazonrootca3 Mon Dec 02 12:01:40 2019 +0530 @@ -0,0 +1,19 @@ +Owner: CN=Amazon Root CA 3, O=Amazon, C=US +Issuer: CN=Amazon Root CA 3, O=Amazon, C=US +Serial number: 66c9fd5749736663f3b0b9ad9e89e7603f24a +Valid from: Tue May 26 00:00:00 GMT 2015 until: Sat May 26 00:00:00 GMT 2040 +Signature algorithm name: SHA256withECDSA +Subject Public Key Algorithm: 256-bit EC key +Version: 3 +-----BEGIN CERTIFICATE----- +MIIBtjCCAVugAwIBAgITBmyf1XSXNmY/Owua2eiedgPySjAKBggqhkjOPQQDAjA5 +MQswCQYDVQQGEwJVUzEPMA0GA1UEChMGQW1hem9uMRkwFwYDVQQDExBBbWF6b24g +Um9vdCBDQSAzMB4XDTE1MDUyNjAwMDAwMFoXDTQwMDUyNjAwMDAwMFowOTELMAkG +A1UEBhMCVVMxDzANBgNVBAoTBkFtYXpvbjEZMBcGA1UEAxMQQW1hem9uIFJvb3Qg +Q0EgMzBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABCmXp8ZBf8ANm+gBG1bG8lKl +ui2yEujSLtf6ycXYqm0fc4E7O5hrOXwzpcVOho6AF2hiRVd9RFgdszflZwjrZt6j +QjBAMA8GA1UdEwEB/wQFMAMBAf8wDgYDVR0PAQH/BAQDAgGGMB0GA1UdDgQWBBSr +ttvXBp43rDCGB5Fwx5zEGbF4wDAKBggqhkjOPQQDAgNJADBGAiEA4IWSoxe3jfkr +BqWTrBqYaGFy+uGh0PsceGCmQ5nFuMQCIQCcAu/xlJyzlvnrxir4tiz+OpAUFteM +YyRIHN8wfdVoOw== +-----END CERTIFICATE-----
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make/data/cacerts/amazonrootca4 Mon Dec 02 12:01:40 2019 +0530 @@ -0,0 +1,20 @@ +Owner: CN=Amazon Root CA 4, O=Amazon, C=US +Issuer: CN=Amazon Root CA 4, O=Amazon, C=US +Serial number: 66c9fd7c1bb104c2943e5717b7b2cc81ac10e +Valid from: Tue May 26 00:00:00 GMT 2015 until: Sat May 26 00:00:00 GMT 2040 +Signature algorithm name: SHA384withECDSA +Subject Public Key Algorithm: 384-bit EC key +Version: 3 +-----BEGIN CERTIFICATE----- +MIIB8jCCAXigAwIBAgITBmyf18G7EEwpQ+Vxe3ssyBrBDjAKBggqhkjOPQQDAzA5 +MQswCQYDVQQGEwJVUzEPMA0GA1UEChMGQW1hem9uMRkwFwYDVQQDExBBbWF6b24g +Um9vdCBDQSA0MB4XDTE1MDUyNjAwMDAwMFoXDTQwMDUyNjAwMDAwMFowOTELMAkG +A1UEBhMCVVMxDzANBgNVBAoTBkFtYXpvbjEZMBcGA1UEAxMQQW1hem9uIFJvb3Qg +Q0EgNDB2MBAGByqGSM49AgEGBSuBBAAiA2IABNKrijdPo1MN/sGKe0uoe0ZLY7Bi +9i0b2whxIdIA6GO9mif78DluXeo9pcmBqqNbIJhFXRbb/egQbeOc4OO9X4Ri83Bk +M6DLJC9wuoihKqB1+IGuYgbEgds5bimwHvouXKNCMEAwDwYDVR0TAQH/BAUwAwEB +/zAOBgNVHQ8BAf8EBAMCAYYwHQYDVR0OBBYEFNPsxzplbszh2naaVvuc84ZtV+WB +MAoGCCqGSM49BAMDA2gAMGUCMDqLIfG9fhGt0O9Yli/W651+kI0rz2ZVwyzjKKlw +CkcO8DdZEv8tmZQoTipPNU0zWgIxAOp1AE47xDqUEpHJWEadIRNyp4iciuRMStuW +1KyLa2tJElMzrdfkviT8tQp21KW8EA== +-----END CERTIFICATE-----
--- a/src/hotspot/cpu/aarch64/aarch64.ad Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/cpu/aarch64/aarch64.ad Mon Dec 02 12:01:40 2019 +0530 @@ -5639,7 +5639,6 @@ operand cmpOpEqNe() %{ match(Bool); - match(CmpOp); op_cost(0); predicate(n->as_Bool()->_test._test == BoolTest::ne || n->as_Bool()->_test._test == BoolTest::eq); @@ -5663,7 +5662,6 @@ operand cmpOpLtGe() %{ match(Bool); - match(CmpOp); op_cost(0); predicate(n->as_Bool()->_test._test == BoolTest::lt @@ -5688,7 +5686,6 @@ operand cmpOpUEqNeLtGe() %{ match(Bool); - match(CmpOp); op_cost(0); predicate(n->as_Bool()->_test._test == BoolTest::eq
--- a/src/hotspot/cpu/aarch64/gc/z/zGlobals_aarch64.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/cpu/aarch64/gc/z/zGlobals_aarch64.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -144,7 +144,7 @@ const size_t max_address_offset_bits = 44; // 16TB const size_t address_offset = ZUtils::round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio); const size_t address_offset_bits = log2_intptr(address_offset); - return MIN2(MAX2(address_offset_bits, min_address_offset_bits), max_address_offset_bits); + return clamp(address_offset_bits, min_address_offset_bits, max_address_offset_bits); } size_t ZPlatformAddressMetadataShift() {
--- a/src/hotspot/cpu/aarch64/nativeInst_aarch64.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/cpu/aarch64/nativeInst_aarch64.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -30,6 +30,7 @@ #include "nativeInst_aarch64.hpp" #include "oops/oop.inline.hpp" #include "runtime/handles.hpp" +#include "runtime/orderAccess.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" #include "utilities/ostream.hpp"
--- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -443,7 +443,6 @@ Register obj = r0; Register mdp = r1; Register tmp = r2; - __ ldr(mdp, Address(rmethod, Method::method_data_offset())); __ profile_return_type(mdp, obj, tmp); } @@ -1633,13 +1632,8 @@ __ mov(rscratch2, true); __ strb(rscratch2, do_not_unlock_if_synchronized); - Label no_mdp; Register mdp = r3; - __ ldr(mdp, Address(rmethod, Method::method_data_offset())); - __ cbz(mdp, no_mdp); - __ add(mdp, mdp, in_bytes(MethodData::data_offset())); __ profile_parameters_type(mdp, r1, r2); - __ bind(no_mdp); // increment invocation count & check for overflow Label invocation_counter_overflow;
--- a/src/hotspot/cpu/arm/arm.ad Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/cpu/arm/arm.ad Mon Dec 02 12:01:40 2019 +0530 @@ -2204,6 +2204,30 @@ interface(REG_INTER); %} +operand R8RegP() %{ + constraint(ALLOC_IN_RC(R8_regP)); + match(iRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand R9RegP() %{ + constraint(ALLOC_IN_RC(R9_regP)); + match(iRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand R12RegP() %{ + constraint(ALLOC_IN_RC(R12_regP)); + match(iRegP); + + format %{ %} + interface(REG_INTER); +%} + operand R2RegP() %{ constraint(ALLOC_IN_RC(R2_regP)); match(iRegP); @@ -2236,6 +2260,14 @@ interface(REG_INTER); %} +operand SPRegP() %{ + constraint(ALLOC_IN_RC(SP_regP)); + match(iRegP); + + format %{ %} + interface(REG_INTER); +%} + operand LRRegP() %{ constraint(ALLOC_IN_RC(LR_regP)); match(iRegP);
--- a/src/hotspot/cpu/arm/arm_32.ad Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/cpu/arm/arm_32.ad Mon Dec 02 12:01:40 2019 +0530 @@ -232,11 +232,15 @@ reg_class R1_regP(R_R1); reg_class R2_regP(R_R2); reg_class R4_regP(R_R4); +reg_class R8_regP(R_R8); +reg_class R9_regP(R_R9); +reg_class R12_regP(R_R12); reg_class Rexception_regP(R_Rexception_obj); reg_class Ricklass_regP(R_Ricklass); reg_class Rmethod_regP(R_Rmethod); reg_class Rthread_regP(R_Rthread); reg_class IP_regP(R_R12); +reg_class SP_regP(R_R13); reg_class LR_regP(R_R14); reg_class FP_regP(R_R11);
--- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -581,6 +581,7 @@ base_reg = Rtemp; __ str(from_lo, Address(Rtemp)); if (patch != NULL) { + __ nop(); // see comment before patching_epilog for 2nd str patching_epilog(patch, lir_patch_low, base_reg, info); patch = new PatchingStub(_masm, PatchingStub::access_field_id); patch_code = lir_patch_high; @@ -589,6 +590,7 @@ } else if (base_reg == from_lo) { __ str(from_hi, as_Address_hi(to_addr)); if (patch != NULL) { + __ nop(); // see comment before patching_epilog for 2nd str patching_epilog(patch, lir_patch_high, base_reg, info); patch = new PatchingStub(_masm, PatchingStub::access_field_id); patch_code = lir_patch_low; @@ -597,6 +599,7 @@ } else { __ str(from_lo, as_Address_lo(to_addr)); if (patch != NULL) { + __ nop(); // see comment before patching_epilog for 2nd str patching_epilog(patch, lir_patch_low, base_reg, info); patch = new PatchingStub(_masm, PatchingStub::access_field_id); patch_code = lir_patch_high; @@ -640,7 +643,7 @@ } if (patch != NULL) { - // Offset embeedded into LDR/STR instruction may appear not enough + // Offset embedded into LDR/STR instruction may appear not enough // to address a field. So, provide a space for one more instruction // that will deal with larger offsets. __ nop(); @@ -791,6 +794,7 @@ base_reg = Rtemp; __ ldr(to_lo, Address(Rtemp)); if (patch != NULL) { + __ nop(); // see comment before patching_epilog for 2nd ldr patching_epilog(patch, lir_patch_low, base_reg, info); patch = new PatchingStub(_masm, PatchingStub::access_field_id); patch_code = lir_patch_high; @@ -799,6 +803,7 @@ } else if (base_reg == to_lo) { __ ldr(to_hi, as_Address_hi(addr)); if (patch != NULL) { + __ nop(); // see comment before patching_epilog for 2nd ldr patching_epilog(patch, lir_patch_high, base_reg, info); patch = new PatchingStub(_masm, PatchingStub::access_field_id); patch_code = lir_patch_low; @@ -807,6 +812,7 @@ } else { __ ldr(to_lo, as_Address_lo(addr)); if (patch != NULL) { + __ nop(); // see comment before patching_epilog for 2nd ldr patching_epilog(patch, lir_patch_low, base_reg, info); patch = new PatchingStub(_masm, PatchingStub::access_field_id); patch_code = lir_patch_high; @@ -846,7 +852,7 @@ } if (patch != NULL) { - // Offset embeedded into LDR/STR instruction may appear not enough + // Offset embedded into LDR/STR instruction may appear not enough // to address a field. So, provide a space for one more instruction // that will deal with larger offsets. __ nop();
--- a/src/hotspot/cpu/arm/nativeInst_arm_32.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/cpu/arm/nativeInst_arm_32.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -28,6 +28,7 @@ #include "asm/macroAssembler.hpp" #include "code/codeCache.hpp" #include "runtime/icache.hpp" +#include "runtime/orderAccess.hpp" #include "runtime/os.hpp" #include "runtime/thread.hpp" #include "register_arm.hpp"
--- a/src/hotspot/cpu/arm/relocInfo_arm.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/cpu/arm/relocInfo_arm.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -29,7 +29,6 @@ #include "nativeInst_arm.hpp" #include "oops/compressedOops.inline.hpp" #include "oops/oop.hpp" -#include "runtime/orderAccess.hpp" #include "runtime/safepoint.hpp" void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
--- a/src/hotspot/cpu/arm/stubGenerator_arm.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/cpu/arm/stubGenerator_arm.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -437,7 +437,8 @@ // for which we do not support MP and so membars are not necessary. This ARMv5 code will // be removed in the future. - // Support for jint Atomic::add(jint add_value, volatile jint *dest) + // Implementation of atomic_add(jint add_value, volatile jint* dest) + // used by Atomic::add(volatile jint* dest, jint add_value) // // Arguments : // @@ -487,7 +488,8 @@ return start; } - // Support for jint Atomic::xchg(jint exchange_value, volatile jint *dest) + // Implementation of jint atomic_xchg(jint exchange_value, volatile jint* dest) + // used by Atomic::add(volatile jint* dest, jint exchange_value) // // Arguments : // @@ -535,7 +537,8 @@ return start; } - // Support for jint Atomic::cmpxchg(jint exchange_value, volatile jint *dest, jint compare_value) + // Implementation of jint atomic_cmpxchg(jint exchange_value, volatile jint *dest, jint compare_value) + // used by Atomic::cmpxchg(volatile jint *dest, jint compare_value, jint exchange_value) // // Arguments : //
--- a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -883,7 +883,7 @@ // // markWord displaced_header = obj->mark().set_unlocked(); // monitor->lock()->set_displaced_header(displaced_header); - // if (Atomic::cmpxchg(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) { + // if (Atomic::cmpxchg(/*addr*/obj->mark_addr(), /*cmp*/displaced_header, /*ex=*/monitor) == displaced_header) { // // We stored the monitor address into the object's mark word. // } else if (THREAD->is_lock_owned((address)displaced_header)) // // Simple recursive case. @@ -921,7 +921,7 @@ std(displaced_header, BasicObjectLock::lock_offset_in_bytes() + BasicLock::displaced_header_offset_in_bytes(), monitor); - // if (Atomic::cmpxchg(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) { + // if (Atomic::cmpxchg(/*addr*/obj->mark_addr(), /*cmp*/displaced_header, /*ex=*/monitor) == displaced_header) { // Store stack address of the BasicObjectLock (this is monitor) into object. addi(object_mark_addr, object, oopDesc::mark_offset_in_bytes()); @@ -997,7 +997,7 @@ // if ((displaced_header = monitor->displaced_header()) == NULL) { // // Recursive unlock. Mark the monitor unlocked by setting the object field to NULL. // monitor->set_obj(NULL); - // } else if (Atomic::cmpxchg(displaced_header, obj->mark_addr(), monitor) == monitor) { + // } else if (Atomic::cmpxchg(obj->mark_addr(), monitor, displaced_header) == monitor) { // // We swapped the unlocked mark in displaced_header into the object's mark word. // monitor->set_obj(NULL); // } else { @@ -1030,7 +1030,7 @@ cmpdi(CCR0, displaced_header, 0); beq(CCR0, free_slot); // recursive unlock - // } else if (Atomic::cmpxchg(displaced_header, obj->mark_addr(), monitor) == monitor) { + // } else if (Atomic::cmpxchg(obj->mark_addr(), monitor, displaced_header) == monitor) { // // We swapped the unlocked mark in displaced_header into the object's mark word. // monitor->set_obj(NULL);
--- a/src/hotspot/cpu/ppc/nativeInst_ppc.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/cpu/ppc/nativeInst_ppc.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -374,7 +374,7 @@ // Finally patch out the jump. volatile juint *jump_addr = (volatile juint*)instr_addr; // Release not needed because caller uses invalidate_range after copying the remaining bytes. - //OrderAccess::release_store(jump_addr, *((juint*)code_buffer)); + //Atomic::release_store(jump_addr, *((juint*)code_buffer)); *jump_addr = *((juint*)code_buffer); // atomically store code over branch instruction ICache::ppc64_flush_icache_bytes(instr_addr, NativeGeneralJump::instruction_size); }
--- a/src/hotspot/cpu/s390/interp_masm_s390.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/cpu/s390/interp_masm_s390.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -976,7 +976,7 @@ // // markWord displaced_header = obj->mark().set_unlocked(); // monitor->lock()->set_displaced_header(displaced_header); - // if (Atomic::cmpxchg(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) { + // if (Atomic::cmpxchg(/*addr*/obj->mark_addr(), /*cmp*/displaced_header, /*ex=*/monitor) == displaced_header) { // // We stored the monitor address into the object's mark word. // } else if (THREAD->is_lock_owned((address)displaced_header)) // // Simple recursive case. @@ -1011,7 +1011,7 @@ z_stg(displaced_header, BasicObjectLock::lock_offset_in_bytes() + BasicLock::displaced_header_offset_in_bytes(), monitor); - // if (Atomic::cmpxchg(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) { + // if (Atomic::cmpxchg(/*addr*/obj->mark_addr(), /*cmp*/displaced_header, /*ex=*/monitor) == displaced_header) { // Store stack address of the BasicObjectLock (this is monitor) into object. add2reg(object_mark_addr, oopDesc::mark_offset_in_bytes(), object); @@ -1082,7 +1082,7 @@ // if ((displaced_header = monitor->displaced_header()) == NULL) { // // Recursive unlock. Mark the monitor unlocked by setting the object field to NULL. // monitor->set_obj(NULL); - // } else if (Atomic::cmpxchg(displaced_header, obj->mark_addr(), monitor) == monitor) { + // } else if (Atomic::cmpxchg(obj->mark_addr(), monitor, displaced_header) == monitor) { // // We swapped the unlocked mark in displaced_header into the object's mark word. // monitor->set_obj(NULL); // } else { @@ -1123,7 +1123,7 @@ BasicLock::displaced_header_offset_in_bytes())); z_bre(done); // displaced_header == 0 -> goto done - // } else if (Atomic::cmpxchg(displaced_header, obj->mark_addr(), monitor) == monitor) { + // } else if (Atomic::cmpxchg(obj->mark_addr(), monitor, displaced_header) == monitor) { // // We swapped the unlocked mark in displaced_header into the object's mark word. // monitor->set_obj(NULL);
--- a/src/hotspot/cpu/sparc/stubGenerator_sparc.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/cpu/sparc/stubGenerator_sparc.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -585,7 +585,8 @@ return start; } - // Support for jint Atomic::xchg(jint exchange_value, volatile jint* dest). + // Implementation of jint atomic_xchg(jint exchange_value, volatile jint* dest) + // used by Atomic::xchg(volatile jint* dest, jint exchange_value) // // Arguments: // @@ -622,7 +623,8 @@ } - // Support for jint Atomic::cmpxchg(jint exchange_value, volatile jint* dest, jint compare_value) + // Implementation of jint atomic_cmpxchg(jint exchange_value, volatile jint* dest, jint compare_value) + // used by Atomic::cmpxchg(volatile jint* dest, jint compare_value, jint exchange_value) // // Arguments: // @@ -646,7 +648,8 @@ return start; } - // Support for jlong Atomic::cmpxchg(jlong exchange_value, volatile jlong *dest, jlong compare_value) + // Implementation of jlong atomic_cmpxchg_long(jlong exchange_value, volatile jlong *dest, jlong compare_value) + // used by Atomic::cmpxchg(volatile jlong *dest, jlong compare_value, jlong exchange_value) // // Arguments: // @@ -679,7 +682,8 @@ } - // Support for jint Atomic::add(jint add_value, volatile jint* dest). + // Implementation of jint atomic_add(jint add_value, volatile jint* dest) + // used by Atomic::add(volatile jint* dest, jint add_value) // // Arguments: //
--- a/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -327,24 +327,42 @@ #endif } +#ifdef _LP64 void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) { BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); if (bs_nm == NULL) { return; } -#ifndef _LP64 - ShouldNotReachHere(); -#else Label continuation; - Register thread = LP64_ONLY(r15_thread); + Register thread = r15_thread; Address disarmed_addr(thread, in_bytes(bs_nm->thread_disarmed_offset())); __ align(8); __ cmpl(disarmed_addr, 0); __ jcc(Assembler::equal, continuation); __ call(RuntimeAddress(StubRoutines::x86::method_entry_barrier())); __ bind(continuation); +} +#else +void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) { + BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); + if (bs_nm == NULL) { + return; + } + + Label continuation; + + Register tmp = rdi; + __ push(tmp); + __ movptr(tmp, (intptr_t)bs_nm->disarmed_value_address()); + Address disarmed_addr(tmp, 0); + __ align(4); + __ cmpl(disarmed_addr, 0); + __ pop(tmp); + __ jcc(Assembler::equal, continuation); + __ call(RuntimeAddress(StubRoutines::x86::method_entry_barrier())); + __ bind(continuation); +} #endif -} void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) { BarrierSetNMethod* bs = BarrierSet::barrier_set()->barrier_set_nmethod();
--- a/src/hotspot/cpu/x86/gc/shared/barrierSetNMethod_x86.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/cpu/x86/gc/shared/barrierSetNMethod_x86.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -35,6 +35,7 @@ class NativeNMethodCmpBarrier: public NativeInstruction { public: +#ifdef _LP64 enum Intel_specific_constants { instruction_code = 0x81, instruction_size = 8, @@ -42,6 +43,14 @@ instruction_rex_prefix = Assembler::REX | Assembler::REX_B, instruction_modrm = 0x7f // [r15 + offset] }; +#else + enum Intel_specific_constants { + instruction_code = 0x81, + instruction_size = 7, + imm_offset = 2, + instruction_modrm = 0x3f // [rdi] + }; +#endif address instruction_address() const { return addr_at(0); } address immediate_address() const { return addr_at(imm_offset); } @@ -51,6 +60,7 @@ void verify() const; }; +#ifdef _LP64 void NativeNMethodCmpBarrier::verify() const { if (((uintptr_t) instruction_address()) & 0x7) { fatal("Not properly aligned"); @@ -77,6 +87,27 @@ fatal("not a cmp barrier"); } } +#else +void NativeNMethodCmpBarrier::verify() const { + if (((uintptr_t) instruction_address()) & 0x3) { + fatal("Not properly aligned"); + } + + int inst = ubyte_at(0); + if (inst != instruction_code) { + tty->print_cr("Addr: " INTPTR_FORMAT " Code: 0x%x", p2i(instruction_address()), + inst); + fatal("not a cmp barrier"); + } + + int modrm = ubyte_at(1); + if (modrm != instruction_modrm) { + tty->print_cr("Addr: " INTPTR_FORMAT " mod/rm: 0x%x", p2i(instruction_address()), + modrm); + fatal("not a cmp barrier"); + } +} +#endif // _LP64 void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) { /* @@ -127,7 +158,7 @@ // NativeNMethodCmpBarrier::verify() will immediately complain when it does // not find the expected native instruction at this offset, which needs updating. // Note that this offset is invariant of PreserveFramePointer. -static const int entry_barrier_offset = -19; +static const int entry_barrier_offset = LP64_ONLY(-19) NOT_LP64(-18); static NativeNMethodCmpBarrier* native_nmethod_barrier(nmethod* nm) { address barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset;
--- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -395,6 +395,52 @@ __ block_comment("load_reference_barrier_native { "); } +#ifdef _LP64 +void ShenandoahBarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) { + // Use default version + BarrierSetAssembler::c2i_entry_barrier(masm); +} +#else +void ShenandoahBarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) { + BarrierSetNMethod* bs = BarrierSet::barrier_set()->barrier_set_nmethod(); + if (bs == NULL) { + return; + } + + Label bad_call; + __ cmpptr(rbx, 0); // rbx contains the incoming method for c2i adapters. + __ jcc(Assembler::equal, bad_call); + + Register tmp1 = rax; + Register tmp2 = rcx; + + __ push(tmp1); + __ push(tmp2); + + // Pointer chase to the method holder to find out if the method is concurrently unloading. + Label method_live; + __ load_method_holder_cld(tmp1, rbx); + + // Is it a strong CLD? + __ cmpl(Address(tmp1, ClassLoaderData::keep_alive_offset()), 0); + __ jcc(Assembler::greater, method_live); + + // Is it a weak but alive CLD? + __ movptr(tmp1, Address(tmp1, ClassLoaderData::holder_offset())); + __ resolve_weak_handle(tmp1, tmp2); + __ cmpptr(tmp1, 0); + __ jcc(Assembler::notEqual, method_live); + __ pop(tmp2); + __ pop(tmp1); + + __ bind(bad_call); + __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); + __ bind(method_live); + __ pop(tmp2); + __ pop(tmp1); +} +#endif + void ShenandoahBarrierSetAssembler::storeval_barrier(MacroAssembler* masm, Register dst, Register tmp) { if (ShenandoahStoreValEnqueueBarrier) { storeval_barrier_impl(masm, dst, tmp); @@ -511,8 +557,12 @@ // 3: apply keep-alive barrier if needed if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) { - const Register thread = NOT_LP64(tmp_thread) LP64_ONLY(r15_thread); + __ push_IU_state(); + Register thread = NOT_LP64(tmp_thread) LP64_ONLY(r15_thread); assert_different_registers(dst, tmp1, tmp_thread); + if (!thread->is_valid()) { + thread = rdx; + } NOT_LP64(__ get_thread(thread)); // Generate the SATB pre-barrier code to log the value of // the referent field in an SATB buffer. @@ -523,6 +573,7 @@ tmp1 /* tmp */, true /* tosca_live */, true /* expand_call */); + __ pop_IU_state(); } }
--- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -86,6 +86,7 @@ Address dst, Register val, Register tmp1, Register tmp2); virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, Register obj, Register tmp, Label& slowpath); + virtual void c2i_entry_barrier(MacroAssembler* masm); virtual void barrier_stubs_init();
--- a/src/hotspot/cpu/x86/gc/z/zGlobals_x86.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/cpu/x86/gc/z/zGlobals_x86.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -144,7 +144,7 @@ const size_t max_address_offset_bits = 44; // 16TB const size_t address_offset = ZUtils::round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio); const size_t address_offset_bits = log2_intptr(address_offset); - return MIN2(MAX2(address_offset_bits, min_address_offset_bits), max_address_offset_bits); + return clamp(address_offset_bits, min_address_offset_bits, max_address_offset_bits); } size_t ZPlatformAddressMetadataShift() {
--- a/src/hotspot/cpu/x86/rdtsc_x86.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/cpu/x86/rdtsc_x86.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -24,6 +24,7 @@ #include "precompiled.hpp" #include "rdtsc_x86.hpp" +#include "runtime/orderAccess.hpp" #include "runtime/thread.inline.hpp" #include "vm_version_ext_x86.hpp"
--- a/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -975,6 +975,9 @@ address c2i_entry = __ pc(); + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->c2i_entry_barrier(masm); + gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); __ flush(); @@ -1886,6 +1889,10 @@ // -2 because return address is already present and so is saved rbp __ subptr(rsp, stack_size - 2*wordSize); + + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->nmethod_entry_barrier(masm); + // Frame is now completed as far as size and linkage. int frame_complete = ((intptr_t)__ pc()) - start; @@ -1921,12 +1928,12 @@ // if we load it once it is usable thru the entire wrapper const Register thread = rdi; - // We use rsi as the oop handle for the receiver/klass - // It is callee save so it survives the call to native - - const Register oop_handle_reg = rsi; - - __ get_thread(thread); + // We use rsi as the oop handle for the receiver/klass + // It is callee save so it survives the call to native + + const Register oop_handle_reg = rsi; + + __ get_thread(thread); if (is_critical_native && !Universe::heap()->supports_object_pinning()) { check_needs_gc_for_critical_native(masm, thread, stack_slots, total_c_args, total_in_args,
--- a/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -27,6 +27,7 @@ #include "asm/macroAssembler.inline.hpp" #include "gc/shared/barrierSet.hpp" #include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shared/barrierSetNMethod.hpp" #include "interpreter/interpreter.hpp" #include "memory/universe.hpp" #include "nativeInst_x86.hpp" @@ -430,7 +431,8 @@ //---------------------------------------------------------------------------------------------------- - // Support for int32_t Atomic::xchg(int32_t exchange_value, volatile int32_t* dest) + // Implementation of int32_t atomic_xchg(int32_t exchange_value, volatile int32_t* dest) + // used by Atomic::xchg(volatile int32_t* dest, int32_t exchange_value) // // xchg exists as far back as 8086, lock needed for MP only // Stack layout immediately after call: @@ -3662,6 +3664,68 @@ __ ret(0); } + address generate_method_entry_barrier() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier"); + + Label deoptimize_label; + + address start = __ pc(); + + __ push(-1); // cookie, this is used for writing the new rsp when deoptimizing + + BLOCK_COMMENT("Entry:"); + __ enter(); // save rbp + + // save rbx, because we want to use that value. + // We could do without it but then we depend on the number of slots used by pusha + __ push(rbx); + + __ lea(rbx, Address(rsp, wordSize * 3)); // 1 for cookie, 1 for rbp, 1 for rbx - this should be the return address + + __ pusha(); + + // xmm0 and xmm1 may be used for passing float/double arguments + const int xmm_size = wordSize * 2; + const int xmm_spill_size = xmm_size * 2; + __ subptr(rsp, xmm_spill_size); + __ movdqu(Address(rsp, xmm_size * 1), xmm1); + __ movdqu(Address(rsp, xmm_size * 0), xmm0); + + __ call_VM_leaf(CAST_FROM_FN_PTR(address, static_cast<int (*)(address*)>(BarrierSetNMethod::nmethod_stub_entry_barrier)), rbx); + + __ movdqu(xmm0, Address(rsp, xmm_size * 0)); + __ movdqu(xmm1, Address(rsp, xmm_size * 1)); + __ addptr(rsp, xmm_spill_size); + + __ cmpl(rax, 1); // 1 means deoptimize + __ jcc(Assembler::equal, deoptimize_label); + + __ popa(); + __ pop(rbx); + + __ leave(); + + __ addptr(rsp, 1 * wordSize); // cookie + __ ret(0); + + __ BIND(deoptimize_label); + + __ popa(); + __ pop(rbx); + + __ leave(); + + // this can be taken out, but is good for verification purposes. getting a SIGSEGV + // here while still having a correct stack is valuable + __ testptr(rsp, Address(rsp, 0)); + + __ movptr(rsp, Address(rsp, 0)); // new rsp was written in the barrier + __ jmp(Address(rsp, -1 * wordSize)); // jmp target should be callers verified_entry_point + + return start; + } + public: // Information about frame layout at time of blocking runtime call. // Note that we only have to preserve callee-saved registers since @@ -3958,6 +4022,11 @@ StubRoutines::_safefetchN_entry = StubRoutines::_safefetch32_entry; StubRoutines::_safefetchN_fault_pc = StubRoutines::_safefetch32_fault_pc; StubRoutines::_safefetchN_continuation_pc = StubRoutines::_safefetch32_continuation_pc; + + BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); + if (bs_nm != NULL) { + StubRoutines::x86::_method_entry_barrier = generate_method_entry_barrier(); + } }
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -552,7 +552,8 @@ return start; } - // Support for jint atomic::xchg(jint exchange_value, volatile jint* dest) + // Implementation of jint atomic_xchg(jint add_value, volatile jint* dest) + // used by Atomic::xchg(volatile jint* dest, jint exchange_value) // // Arguments : // c_rarg0: exchange_value @@ -571,7 +572,8 @@ return start; } - // Support for intptr_t atomic::xchg_long(jlong exchange_value, volatile jlong* dest) + // Implementation of intptr_t atomic_xchg(jlong add_value, volatile jlong* dest) + // used by Atomic::xchg(volatile jlong* dest, jlong exchange_value) // // Arguments : // c_rarg0: exchange_value @@ -668,7 +670,8 @@ return start; } - // Support for jint atomic::add(jint add_value, volatile jint* dest) + // Implementation of jint atomic_add(jint add_value, volatile jint* dest) + // used by Atomic::add(volatile jint* dest, jint add_value) // // Arguments : // c_rarg0: add_value @@ -690,7 +693,8 @@ return start; } - // Support for intptr_t atomic::add_ptr(intptr_t add_value, volatile intptr_t* dest) + // Implementation of intptr_t atomic_add(intptr_t add_value, volatile intptr_t* dest) + // used by Atomic::add(volatile intptr_t* dest, intptr_t add_value) // // Arguments : // c_rarg0: add_value
--- a/src/hotspot/cpu/x86/stubRoutines_x86.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/cpu/x86/stubRoutines_x86.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -55,14 +55,8 @@ static address _double_sign_mask; static address _double_sign_flip; - static address _method_entry_barrier; - public: - static address method_entry_barrier() { - return _method_entry_barrier; - } - static address get_previous_fp_entry() { return _get_previous_fp_entry; } @@ -121,6 +115,8 @@ //shuffle mask for big-endian 128-bit integers static address _counter_shuffle_mask_addr; + static address _method_entry_barrier; + // masks and table for CRC32 static uint64_t _crc_by128_masks[]; static juint _crc_table[]; @@ -221,6 +217,7 @@ static address upper_word_mask_addr() { return _upper_word_mask_addr; } static address shuffle_byte_flip_mask_addr() { return _shuffle_byte_flip_mask_addr; } static address k256_addr() { return _k256_adr; } + static address method_entry_barrier() { return _method_entry_barrier; } static address vector_short_to_byte_mask() { return _vector_short_to_byte_mask;
--- a/src/hotspot/cpu/x86/stubRoutines_x86_32.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/cpu/x86/stubRoutines_x86_32.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -32,3 +32,5 @@ // a description of how to extend it, see the stubRoutines.hpp file. address StubRoutines::x86::_verify_fpu_cntrl_wrd_entry = NULL; +address StubRoutines::x86::_method_entry_barrier = NULL; +
--- a/src/hotspot/cpu/x86/vm_version_x86.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/cpu/x86/vm_version_x86.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -367,26 +367,29 @@ // intx saved_useavx = UseAVX; intx saved_usesse = UseSSE; - // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f - __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); - __ movl(rax, 0x10000); - __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm - __ cmpl(rax, 0x10000); - __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported - // check _cpuid_info.xem_xcr0_eax.bits.opmask - // check _cpuid_info.xem_xcr0_eax.bits.zmm512 - // check _cpuid_info.xem_xcr0_eax.bits.zmm32 - __ movl(rax, 0xE0); - __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm - __ cmpl(rax, 0xE0); - __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported - __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); - __ movl(rax, Address(rsi, 0)); - __ cmpl(rax, 0x50654); // If it is Skylake - __ jcc(Assembler::equal, legacy_setup); // If UseAVX is unitialized or is set by the user to include EVEX if (use_evex) { + // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f + __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); + __ movl(rax, 0x10000); + __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm + __ cmpl(rax, 0x10000); + __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported + // check _cpuid_info.xem_xcr0_eax.bits.opmask + // check _cpuid_info.xem_xcr0_eax.bits.zmm512 + // check _cpuid_info.xem_xcr0_eax.bits.zmm32 + __ movl(rax, 0xE0); + __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm + __ cmpl(rax, 0xE0); + __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported + + if (FLAG_IS_DEFAULT(UseAVX)) { + __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); + __ movl(rax, Address(rsi, 0)); + __ cmpl(rax, 0x50654); // If it is Skylake + __ jcc(Assembler::equal, legacy_setup); + } // EVEX setup: run in lowest evex mode VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts UseAVX = 3; @@ -455,27 +458,28 @@ VM_Version::set_cpuinfo_cont_addr(__ pc()); // Returns here after signal. Save xmm0 to check it later. - // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f - __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); - __ movl(rax, 0x10000); - __ andl(rax, Address(rsi, 4)); - __ cmpl(rax, 0x10000); - __ jcc(Assembler::notEqual, legacy_save_restore); - // check _cpuid_info.xem_xcr0_eax.bits.opmask - // check _cpuid_info.xem_xcr0_eax.bits.zmm512 - // check _cpuid_info.xem_xcr0_eax.bits.zmm32 - __ movl(rax, 0xE0); - __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm - __ cmpl(rax, 0xE0); - __ jcc(Assembler::notEqual, legacy_save_restore); - - __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); - __ movl(rax, Address(rsi, 0)); - __ cmpl(rax, 0x50654); // If it is Skylake - __ jcc(Assembler::equal, legacy_save_restore); - // If UseAVX is unitialized or is set by the user to include EVEX if (use_evex) { + // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f + __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); + __ movl(rax, 0x10000); + __ andl(rax, Address(rsi, 4)); + __ cmpl(rax, 0x10000); + __ jcc(Assembler::notEqual, legacy_save_restore); + // check _cpuid_info.xem_xcr0_eax.bits.opmask + // check _cpuid_info.xem_xcr0_eax.bits.zmm512 + // check _cpuid_info.xem_xcr0_eax.bits.zmm32 + __ movl(rax, 0xE0); + __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm + __ cmpl(rax, 0xE0); + __ jcc(Assembler::notEqual, legacy_save_restore); + + if (FLAG_IS_DEFAULT(UseAVX)) { + __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); + __ movl(rax, Address(rsi, 0)); + __ cmpl(rax, 0x50654); // If it is Skylake + __ jcc(Assembler::equal, legacy_save_restore); + } // EVEX check: run in lowest evex mode VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts UseAVX = 3;
--- a/src/hotspot/cpu/x86/x86_32.ad Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/cpu/x86/x86_32.ad Mon Dec 02 12:01:40 2019 +0530 @@ -3917,6 +3917,13 @@ interface(REG_INTER); %} +operand eDXRegP(eRegP reg) %{ + constraint(ALLOC_IN_RC(edx_reg)); + match(reg); + format %{ "EDX" %} + interface(REG_INTER); +%} + operand eSIRegP(eRegP reg) %{ constraint(ALLOC_IN_RC(esi_reg)); match(reg); @@ -8977,7 +8984,7 @@ %} ins_pipe(ialu_reg_reg); -%} +%} //----------Long Instructions------------------------------------------------ // Add Long Register with Register
--- a/src/hotspot/cpu/x86/x86_64.ad Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/cpu/x86/x86_64.ad Mon Dec 02 12:01:40 2019 +0530 @@ -267,6 +267,9 @@ // Singleton class for RSI pointer register reg_class ptr_rsi_reg(RSI, RSI_H); +// Singleton class for RBP pointer register +reg_class ptr_rbp_reg(RBP, RBP_H); + // Singleton class for RDI pointer register reg_class ptr_rdi_reg(RDI, RDI_H); @@ -3530,6 +3533,16 @@ interface(REG_INTER); %} +operand rbp_RegP() +%{ + constraint(ALLOC_IN_RC(ptr_rbp_reg)); + match(RegP); + match(rRegP); + + format %{ %} + interface(REG_INTER); +%} + // Used in rep stosq operand rdi_RegP() %{
--- a/src/hotspot/cpu/zero/cppInterpreter_zero.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/cpu/zero/cppInterpreter_zero.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -38,13 +38,11 @@ #include "prims/jvmtiExport.hpp" #include "prims/jvmtiThreadState.hpp" #include "runtime/arguments.hpp" -#include "runtime/atomic.hpp" #include "runtime/deoptimization.hpp" #include "runtime/frame.inline.hpp" #include "runtime/handles.inline.hpp" #include "runtime/interfaceSupport.inline.hpp" #include "runtime/jniHandles.inline.hpp" -#include "runtime/orderAccess.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" #include "runtime/synchronizer.hpp"
--- a/src/hotspot/os/aix/os_aix.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/os/aix/os_aix.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -60,7 +60,6 @@ #include "runtime/javaCalls.hpp" #include "runtime/mutexLocker.hpp" #include "runtime/objectMonitor.hpp" -#include "runtime/orderAccess.hpp" #include "runtime/os.hpp" #include "runtime/osThread.hpp" #include "runtime/perfMemory.hpp" @@ -1084,7 +1083,7 @@ if (now <= prev) { return prev; // same or retrograde time; } - jlong obsv = Atomic::cmpxchg(now, &max_real_time, prev); + jlong obsv = Atomic::cmpxchg(&max_real_time, prev, now); assert(obsv >= prev, "invariant"); // Monotonicity // If the CAS succeeded then we're done and return "now". // If the CAS failed and the observed value "obsv" is >= now then @@ -1794,7 +1793,7 @@ for (;;) { for (int i = 0; i < NSIG + 1; i++) { jint n = pending_signals[i]; - if (n > 0 && n == Atomic::cmpxchg(n - 1, &pending_signals[i], n)) { + if (n > 0 && n == Atomic::cmpxchg(&pending_signals[i], n, n - 1)) { return i; } }
--- a/src/hotspot/os/bsd/os_bsd.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/os/bsd/os_bsd.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -51,7 +51,6 @@ #include "runtime/javaCalls.hpp" #include "runtime/mutexLocker.hpp" #include "runtime/objectMonitor.hpp" -#include "runtime/orderAccess.hpp" #include "runtime/osThread.hpp" #include "runtime/perfMemory.hpp" #include "runtime/semaphore.hpp" @@ -931,7 +930,7 @@ if (now <= prev) { return prev; // same or retrograde time; } - const uint64_t obsv = Atomic::cmpxchg(now, &Bsd::_max_abstime, prev); + const uint64_t obsv = Atomic::cmpxchg(&Bsd::_max_abstime, prev, now); assert(obsv >= prev, "invariant"); // Monotonicity // If the CAS succeeded then we're done and return "now". // If the CAS failed and the observed value "obsv" is >= now then @@ -1834,7 +1833,7 @@ for (;;) { for (int i = 0; i < NSIG + 1; i++) { jint n = pending_signals[i]; - if (n > 0 && n == Atomic::cmpxchg(n - 1, &pending_signals[i], n)) { + if (n > 0 && n == Atomic::cmpxchg(&pending_signals[i], n, n - 1)) { return i; } } @@ -1895,7 +1894,7 @@ } char buf[PATH_MAX + 1]; - int num = Atomic::add(1, &cnt); + int num = Atomic::add(&cnt, 1); snprintf(buf, PATH_MAX + 1, "%s/hs-vm-%d-%d", os::get_temp_directory(), os::current_process_id(), num); @@ -3209,7 +3208,7 @@ static volatile int next_processor_id = 0; static inline volatile int* get_apic_to_processor_mapping() { - volatile int* mapping = OrderAccess::load_acquire(&apic_to_processor_mapping); + volatile int* mapping = Atomic::load_acquire(&apic_to_processor_mapping); if (mapping == NULL) { // Calculate possible number space for APIC ids. This space is not necessarily // in the range [0, number_of_processors). @@ -3238,9 +3237,9 @@ mapping[i] = -1; } - if (!Atomic::replace_if_null(mapping, &apic_to_processor_mapping)) { + if (!Atomic::replace_if_null(&apic_to_processor_mapping, mapping)) { FREE_C_HEAP_ARRAY(int, mapping); - mapping = OrderAccess::load_acquire(&apic_to_processor_mapping); + mapping = Atomic::load_acquire(&apic_to_processor_mapping); } } @@ -3264,12 +3263,14 @@ int processor_id = Atomic::load(&mapping[apic_id]); while (processor_id < 0) { - if (Atomic::cmpxchg(-2, &mapping[apic_id], -1)) { - Atomic::store(Atomic::add(1, &next_processor_id) - 1, &mapping[apic_id]); + if (Atomic::cmpxchg(&mapping[apic_id], -1, -2) == -1) { + Atomic::store(&mapping[apic_id], Atomic::add(&next_processor_id, 1) - 1); } processor_id = Atomic::load(&mapping[apic_id]); } + assert(processor_id >= 0 && processor_id < os::processor_count(), "invalid processor id"); + return (uint)processor_id; } #endif
--- a/src/hotspot/os/linux/os_linux.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/os/linux/os_linux.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -53,7 +53,6 @@ #include "runtime/javaCalls.hpp" #include "runtime/mutexLocker.hpp" #include "runtime/objectMonitor.hpp" -#include "runtime/orderAccess.hpp" #include "runtime/osThread.hpp" #include "runtime/perfMemory.hpp" #include "runtime/sharedRuntime.hpp" @@ -2752,7 +2751,7 @@ for (;;) { for (int i = 0; i < NSIG + 1; i++) { jint n = pending_signals[i]; - if (n > 0 && n == Atomic::cmpxchg(n - 1, &pending_signals[i], n)) { + if (n > 0 && n == Atomic::cmpxchg(&pending_signals[i], n, n - 1)) { return i; } } @@ -2813,7 +2812,7 @@ } char buf[PATH_MAX+1]; - int num = Atomic::add(1, &cnt); + int num = Atomic::add(&cnt, 1); snprintf(buf, sizeof(buf), "%s/hs-vm-%d-%d", os::get_temp_directory(), os::current_process_id(), num);
--- a/src/hotspot/os/posix/os_posix.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/os/posix/os_posix.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -30,6 +30,8 @@ #include "runtime/frame.inline.hpp" #include "runtime/interfaceSupport.inline.hpp" #include "services/memTracker.hpp" +#include "runtime/atomic.hpp" +#include "runtime/orderAccess.hpp" #include "utilities/align.hpp" #include "utilities/events.hpp" #include "utilities/formatBuffer.hpp" @@ -1900,7 +1902,7 @@ // atomically decrement _event for (;;) { v = _event; - if (Atomic::cmpxchg(v - 1, &_event, v) == v) break; + if (Atomic::cmpxchg(&_event, v, v - 1) == v) break; } guarantee(v >= 0, "invariant"); @@ -1940,7 +1942,7 @@ // atomically decrement _event for (;;) { v = _event; - if (Atomic::cmpxchg(v - 1, &_event, v) == v) break; + if (Atomic::cmpxchg(&_event, v, v - 1) == v) break; } guarantee(v >= 0, "invariant"); @@ -1998,7 +2000,7 @@ // but only in the correctly written condition checking loops of ObjectMonitor, // Mutex/Monitor, Thread::muxAcquire and JavaThread::sleep - if (Atomic::xchg(1, &_event) >= 0) return; + if (Atomic::xchg(&_event, 1) >= 0) return; int status = pthread_mutex_lock(_mutex); assert_status(status == 0, status, "mutex_lock"); @@ -2046,7 +2048,7 @@ // Return immediately if a permit is available. // We depend on Atomic::xchg() having full barrier semantics // since we are doing a lock-free update to _counter. - if (Atomic::xchg(0, &_counter) > 0) return; + if (Atomic::xchg(&_counter, 0) > 0) return; Thread* thread = Thread::current(); assert(thread->is_Java_thread(), "Must be JavaThread");
--- a/src/hotspot/os/solaris/os_solaris.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/os/solaris/os_solaris.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -1024,7 +1024,7 @@ if (now <= prev) { return prev; // same or retrograde time; } - const hrtime_t obsv = Atomic::cmpxchg(now, &max_hrtime, prev); + const hrtime_t obsv = Atomic::cmpxchg(&max_hrtime, prev, now); assert(obsv >= prev, "invariant"); // Monotonicity // If the CAS succeeded then we're done and return "now". // If the CAS failed and the observed value "obsv" is >= now then @@ -1984,7 +1984,7 @@ while (true) { for (int i = 0; i < Sigexit + 1; i++) { jint n = pending_signals[i]; - if (n > 0 && n == Atomic::cmpxchg(n - 1, &pending_signals[i], n)) { + if (n > 0 && n == Atomic::cmpxchg(&pending_signals[i], n, n - 1)) { return i; } } @@ -4710,7 +4710,7 @@ int v; for (;;) { v = _Event; - if (Atomic::cmpxchg(v-1, &_Event, v) == v) break; + if (Atomic::cmpxchg(&_Event, v, v-1) == v) break; } guarantee(v >= 0, "invariant"); if (v == 0) { @@ -4748,7 +4748,7 @@ int v; for (;;) { v = _Event; - if (Atomic::cmpxchg(v-1, &_Event, v) == v) break; + if (Atomic::cmpxchg(&_Event, v, v-1) == v) break; } guarantee(v >= 0, "invariant"); if (v != 0) return OS_OK; @@ -4797,7 +4797,7 @@ // from the first park() call after an unpark() call which will help // shake out uses of park() and unpark() without condition variables. - if (Atomic::xchg(1, &_Event) >= 0) return; + if (Atomic::xchg(&_Event, 1) >= 0) return; // If the thread associated with the event was parked, wake it. // Wait for the thread assoc with the PlatformEvent to vacate. @@ -4896,7 +4896,7 @@ // Return immediately if a permit is available. // We depend on Atomic::xchg() having full barrier semantics // since we are doing a lock-free update to _counter. - if (Atomic::xchg(0, &_counter) > 0) return; + if (Atomic::xchg(&_counter, 0) > 0) return; // Optional fast-exit: Check interrupt before trying to wait Thread* thread = Thread::current();
--- a/src/hotspot/os/windows/osThread_windows.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/os/windows/osThread_windows.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -23,7 +23,6 @@ */ // no precompiled headers -#include "runtime/orderAccess.hpp" #include "runtime/os.hpp" #include "runtime/osThread.hpp"
--- a/src/hotspot/os/windows/os_windows.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/os/windows/os_windows.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -2096,7 +2096,7 @@ while (true) { for (int i = 0; i < NSIG + 1; i++) { jint n = pending_signals[i]; - if (n > 0 && n == Atomic::cmpxchg(n - 1, &pending_signals[i], n)) { + if (n > 0 && n == Atomic::cmpxchg(&pending_signals[i], n, n - 1)) { return i; } } @@ -3747,15 +3747,15 @@ // The first thread that reached this point, initializes the critical section. if (!InitOnceExecuteOnce(&init_once_crit_sect, init_crit_sect_call, &crit_sect, NULL)) { warning("crit_sect initialization failed in %s: %d\n", __FILE__, __LINE__); - } else if (OrderAccess::load_acquire(&process_exiting) == 0) { + } else if (Atomic::load_acquire(&process_exiting) == 0) { if (what != EPT_THREAD) { // Atomically set process_exiting before the critical section // to increase the visibility between racing threads. - Atomic::cmpxchg(GetCurrentThreadId(), &process_exiting, (DWORD)0); + Atomic::cmpxchg(&process_exiting, (DWORD)0, GetCurrentThreadId()); } EnterCriticalSection(&crit_sect); - if (what == EPT_THREAD && OrderAccess::load_acquire(&process_exiting) == 0) { + if (what == EPT_THREAD && Atomic::load_acquire(&process_exiting) == 0) { // Remove from the array those handles of the threads that have completed exiting. for (i = 0, j = 0; i < handle_count; ++i) { res = WaitForSingleObject(handles[i], 0 /* don't wait */); @@ -3868,7 +3868,7 @@ } if (!registered && - OrderAccess::load_acquire(&process_exiting) != 0 && + Atomic::load_acquire(&process_exiting) != 0 && process_exiting != GetCurrentThreadId()) { // Some other thread is about to call exit(), so we don't let // the current unregistered thread proceed to exit() or _endthreadex() @@ -5136,7 +5136,7 @@ int v; for (;;) { v = _Event; - if (Atomic::cmpxchg(v-1, &_Event, v) == v) break; + if (Atomic::cmpxchg(&_Event, v, v-1) == v) break; } guarantee((v == 0) || (v == 1), "invariant"); if (v != 0) return OS_OK; @@ -5198,7 +5198,7 @@ int v; for (;;) { v = _Event; - if (Atomic::cmpxchg(v-1, &_Event, v) == v) break; + if (Atomic::cmpxchg(&_Event, v, v-1) == v) break; } guarantee((v == 0) || (v == 1), "invariant"); if (v != 0) return; @@ -5236,7 +5236,7 @@ // from the first park() call after an unpark() call which will help // shake out uses of park() and unpark() without condition variables. - if (Atomic::xchg(1, &_Event) >= 0) return; + if (Atomic::xchg(&_Event, 1) >= 0) return; ::SetEvent(_ParkHandle); }
--- a/src/hotspot/os/windows/threadCritical_windows.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/os/windows/threadCritical_windows.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -56,7 +56,7 @@ if (lock_owner != current_thread) { // Grab the lock before doing anything. - while (Atomic::cmpxchg(0, &lock_count, -1) != -1) { + while (Atomic::cmpxchg(&lock_count, -1, 0) != -1) { if (initialized) { DWORD ret = WaitForSingleObject(lock_event, INFINITE); assert(ret == WAIT_OBJECT_0, "unexpected return value from WaitForSingleObject");
--- a/src/hotspot/os_cpu/aix_ppc/atomic_aix_ppc.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/os_cpu/aix_ppc/atomic_aix_ppc.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -30,6 +30,7 @@ #error "Atomic currently only implemented for PPC64" #endif +#include "orderAccess_aix_ppc.hpp" #include "utilities/debug.hpp" // Implementation of class atomic @@ -95,13 +96,13 @@ struct Atomic::PlatformAdd : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> > { - template<typename I, typename D> - D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const; + template<typename D, typename I> + D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const; }; template<> -template<typename I, typename D> -inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest, +template<typename D, typename I> +inline D Atomic::PlatformAdd<4>::add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const { STATIC_ASSERT(4 == sizeof(I)); STATIC_ASSERT(4 == sizeof(D)); @@ -126,8 +127,8 @@ template<> -template<typename I, typename D> -inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest, +template<typename D, typename I> +inline D Atomic::PlatformAdd<8>::add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const { STATIC_ASSERT(8 == sizeof(I)); STATIC_ASSERT(8 == sizeof(D)); @@ -152,8 +153,8 @@ template<> template<typename T> -inline T Atomic::PlatformXchg<4>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformXchg<4>::operator()(T volatile* dest, + T exchange_value, atomic_memory_order order) const { // Note that xchg doesn't necessarily do an acquire // (see synchronizer.cpp). @@ -191,8 +192,8 @@ template<> template<typename T> -inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformXchg<8>::operator()(T volatile* dest, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(8 == sizeof(T)); // Note that xchg doesn't necessarily do an acquire @@ -231,9 +232,9 @@ template<> template<typename T> -inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformCmpxchg<1>::operator()(T volatile* dest, T compare_value, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(1 == sizeof(T)); @@ -301,9 +302,9 @@ template<> template<typename T> -inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest, T compare_value, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(4 == sizeof(T)); @@ -351,9 +352,9 @@ template<> template<typename T> -inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest, T compare_value, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(8 == sizeof(T)); @@ -399,4 +400,15 @@ return old_value; } +template<size_t byte_size> +struct Atomic::PlatformOrderedLoad<byte_size, X_ACQUIRE> { + template <typename T> + T operator()(const volatile T* p) const { + T t = Atomic::load(p); + // Use twi-isync for load_acquire (faster than lwsync). + __asm__ __volatile__ ("twi 0,%0,0\n isync\n" : : "r" (t) : "memory"); + return t; + } +}; + #endif // OS_CPU_AIX_PPC_ATOMIC_AIX_PPC_HPP
--- a/src/hotspot/os_cpu/aix_ppc/orderAccess_aix_ppc.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/os_cpu/aix_ppc/orderAccess_aix_ppc.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -64,8 +64,6 @@ #define inlasm_lwsync() __asm__ __volatile__ ("lwsync" : : : "memory"); #define inlasm_eieio() __asm__ __volatile__ ("eieio" : : : "memory"); #define inlasm_isync() __asm__ __volatile__ ("isync" : : : "memory"); -// Use twi-isync for load_acquire (faster than lwsync). -#define inlasm_acquire_reg(X) __asm__ __volatile__ ("twi 0,%0,0\n isync\n" : : "r" (X) : "memory"); inline void OrderAccess::loadload() { inlasm_lwsync(); } inline void OrderAccess::storestore() { inlasm_lwsync(); } @@ -78,13 +76,6 @@ inline void OrderAccess::cross_modify_fence() { inlasm_isync(); } -template<size_t byte_size> -struct OrderAccess::PlatformOrderedLoad<byte_size, X_ACQUIRE> -{ - template <typename T> - T operator()(const volatile T* p) const { T t = Atomic::load(p); inlasm_acquire_reg(t); return t; } -}; - #undef inlasm_sync #undef inlasm_lwsync #undef inlasm_eieio
--- a/src/hotspot/os_cpu/bsd_x86/atomic_bsd_x86.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/os_cpu/bsd_x86/atomic_bsd_x86.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -31,13 +31,13 @@ struct Atomic::PlatformAdd : Atomic::FetchAndAdd<Atomic::PlatformAdd<byte_size> > { - template<typename I, typename D> - D fetch_and_add(I add_value, D volatile* dest, atomic_memory_order /* order */) const; + template<typename D, typename I> + D fetch_and_add(D volatile* dest, I add_value, atomic_memory_order /* order */) const; }; template<> -template<typename I, typename D> -inline D Atomic::PlatformAdd<4>::fetch_and_add(I add_value, D volatile* dest, +template<typename D, typename I> +inline D Atomic::PlatformAdd<4>::fetch_and_add(D volatile* dest, I add_value, atomic_memory_order /* order */) const { STATIC_ASSERT(4 == sizeof(I)); STATIC_ASSERT(4 == sizeof(D)); @@ -51,8 +51,8 @@ template<> template<typename T> -inline T Atomic::PlatformXchg<4>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformXchg<4>::operator()(T volatile* dest, + T exchange_value, atomic_memory_order /* order */) const { STATIC_ASSERT(4 == sizeof(T)); __asm__ volatile ( "xchgl (%2),%0" @@ -64,9 +64,9 @@ template<> template<typename T> -inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformCmpxchg<1>::operator()(T volatile* dest, T compare_value, + T exchange_value, atomic_memory_order /* order */) const { STATIC_ASSERT(1 == sizeof(T)); __asm__ volatile ( "lock cmpxchgb %1,(%3)" @@ -78,9 +78,9 @@ template<> template<typename T> -inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest, T compare_value, + T exchange_value, atomic_memory_order /* order */) const { STATIC_ASSERT(4 == sizeof(T)); __asm__ volatile ( "lock cmpxchgl %1,(%3)" @@ -92,8 +92,8 @@ #ifdef AMD64 template<> -template<typename I, typename D> -inline D Atomic::PlatformAdd<8>::fetch_and_add(I add_value, D volatile* dest, +template<typename D, typename I> +inline D Atomic::PlatformAdd<8>::fetch_and_add(D volatile* dest, I add_value, atomic_memory_order /* order */) const { STATIC_ASSERT(8 == sizeof(I)); STATIC_ASSERT(8 == sizeof(D)); @@ -107,8 +107,8 @@ template<> template<typename T> -inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformXchg<8>::operator()(T volatile* dest, + T exchange_value, atomic_memory_order /* order */) const { STATIC_ASSERT(8 == sizeof(T)); __asm__ __volatile__ ("xchgq (%2),%0" @@ -120,9 +120,9 @@ template<> template<typename T> -inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest, T compare_value, + T exchange_value, atomic_memory_order /* order */) const { STATIC_ASSERT(8 == sizeof(T)); __asm__ __volatile__ ( "lock cmpxchgq %1,(%3)" @@ -142,12 +142,12 @@ template<> template<typename T> -inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest, T compare_value, + T exchange_value, atomic_memory_order /* order */) const { STATIC_ASSERT(8 == sizeof(T)); - return cmpxchg_using_helper<int64_t>(_Atomic_cmpxchg_long, exchange_value, dest, compare_value); + return cmpxchg_using_helper<int64_t>(_Atomic_cmpxchg_long, dest, compare_value, exchange_value); } template<> @@ -161,12 +161,62 @@ template<> template<typename T> -inline void Atomic::PlatformStore<8>::operator()(T store_value, - T volatile* dest) const { +inline void Atomic::PlatformStore<8>::operator()(T volatile* dest, + T store_value) const { STATIC_ASSERT(8 == sizeof(T)); _Atomic_move_long(reinterpret_cast<const volatile int64_t*>(&store_value), reinterpret_cast<volatile int64_t*>(dest)); } #endif // AMD64 +template<> +struct Atomic::PlatformOrderedStore<1, RELEASE_X_FENCE> +{ + template <typename T> + void operator()(volatile T* p, T v) const { + __asm__ volatile ( "xchgb (%2),%0" + : "=q" (v) + : "0" (v), "r" (p) + : "memory"); + } +}; + +template<> +struct Atomic::PlatformOrderedStore<2, RELEASE_X_FENCE> +{ + template <typename T> + void operator()(volatile T* p, T v) const { + __asm__ volatile ( "xchgw (%2),%0" + : "=r" (v) + : "0" (v), "r" (p) + : "memory"); + } +}; + +template<> +struct Atomic::PlatformOrderedStore<4, RELEASE_X_FENCE> +{ + template <typename T> + void operator()(volatile T* p, T v) const { + __asm__ volatile ( "xchgl (%2),%0" + : "=r" (v) + : "0" (v), "r" (p) + : "memory"); + } +}; + +#ifdef AMD64 +template<> +struct Atomic::PlatformOrderedStore<8, RELEASE_X_FENCE> +{ + template <typename T> + void operator()(volatile T* p, T v) const { + __asm__ volatile ( "xchgq (%2), %0" + : "=r" (v) + : "0" (v), "r" (p) + : "memory"); + } +}; +#endif // AMD64 + #endif // OS_CPU_BSD_X86_ATOMIC_BSD_X86_HPP
--- a/src/hotspot/os_cpu/bsd_x86/bsd_x86_32.s Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/os_cpu/bsd_x86/bsd_x86_32.s Mon Dec 02 12:01:40 2019 +0530 @@ -633,9 +633,9 @@ ret - # Support for int64_t Atomic::cmpxchg(int64_t exchange_value, + # Support for int64_t Atomic::cmpxchg(int64_t compare_value, # volatile int64_t* dest, - # int64_t compare_value) + # int64_t exchange_value) # .p2align 4,,15 ELF_TYPE(_Atomic_cmpxchg_long,@function) @@ -665,4 +665,3 @@ movl 8(%esp), %eax # dest fistpll (%eax) ret -
--- a/src/hotspot/os_cpu/bsd_x86/orderAccess_bsd_x86.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/os_cpu/bsd_x86/orderAccess_bsd_x86.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -64,54 +64,4 @@ __asm__ volatile ("cpuid " : "+a" (idx) : : "ebx", "ecx", "edx", "memory"); } -template<> -struct OrderAccess::PlatformOrderedStore<1, RELEASE_X_FENCE> -{ - template <typename T> - void operator()(T v, volatile T* p) const { - __asm__ volatile ( "xchgb (%2),%0" - : "=q" (v) - : "0" (v), "r" (p) - : "memory"); - } -}; - -template<> -struct OrderAccess::PlatformOrderedStore<2, RELEASE_X_FENCE> -{ - template <typename T> - void operator()(T v, volatile T* p) const { - __asm__ volatile ( "xchgw (%2),%0" - : "=r" (v) - : "0" (v), "r" (p) - : "memory"); - } -}; - -template<> -struct OrderAccess::PlatformOrderedStore<4, RELEASE_X_FENCE> -{ - template <typename T> - void operator()(T v, volatile T* p) const { - __asm__ volatile ( "xchgl (%2),%0" - : "=r" (v) - : "0" (v), "r" (p) - : "memory"); - } -}; - -#ifdef AMD64 -template<> -struct OrderAccess::PlatformOrderedStore<8, RELEASE_X_FENCE> -{ - template <typename T> - void operator()(T v, volatile T* p) const { - __asm__ volatile ( "xchgq (%2), %0" - : "=r" (v) - : "0" (v), "r" (p) - : "memory"); - } -}; -#endif // AMD64 - #endif // OS_CPU_BSD_X86_ORDERACCESS_BSD_X86_HPP
--- a/src/hotspot/os_cpu/bsd_zero/atomic_bsd_zero.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/os_cpu/bsd_zero/atomic_bsd_zero.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -163,22 +163,22 @@ struct Atomic::PlatformAdd : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> > { - template<typename I, typename D> - D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const; + template<typename D, typename I> + D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const; }; template<> -template<typename I, typename D> -inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest, +template<typename D, typename I> +inline D Atomic::PlatformAdd<4>::add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const { STATIC_ASSERT(4 == sizeof(I)); STATIC_ASSERT(4 == sizeof(D)); #ifdef ARM - return add_using_helper<int>(arm_add_and_fetch, add_value, dest); + return add_using_helper<int>(arm_add_and_fetch, dest, add_value); #else #ifdef M68K - return add_using_helper<int>(m68k_add_and_fetch, add_value, dest); + return add_using_helper<int>(m68k_add_and_fetch, dest, add_value); #else return __sync_add_and_fetch(dest, add_value); #endif // M68K @@ -186,8 +186,8 @@ } template<> -template<typename I, typename D> -inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest, +template<typename D, typename !> +inline D Atomic::PlatformAdd<8>::add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const { STATIC_ASSERT(8 == sizeof(I)); STATIC_ASSERT(8 == sizeof(D)); @@ -197,15 +197,15 @@ template<> template<typename T> -inline T Atomic::PlatformXchg<4>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformXchg<4>::operator()(T volatile* dest, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(4 == sizeof(T)); #ifdef ARM - return xchg_using_helper<int>(arm_lock_test_and_set, exchange_value, dest); + return xchg_using_helper<int>(arm_lock_test_and_set, dest, exchange_value); #else #ifdef M68K - return xchg_using_helper<int>(m68k_lock_test_and_set, exchange_value, dest); + return xchg_using_helper<int>(m68k_lock_test_and_set, dest, exchange_value); #else // __sync_lock_test_and_set is a bizarrely named atomic exchange // operation. Note that some platforms only support this with the @@ -224,8 +224,8 @@ template<> template<typename T> -inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformXchg<8>::operator()(T volatile* dest, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(8 == sizeof(T)); T result = __sync_lock_test_and_set (dest, exchange_value); @@ -239,16 +239,16 @@ template<> template<typename T> -inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest, T compare_value, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(4 == sizeof(T)); #ifdef ARM - return cmpxchg_using_helper<int>(arm_compare_and_swap, exchange_value, dest, compare_value); + return cmpxchg_using_helper<int>(arm_compare_and_swap, dest, compare_value, exchange_value); #else #ifdef M68K - return cmpxchg_using_helper<int>(m68k_compare_and_swap, exchange_value, dest, compare_value); + return cmpxchg_using_helper<int>(m68k_compare_and_swap, dest, compare_value, exchange_value); #else return __sync_val_compare_and_swap(dest, compare_value, exchange_value); #endif // M68K @@ -257,9 +257,9 @@ template<> template<typename T> -inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest, T compare_value, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(8 == sizeof(T)); return __sync_val_compare_and_swap(dest, compare_value, exchange_value); @@ -276,8 +276,8 @@ template<> template<typename T> -inline void Atomic::PlatformStore<8>::operator()(T store_value, - T volatile* dest) const { +inline void Atomic::PlatformStore<8>::operator()(T volatile* dest, + T store_value) const { STATIC_ASSERT(8 == sizeof(T)); os::atomic_copy64(reinterpret_cast<const volatile int64_t*>(&store_value), reinterpret_cast<volatile int64_t*>(dest)); }
--- a/src/hotspot/os_cpu/linux_aarch64/atomic_linux_aarch64.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/os_cpu/linux_aarch64/atomic_linux_aarch64.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -32,16 +32,12 @@ // Note that memory_order_conservative requires a full barrier after atomic stores. // See https://patchwork.kernel.org/patch/3575821/ -#define FULL_MEM_BARRIER __sync_synchronize() -#define READ_MEM_BARRIER __atomic_thread_fence(__ATOMIC_ACQUIRE); -#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE); - template<size_t byte_size> struct Atomic::PlatformAdd : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> > { - template<typename I, typename D> - D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const { + template<typename D, typename I> + D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const { D res = __atomic_add_fetch(dest, add_value, __ATOMIC_RELEASE); FULL_MEM_BARRIER; return res; @@ -50,8 +46,8 @@ template<size_t byte_size> template<typename T> -inline T Atomic::PlatformXchg<byte_size>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformXchg<byte_size>::operator()(T volatile* dest, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(byte_size == sizeof(T)); T res = __atomic_exchange_n(dest, exchange_value, __ATOMIC_RELEASE); @@ -61,9 +57,9 @@ template<size_t byte_size> template<typename T> -inline T Atomic::PlatformCmpxchg<byte_size>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformCmpxchg<byte_size>::operator()(T volatile* dest, T compare_value, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(byte_size == sizeof(T)); if (order == memory_order_relaxed) { @@ -81,4 +77,25 @@ } } +template<size_t byte_size> +struct Atomic::PlatformOrderedLoad<byte_size, X_ACQUIRE> +{ + template <typename T> + T operator()(const volatile T* p) const { T data; __atomic_load(const_cast<T*>(p), &data, __ATOMIC_ACQUIRE); return data; } +}; + +template<size_t byte_size> +struct Atomic::PlatformOrderedStore<byte_size, RELEASE_X> +{ + template <typename T> + void operator()(volatile T* p, T v) const { __atomic_store(const_cast<T*>(p), &v, __ATOMIC_RELEASE); } +}; + +template<size_t byte_size> +struct Atomic::PlatformOrderedStore<byte_size, RELEASE_X_FENCE> +{ + template <typename T> + void operator()(volatile T* p, T v) const { release_store(p, v); OrderAccess::fence(); } +}; + #endif // OS_CPU_LINUX_AARCH64_ATOMIC_LINUX_AARCH64_HPP
--- a/src/hotspot/os_cpu/linux_aarch64/orderAccess_linux_aarch64.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/os_cpu/linux_aarch64/orderAccess_linux_aarch64.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -37,6 +37,10 @@ inline void OrderAccess::loadstore() { acquire(); } inline void OrderAccess::storeload() { fence(); } +#define FULL_MEM_BARRIER __sync_synchronize() +#define READ_MEM_BARRIER __atomic_thread_fence(__ATOMIC_ACQUIRE); +#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE); + inline void OrderAccess::acquire() { READ_MEM_BARRIER; } @@ -51,25 +55,4 @@ inline void OrderAccess::cross_modify_fence() { } -template<size_t byte_size> -struct OrderAccess::PlatformOrderedLoad<byte_size, X_ACQUIRE> -{ - template <typename T> - T operator()(const volatile T* p) const { T data; __atomic_load(const_cast<T*>(p), &data, __ATOMIC_ACQUIRE); return data; } -}; - -template<size_t byte_size> -struct OrderAccess::PlatformOrderedStore<byte_size, RELEASE_X> -{ - template <typename T> - void operator()(T v, volatile T* p) const { __atomic_store(const_cast<T*>(p), &v, __ATOMIC_RELEASE); } -}; - -template<size_t byte_size> -struct OrderAccess::PlatformOrderedStore<byte_size, RELEASE_X_FENCE> -{ - template <typename T> - void operator()(T v, volatile T* p) const { release_store(p, v); fence(); } -}; - #endif // OS_CPU_LINUX_AARCH64_ORDERACCESS_LINUX_AARCH64_HPP
--- a/src/hotspot/os_cpu/linux_arm/atomic_linux_arm.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/os_cpu/linux_arm/atomic_linux_arm.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -54,8 +54,8 @@ template<> template<typename T> -inline void Atomic::PlatformStore<8>::operator()(T store_value, - T volatile* dest) const { +inline void Atomic::PlatformStore<8>::operator()(T volatile* dest, + T store_value) const { STATIC_ASSERT(8 == sizeof(T)); (*os::atomic_store_long_func)( PrimitiveConversions::cast<int64_t>(store_value), reinterpret_cast<volatile int64_t*>(dest)); @@ -70,27 +70,27 @@ struct Atomic::PlatformAdd : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> > { - template<typename I, typename D> - D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const; + template<typename D, typename I> + D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const; }; template<> -template<typename I, typename D> -inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest, +template<typename D, typename I> +inline D Atomic::PlatformAdd<4>::add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const { STATIC_ASSERT(4 == sizeof(I)); STATIC_ASSERT(4 == sizeof(D)); - return add_using_helper<int32_t>(os::atomic_add_func, add_value, dest); + return add_using_helper<int32_t>(os::atomic_add_func, dest, add_value); } template<> template<typename T> -inline T Atomic::PlatformXchg<4>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformXchg<4>::operator()(T volatile* dest, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(4 == sizeof(T)); - return xchg_using_helper<int32_t>(os::atomic_xchg_func, exchange_value, dest); + return xchg_using_helper<int32_t>(os::atomic_xchg_func, dest, exchange_value); } @@ -119,22 +119,22 @@ template<> template<typename T> -inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest, T compare_value, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(4 == sizeof(T)); - return cmpxchg_using_helper<int32_t>(reorder_cmpxchg_func, exchange_value, dest, compare_value); + return cmpxchg_using_helper<int32_t>(reorder_cmpxchg_func, dest, compare_value, exchange_value); } template<> template<typename T> -inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest, T compare_value, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(8 == sizeof(T)); - return cmpxchg_using_helper<int64_t>(reorder_cmpxchg_long_func, exchange_value, dest, compare_value); + return cmpxchg_using_helper<int64_t>(reorder_cmpxchg_long_func, dest, compare_value, exchange_value); } #endif // OS_CPU_LINUX_ARM_ATOMIC_LINUX_ARM_HPP
--- a/src/hotspot/os_cpu/linux_ppc/atomic_linux_ppc.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/os_cpu/linux_ppc/atomic_linux_ppc.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -30,6 +30,7 @@ #error "Atomic currently only implemented for PPC64" #endif +#include "orderAccess_linux_ppc.hpp" #include "utilities/debug.hpp" // Implementation of class atomic @@ -95,13 +96,13 @@ struct Atomic::PlatformAdd : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> > { - template<typename I, typename D> - D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const; + template<typename D, typename I> + D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const; }; template<> -template<typename I, typename D> -inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest, +template<typename D, typename I> +inline D Atomic::PlatformAdd<4>::add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const { STATIC_ASSERT(4 == sizeof(I)); STATIC_ASSERT(4 == sizeof(D)); @@ -126,8 +127,8 @@ template<> -template<typename I, typename D> -inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest, +template<typename D, typename I> +inline D Atomic::PlatformAdd<8>::add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const { STATIC_ASSERT(8 == sizeof(I)); STATIC_ASSERT(8 == sizeof(D)); @@ -152,8 +153,8 @@ template<> template<typename T> -inline T Atomic::PlatformXchg<4>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformXchg<4>::operator()(T volatile* dest, + T exchange_value, atomic_memory_order order) const { // Note that xchg doesn't necessarily do an acquire // (see synchronizer.cpp). @@ -191,8 +192,8 @@ template<> template<typename T> -inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformXchg<8>::operator()(T volatile* dest, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(8 == sizeof(T)); // Note that xchg doesn't necessarily do an acquire @@ -231,9 +232,9 @@ template<> template<typename T> -inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformCmpxchg<1>::operator()(T volatile* dest, T compare_value, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(1 == sizeof(T)); @@ -301,9 +302,9 @@ template<> template<typename T> -inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest, T compare_value, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(4 == sizeof(T)); @@ -351,9 +352,9 @@ template<> template<typename T> -inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest, T compare_value, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(8 == sizeof(T)); @@ -399,4 +400,16 @@ return old_value; } +template<size_t byte_size> +struct Atomic::PlatformOrderedLoad<byte_size, X_ACQUIRE> +{ + template <typename T> + T operator()(const volatile T* p) const { + T t = Atomic::load(p); + // Use twi-isync for load_acquire (faster than lwsync). + __asm__ __volatile__ ("twi 0,%0,0\n isync\n" : : "r" (t) : "memory"); + return t; + } +}; + #endif // OS_CPU_LINUX_PPC_ATOMIC_LINUX_PPC_HPP
--- a/src/hotspot/os_cpu/linux_ppc/orderAccess_linux_ppc.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/os_cpu/linux_ppc/orderAccess_linux_ppc.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -68,8 +68,6 @@ #define inlasm_lwsync() __asm__ __volatile__ ("lwsync" : : : "memory"); #define inlasm_eieio() __asm__ __volatile__ ("eieio" : : : "memory"); #define inlasm_isync() __asm__ __volatile__ ("isync" : : : "memory"); -// Use twi-isync for load_acquire (faster than lwsync). -#define inlasm_acquire_reg(X) __asm__ __volatile__ ("twi 0,%0,0\n isync\n" : : "r" (X) : "memory"); inline void OrderAccess::loadload() { inlasm_lwsync(); } inline void OrderAccess::storestore() { inlasm_lwsync(); } @@ -82,17 +80,9 @@ inline void OrderAccess::cross_modify_fence() { inlasm_isync(); } -template<size_t byte_size> -struct OrderAccess::PlatformOrderedLoad<byte_size, X_ACQUIRE> -{ - template <typename T> - T operator()(const volatile T* p) const { T t = Atomic::load(p); inlasm_acquire_reg(t); return t; } -}; - #undef inlasm_sync #undef inlasm_lwsync #undef inlasm_eieio #undef inlasm_isync -#undef inlasm_acquire_reg #endif // OS_CPU_LINUX_PPC_ORDERACCESS_LINUX_PPC_HPP
--- a/src/hotspot/os_cpu/linux_s390/atomic_linux_s390.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/os_cpu/linux_s390/atomic_linux_s390.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -78,13 +78,13 @@ struct Atomic::PlatformAdd : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> > { - template<typename I, typename D> - D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const; + template<typename D, typename I> + D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const; }; template<> -template<typename I, typename D> -inline D Atomic::PlatformAdd<4>::add_and_fetch(I inc, D volatile* dest, +template<typename D, typename I> +inline D Atomic::PlatformAdd<4>::add_and_fetch(D volatile* dest, I inc, atomic_memory_order order) const { STATIC_ASSERT(4 == sizeof(I)); STATIC_ASSERT(4 == sizeof(D)); @@ -137,8 +137,8 @@ template<> -template<typename I, typename D> -inline D Atomic::PlatformAdd<8>::add_and_fetch(I inc, D volatile* dest, +template<typename D, typename I> +inline D Atomic::PlatformAdd<8>::add_and_fetch(D volatile* dest, I inc, atomic_memory_order order) const { STATIC_ASSERT(8 == sizeof(I)); STATIC_ASSERT(8 == sizeof(D)); @@ -208,8 +208,8 @@ // replacement succeeded. template<> template<typename T> -inline T Atomic::PlatformXchg<4>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformXchg<4>::operator()(T volatile* dest, + T exchange_value, atomic_memory_order unused) const { STATIC_ASSERT(4 == sizeof(T)); T old; @@ -232,8 +232,8 @@ template<> template<typename T> -inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformXchg<8>::operator()(T volatile* dest, + T exchange_value, atomic_memory_order unused) const { STATIC_ASSERT(8 == sizeof(T)); T old; @@ -289,9 +289,9 @@ template<> template<typename T> -inline T Atomic::PlatformCmpxchg<4>::operator()(T xchg_val, - T volatile* dest, +inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest, T cmp_val, + T xchg_val, atomic_memory_order unused) const { STATIC_ASSERT(4 == sizeof(T)); T old; @@ -313,9 +313,9 @@ template<> template<typename T> -inline T Atomic::PlatformCmpxchg<8>::operator()(T xchg_val, - T volatile* dest, +inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest, T cmp_val, + T xchg_val, atomic_memory_order unused) const { STATIC_ASSERT(8 == sizeof(T)); T old; @@ -335,4 +335,11 @@ return old; } +template<size_t byte_size> +struct Atomic::PlatformOrderedLoad<byte_size, X_ACQUIRE> +{ + template <typename T> + T operator()(const volatile T* p) const { T t = *p; OrderAccess::acquire(); return t; } +}; + #endif // OS_CPU_LINUX_S390_ATOMIC_LINUX_S390_HPP
--- a/src/hotspot/os_cpu/linux_s390/orderAccess_linux_s390.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/os_cpu/linux_s390/orderAccess_linux_s390.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -76,13 +76,6 @@ inline void OrderAccess::fence() { inlasm_zarch_sync(); } inline void OrderAccess::cross_modify_fence() { inlasm_zarch_sync(); } -template<size_t byte_size> -struct OrderAccess::PlatformOrderedLoad<byte_size, X_ACQUIRE> -{ - template <typename T> - T operator()(const volatile T* p) const { T t = *p; inlasm_zarch_acquire(); return t; } -}; - #undef inlasm_compiler_barrier #undef inlasm_zarch_sync #undef inlasm_zarch_release
--- a/src/hotspot/os_cpu/linux_sparc/atomic_linux_sparc.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/os_cpu/linux_sparc/atomic_linux_sparc.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -31,13 +31,13 @@ struct Atomic::PlatformAdd : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> > { - template<typename I, typename D> - D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const; + template<typename D, typename I> + D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const; }; template<> -template<typename I, typename D> -inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest, +template<typename D, typename I> +inline D Atomic::PlatformAdd<4>::add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const { STATIC_ASSERT(4 == sizeof(I)); STATIC_ASSERT(4 == sizeof(D)); @@ -59,8 +59,8 @@ } template<> -template<typename I, typename D> -inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest, +template<typename D, typename I> +inline D Atomic::PlatformAdd<8>::add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const { STATIC_ASSERT(8 == sizeof(I)); STATIC_ASSERT(8 == sizeof(D)); @@ -83,8 +83,8 @@ template<> template<typename T> -inline T Atomic::PlatformXchg<4>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformXchg<4>::operator()(T volatile* dest, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(4 == sizeof(T)); T rv = exchange_value; @@ -98,8 +98,8 @@ template<> template<typename T> -inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformXchg<8>::operator()(T volatile* dest, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(8 == sizeof(T)); T rv = exchange_value; @@ -124,9 +124,9 @@ template<> template<typename T> -inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest, T compare_value, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(4 == sizeof(T)); T rv; @@ -140,9 +140,9 @@ template<> template<typename T> -inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest, T compare_value, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(8 == sizeof(T)); T rv;
--- a/src/hotspot/os_cpu/linux_x86/atomic_linux_x86.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/os_cpu/linux_x86/atomic_linux_x86.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -31,13 +31,13 @@ struct Atomic::PlatformAdd : Atomic::FetchAndAdd<Atomic::PlatformAdd<byte_size> > { - template<typename I, typename D> - D fetch_and_add(I add_value, D volatile* dest, atomic_memory_order order) const; + template<typename D, typename I> + D fetch_and_add(D volatile* dest, I add_value, atomic_memory_order order) const; }; template<> -template<typename I, typename D> -inline D Atomic::PlatformAdd<4>::fetch_and_add(I add_value, D volatile* dest, +template<typename D, typename I> +inline D Atomic::PlatformAdd<4>::fetch_and_add(D volatile* dest, I add_value, atomic_memory_order order) const { STATIC_ASSERT(4 == sizeof(I)); STATIC_ASSERT(4 == sizeof(D)); @@ -51,8 +51,8 @@ template<> template<typename T> -inline T Atomic::PlatformXchg<4>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformXchg<4>::operator()(T volatile* dest, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(4 == sizeof(T)); __asm__ volatile ( "xchgl (%2),%0" @@ -64,9 +64,9 @@ template<> template<typename T> -inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformCmpxchg<1>::operator()(T volatile* dest, T compare_value, + T exchange_value, atomic_memory_order /* order */) const { STATIC_ASSERT(1 == sizeof(T)); __asm__ volatile ("lock cmpxchgb %1,(%3)" @@ -78,9 +78,9 @@ template<> template<typename T> -inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest, T compare_value, + T exchange_value, atomic_memory_order /* order */) const { STATIC_ASSERT(4 == sizeof(T)); __asm__ volatile ("lock cmpxchgl %1,(%3)" @@ -93,8 +93,8 @@ #ifdef AMD64 template<> -template<typename I, typename D> -inline D Atomic::PlatformAdd<8>::fetch_and_add(I add_value, D volatile* dest, +template<typename D, typename I> +inline D Atomic::PlatformAdd<8>::fetch_and_add(D volatile* dest, I add_value, atomic_memory_order order) const { STATIC_ASSERT(8 == sizeof(I)); STATIC_ASSERT(8 == sizeof(D)); @@ -108,7 +108,7 @@ template<> template<typename T> -inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, T volatile* dest, +inline T Atomic::PlatformXchg<8>::operator()(T volatile* dest, T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(8 == sizeof(T)); __asm__ __volatile__ ("xchgq (%2),%0" @@ -120,9 +120,9 @@ template<> template<typename T> -inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest, T compare_value, + T exchange_value, atomic_memory_order /* order */) const { STATIC_ASSERT(8 == sizeof(T)); __asm__ __volatile__ ("lock cmpxchgq %1,(%3)" @@ -142,12 +142,12 @@ template<> template<typename T> -inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest, T compare_value, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(8 == sizeof(T)); - return cmpxchg_using_helper<int64_t>(_Atomic_cmpxchg_long, exchange_value, dest, compare_value); + return cmpxchg_using_helper<int64_t>(_Atomic_cmpxchg_long, dest, compare_value, exchange_value); } template<> @@ -161,12 +161,62 @@ template<> template<typename T> -inline void Atomic::PlatformStore<8>::operator()(T store_value, - T volatile* dest) const { +inline void Atomic::PlatformStore<8>::operator()(T volatile* dest, + T store_value) const { STATIC_ASSERT(8 == sizeof(T)); _Atomic_move_long(reinterpret_cast<const volatile int64_t*>(&store_value), reinterpret_cast<volatile int64_t*>(dest)); } #endif // AMD64 +template<> +struct Atomic::PlatformOrderedStore<1, RELEASE_X_FENCE> +{ + template <typename T> + void operator()(volatile T* p, T v) const { + __asm__ volatile ( "xchgb (%2),%0" + : "=q" (v) + : "0" (v), "r" (p) + : "memory"); + } +}; + +template<> +struct Atomic::PlatformOrderedStore<2, RELEASE_X_FENCE> +{ + template <typename T> + void operator()(volatile T* p, T v) const { + __asm__ volatile ( "xchgw (%2),%0" + : "=r" (v) + : "0" (v), "r" (p) + : "memory"); + } +}; + +template<> +struct Atomic::PlatformOrderedStore<4, RELEASE_X_FENCE> +{ + template <typename T> + void operator()(volatile T* p, T v) const { + __asm__ volatile ( "xchgl (%2),%0" + : "=r" (v) + : "0" (v), "r" (p) + : "memory"); + } +}; + +#ifdef AMD64 +template<> +struct Atomic::PlatformOrderedStore<8, RELEASE_X_FENCE> +{ + template <typename T> + void operator()(volatile T* p, T v) const { + __asm__ volatile ( "xchgq (%2), %0" + : "=r" (v) + : "0" (v), "r" (p) + : "memory"); + } +}; +#endif // AMD64 + #endif // OS_CPU_LINUX_X86_ATOMIC_LINUX_X86_HPP
--- a/src/hotspot/os_cpu/linux_x86/linux_x86_32.s Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/os_cpu/linux_x86/linux_x86_32.s Mon Dec 02 12:01:40 2019 +0530 @@ -1,4 +1,4 @@ -# +# # Copyright (c) 2004, 2017, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # @@ -19,15 +19,15 @@ # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA # or visit www.oracle.com if you need additional information or have any # questions. -# +# - + # NOTE WELL! The _Copy functions are called directly # from server-compiler-generated code via CallLeafNoFP, # which means that they *must* either not use floating # point or use it in the same manner as does the server # compiler. - + .globl _Copy_conjoint_bytes .globl _Copy_arrayof_conjoint_bytes .globl _Copy_conjoint_jshorts_atomic @@ -174,7 +174,7 @@ leal -1(%esi,%ecx),%eax # from + count - 1 jbe acb_CopyRight cmpl %eax,%edi - jbe acb_CopyLeft + jbe acb_CopyLeft # copy from low to high acb_CopyRight: cmpl $3,%ecx @@ -262,7 +262,7 @@ leal -2(%esi,%ecx,2),%eax # from + count*2 - 2 jbe cs_CopyRight cmpl %eax,%edi - jbe cs_CopyLeft + jbe cs_CopyLeft # copy from low to high cs_CopyRight: # align source address at dword address boundary @@ -283,7 +283,7 @@ jbe 2f # <= 32 dwords # copy aligned dwords rep; smovl - jmp 4f + jmp 4f # copy aligned dwords 2: subl %esi,%edi .p2align 4,,15 @@ -349,7 +349,7 @@ leal -2(%esi,%ecx,2),%eax # from + count*2 - 2 jbe acs_CopyRight cmpl %eax,%edi - jbe acs_CopyLeft + jbe acs_CopyLeft acs_CopyRight: movl %ecx,%eax # word count sarl %ecx # dword count @@ -358,10 +358,10 @@ jbe 2f # <= 32 dwords # copy aligned dwords rep; smovl - jmp 4f + jmp 4f # copy aligned dwords .space 5 -2: subl %esi,%edi +2: subl %esi,%edi .p2align 4,,15 3: movl (%esi),%edx movl %edx,(%edi,%esi,1) @@ -428,7 +428,7 @@ leal -4(%esi,%ecx,4),%eax # from + count*4 - 4 jbe ci_CopyRight cmpl %eax,%edi - jbe ci_CopyLeft + jbe ci_CopyLeft ci_CopyRight: cmpl $32,%ecx jbe 2f # <= 32 dwords @@ -471,7 +471,7 @@ popl %edi popl %esi ret - + # Support for void Copy::conjoint_jlongs_atomic(jlong* from, # jlong* to, # size_t count) @@ -537,7 +537,7 @@ je 5f cmpl $33,%ecx jae 3f -1: subl %esi,%edi +1: subl %esi,%edi .p2align 4,,15 2: movl (%esi),%edx movl %edx,(%edi,%esi,1) @@ -545,7 +545,7 @@ subl $1,%ecx jnz 2b addl %esi,%edi - jmp 5f + jmp 5f 3: smovl # align to 8 bytes, we know we are 4 byte aligned to start subl $1,%ecx 4: .p2align 4,,15 @@ -612,9 +612,9 @@ ret - # Support for jlong Atomic::cmpxchg(jlong exchange_value, - # volatile jlong* dest, - # jlong compare_value) + # Support for jlong Atomic::cmpxchg(volatile jlong* dest, + # jlong compare_value, + # jlong exchange_value) # .p2align 4,,15 .type _Atomic_cmpxchg_long,@function @@ -643,4 +643,3 @@ movl 8(%esp), %eax # dest fistpll (%eax) ret -
--- a/src/hotspot/os_cpu/linux_x86/orderAccess_linux_x86.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/os_cpu/linux_x86/orderAccess_linux_x86.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -66,54 +66,4 @@ #endif } -template<> -struct OrderAccess::PlatformOrderedStore<1, RELEASE_X_FENCE> -{ - template <typename T> - void operator()(T v, volatile T* p) const { - __asm__ volatile ( "xchgb (%2),%0" - : "=q" (v) - : "0" (v), "r" (p) - : "memory"); - } -}; - -template<> -struct OrderAccess::PlatformOrderedStore<2, RELEASE_X_FENCE> -{ - template <typename T> - void operator()(T v, volatile T* p) const { - __asm__ volatile ( "xchgw (%2),%0" - : "=r" (v) - : "0" (v), "r" (p) - : "memory"); - } -}; - -template<> -struct OrderAccess::PlatformOrderedStore<4, RELEASE_X_FENCE> -{ - template <typename T> - void operator()(T v, volatile T* p) const { - __asm__ volatile ( "xchgl (%2),%0" - : "=r" (v) - : "0" (v), "r" (p) - : "memory"); - } -}; - -#ifdef AMD64 -template<> -struct OrderAccess::PlatformOrderedStore<8, RELEASE_X_FENCE> -{ - template <typename T> - void operator()(T v, volatile T* p) const { - __asm__ volatile ( "xchgq (%2), %0" - : "=r" (v) - : "0" (v), "r" (p) - : "memory"); - } -}; -#endif // AMD64 - #endif // OS_CPU_LINUX_X86_ORDERACCESS_LINUX_X86_HPP
--- a/src/hotspot/os_cpu/linux_zero/atomic_linux_zero.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/os_cpu/linux_zero/atomic_linux_zero.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -34,13 +34,13 @@ struct Atomic::PlatformAdd : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> > { - template<typename I, typename D> - D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const; + template<typename D, typename I> + D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const; }; template<> -template<typename I, typename D> -inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest, +template<typename D, typename I> +inline D Atomic::PlatformAdd<4>::add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const { STATIC_ASSERT(4 == sizeof(I)); STATIC_ASSERT(4 == sizeof(D)); @@ -49,8 +49,8 @@ } template<> -template<typename I, typename D> -inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest, +template<typename D, typename I> +inline D Atomic::PlatformAdd<8>::add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const { STATIC_ASSERT(8 == sizeof(I)); STATIC_ASSERT(8 == sizeof(D)); @@ -59,8 +59,8 @@ template<> template<typename T> -inline T Atomic::PlatformXchg<4>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformXchg<4>::operator()(T volatile* dest, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(4 == sizeof(T)); // __sync_lock_test_and_set is a bizarrely named atomic exchange @@ -78,8 +78,8 @@ template<> template<typename T> -inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformXchg<8>::operator()(T volatile* dest, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(8 == sizeof(T)); T result = __sync_lock_test_and_set (dest, exchange_value); @@ -93,9 +93,9 @@ template<> template<typename T> -inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest, T compare_value, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(4 == sizeof(T)); return __sync_val_compare_and_swap(dest, compare_value, exchange_value); @@ -103,9 +103,9 @@ template<> template<typename T> -inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest, T compare_value, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(8 == sizeof(T)); return __sync_val_compare_and_swap(dest, compare_value, exchange_value); @@ -122,8 +122,8 @@ template<> template<typename T> -inline void Atomic::PlatformStore<8>::operator()(T store_value, - T volatile* dest) const { +inline void Atomic::PlatformStore<8>::operator()(T volatile* dest, + T store_value) const { STATIC_ASSERT(8 == sizeof(T)); os::atomic_copy64(reinterpret_cast<const volatile int64_t*>(&store_value), reinterpret_cast<volatile int64_t*>(dest)); }
--- a/src/hotspot/os_cpu/solaris_sparc/atomic_solaris_sparc.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/os_cpu/solaris_sparc/atomic_solaris_sparc.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -30,12 +30,12 @@ // Implement ADD using a CAS loop. template<size_t byte_size> struct Atomic::PlatformAdd { - template<typename I, typename D> - inline D operator()(I add_value, D volatile* dest, atomic_memory_order order) const { + template<typename D, typename I> + inline D operator()(D volatile* dest, I add_value, atomic_memory_order order) const { D old_value = *dest; while (true) { D new_value = old_value + add_value; - D result = cmpxchg(new_value, dest, old_value); + D result = cmpxchg(dest, old_value, new_value); if (result == old_value) break; old_value = result; } @@ -45,8 +45,8 @@ template<> template<typename T> -inline T Atomic::PlatformXchg<4>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformXchg<4>::operator()(T volatile* dest, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(4 == sizeof(T)); __asm__ volatile ( "swap [%2],%0" @@ -58,13 +58,13 @@ template<> template<typename T> -inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformXchg<8>::operator()(T volatile* dest, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(8 == sizeof(T)); T old_value = *dest; while (true) { - T result = cmpxchg(exchange_value, dest, old_value); + T result = cmpxchg(dest, old_value, exchange_value); if (result == old_value) break; old_value = result; } @@ -77,9 +77,9 @@ template<> template<typename T> -inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest, T compare_value, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(4 == sizeof(T)); T rv; @@ -93,9 +93,9 @@ template<> template<typename T> -inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest, T compare_value, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(8 == sizeof(T)); T rv;
--- a/src/hotspot/os_cpu/solaris_x86/atomic_solaris_x86.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/os_cpu/solaris_x86/atomic_solaris_x86.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -44,14 +44,14 @@ struct Atomic::PlatformAdd : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> > { - template<typename I, typename D> - D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const; + template<typename D, typename I> + D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const; }; // Not using add_using_helper; see comment for cmpxchg. template<> -template<typename I, typename D> -inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest, +template<typename D, typename I> +inline D Atomic::PlatformAdd<4>::add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const { STATIC_ASSERT(4 == sizeof(I)); STATIC_ASSERT(4 == sizeof(D)); @@ -62,8 +62,8 @@ // Not using add_using_helper; see comment for cmpxchg. template<> -template<typename I, typename D> -inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest, +template<typename D, typename I> +inline D Atomic::PlatformAdd<8>::add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const { STATIC_ASSERT(8 == sizeof(I)); STATIC_ASSERT(8 == sizeof(D)); @@ -74,8 +74,8 @@ template<> template<typename T> -inline T Atomic::PlatformXchg<4>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformXchg<4>::operator()(T volatile* dest, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(4 == sizeof(T)); return PrimitiveConversions::cast<T>( @@ -87,8 +87,8 @@ template<> template<typename T> -inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformXchg<8>::operator()(T volatile* dest, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(8 == sizeof(T)); return PrimitiveConversions::cast<T>( @@ -104,9 +104,9 @@ template<> template<typename T> -inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformCmpxchg<1>::operator()(T volatile* dest, T compare_value, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(1 == sizeof(T)); return PrimitiveConversions::cast<T>( @@ -117,9 +117,9 @@ template<> template<typename T> -inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest, T compare_value, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(4 == sizeof(T)); return PrimitiveConversions::cast<T>( @@ -130,9 +130,9 @@ template<> template<typename T> -inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest, T compare_value, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(8 == sizeof(T)); return PrimitiveConversions::cast<T>(
--- a/src/hotspot/os_cpu/solaris_x86/os_solaris_x86.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/os_cpu/solaris_x86/os_solaris_x86.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -38,7 +38,6 @@ #include "prims/jniFastGetField.hpp" #include "prims/jvm_misc.hpp" #include "runtime/arguments.hpp" -#include "runtime/atomic.hpp" #include "runtime/extendedPC.hpp" #include "runtime/frame.inline.hpp" #include "runtime/interfaceSupport.inline.hpp"
--- a/src/hotspot/os_cpu/solaris_x86/solaris_x86_64.il Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/os_cpu/solaris_x86/solaris_x86_64.il Mon Dec 02 12:01:40 2019 +0530 @@ -49,7 +49,8 @@ orq %rdx, %rax .end - // Support for jint Atomic::add(jint add_value, volatile jint* dest) + // Implementation of jint _Atomic_add(jint add_value, volatile jint* dest) + // used by Atomic::add(volatile jint* dest, jint add_value) .inline _Atomic_add,2 movl %edi, %eax // save add_value for return lock @@ -57,7 +58,8 @@ addl %edi, %eax .end - // Support for jlong Atomic::add(jlong add_value, volatile jlong* dest) + // Implementation of jlong _Atomic_add(jlong add_value, volatile jlong* dest) + // used by Atomic::add(volatile jlong* dest, jint add_value) .inline _Atomic_add_long,2 movq %rdi, %rax // save add_value for return lock @@ -65,39 +67,41 @@ addq %rdi, %rax .end - // Support for jint Atomic::xchg(jint exchange_value, volatile jint* dest). + // Implementation of jint _Atomic_xchg(jint exchange_value, volatile jint* dest) + // used by Atomic::xchg(volatile jint* dest, jint exchange_value) .inline _Atomic_xchg,2 xchgl (%rsi), %edi movl %edi, %eax .end - // Support for jlong Atomic::xchg(jlong exchange_value, volatile jlong* dest). + // Implementation of jlong _Atomic_xchg(jlong exchange_value, volatile jlong* dest) + // used by Atomic::xchg(volatile jlong* dest, jlong exchange_value) .inline _Atomic_xchg_long,2 xchgq (%rsi), %rdi movq %rdi, %rax .end - // Support for jbyte Atomic::cmpxchg(jbyte exchange_value, - // volatile jbyte *dest, - // jbyte compare_value) + // Support for jbyte Atomic::cmpxchg(volatile jbyte *dest, + // jbyte compare_value, + // jbyte exchange_value) .inline _Atomic_cmpxchg_byte,3 movb %dl, %al // compare_value lock cmpxchgb %dil, (%rsi) .end - // Support for jint Atomic::cmpxchg(jint exchange_value, - // volatile jint *dest, - // jint compare_value) + // Support for jint Atomic::cmpxchg(volatile jint *dest, + // int compare_value, + // jint exchange_value) .inline _Atomic_cmpxchg,3 movl %edx, %eax // compare_value lock cmpxchgl %edi, (%rsi) .end - // Support for jlong Atomic::cmpxchg(jlong exchange_value, - // volatile jlong* dest, - // jlong compare_value) + // Support for jlong Atomic::cmpxchg(volatile jlong* dest, + // jlong compare_value, + // jlong exchange_value) .inline _Atomic_cmpxchg_long,3 movq %rdx, %rax // compare_value lock
--- a/src/hotspot/os_cpu/windows_x86/atomic_windows_x86.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/os_cpu/windows_x86/atomic_windows_x86.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -27,6 +27,17 @@ #include "runtime/os.hpp" +// Note that in MSVC, volatile memory accesses are explicitly +// guaranteed to have acquire release semantics (w.r.t. compiler +// reordering) and therefore does not even need a compiler barrier +// for normal acquire release accesses. And all generalized +// bound calls like release_store go through Atomic::load +// and Atomic::store which do volatile memory accesses. +template<> inline void ScopedFence<X_ACQUIRE>::postfix() { } +template<> inline void ScopedFence<RELEASE_X>::prefix() { } +template<> inline void ScopedFence<RELEASE_X_FENCE>::prefix() { } +template<> inline void ScopedFence<RELEASE_X_FENCE>::postfix() { OrderAccess::fence(); } + // The following alternative implementations are needed because // Windows 95 doesn't support (some of) the corresponding Windows NT // calls. Furthermore, these versions allow inlining in the caller. @@ -46,33 +57,33 @@ struct Atomic::PlatformAdd : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> > { - template<typename I, typename D> - D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const; + template<typename D, typename I> + D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const; }; #ifdef AMD64 template<> -template<typename I, typename D> -inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest, +template<typename D, typename I> +inline D Atomic::PlatformAdd<4>::add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const { - return add_using_helper<int32_t>(os::atomic_add_func, add_value, dest); + return add_using_helper<int32_t>(os::atomic_add_func, dest, add_value); } template<> -template<typename I, typename D> -inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest, +template<typename D, typename I> +inline D Atomic::PlatformAdd<8>::add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const { - return add_using_helper<int64_t>(os::atomic_add_long_func, add_value, dest); + return add_using_helper<int64_t>(os::atomic_add_long_func, dest, add_value); } #define DEFINE_STUB_XCHG(ByteSize, StubType, StubName) \ template<> \ template<typename T> \ - inline T Atomic::PlatformXchg<ByteSize>::operator()(T exchange_value, \ - T volatile* dest, \ + inline T Atomic::PlatformXchg<ByteSize>::operator()(T volatile* dest, \ + T exchange_value, \ atomic_memory_order order) const { \ STATIC_ASSERT(ByteSize == sizeof(T)); \ - return xchg_using_helper<StubType>(StubName, exchange_value, dest); \ + return xchg_using_helper<StubType>(StubName, dest, exchange_value); \ } DEFINE_STUB_XCHG(4, int32_t, os::atomic_xchg_func) @@ -80,15 +91,15 @@ #undef DEFINE_STUB_XCHG -#define DEFINE_STUB_CMPXCHG(ByteSize, StubType, StubName) \ - template<> \ - template<typename T> \ - inline T Atomic::PlatformCmpxchg<ByteSize>::operator()(T exchange_value, \ - T volatile* dest, \ - T compare_value, \ +#define DEFINE_STUB_CMPXCHG(ByteSize, StubType, StubName) \ + template<> \ + template<typename T> \ + inline T Atomic::PlatformCmpxchg<ByteSize>::operator()(T volatile* dest, \ + T compare_value, \ + T exchange_value, \ atomic_memory_order order) const { \ - STATIC_ASSERT(ByteSize == sizeof(T)); \ - return cmpxchg_using_helper<StubType>(StubName, exchange_value, dest, compare_value); \ + STATIC_ASSERT(ByteSize == sizeof(T)); \ + return cmpxchg_using_helper<StubType>(StubName, dest, compare_value, exchange_value); \ } DEFINE_STUB_CMPXCHG(1, int8_t, os::atomic_cmpxchg_byte_func) @@ -100,8 +111,8 @@ #else // !AMD64 template<> -template<typename I, typename D> -inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest, +template<typename D, typename I> +inline D Atomic::PlatformAdd<4>::add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const { STATIC_ASSERT(4 == sizeof(I)); STATIC_ASSERT(4 == sizeof(D)); @@ -116,8 +127,8 @@ template<> template<typename T> -inline T Atomic::PlatformXchg<4>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformXchg<4>::operator()(T volatile* dest, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(4 == sizeof(T)); // alternative for InterlockedExchange @@ -130,9 +141,9 @@ template<> template<typename T> -inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformCmpxchg<1>::operator()(T volatile* dest, T compare_value, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(1 == sizeof(T)); // alternative for InterlockedCompareExchange @@ -146,9 +157,9 @@ template<> template<typename T> -inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest, T compare_value, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(4 == sizeof(T)); // alternative for InterlockedCompareExchange @@ -162,9 +173,9 @@ template<> template<typename T> -inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, - T volatile* dest, +inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest, T compare_value, + T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(8 == sizeof(T)); int32_t ex_lo = (int32_t)exchange_value; @@ -202,8 +213,8 @@ template<> template<typename T> -inline void Atomic::PlatformStore<8>::operator()(T store_value, - T volatile* dest) const { +inline void Atomic::PlatformStore<8>::operator()(T volatile* dest, + T store_value) const { STATIC_ASSERT(8 == sizeof(T)); volatile T* src = &store_value; __asm { @@ -218,4 +229,45 @@ #pragma warning(default: 4035) // Enables warnings reporting missing return statement +#ifndef AMD64 +template<> +struct Atomic::PlatformOrderedStore<1, RELEASE_X_FENCE> +{ + template <typename T> + void operator()(volatile T* p, T v) const { + __asm { + mov edx, p; + mov al, v; + xchg al, byte ptr [edx]; + } + } +}; + +template<> +struct Atomic::PlatformOrderedStore<2, RELEASE_X_FENCE> +{ + template <typename T> + void operator()(volatile T* p, T v) const { + __asm { + mov edx, p; + mov ax, v; + xchg ax, word ptr [edx]; + } + } +}; + +template<> +struct Atomic::PlatformOrderedStore<4, RELEASE_X_FENCE> +{ + template <typename T> + void operator()(volatile T* p, T v) const { + __asm { + mov edx, p; + mov eax, v; + xchg eax, dword ptr [edx]; + } + } +}; +#endif // AMD64 + #endif // OS_CPU_WINDOWS_X86_ATOMIC_WINDOWS_X86_HPP
--- a/src/hotspot/os_cpu/windows_x86/orderAccess_windows_x86.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/os_cpu/windows_x86/orderAccess_windows_x86.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -39,17 +39,6 @@ _ReadWriteBarrier(); } -// Note that in MSVC, volatile memory accesses are explicitly -// guaranteed to have acquire release semantics (w.r.t. compiler -// reordering) and therefore does not even need a compiler barrier -// for normal acquire release accesses. And all generalized -// bound calls like release_store go through OrderAccess::load -// and OrderAccess::store which do volatile memory accesses. -template<> inline void ScopedFence<X_ACQUIRE>::postfix() { } -template<> inline void ScopedFence<RELEASE_X>::prefix() { } -template<> inline void ScopedFence<RELEASE_X_FENCE>::prefix() { } -template<> inline void ScopedFence<RELEASE_X_FENCE>::postfix() { OrderAccess::fence(); } - inline void OrderAccess::loadload() { compiler_barrier(); } inline void OrderAccess::storestore() { compiler_barrier(); } inline void OrderAccess::loadstore() { compiler_barrier(); } @@ -74,45 +63,4 @@ __cpuid(regs, 0); } -#ifndef AMD64 -template<> -struct OrderAccess::PlatformOrderedStore<1, RELEASE_X_FENCE> -{ - template <typename T> - void operator()(T v, volatile T* p) const { - __asm { - mov edx, p; - mov al, v; - xchg al, byte ptr [edx]; - } - } -}; - -template<> -struct OrderAccess::PlatformOrderedStore<2, RELEASE_X_FENCE> -{ - template <typename T> - void operator()(T v, volatile T* p) const { - __asm { - mov edx, p; - mov ax, v; - xchg ax, word ptr [edx]; - } - } -}; - -template<> -struct OrderAccess::PlatformOrderedStore<4, RELEASE_X_FENCE> -{ - template <typename T> - void operator()(T v, volatile T* p) const { - __asm { - mov edx, p; - mov eax, v; - xchg eax, dword ptr [edx]; - } - } -}; -#endif // AMD64 - #endif // OS_CPU_WINDOWS_X86_ORDERACCESS_WINDOWS_X86_HPP
--- a/src/hotspot/share/adlc/adlparse.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/adlc/adlparse.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -123,6 +123,7 @@ parse_err(SEMERR, "Did not declare 'register' definitions"); } regBlock->addSpillRegClass(); + regBlock->addDynamicRegClass(); // Done with parsing, check consistency.
--- a/src/hotspot/share/adlc/archDesc.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/adlc/archDesc.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -245,12 +245,12 @@ // Construct chain rules build_chain_rule(op); - MatchRule &mrule = *op->_matrule; - Predicate *pred = op->_predicate; + MatchRule *mrule = op->_matrule; + Predicate *pred = op->_predicate; // Grab the machine type of the operand const char *rootOp = op->_ident; - mrule._machType = rootOp; + mrule->_machType = rootOp; // Check for special cases if (strcmp(rootOp,"Universe")==0) continue; @@ -271,10 +271,13 @@ // Find result type for match. const char *result = op->reduce_result(); - bool has_root = false; - // Construct a MatchList for this entry - buildMatchList(op->_matrule, result, rootOp, pred, cost); + // Construct a MatchList for this entry. + // Iterate over the list to enumerate all match cases for operands with multiple match rules. + for (; mrule != NULL; mrule = mrule->_next) { + mrule->_machType = rootOp; + buildMatchList(mrule, result, rootOp, pred, cost); + } } } @@ -805,6 +808,8 @@ return "RegMask::Empty"; } else if (strcmp(reg_class_name,"stack_slots")==0) { return "(Compile::current()->FIRST_STACK_mask())"; + } else if (strcmp(reg_class_name, "dynamic")==0) { + return "*_opnds[0]->in_RegMask(0)"; } else { char *rc_name = toUpper(reg_class_name); const char *mask = "_mask"; @@ -867,7 +872,7 @@ } // Instructions producing 'Universe' use RegMask::Empty - if( strcmp(result,"Universe")==0 ) { + if (strcmp(result,"Universe") == 0) { return "RegMask::Empty"; }
--- a/src/hotspot/share/adlc/formsopt.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/adlc/formsopt.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -80,6 +80,15 @@ _regClass.Insert(rc_name,reg_class); } +// Called after parsing the Register block. Record the register class +// for operands which are overwritten after matching. +void RegisterForm::addDynamicRegClass() { + const char *rc_name = "dynamic"; + RegClass* reg_class = new RegClass(rc_name); + reg_class->set_stack_version(false); + _rclasses.addName(rc_name); + _regClass.Insert(rc_name,reg_class); +} // Provide iteration over all register definitions // in the order used by the register allocator
--- a/src/hotspot/share/adlc/formsopt.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/adlc/formsopt.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -104,6 +104,7 @@ AllocClass *addAllocClass(char *allocName); void addSpillRegClass(); + void addDynamicRegClass(); // Provide iteration over all register definitions // in the order used by the register allocator
--- a/src/hotspot/share/adlc/output_c.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/adlc/output_c.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -2781,6 +2781,8 @@ // Return the sole RegMask. if (strcmp(first_reg_class, "stack_slots") == 0) { fprintf(fp," return &(Compile::current()->FIRST_STACK_mask());\n"); + } else if (strcmp(first_reg_class, "dynamic") == 0) { + fprintf(fp," return &RegMask::Empty;\n"); } else { const char* first_reg_class_to_upper = toUpper(first_reg_class); fprintf(fp," return &%s_mask();\n", first_reg_class_to_upper);
--- a/src/hotspot/share/aot/aotCodeHeap.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/aot/aotCodeHeap.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -40,6 +40,7 @@ #include "oops/compressedOops.hpp" #include "oops/klass.inline.hpp" #include "oops/method.inline.hpp" +#include "runtime/atomic.hpp" #include "runtime/deoptimization.hpp" #include "runtime/handles.inline.hpp" #include "runtime/os.hpp" @@ -347,7 +348,7 @@ AOTCompiledMethod *aot = new AOTCompiledMethod(code, mh(), meta, metadata_table, metadata_size, state_adr, this, name, code_id, _aot_id); assert(_code_to_aot[code_id]._aot == NULL, "should be not initialized"); _code_to_aot[code_id]._aot = aot; // Should set this first - if (Atomic::cmpxchg(in_use, &_code_to_aot[code_id]._state, not_set) != not_set) { + if (Atomic::cmpxchg(&_code_to_aot[code_id]._state, not_set, in_use) != not_set) { _code_to_aot[code_id]._aot = NULL; // Clean } else { // success // Publish method @@ -410,7 +411,7 @@ AOTCompiledMethod* aot = new AOTCompiledMethod(entry, NULL, meta, metadata_table, metadata_size, state_adr, this, full_name, code_id, i); assert(_code_to_aot[code_id]._aot == NULL, "should be not initialized"); _code_to_aot[code_id]._aot = aot; - if (Atomic::cmpxchg(in_use, &_code_to_aot[code_id]._state, not_set) != not_set) { + if (Atomic::cmpxchg(&_code_to_aot[code_id]._state, not_set, in_use) != not_set) { fatal("stab '%s' code state is %d", full_name, _code_to_aot[code_id]._state); } // Adjust code buffer boundaries only for stubs because they are last in the buffer. @@ -721,7 +722,7 @@ for (int i = 0; i < methods_cnt; ++i) { int code_id = indexes[i]; // Invalidate aot code. - if (Atomic::cmpxchg(invalid, &_code_to_aot[code_id]._state, not_set) != not_set) { + if (Atomic::cmpxchg(&_code_to_aot[code_id]._state, not_set, invalid) != not_set) { if (_code_to_aot[code_id]._state == in_use) { AOTCompiledMethod* aot = _code_to_aot[code_id]._aot; assert(aot != NULL, "aot should be set");
--- a/src/hotspot/share/aot/aotCompiledMethod.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/aot/aotCompiledMethod.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -37,6 +37,7 @@ #include "runtime/frame.inline.hpp" #include "runtime/handles.inline.hpp" #include "runtime/java.hpp" +#include "runtime/orderAccess.hpp" #include "runtime/os.hpp" #include "runtime/safepointVerifiers.hpp" #include "runtime/sharedRuntime.hpp"
--- a/src/hotspot/share/asm/assembler.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/asm/assembler.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -29,7 +29,6 @@ #include "gc/shared/collectedHeap.hpp" #include "memory/universe.hpp" #include "oops/compressedOops.hpp" -#include "runtime/atomic.hpp" #include "runtime/icache.hpp" #include "runtime/os.hpp" #include "runtime/thread.hpp"
--- a/src/hotspot/share/c1/c1_GraphBuilder.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/c1/c1_GraphBuilder.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -3793,7 +3793,7 @@ INLINE_BAILOUT("total inlining greater than DesiredMethodLimit"); } // printing - print_inlining(callee); + print_inlining(callee, "inline", /*success*/ true); } // NOTE: Bailouts from this point on, which occur at the @@ -4315,16 +4315,11 @@ void GraphBuilder::print_inlining(ciMethod* callee, const char* msg, bool success) { CompileLog* log = compilation()->log(); if (log != NULL) { + assert(msg != NULL, "inlining msg should not be null!"); if (success) { - if (msg != NULL) - log->inline_success(msg); - else - log->inline_success("receiver is statically known"); + log->inline_success(msg); } else { - if (msg != NULL) - log->inline_fail(msg); - else - log->inline_fail("reason unknown"); + log->inline_fail(msg); } } EventCompilerInlining event;
--- a/src/hotspot/share/c1/c1_GraphBuilder.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/c1/c1_GraphBuilder.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -381,7 +381,7 @@ void append_unsafe_get_and_set_obj(ciMethod* callee, bool is_add); void append_char_access(ciMethod* callee, bool is_store); - void print_inlining(ciMethod* callee, const char* msg = NULL, bool success = true); + void print_inlining(ciMethod* callee, const char* msg, bool success = true); void profile_call(ciMethod* callee, Value recv, ciKlass* predicted_holder, Values* obj_args, bool inlined); void profile_return_type(Value ret, ciMethod* callee, ciMethod* m = NULL, int bci = -1);
--- a/src/hotspot/share/classfile/classLoader.inline.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/classfile/classLoader.inline.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -26,14 +26,14 @@ #define SHARE_CLASSFILE_CLASSLOADER_INLINE_HPP #include "classfile/classLoader.hpp" -#include "runtime/orderAccess.hpp" +#include "runtime/atomic.hpp" // Next entry in class path -inline ClassPathEntry* ClassPathEntry::next() const { return OrderAccess::load_acquire(&_next); } +inline ClassPathEntry* ClassPathEntry::next() const { return Atomic::load_acquire(&_next); } inline void ClassPathEntry::set_next(ClassPathEntry* next) { // may have unlocked readers, so ensure visibility. - OrderAccess::release_store(&_next, next); + Atomic::release_store(&_next, next); } inline ClassPathEntry* ClassLoader::classpath_entry(int n) {
--- a/src/hotspot/share/classfile/classLoaderData.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/classfile/classLoaderData.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -67,7 +67,6 @@ #include "runtime/atomic.hpp" #include "runtime/handles.inline.hpp" #include "runtime/mutex.hpp" -#include "runtime/orderAccess.hpp" #include "runtime/safepoint.hpp" #include "utilities/growableArray.hpp" #include "utilities/macros.hpp" @@ -187,11 +186,11 @@ oop* ClassLoaderData::ChunkedHandleList::add(oop o) { if (_head == NULL || _head->_size == Chunk::CAPACITY) { Chunk* next = new Chunk(_head); - OrderAccess::release_store(&_head, next); + Atomic::release_store(&_head, next); } oop* handle = &_head->_data[_head->_size]; NativeAccess<IS_DEST_UNINITIALIZED>::oop_store(handle, o); - OrderAccess::release_store(&_head->_size, _head->_size + 1); + Atomic::release_store(&_head->_size, _head->_size + 1); return handle; } @@ -214,10 +213,10 @@ } void ClassLoaderData::ChunkedHandleList::oops_do(OopClosure* f) { - Chunk* head = OrderAccess::load_acquire(&_head); + Chunk* head = Atomic::load_acquire(&_head); if (head != NULL) { // Must be careful when reading size of head - oops_do_chunk(f, head, OrderAccess::load_acquire(&head->_size)); + oops_do_chunk(f, head, Atomic::load_acquire(&head->_size)); for (Chunk* c = head->_next; c != NULL; c = c->_next) { oops_do_chunk(f, c, c->_size); } @@ -273,7 +272,7 @@ return; } int new_claim = old_claim & ~claim; - if (Atomic::cmpxchg(new_claim, &_claim, old_claim) == old_claim) { + if (Atomic::cmpxchg(&_claim, old_claim, new_claim) == old_claim) { return; } } @@ -286,7 +285,7 @@ return false; } int new_claim = old_claim | claim; - if (Atomic::cmpxchg(new_claim, &_claim, old_claim) == old_claim) { + if (Atomic::cmpxchg(&_claim, old_claim, new_claim) == old_claim) { return true; } } @@ -326,7 +325,7 @@ void ClassLoaderData::classes_do(KlassClosure* klass_closure) { // Lock-free access requires load_acquire - for (Klass* k = OrderAccess::load_acquire(&_klasses); k != NULL; k = k->next_link()) { + for (Klass* k = Atomic::load_acquire(&_klasses); k != NULL; k = k->next_link()) { klass_closure->do_klass(k); assert(k != k->next_link(), "no loops!"); } @@ -334,7 +333,7 @@ void ClassLoaderData::classes_do(void f(Klass * const)) { // Lock-free access requires load_acquire - for (Klass* k = OrderAccess::load_acquire(&_klasses); k != NULL; k = k->next_link()) { + for (Klass* k = Atomic::load_acquire(&_klasses); k != NULL; k = k->next_link()) { f(k); assert(k != k->next_link(), "no loops!"); } @@ -342,7 +341,7 @@ void ClassLoaderData::methods_do(void f(Method*)) { // Lock-free access requires load_acquire - for (Klass* k = OrderAccess::load_acquire(&_klasses); k != NULL; k = k->next_link()) { + for (Klass* k = Atomic::load_acquire(&_klasses); k != NULL; k = k->next_link()) { if (k->is_instance_klass() && InstanceKlass::cast(k)->is_loaded()) { InstanceKlass::cast(k)->methods_do(f); } @@ -351,7 +350,7 @@ void ClassLoaderData::loaded_classes_do(KlassClosure* klass_closure) { // Lock-free access requires load_acquire - for (Klass* k = OrderAccess::load_acquire(&_klasses); k != NULL; k = k->next_link()) { + for (Klass* k = Atomic::load_acquire(&_klasses); k != NULL; k = k->next_link()) { // Do not filter ArrayKlass oops here... if (k->is_array_klass() || (k->is_instance_klass() && InstanceKlass::cast(k)->is_loaded())) { #ifdef ASSERT @@ -366,7 +365,7 @@ void ClassLoaderData::classes_do(void f(InstanceKlass*)) { // Lock-free access requires load_acquire - for (Klass* k = OrderAccess::load_acquire(&_klasses); k != NULL; k = k->next_link()) { + for (Klass* k = Atomic::load_acquire(&_klasses); k != NULL; k = k->next_link()) { if (k->is_instance_klass()) { f(InstanceKlass::cast(k)); } @@ -465,7 +464,7 @@ k->set_next_link(old_value); // Link the new item into the list, making sure the linked class is stable // since the list can be walked without a lock - OrderAccess::release_store(&_klasses, k); + Atomic::release_store(&_klasses, k); if (k->is_array_klass()) { ClassLoaderDataGraph::inc_array_classes(1); } else { @@ -552,7 +551,7 @@ ModuleEntryTable* ClassLoaderData::modules() { // Lazily create the module entry table at first request. // Lock-free access requires load_acquire. - ModuleEntryTable* modules = OrderAccess::load_acquire(&_modules); + ModuleEntryTable* modules = Atomic::load_acquire(&_modules); if (modules == NULL) { MutexLocker m1(Module_lock); // Check if _modules got allocated while we were waiting for this lock. @@ -562,7 +561,7 @@ { MutexLocker m1(metaspace_lock(), Mutex::_no_safepoint_check_flag); // Ensure _modules is stable, since it is examined without a lock - OrderAccess::release_store(&_modules, modules); + Atomic::release_store(&_modules, modules); } } } @@ -752,7 +751,7 @@ // The reason for the delayed allocation is because some class loaders are // simply for delegating with no metadata of their own. // Lock-free access requires load_acquire. - ClassLoaderMetaspace* metaspace = OrderAccess::load_acquire(&_metaspace); + ClassLoaderMetaspace* metaspace = Atomic::load_acquire(&_metaspace); if (metaspace == NULL) { MutexLocker ml(_metaspace_lock, Mutex::_no_safepoint_check_flag); // Check if _metaspace got allocated while we were waiting for this lock. @@ -768,7 +767,7 @@ metaspace = new ClassLoaderMetaspace(_metaspace_lock, Metaspace::StandardMetaspaceType); } // Ensure _metaspace is stable, since it is examined without a lock - OrderAccess::release_store(&_metaspace, metaspace); + Atomic::release_store(&_metaspace, metaspace); } } return metaspace; @@ -969,7 +968,7 @@ bool ClassLoaderData::contains_klass(Klass* klass) { // Lock-free access requires load_acquire - for (Klass* k = OrderAccess::load_acquire(&_klasses); k != NULL; k = k->next_link()) { + for (Klass* k = Atomic::load_acquire(&_klasses); k != NULL; k = k->next_link()) { if (k == klass) return true; } return false;
--- a/src/hotspot/share/classfile/classLoaderDataGraph.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/classfile/classLoaderDataGraph.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -38,7 +38,6 @@ #include "runtime/atomic.hpp" #include "runtime/handles.inline.hpp" #include "runtime/mutex.hpp" -#include "runtime/orderAccess.hpp" #include "runtime/safepoint.hpp" #include "runtime/safepointVerifiers.hpp" #include "utilities/growableArray.hpp" @@ -59,13 +58,13 @@ // // Any ClassLoaderData added after or during walking the list are prepended to // _head. Their claim mark need not be handled here. - for (ClassLoaderData* cld = OrderAccess::load_acquire(&_head); cld != NULL; cld = cld->next()) { + for (ClassLoaderData* cld = Atomic::load_acquire(&_head); cld != NULL; cld = cld->next()) { cld->clear_claim(); } } void ClassLoaderDataGraph::clear_claimed_marks(int claim) { - for (ClassLoaderData* cld = OrderAccess::load_acquire(&_head); cld != NULL; cld = cld->next()) { + for (ClassLoaderData* cld = Atomic::load_acquire(&_head); cld != NULL; cld = cld->next()) { cld->clear_claim(claim); } } @@ -186,10 +185,7 @@ // GC root of class loader data created. ClassLoaderData* volatile ClassLoaderDataGraph::_head = NULL; ClassLoaderData* ClassLoaderDataGraph::_unloading = NULL; -ClassLoaderData* ClassLoaderDataGraph::_saved_unloading = NULL; -ClassLoaderData* ClassLoaderDataGraph::_saved_head = NULL; -bool ClassLoaderDataGraph::_should_purge = false; bool ClassLoaderDataGraph::_should_clean_deallocate_lists = false; bool ClassLoaderDataGraph::_safepoint_cleanup_needed = false; bool ClassLoaderDataGraph::_metaspace_oom = false; @@ -220,7 +216,7 @@ // First install the new CLD to the Graph. cld->set_next(_head); - OrderAccess::release_store(&_head, cld); + Atomic::release_store(&_head, cld); // Next associate with the class_loader. if (!is_unsafe_anonymous) { @@ -249,9 +245,7 @@ void ClassLoaderDataGraph::cld_unloading_do(CLDClosure* cl) { assert_locked_or_safepoint_weak(ClassLoaderDataGraph_lock); - // Only walk the head until any clds not purged from prior unloading - // (CMS doesn't purge right away). - for (ClassLoaderData* cld = _unloading; cld != _saved_unloading; cld = cld->next()) { + for (ClassLoaderData* cld = _unloading; cld != NULL; cld = cld->next()) { assert(cld->is_unloading(), "invariant"); cl->do_cld(cld); } @@ -381,9 +375,7 @@ void ClassLoaderDataGraph::modules_unloading_do(void f(ModuleEntry*)) { assert_locked_or_safepoint(ClassLoaderDataGraph_lock); - // Only walk the head until any clds not purged from prior unloading - // (CMS doesn't purge right away). - for (ClassLoaderData* cld = _unloading; cld != _saved_unloading; cld = cld->next()) { + for (ClassLoaderData* cld = _unloading; cld != NULL; cld = cld->next()) { assert(cld->is_unloading(), "invariant"); cld->modules_do(f); } @@ -399,9 +391,7 @@ void ClassLoaderDataGraph::packages_unloading_do(void f(PackageEntry*)) { assert_locked_or_safepoint(ClassLoaderDataGraph_lock); - // Only walk the head until any clds not purged from prior unloading - // (CMS doesn't purge right away). - for (ClassLoaderData* cld = _unloading; cld != _saved_unloading; cld = cld->next()) { + for (ClassLoaderData* cld = _unloading; cld != NULL; cld = cld->next()) { assert(cld->is_unloading(), "invariant"); cld->packages_do(f); } @@ -424,9 +414,7 @@ void ClassLoaderDataGraph::classes_unloading_do(void f(Klass* const)) { assert_locked_or_safepoint(ClassLoaderDataGraph_lock); - // Only walk the head until any clds not purged from prior unloading - // (CMS doesn't purge right away). - for (ClassLoaderData* cld = _unloading; cld != _saved_unloading; cld = cld->next()) { + for (ClassLoaderData* cld = _unloading; cld != NULL; cld = cld->next()) { assert(cld->is_unloading(), "invariant"); cld->classes_do(f); } @@ -476,32 +464,6 @@ } } -GrowableArray<ClassLoaderData*>* ClassLoaderDataGraph::new_clds() { - assert_locked_or_safepoint(ClassLoaderDataGraph_lock); - assert(_head == NULL || _saved_head != NULL, "remember_new_clds(true) not called?"); - - GrowableArray<ClassLoaderData*>* array = new GrowableArray<ClassLoaderData*>(); - - // The CLDs in [_head, _saved_head] were all added during last call to remember_new_clds(true); - ClassLoaderData* curr = _head; - while (curr != _saved_head) { - if (!curr->claimed(ClassLoaderData::_claim_strong)) { - array->push(curr); - LogTarget(Debug, class, loader, data) lt; - if (lt.is_enabled()) { - LogStream ls(lt); - ls.print("found new CLD: "); - curr->print_value_on(&ls); - ls.cr(); - } - } - - curr = curr->_next; - } - - return array; -} - #ifndef PRODUCT bool ClassLoaderDataGraph::contains_loader_data(ClassLoaderData* loader_data) { assert_locked_or_safepoint(ClassLoaderDataGraph_lock); @@ -544,10 +506,6 @@ uint loaders_processed = 0; uint loaders_removed = 0; - // Save previous _unloading pointer for CMS which may add to unloading list before - // purging and we don't want to rewalk the previously unloaded class loader data. - _saved_unloading = _unloading; - data = _head; while (data != NULL) { if (data->is_alive()) { @@ -676,7 +634,7 @@ while (head != NULL) { Klass* next = next_klass_in_cldg(head); - Klass* old_head = Atomic::cmpxchg(next, &_next_klass, head); + Klass* old_head = Atomic::cmpxchg(&_next_klass, head, next); if (old_head == head) { return head; // Won the CAS.
--- a/src/hotspot/share/classfile/classLoaderDataGraph.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/classfile/classLoaderDataGraph.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -43,10 +43,6 @@ // All CLDs (except the null CLD) can be reached by walking _head->_next->... static ClassLoaderData* volatile _head; static ClassLoaderData* _unloading; - // CMS support. - static ClassLoaderData* _saved_head; - static ClassLoaderData* _saved_unloading; - static bool _should_purge; // Set if there's anything to purge in the deallocate lists or previous versions // during a safepoint after class unloading in a full GC. @@ -115,18 +111,6 @@ static void print_dictionary(outputStream* st); static void print_table_statistics(outputStream* st); - // CMS support. - static void remember_new_clds(bool remember) { _saved_head = (remember ? _head : NULL); } - static GrowableArray<ClassLoaderData*>* new_clds(); - - static void set_should_purge(bool b) { _should_purge = b; } - static bool should_purge_and_reset() { - bool res = _should_purge; - // reset for next time. - set_should_purge(false); - return res; - } - static int resize_dictionaries(); static bool has_metaspace_oom() { return _metaspace_oom; }
--- a/src/hotspot/share/classfile/classLoaderDataGraph.inline.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/classfile/classLoaderDataGraph.inline.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -50,21 +50,21 @@ } void ClassLoaderDataGraph::inc_instance_classes(size_t count) { - Atomic::add(count, &_num_instance_classes); + Atomic::add(&_num_instance_classes, count); } void ClassLoaderDataGraph::dec_instance_classes(size_t count) { assert(count <= _num_instance_classes, "Sanity"); - Atomic::sub(count, &_num_instance_classes); + Atomic::sub(&_num_instance_classes, count); } void ClassLoaderDataGraph::inc_array_classes(size_t count) { - Atomic::add(count, &_num_array_classes); + Atomic::add(&_num_array_classes, count); } void ClassLoaderDataGraph::dec_array_classes(size_t count) { assert(count <= _num_array_classes, "Sanity"); - Atomic::sub(count, &_num_array_classes); + Atomic::sub(&_num_array_classes, count); } bool ClassLoaderDataGraph::should_clean_metaspaces_and_reset() {
--- a/src/hotspot/share/classfile/dictionary.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/classfile/dictionary.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -33,8 +33,6 @@ #include "memory/metaspaceClosure.hpp" #include "memory/resourceArea.hpp" #include "oops/oop.inline.hpp" -#include "runtime/atomic.hpp" -#include "runtime/orderAccess.hpp" #include "runtime/mutexLocker.hpp" #include "runtime/safepointVerifiers.hpp" #include "utilities/hashtable.inline.hpp"
--- a/src/hotspot/share/classfile/stringTable.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/classfile/stringTable.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -214,11 +214,11 @@ } size_t StringTable::item_added() { - return Atomic::add((size_t)1, &_items_count); + return Atomic::add(&_items_count, (size_t)1); } size_t StringTable::add_items_to_clean(size_t ndead) { - size_t total = Atomic::add((size_t)ndead, &_uncleaned_items_count); + size_t total = Atomic::add(&_uncleaned_items_count, (size_t)ndead); log_trace(stringtable)( "Uncleaned items:" SIZE_FORMAT " added: " SIZE_FORMAT " total:" SIZE_FORMAT, _uncleaned_items_count, ndead, total); @@ -226,7 +226,7 @@ } void StringTable::item_removed() { - Atomic::add((size_t)-1, &_items_count); + Atomic::add(&_items_count, (size_t)-1); } double StringTable::get_load_factor() {
--- a/src/hotspot/share/classfile/symbolTable.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/classfile/symbolTable.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -189,8 +189,8 @@ } } -void SymbolTable::reset_has_items_to_clean() { Atomic::store(false, &_has_items_to_clean); } -void SymbolTable::mark_has_items_to_clean() { Atomic::store(true, &_has_items_to_clean); } +void SymbolTable::reset_has_items_to_clean() { Atomic::store(&_has_items_to_clean, false); } +void SymbolTable::mark_has_items_to_clean() { Atomic::store(&_has_items_to_clean, true); } bool SymbolTable::has_items_to_clean() { return Atomic::load(&_has_items_to_clean); } void SymbolTable::item_added() { @@ -724,7 +724,7 @@ bdt.done(jt); } - Atomic::add(stdc._processed, &_symbols_counted); + Atomic::add(&_symbols_counted, stdc._processed); log_debug(symboltable)("Cleaned " SIZE_FORMAT " of " SIZE_FORMAT, stdd._deleted, stdc._processed);
--- a/src/hotspot/share/classfile/systemDictionary.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/classfile/systemDictionary.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -79,7 +79,6 @@ #include "runtime/java.hpp" #include "runtime/javaCalls.hpp" #include "runtime/mutexLocker.hpp" -#include "runtime/orderAccess.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/signature.hpp" #include "services/classLoadingService.hpp"
--- a/src/hotspot/share/classfile/verifier.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/classfile/verifier.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -50,7 +50,6 @@ #include "runtime/interfaceSupport.inline.hpp" #include "runtime/javaCalls.hpp" #include "runtime/jniHandles.inline.hpp" -#include "runtime/orderAccess.hpp" #include "runtime/os.hpp" #include "runtime/safepointVerifiers.hpp" #include "runtime/thread.hpp"
--- a/src/hotspot/share/code/codeCache.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/code/codeCache.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -47,6 +47,7 @@ #include "oops/oop.inline.hpp" #include "oops/verifyOopClosure.hpp" #include "runtime/arguments.hpp" +#include "runtime/atomic.hpp" #include "runtime/deoptimization.hpp" #include "runtime/handles.inline.hpp" #include "runtime/icache.hpp" @@ -749,7 +750,7 @@ for (;;) { ExceptionCache* purge_list_head = Atomic::load(&_exception_cache_purge_list); entry->set_purge_list_next(purge_list_head); - if (Atomic::cmpxchg(entry, &_exception_cache_purge_list, purge_list_head) == purge_list_head) { + if (Atomic::cmpxchg(&_exception_cache_purge_list, purge_list_head, entry) == purge_list_head) { break; } }
--- a/src/hotspot/share/code/compiledMethod.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/code/compiledMethod.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -38,6 +38,7 @@ #include "oops/methodData.hpp" #include "oops/method.inline.hpp" #include "prims/methodHandles.hpp" +#include "runtime/atomic.hpp" #include "runtime/deoptimization.hpp" #include "runtime/handles.inline.hpp" #include "runtime/mutexLocker.hpp" @@ -113,7 +114,7 @@ //----------------------------------------------------------------------------- ExceptionCache* CompiledMethod::exception_cache_acquire() const { - return OrderAccess::load_acquire(&_exception_cache); + return Atomic::load_acquire(&_exception_cache); } void CompiledMethod::add_exception_cache_entry(ExceptionCache* new_entry) { @@ -133,7 +134,7 @@ // next pointers always point at live ExceptionCaches, that are not removed due // to concurrent ExceptionCache cleanup. ExceptionCache* next = ec->next(); - if (Atomic::cmpxchg(next, &_exception_cache, ec) == ec) { + if (Atomic::cmpxchg(&_exception_cache, ec, next) == ec) { CodeCache::release_exception_cache(ec); } continue; @@ -143,7 +144,7 @@ new_entry->set_next(ec); } } - if (Atomic::cmpxchg(new_entry, &_exception_cache, ec) == ec) { + if (Atomic::cmpxchg(&_exception_cache, ec, new_entry) == ec) { return; } } @@ -176,7 +177,7 @@ // Try to clean head; this is contended by concurrent inserts, that // both lazily clean the head, and insert entries at the head. If // the CAS fails, the operation is restarted. - if (Atomic::cmpxchg(next, &_exception_cache, curr) != curr) { + if (Atomic::cmpxchg(&_exception_cache, curr, next) != curr) { prev = NULL; curr = exception_cache_acquire(); continue; @@ -615,7 +616,7 @@ if (md != NULL && md->is_method()) { Method* method = static_cast<Method*>(md); if (!method->method_holder()->is_loader_alive()) { - Atomic::store((Method*)NULL, r->metadata_addr()); + Atomic::store(r->metadata_addr(), (Method*)NULL); if (!r->metadata_is_immediate()) { r->fix_metadata_relocation();
--- a/src/hotspot/share/code/compiledMethod.inline.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/code/compiledMethod.inline.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -27,8 +27,8 @@ #include "code/compiledMethod.hpp" #include "code/nativeInst.hpp" +#include "runtime/atomic.hpp" #include "runtime/frame.hpp" -#include "runtime/orderAccess.hpp" inline bool CompiledMethod::is_deopt_pc(address pc) { return is_deopt_entry(pc) || is_deopt_mh_entry(pc); } @@ -61,7 +61,7 @@ // class ExceptionCache methods -inline int ExceptionCache::count() { return OrderAccess::load_acquire(&_count); } +inline int ExceptionCache::count() { return Atomic::load_acquire(&_count); } address ExceptionCache::pc_at(int index) { assert(index >= 0 && index < count(),""); @@ -74,7 +74,7 @@ } // increment_count is only called under lock, but there may be concurrent readers. -inline void ExceptionCache::increment_count() { OrderAccess::release_store(&_count, _count + 1); } +inline void ExceptionCache::increment_count() { Atomic::release_store(&_count, _count + 1); } #endif // SHARE_CODE_COMPILEDMETHOD_INLINE_HPP
--- a/src/hotspot/share/code/dependencyContext.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/code/dependencyContext.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -28,6 +28,7 @@ #include "code/dependencyContext.hpp" #include "memory/resourceArea.hpp" #include "runtime/atomic.hpp" +#include "runtime/orderAccess.hpp" #include "runtime/perfData.hpp" #include "utilities/exceptions.hpp" @@ -101,7 +102,7 @@ for (;;) { nmethodBucket* head = Atomic::load(_dependency_context_addr); new_head->set_next(head); - if (Atomic::cmpxchg(new_head, _dependency_context_addr, head) == head) { + if (Atomic::cmpxchg(_dependency_context_addr, head, new_head) == head) { break; } } @@ -124,7 +125,7 @@ for (;;) { nmethodBucket* purge_list_head = Atomic::load(&_purge_list); b->set_purge_list_next(purge_list_head); - if (Atomic::cmpxchg(b, &_purge_list, purge_list_head) == purge_list_head) { + if (Atomic::cmpxchg(&_purge_list, purge_list_head, b) == purge_list_head) { break; } } @@ -260,7 +261,7 @@ #endif //PRODUCT int nmethodBucket::decrement() { - return Atomic::sub(1, &_count); + return Atomic::sub(&_count, 1); } // We use a monotonically increasing epoch counter to track the last epoch a given @@ -272,7 +273,7 @@ if (last_cleanup >= cleaning_epoch) { return false; } - return Atomic::cmpxchg(cleaning_epoch, _last_cleanup_addr, last_cleanup) == last_cleanup; + return Atomic::cmpxchg(_last_cleanup_addr, last_cleanup, cleaning_epoch) == last_cleanup; } // Retrieve the first nmethodBucket that has a dependent that does not correspond to @@ -281,7 +282,7 @@ nmethodBucket* DependencyContext::dependencies_not_unloading() { for (;;) { // Need acquire becase the read value could come from a concurrent insert. - nmethodBucket* head = OrderAccess::load_acquire(_dependency_context_addr); + nmethodBucket* head = Atomic::load_acquire(_dependency_context_addr); if (head == NULL || !head->get_nmethod()->is_unloading()) { return head; } @@ -291,7 +292,7 @@ // Unstable load of head w.r.t. head->next continue; } - if (Atomic::cmpxchg(head_next, _dependency_context_addr, head) == head) { + if (Atomic::cmpxchg(_dependency_context_addr, head, head_next) == head) { // Release is_unloading entries if unlinking was claimed DependencyContext::release(head); } @@ -300,7 +301,7 @@ // Relaxed accessors void DependencyContext::set_dependencies(nmethodBucket* b) { - Atomic::store(b, _dependency_context_addr); + Atomic::store(_dependency_context_addr, b); } nmethodBucket* DependencyContext::dependencies() { @@ -313,7 +314,7 @@ void DependencyContext::cleaning_start() { assert(SafepointSynchronize::is_at_safepoint(), "must be"); uint64_t epoch = ++_cleaning_epoch_monotonic; - Atomic::store(epoch, &_cleaning_epoch); + Atomic::store(&_cleaning_epoch, epoch); } // The epilogue marks the end of dependency context cleanup by the GC, @@ -323,7 +324,7 @@ // was called. That allows dependency contexts to be cleaned concurrently. void DependencyContext::cleaning_end() { uint64_t epoch = 0; - Atomic::store(epoch, &_cleaning_epoch); + Atomic::store(&_cleaning_epoch, epoch); } // This function skips over nmethodBuckets in the list corresponding to @@ -345,7 +346,7 @@ // Unstable load of next w.r.t. next->next continue; } - if (Atomic::cmpxchg(next_next, &_next, next) == next) { + if (Atomic::cmpxchg(&_next, next, next_next) == next) { // Release is_unloading entries if unlinking was claimed DependencyContext::release(next); } @@ -358,7 +359,7 @@ } void nmethodBucket::set_next(nmethodBucket* b) { - Atomic::store(b, &_next); + Atomic::store(&_next, b); } nmethodBucket* nmethodBucket::purge_list_next() { @@ -366,5 +367,5 @@ } void nmethodBucket::set_purge_list_next(nmethodBucket* b) { - Atomic::store(b, &_purge_list_next); + Atomic::store(&_purge_list_next, b); }
--- a/src/hotspot/share/code/nmethod.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/code/nmethod.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -315,7 +315,7 @@ } void ExceptionCache::set_next(ExceptionCache *ec) { - Atomic::store(ec, &_next); + Atomic::store(&_next, ec); } //----------------------------------------------------------------------------- @@ -1150,7 +1150,7 @@ // Ensure monotonicity of transitions. return false; } - if (Atomic::cmpxchg(new_state, &_state, old_state) == old_state) { + if (Atomic::cmpxchg(&_state, old_state, new_state) == old_state) { return true; } } @@ -1849,7 +1849,7 @@ assert(SafepointSynchronize::is_at_safepoint(), "only at safepoint"); if ((_oops_do_mark_link == NULL) && - (Atomic::replace_if_null(mark_link(this, claim_weak_request_tag), &_oops_do_mark_link))) { + (Atomic::replace_if_null(&_oops_do_mark_link, mark_link(this, claim_weak_request_tag)))) { oops_do_log_change("oops_do, mark weak request"); return true; } @@ -1863,7 +1863,7 @@ nmethod::oops_do_mark_link* nmethod::oops_do_try_claim_strong_done() { assert(SafepointSynchronize::is_at_safepoint(), "only at safepoint"); - oops_do_mark_link* old_next = Atomic::cmpxchg(mark_link(this, claim_strong_done_tag), &_oops_do_mark_link, mark_link(NULL, claim_weak_request_tag)); + oops_do_mark_link* old_next = Atomic::cmpxchg(&_oops_do_mark_link, mark_link(NULL, claim_weak_request_tag), mark_link(this, claim_strong_done_tag)); if (old_next == NULL) { oops_do_log_change("oops_do, mark strong done"); } @@ -1874,7 +1874,7 @@ assert(SafepointSynchronize::is_at_safepoint(), "only at safepoint"); assert(next == mark_link(this, claim_weak_request_tag), "Should be claimed as weak"); - oops_do_mark_link* old_next = Atomic::cmpxchg(mark_link(this, claim_strong_request_tag), &_oops_do_mark_link, next); + oops_do_mark_link* old_next = Atomic::cmpxchg(&_oops_do_mark_link, next, mark_link(this, claim_strong_request_tag)); if (old_next == next) { oops_do_log_change("oops_do, mark strong request"); } @@ -1885,7 +1885,7 @@ assert(SafepointSynchronize::is_at_safepoint(), "only at safepoint"); assert(extract_state(next) == claim_weak_done_tag, "Should be claimed as weak done"); - oops_do_mark_link* old_next = Atomic::cmpxchg(mark_link(extract_nmethod(next), claim_strong_done_tag), &_oops_do_mark_link, next); + oops_do_mark_link* old_next = Atomic::cmpxchg(&_oops_do_mark_link, next, mark_link(extract_nmethod(next), claim_strong_done_tag)); if (old_next == next) { oops_do_log_change("oops_do, mark weak done -> mark strong done"); return true; @@ -1900,13 +1900,13 @@ extract_state(_oops_do_mark_link) == claim_strong_request_tag, "must be but is nmethod " PTR_FORMAT " %u", p2i(extract_nmethod(_oops_do_mark_link)), extract_state(_oops_do_mark_link)); - nmethod* old_head = Atomic::xchg(this, &_oops_do_mark_nmethods); + nmethod* old_head = Atomic::xchg(&_oops_do_mark_nmethods, this); // Self-loop if needed. if (old_head == NULL) { old_head = this; } // Try to install end of list and weak done tag. - if (Atomic::cmpxchg(mark_link(old_head, claim_weak_done_tag), &_oops_do_mark_link, mark_link(this, claim_weak_request_tag)) == mark_link(this, claim_weak_request_tag)) { + if (Atomic::cmpxchg(&_oops_do_mark_link, mark_link(this, claim_weak_request_tag), mark_link(old_head, claim_weak_done_tag)) == mark_link(this, claim_weak_request_tag)) { oops_do_log_change("oops_do, mark weak done"); return NULL; } else { @@ -1917,7 +1917,7 @@ void nmethod::oops_do_add_to_list_as_strong_done() { assert(SafepointSynchronize::is_at_safepoint(), "only at safepoint"); - nmethod* old_head = Atomic::xchg(this, &_oops_do_mark_nmethods); + nmethod* old_head = Atomic::xchg(&_oops_do_mark_nmethods, this); // Self-loop if needed. if (old_head == NULL) { old_head = this;
--- a/src/hotspot/share/compiler/compileBroker.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/compiler/compileBroker.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -1479,14 +1479,14 @@ assert(!is_osr, "can't be osr"); // Adapters, native wrappers and method handle intrinsics // should be generated always. - return Atomic::add(1, &_compilation_id); + return Atomic::add(&_compilation_id, 1); } else if (CICountOSR && is_osr) { - id = Atomic::add(1, &_osr_compilation_id); + id = Atomic::add(&_osr_compilation_id, 1); if (CIStartOSR <= id && id < CIStopOSR) { return id; } } else { - id = Atomic::add(1, &_compilation_id); + id = Atomic::add(&_compilation_id, 1); if (CIStart <= id && id < CIStop) { return id; } @@ -1498,7 +1498,7 @@ #else // CICountOSR is a develop flag and set to 'false' by default. In a product built, // only _compilation_id is incremented. - return Atomic::add(1, &_compilation_id); + return Atomic::add(&_compilation_id, 1); #endif }
--- a/src/hotspot/share/compiler/compileBroker.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/compiler/compileBroker.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -29,6 +29,7 @@ #include "compiler/abstractCompiler.hpp" #include "compiler/compileTask.hpp" #include "compiler/compilerDirectives.hpp" +#include "runtime/atomic.hpp" #include "runtime/perfData.hpp" #include "utilities/stack.hpp" #if INCLUDE_JVMCI @@ -335,7 +336,7 @@ static bool should_compile_new_jobs() { return UseCompiler && (_should_compile_new_jobs == run_compilation); } static bool set_should_compile_new_jobs(jint new_state) { // Return success if the current caller set it - jint old = Atomic::cmpxchg(new_state, &_should_compile_new_jobs, 1-new_state); + jint old = Atomic::cmpxchg(&_should_compile_new_jobs, 1-new_state, new_state); bool success = (old == (1-new_state)); if (success) { if (new_state == run_compilation) { @@ -350,7 +351,7 @@ static void disable_compilation_forever() { UseCompiler = false; AlwaysCompileLoopMethods = false; - Atomic::xchg(jint(shutdown_compilation), &_should_compile_new_jobs); + Atomic::xchg(&_should_compile_new_jobs, jint(shutdown_compilation)); } static bool is_compilation_disabled_forever() { @@ -359,7 +360,7 @@ static void handle_full_code_cache(int code_blob_type); // Ensures that warning is only printed once. static bool should_print_compiler_warning() { - jint old = Atomic::cmpxchg(1, &_print_compilation_warning, 0); + jint old = Atomic::cmpxchg(&_print_compilation_warning, 0, 1); return old == 0; } // Return total compilation ticks
--- a/src/hotspot/share/compiler/disassembler.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/compiler/disassembler.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -414,9 +414,10 @@ _bytes_per_line = Disassembler::pd_instruction_alignment(); _print_file_name = true; - if (_optionsParsed) return; // parse only once - - // parse the global option string: + // parse the global option string + // We need to fill the options buffer for each newly created + // decode_env instance. The hsdis_* library looks for options + // in that buffer. collect_options(Disassembler::pd_cpu_opts()); collect_options(PrintAssemblyOptions); @@ -424,6 +425,8 @@ _print_raw = (strstr(options(), "xml") ? 2 : 1); } + if (_optionsParsed) return; // parse only once + if (strstr(options(), "help")) { _print_help = true; }
--- a/src/hotspot/share/gc/epsilon/epsilonHeap.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/epsilon/epsilonHeap.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -31,6 +31,7 @@ #include "memory/allocation.inline.hpp" #include "memory/resourceArea.hpp" #include "memory/universe.hpp" +#include "runtime/atomic.hpp" #include "runtime/globals.hpp" jint EpsilonHeap::initialize() { @@ -156,7 +157,7 @@ // Allocation successful, update counters { size_t last = _last_counter_update; - if ((used - last >= _step_counter_update) && Atomic::cmpxchg(used, &_last_counter_update, last) == last) { + if ((used - last >= _step_counter_update) && Atomic::cmpxchg(&_last_counter_update, last, used) == last) { _monitoring_support->update_counters(); } } @@ -164,7 +165,7 @@ // ...and print the occupancy line, if needed { size_t last = _last_heap_print; - if ((used - last >= _step_heap_print) && Atomic::cmpxchg(used, &_last_heap_print, last) == last) { + if ((used - last >= _step_heap_print) && Atomic::cmpxchg(&_last_heap_print, last, used) == last) { print_heap_info(used); print_metaspace_info(); } @@ -212,7 +213,7 @@ } // Always honor boundaries - size = MAX2(min_size, MIN2(_max_tlab_size, size)); + size = clamp(size, min_size, _max_tlab_size); // Always honor alignment size = align_up(size, MinObjAlignment);
--- a/src/hotspot/share/gc/g1/g1Allocator.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1Allocator.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -194,7 +194,7 @@ if (hr == NULL) { return max_tlab; } else { - return MIN2(MAX2(hr->free(), (size_t) MinTLABSize), max_tlab); + return clamp(hr->free(), MinTLABSize, max_tlab); } }
--- a/src/hotspot/share/gc/g1/g1Analytics.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1Analytics.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -28,6 +28,7 @@ #include "runtime/globals.hpp" #include "runtime/os.hpp" #include "utilities/debug.hpp" +#include "utilities/globalDefinitions.hpp" #include "utilities/numberSeq.hpp" // Different defaults for different number of GC threads @@ -44,11 +45,11 @@ }; // all the same -static double young_cards_per_entry_ratio_defaults[] = { +static double young_card_merge_to_scan_ratio_defaults[] = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 }; -static double young_only_cost_per_remset_card_ms_defaults[] = { +static double young_only_cost_per_card_scan_ms_defaults[] = { 0.015, 0.01, 0.01, 0.008, 0.008, 0.0055, 0.0055, 0.005 }; @@ -61,7 +62,6 @@ 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0 }; - static double young_other_cost_per_region_ms_defaults[] = { 0.3, 0.2, 0.2, 0.15, 0.15, 0.12, 0.12, 0.1 }; @@ -80,13 +80,13 @@ _rs_length_diff_seq(new TruncatedSeq(TruncatedSeqLength)), _concurrent_refine_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)), _logged_cards_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)), - _cost_per_logged_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)), - _cost_scan_hcc_seq(new TruncatedSeq(TruncatedSeqLength)), - _young_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)), - _mixed_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)), - _young_only_cost_per_remset_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)), - _mixed_cost_per_remset_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)), - _cost_per_byte_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _young_card_merge_to_scan_ratio_seq(new TruncatedSeq(TruncatedSeqLength)), + _mixed_card_merge_to_scan_ratio_seq(new TruncatedSeq(TruncatedSeqLength)), + _young_cost_per_card_scan_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _mixed_cost_per_card_scan_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _young_cost_per_card_merge_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _mixed_cost_per_card_merge_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _copy_cost_per_byte_ms_seq(new TruncatedSeq(TruncatedSeqLength)), _constant_other_time_ms_seq(new TruncatedSeq(TruncatedSeqLength)), _young_other_cost_per_region_ms_seq(new TruncatedSeq(TruncatedSeqLength)), _non_young_other_cost_per_region_ms_seq(new TruncatedSeq(TruncatedSeqLength)), @@ -108,11 +108,10 @@ _concurrent_refine_rate_ms_seq->add(1/cost_per_logged_card_ms_defaults[0]); // Some applications have very low rates for logging cards. _logged_cards_rate_ms_seq->add(0.0); - _cost_per_logged_card_ms_seq->add(cost_per_logged_card_ms_defaults[index]); - _cost_scan_hcc_seq->add(0.0); - _young_cards_per_entry_ratio_seq->add(young_cards_per_entry_ratio_defaults[index]); - _young_only_cost_per_remset_card_ms_seq->add(young_only_cost_per_remset_card_ms_defaults[index]); - _cost_per_byte_ms_seq->add(cost_per_byte_ms_defaults[index]); + _young_card_merge_to_scan_ratio_seq->add(young_card_merge_to_scan_ratio_defaults[index]); + _young_cost_per_card_scan_ms_seq->add(young_only_cost_per_card_scan_ms_defaults[index]); + + _copy_cost_per_byte_ms_seq->add(cost_per_byte_ms_defaults[index]); _constant_other_time_ms_seq->add(constant_other_time_ms_defaults[index]); _young_other_cost_per_region_ms_seq->add(young_other_cost_per_region_ms_defaults[index]); _non_young_other_cost_per_region_ms_seq->add(non_young_other_cost_per_region_ms_defaults[index]); @@ -122,12 +121,20 @@ _concurrent_mark_cleanup_times_ms->add(0.20); } -double G1Analytics::get_new_prediction(TruncatedSeq const* seq) const { - return _predictor->get_new_prediction(seq); +bool G1Analytics::enough_samples_available(TruncatedSeq const* seq) const { + return seq->num() >= 3; +} + +double G1Analytics::get_new_unit_prediction(TruncatedSeq const* seq) const { + return _predictor->get_new_unit_prediction(seq); } size_t G1Analytics::get_new_size_prediction(TruncatedSeq const* seq) const { - return (size_t)get_new_prediction(seq); + return (size_t)get_new_lower_zero_bound_prediction(seq); +} + +double G1Analytics::get_new_lower_zero_bound_prediction(TruncatedSeq const* seq) const { + return _predictor->get_new_lower_zero_bound_prediction(seq); } int G1Analytics::num_alloc_rate_ms() const { @@ -144,17 +151,9 @@ void G1Analytics::compute_pause_time_ratio(double interval_ms, double pause_time_ms) { _recent_avg_pause_time_ratio = _recent_gc_times_ms->sum() / interval_ms; - if (_recent_avg_pause_time_ratio < 0.0 || - (_recent_avg_pause_time_ratio - 1.0 > 0.0)) { - // Clip ratio between 0.0 and 1.0, and continue. This will be fixed in - // CR 6902692 by redoing the manner in which the ratio is incrementally computed. - if (_recent_avg_pause_time_ratio < 0.0) { - _recent_avg_pause_time_ratio = 0.0; - } else { - assert(_recent_avg_pause_time_ratio - 1.0 > 0.0, "Ctl-point invariant"); - _recent_avg_pause_time_ratio = 1.0; - } - } + + // Clamp the result to [0.0 ... 1.0] to filter out nonsensical results due to bad input. + _recent_avg_pause_time_ratio = clamp(_recent_avg_pause_time_ratio, 0.0, 1.0); // Compute the ratio of just this last pause time to the entire time range stored // in the vectors. Comparing this pause to the entire range, rather than only the @@ -173,27 +172,27 @@ _logged_cards_rate_ms_seq->add(cards_per_ms); } -void G1Analytics::report_cost_per_logged_card_ms(double cost_per_logged_card_ms) { - _cost_per_logged_card_ms_seq->add(cost_per_logged_card_ms); -} - -void G1Analytics::report_cost_scan_hcc(double cost_scan_hcc) { - _cost_scan_hcc_seq->add(cost_scan_hcc); -} - -void G1Analytics::report_cost_per_remset_card_ms(double cost_per_remset_card_ms, bool for_young_gc) { +void G1Analytics::report_cost_per_card_scan_ms(double cost_per_card_ms, bool for_young_gc) { if (for_young_gc) { - _young_only_cost_per_remset_card_ms_seq->add(cost_per_remset_card_ms); + _young_cost_per_card_scan_ms_seq->add(cost_per_card_ms); } else { - _mixed_cost_per_remset_card_ms_seq->add(cost_per_remset_card_ms); + _mixed_cost_per_card_scan_ms_seq->add(cost_per_card_ms); } } -void G1Analytics::report_cards_per_entry_ratio(double cards_per_entry_ratio, bool for_young_gc) { +void G1Analytics::report_cost_per_card_merge_ms(double cost_per_card_ms, bool for_young_gc) { if (for_young_gc) { - _young_cards_per_entry_ratio_seq->add(cards_per_entry_ratio); + _young_cost_per_card_merge_ms_seq->add(cost_per_card_ms); } else { - _mixed_cards_per_entry_ratio_seq->add(cards_per_entry_ratio); + _mixed_cost_per_card_merge_ms_seq->add(cost_per_card_ms); + } +} + +void G1Analytics::report_card_merge_to_scan_ratio(double merge_to_scan_ratio, bool for_young_gc) { + if (for_young_gc) { + _young_card_merge_to_scan_ratio_seq->add(merge_to_scan_ratio); + } else { + _mixed_card_merge_to_scan_ratio_seq->add(merge_to_scan_ratio); } } @@ -205,7 +204,7 @@ if (mark_or_rebuild_in_progress) { _cost_per_byte_ms_during_cm_seq->add(cost_per_byte_ms); } else { - _cost_per_byte_ms_seq->add(cost_per_byte_ms); + _copy_cost_per_byte_ms_seq->add(cost_per_byte_ms); } } @@ -230,70 +229,50 @@ } double G1Analytics::predict_alloc_rate_ms() const { - return get_new_prediction(_alloc_rate_ms_seq); + return get_new_lower_zero_bound_prediction(_alloc_rate_ms_seq); } double G1Analytics::predict_concurrent_refine_rate_ms() const { - return get_new_prediction(_concurrent_refine_rate_ms_seq); + return get_new_lower_zero_bound_prediction(_concurrent_refine_rate_ms_seq); } double G1Analytics::predict_logged_cards_rate_ms() const { - return get_new_prediction(_logged_cards_rate_ms_seq); + return get_new_lower_zero_bound_prediction(_logged_cards_rate_ms_seq); } -double G1Analytics::predict_cost_per_logged_card_ms() const { - return get_new_prediction(_cost_per_logged_card_ms_seq); +double G1Analytics::predict_young_card_merge_to_scan_ratio() const { + return get_new_unit_prediction(_young_card_merge_to_scan_ratio_seq); } -double G1Analytics::predict_scan_hcc_ms() const { - return get_new_prediction(_cost_scan_hcc_seq); -} - -double G1Analytics::predict_rs_update_time_ms(size_t pending_cards) const { - return pending_cards * predict_cost_per_logged_card_ms() + predict_scan_hcc_ms(); -} - -double G1Analytics::predict_young_cards_per_entry_ratio() const { - return get_new_prediction(_young_cards_per_entry_ratio_seq); -} - -double G1Analytics::predict_mixed_cards_per_entry_ratio() const { - if (_mixed_cards_per_entry_ratio_seq->num() < 2) { - return predict_young_cards_per_entry_ratio(); +size_t G1Analytics::predict_scan_card_num(size_t rs_length, bool for_young_gc) const { + if (for_young_gc || !enough_samples_available(_mixed_card_merge_to_scan_ratio_seq)) { + return (size_t)(rs_length * predict_young_card_merge_to_scan_ratio()); } else { - return get_new_prediction(_mixed_cards_per_entry_ratio_seq); + return (size_t)(rs_length * get_new_unit_prediction(_mixed_card_merge_to_scan_ratio_seq)); } } -size_t G1Analytics::predict_card_num(size_t rs_length, bool for_young_gc) const { - if (for_young_gc) { - return (size_t) (rs_length * predict_young_cards_per_entry_ratio()); +double G1Analytics::predict_card_merge_time_ms(size_t card_num, bool for_young_gc) const { + if (for_young_gc || !enough_samples_available(_mixed_cost_per_card_merge_ms_seq)) { + return card_num * get_new_lower_zero_bound_prediction(_young_cost_per_card_merge_ms_seq); } else { - return (size_t) (rs_length * predict_mixed_cards_per_entry_ratio()); + return card_num * get_new_lower_zero_bound_prediction(_mixed_cost_per_card_merge_ms_seq); } } -double G1Analytics::predict_rs_scan_time_ms(size_t card_num, bool for_young_gc) const { - if (for_young_gc) { - return card_num * get_new_prediction(_young_only_cost_per_remset_card_ms_seq); +double G1Analytics::predict_card_scan_time_ms(size_t card_num, bool for_young_gc) const { + if (for_young_gc || !enough_samples_available(_mixed_cost_per_card_scan_ms_seq)) { + return card_num * get_new_lower_zero_bound_prediction(_young_cost_per_card_scan_ms_seq); } else { - return predict_mixed_rs_scan_time_ms(card_num); - } -} - -double G1Analytics::predict_mixed_rs_scan_time_ms(size_t card_num) const { - if (_mixed_cost_per_remset_card_ms_seq->num() < 3) { - return card_num * get_new_prediction(_young_only_cost_per_remset_card_ms_seq); - } else { - return card_num * get_new_prediction(_mixed_cost_per_remset_card_ms_seq); + return card_num * get_new_lower_zero_bound_prediction(_mixed_cost_per_card_scan_ms_seq); } } double G1Analytics::predict_object_copy_time_ms_during_cm(size_t bytes_to_copy) const { - if (_cost_per_byte_ms_during_cm_seq->num() < 3) { - return (1.1 * bytes_to_copy) * get_new_prediction(_cost_per_byte_ms_seq); + if (!enough_samples_available(_cost_per_byte_ms_during_cm_seq)) { + return (1.1 * bytes_to_copy) * get_new_lower_zero_bound_prediction(_copy_cost_per_byte_ms_seq); } else { - return bytes_to_copy * get_new_prediction(_cost_per_byte_ms_during_cm_seq); + return bytes_to_copy * get_new_lower_zero_bound_prediction(_cost_per_byte_ms_during_cm_seq); } } @@ -301,36 +280,32 @@ if (during_concurrent_mark) { return predict_object_copy_time_ms_during_cm(bytes_to_copy); } else { - return bytes_to_copy * get_new_prediction(_cost_per_byte_ms_seq); + return bytes_to_copy * get_new_lower_zero_bound_prediction(_copy_cost_per_byte_ms_seq); } } -double G1Analytics::predict_cost_per_byte_ms() const { - return get_new_prediction(_cost_per_byte_ms_seq); -} - double G1Analytics::predict_constant_other_time_ms() const { - return get_new_prediction(_constant_other_time_ms_seq); + return get_new_lower_zero_bound_prediction(_constant_other_time_ms_seq); } double G1Analytics::predict_young_other_time_ms(size_t young_num) const { - return young_num * get_new_prediction(_young_other_cost_per_region_ms_seq); + return young_num * get_new_lower_zero_bound_prediction(_young_other_cost_per_region_ms_seq); } double G1Analytics::predict_non_young_other_time_ms(size_t non_young_num) const { - return non_young_num * get_new_prediction(_non_young_other_cost_per_region_ms_seq); + return non_young_num * get_new_lower_zero_bound_prediction(_non_young_other_cost_per_region_ms_seq); } double G1Analytics::predict_remark_time_ms() const { - return get_new_prediction(_concurrent_mark_remark_times_ms); + return get_new_lower_zero_bound_prediction(_concurrent_mark_remark_times_ms); } double G1Analytics::predict_cleanup_time_ms() const { - return get_new_prediction(_concurrent_mark_cleanup_times_ms); + return get_new_lower_zero_bound_prediction(_concurrent_mark_cleanup_times_ms); } size_t G1Analytics::predict_rs_length() const { - return get_new_size_prediction(_rs_length_seq) + get_new_prediction(_rs_length_diff_seq); + return get_new_size_prediction(_rs_length_seq) + get_new_size_prediction(_rs_length_diff_seq); } size_t G1Analytics::predict_pending_cards() const {
--- a/src/hotspot/share/gc/g1/g1Analytics.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1Analytics.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -48,13 +48,21 @@ TruncatedSeq* _rs_length_diff_seq; TruncatedSeq* _concurrent_refine_rate_ms_seq; TruncatedSeq* _logged_cards_rate_ms_seq; - TruncatedSeq* _cost_per_logged_card_ms_seq; - TruncatedSeq* _cost_scan_hcc_seq; - TruncatedSeq* _young_cards_per_entry_ratio_seq; - TruncatedSeq* _mixed_cards_per_entry_ratio_seq; - TruncatedSeq* _young_only_cost_per_remset_card_ms_seq; - TruncatedSeq* _mixed_cost_per_remset_card_ms_seq; - TruncatedSeq* _cost_per_byte_ms_seq; + // The ratio between the number of merged cards and actually scanned cards, for + // young-only and mixed gcs. + TruncatedSeq* _young_card_merge_to_scan_ratio_seq; + TruncatedSeq* _mixed_card_merge_to_scan_ratio_seq; + + // The cost to scan a card during young-only and mixed gcs in ms. + TruncatedSeq* _young_cost_per_card_scan_ms_seq; + TruncatedSeq* _mixed_cost_per_card_scan_ms_seq; + + // The cost to merge a card during young-only and mixed gcs in ms. + TruncatedSeq* _young_cost_per_card_merge_ms_seq; + TruncatedSeq* _mixed_cost_per_card_merge_ms_seq; + + // The cost to copy a byte in ms. + TruncatedSeq* _copy_cost_per_byte_ms_seq; TruncatedSeq* _constant_other_time_ms_seq; TruncatedSeq* _young_other_cost_per_region_ms_seq; TruncatedSeq* _non_young_other_cost_per_region_ms_seq; @@ -72,8 +80,13 @@ double _recent_avg_pause_time_ratio; double _last_pause_time_ratio; - double get_new_prediction(TruncatedSeq const* seq) const; + // Returns whether the sequence have enough samples to get a "good" prediction. + // The constant used is random but "small". + bool enough_samples_available(TruncatedSeq const* seq) const; + + double get_new_unit_prediction(TruncatedSeq const* seq) const; size_t get_new_size_prediction(TruncatedSeq const* seq) const; + double get_new_lower_zero_bound_prediction(TruncatedSeq const* seq) const; public: G1Analytics(const G1Predictions* predictor); @@ -103,10 +116,9 @@ void report_alloc_rate_ms(double alloc_rate); void report_concurrent_refine_rate_ms(double cards_per_ms); void report_logged_cards_rate_ms(double cards_per_ms); - void report_cost_per_logged_card_ms(double cost_per_logged_card_ms); - void report_cost_scan_hcc(double cost_scan_hcc); - void report_cost_per_remset_card_ms(double cost_per_remset_card_ms, bool for_young_gc); - void report_cards_per_entry_ratio(double cards_per_entry_ratio, bool for_young_gc); + void report_cost_per_card_scan_ms(double cost_per_remset_card_ms, bool for_young_gc); + void report_cost_per_card_merge_ms(double cost_per_card_ms, bool for_young_gc); + void report_card_merge_to_scan_ratio(double cards_per_entry_ratio, bool for_young_gc); void report_rs_length_diff(double rs_length_diff); void report_cost_per_byte_ms(double cost_per_byte_ms, bool mark_or_rebuild_in_progress); void report_young_other_cost_per_region_ms(double other_cost_per_region_ms); @@ -120,21 +132,14 @@ double predict_concurrent_refine_rate_ms() const; double predict_logged_cards_rate_ms() const; - double predict_cost_per_logged_card_ms() const; + double predict_young_card_merge_to_scan_ratio() const; - double predict_scan_hcc_ms() const; + double predict_mixed_card_merge_to_scan_ratio() const; - double predict_rs_update_time_ms(size_t pending_cards) const; + size_t predict_scan_card_num(size_t rs_length, bool for_young_gc) const; - double predict_young_cards_per_entry_ratio() const; - - double predict_mixed_cards_per_entry_ratio() const; - - size_t predict_card_num(size_t rs_length, bool for_young_gc) const; - - double predict_rs_scan_time_ms(size_t card_num, bool for_young_gc) const; - - double predict_mixed_rs_scan_time_ms(size_t card_num) const; + double predict_card_merge_time_ms(size_t card_num, bool for_young_gc) const; + double predict_card_scan_time_ms(size_t card_num, bool for_young_gc) const; double predict_object_copy_time_ms_during_cm(size_t bytes_to_copy) const; @@ -153,8 +158,6 @@ size_t predict_rs_length() const; size_t predict_pending_cards() const; - double predict_cost_per_byte_ms() const; - // Add a new GC of the given duration and end time to the record. void update_recent_gc_times(double end_time_sec, double elapsed_ms); void compute_pause_time_ratio(double interval_ms, double pause_time_ms);
--- a/src/hotspot/share/gc/g1/g1BarrierSet.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1BarrierSet.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -37,6 +37,7 @@ #include "oops/oop.inline.hpp" #include "runtime/interfaceSupport.inline.hpp" #include "runtime/mutexLocker.hpp" +#include "runtime/orderAccess.hpp" #include "runtime/thread.inline.hpp" #include "utilities/macros.hpp" #ifdef COMPILER1
--- a/src/hotspot/share/gc/g1/g1BlockOffsetTable.inline.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1BlockOffsetTable.inline.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -55,7 +55,7 @@ } void G1BlockOffsetTable::set_offset_array_raw(size_t index, u_char offset) { - Atomic::store(offset, &_offset_array[index]); + Atomic::store(&_offset_array[index], offset); } void G1BlockOffsetTable::set_offset_array(size_t index, u_char offset) {
--- a/src/hotspot/share/gc/g1/g1CardTable.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1CardTable.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -27,8 +27,6 @@ #include "gc/g1/g1CollectedHeap.inline.hpp" #include "gc/shared/memset_with_concurrent_readers.hpp" #include "logging/log.hpp" -#include "runtime/atomic.hpp" -#include "runtime/orderAccess.hpp" void G1CardTable::g1_mark_as_young(const MemRegion& mr) { CardValue *const first = byte_for(mr.start());
--- a/src/hotspot/share/gc/g1/g1CardTable.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1CardTable.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -92,12 +92,16 @@ return pointer_delta(p, _byte_map, sizeof(CardValue)); } - // Mark the given card as Dirty if it is Clean. - inline void mark_clean_as_dirty(size_t card_index); + // Mark the given card as Dirty if it is Clean. Returns the number of dirtied + // cards that were not yet dirty. This result may be inaccurate as it does not + // perform the dirtying atomically. + inline size_t mark_clean_as_dirty(size_t card_index); // Change Clean cards in a (large) area on the card table as Dirty, preserving // already scanned cards. Assumes that most cards in that area are Clean. - inline void mark_region_dirty(size_t start_card_index, size_t num_cards); + // Returns the number of dirtied cards that were not yet dirty. This result may + // be inaccurate as it does not perform the dirtying atomically. + inline size_t mark_region_dirty(size_t start_card_index, size_t num_cards); // Mark the given range of cards as Scanned. All of these cards must be Dirty. inline void mark_as_scanned(size_t start_card_index, size_t num_cards);
--- a/src/hotspot/share/gc/g1/g1CardTable.inline.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1CardTable.inline.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -33,17 +33,21 @@ return (uint)(card_idx >> (HeapRegion::LogOfHRGrainBytes - card_shift)); } -inline void G1CardTable::mark_clean_as_dirty(size_t card_index) { +inline size_t G1CardTable::mark_clean_as_dirty(size_t card_index) { CardValue value = _byte_map[card_index]; if (value == clean_card_val()) { _byte_map[card_index] = dirty_card_val(); + return 1; } + return 0; } -inline void G1CardTable::mark_region_dirty(size_t start_card_index, size_t num_cards) { +inline size_t G1CardTable::mark_region_dirty(size_t start_card_index, size_t num_cards) { assert(is_aligned(start_card_index, sizeof(size_t)), "Start card index must be aligned."); assert(is_aligned(num_cards, sizeof(size_t)), "Number of cards to change must be evenly divisible."); + size_t result = 0; + size_t const num_chunks = num_cards / sizeof(size_t); size_t* cur_word = (size_t*)&_byte_map[start_card_index]; @@ -52,6 +56,7 @@ size_t value = *cur_word; if (value == WordAllClean) { *cur_word = WordAllDirty; + result += sizeof(value); } else if (value == WordAllDirty) { // do nothing. } else { @@ -61,12 +66,15 @@ CardValue value = *cur; if (value == clean_card_val()) { *cur = dirty_card_val(); + result++; } cur++; } } cur_word++; } + + return result; } inline void G1CardTable::mark_as_scanned(size_t start_card_index, size_t num_cards) {
--- a/src/hotspot/share/gc/g1/g1CodeCacheRemSet.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1CodeCacheRemSet.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -32,6 +32,7 @@ #include "memory/iterator.hpp" #include "oops/access.inline.hpp" #include "oops/oop.inline.hpp" +#include "runtime/atomic.hpp" #include "utilities/hashtable.inline.hpp" #include "utilities/stack.inline.hpp" @@ -158,19 +159,19 @@ } G1CodeRootSetTable* G1CodeRootSet::load_acquire_table() { - return OrderAccess::load_acquire(&_table); + return Atomic::load_acquire(&_table); } void G1CodeRootSet::allocate_small_table() { G1CodeRootSetTable* temp = new G1CodeRootSetTable(SmallSize); - OrderAccess::release_store(&_table, temp); + Atomic::release_store(&_table, temp); } void G1CodeRootSetTable::purge_list_append(G1CodeRootSetTable* table) { for (;;) { table->_purge_next = _purge_list; - G1CodeRootSetTable* old = Atomic::cmpxchg(table, &_purge_list, table->_purge_next); + G1CodeRootSetTable* old = Atomic::cmpxchg(&_purge_list, table->_purge_next, table); if (old == table->_purge_next) { break; } @@ -194,7 +195,7 @@ G1CodeRootSetTable::purge_list_append(_table); - OrderAccess::release_store(&_table, temp); + Atomic::release_store(&_table, temp); } void G1CodeRootSet::purge() {
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1CollectedHeap.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -1,4 +1,4 @@ -/* + /* * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -1516,6 +1516,7 @@ _allocator(NULL), _verifier(NULL), _summary_bytes_used(0), + _bytes_used_during_gc(0), _archive_allocator(NULL), _survivor_evac_stats("Young", YoungPLABSize, PLABWeight), _old_evac_stats("Old", OldPLABSize, PLABWeight), @@ -2776,112 +2777,6 @@ G1EagerReclaimHumongousObjects && rem_set->is_empty(); } -class RegisterRegionsWithRegionAttrTableClosure : public HeapRegionClosure { - private: - size_t _total_humongous; - size_t _candidate_humongous; - - bool humongous_region_is_candidate(G1CollectedHeap* g1h, HeapRegion* region) const { - assert(region->is_starts_humongous(), "Must start a humongous object"); - - oop obj = oop(region->bottom()); - - // Dead objects cannot be eager reclaim candidates. Due to class - // unloading it is unsafe to query their classes so we return early. - if (g1h->is_obj_dead(obj, region)) { - return false; - } - - // If we do not have a complete remembered set for the region, then we can - // not be sure that we have all references to it. - if (!region->rem_set()->is_complete()) { - return false; - } - // Candidate selection must satisfy the following constraints - // while concurrent marking is in progress: - // - // * In order to maintain SATB invariants, an object must not be - // reclaimed if it was allocated before the start of marking and - // has not had its references scanned. Such an object must have - // its references (including type metadata) scanned to ensure no - // live objects are missed by the marking process. Objects - // allocated after the start of concurrent marking don't need to - // be scanned. - // - // * An object must not be reclaimed if it is on the concurrent - // mark stack. Objects allocated after the start of concurrent - // marking are never pushed on the mark stack. - // - // Nominating only objects allocated after the start of concurrent - // marking is sufficient to meet both constraints. This may miss - // some objects that satisfy the constraints, but the marking data - // structures don't support efficiently performing the needed - // additional tests or scrubbing of the mark stack. - // - // However, we presently only nominate is_typeArray() objects. - // A humongous object containing references induces remembered - // set entries on other regions. In order to reclaim such an - // object, those remembered sets would need to be cleaned up. - // - // We also treat is_typeArray() objects specially, allowing them - // to be reclaimed even if allocated before the start of - // concurrent mark. For this we rely on mark stack insertion to - // exclude is_typeArray() objects, preventing reclaiming an object - // that is in the mark stack. We also rely on the metadata for - // such objects to be built-in and so ensured to be kept live. - // Frequent allocation and drop of large binary blobs is an - // important use case for eager reclaim, and this special handling - // may reduce needed headroom. - - return obj->is_typeArray() && - g1h->is_potential_eager_reclaim_candidate(region); - } - - public: - RegisterRegionsWithRegionAttrTableClosure() - : _total_humongous(0), - _candidate_humongous(0) { - } - - virtual bool do_heap_region(HeapRegion* r) { - G1CollectedHeap* g1h = G1CollectedHeap::heap(); - - if (!r->is_starts_humongous()) { - g1h->register_region_with_region_attr(r); - return false; - } - - bool is_candidate = humongous_region_is_candidate(g1h, r); - uint rindex = r->hrm_index(); - g1h->set_humongous_reclaim_candidate(rindex, is_candidate); - if (is_candidate) { - g1h->register_humongous_region_with_region_attr(rindex); - _candidate_humongous++; - // We will later handle the remembered sets of these regions. - } else { - g1h->register_region_with_region_attr(r); - } - _total_humongous++; - - return false; - } - - size_t total_humongous() const { return _total_humongous; } - size_t candidate_humongous() const { return _candidate_humongous; } -}; - -void G1CollectedHeap::register_regions_with_region_attr() { - Ticks start = Ticks::now(); - - RegisterRegionsWithRegionAttrTableClosure cl; - heap_region_iterate(&cl); - - phase_times()->record_register_regions((Ticks::now() - start).seconds() * 1000.0, - cl.total_humongous(), - cl.candidate_humongous()); - _has_humongous_reclaim_candidates = cl.candidate_humongous() > 0; -} - #ifndef PRODUCT void G1CollectedHeap::verify_region_attr_remset_update() { class VerifyRegionAttrRemSet : public HeapRegionClosure { @@ -3138,7 +3033,6 @@ collector_state()->yc_type() == Mixed /* all_memory_pools_affected */); G1HeapTransition heap_transition(this); - size_t heap_used_bytes_before_gc = used(); { IsGCActiveMark x; @@ -3211,7 +3105,7 @@ double sample_end_time_sec = os::elapsedTime(); double pause_time_ms = (sample_end_time_sec - sample_start_time_sec) * MILLIUNITS; - policy()->record_collection_pause_end(pause_time_ms, heap_used_bytes_before_gc); + policy()->record_collection_pause_end(pause_time_ms); } verify_after_young_collection(verify_type); @@ -3377,7 +3271,7 @@ BufferNode* next = Atomic::load(&_nodes); while (next != NULL) { BufferNode* node = next; - next = Atomic::cmpxchg(node->next(), &_nodes, node); + next = Atomic::cmpxchg(&_nodes, node, node->next()); if (next == node) { cl->apply_to_buffer(node, buffer_size, worker_id); next = node->next(); @@ -3694,12 +3588,153 @@ } void G1CollectedHeap::merge_per_thread_state_info(G1ParScanThreadStateSet* per_thread_states) { - double merge_pss_time_start = os::elapsedTime(); + Ticks start = Ticks::now(); per_thread_states->flush(); - phase_times()->record_merge_pss_time_ms((os::elapsedTime() - merge_pss_time_start) * 1000.0); + phase_times()->record_or_add_time_secs(G1GCPhaseTimes::MergePSS, 0 /* worker_id */, (Ticks::now() - start).seconds()); } +class G1PrepareEvacuationTask : public AbstractGangTask { + class G1PrepareRegionsClosure : public HeapRegionClosure { + G1CollectedHeap* _g1h; + G1PrepareEvacuationTask* _parent_task; + size_t _worker_humongous_total; + size_t _worker_humongous_candidates; + + bool humongous_region_is_candidate(HeapRegion* region) const { + assert(region->is_starts_humongous(), "Must start a humongous object"); + + oop obj = oop(region->bottom()); + + // Dead objects cannot be eager reclaim candidates. Due to class + // unloading it is unsafe to query their classes so we return early. + if (_g1h->is_obj_dead(obj, region)) { + return false; + } + + // If we do not have a complete remembered set for the region, then we can + // not be sure that we have all references to it. + if (!region->rem_set()->is_complete()) { + return false; + } + // Candidate selection must satisfy the following constraints + // while concurrent marking is in progress: + // + // * In order to maintain SATB invariants, an object must not be + // reclaimed if it was allocated before the start of marking and + // has not had its references scanned. Such an object must have + // its references (including type metadata) scanned to ensure no + // live objects are missed by the marking process. Objects + // allocated after the start of concurrent marking don't need to + // be scanned. + // + // * An object must not be reclaimed if it is on the concurrent + // mark stack. Objects allocated after the start of concurrent + // marking are never pushed on the mark stack. + // + // Nominating only objects allocated after the start of concurrent + // marking is sufficient to meet both constraints. This may miss + // some objects that satisfy the constraints, but the marking data + // structures don't support efficiently performing the needed + // additional tests or scrubbing of the mark stack. + // + // However, we presently only nominate is_typeArray() objects. + // A humongous object containing references induces remembered + // set entries on other regions. In order to reclaim such an + // object, those remembered sets would need to be cleaned up. + // + // We also treat is_typeArray() objects specially, allowing them + // to be reclaimed even if allocated before the start of + // concurrent mark. For this we rely on mark stack insertion to + // exclude is_typeArray() objects, preventing reclaiming an object + // that is in the mark stack. We also rely on the metadata for + // such objects to be built-in and so ensured to be kept live. + // Frequent allocation and drop of large binary blobs is an + // important use case for eager reclaim, and this special handling + // may reduce needed headroom. + + return obj->is_typeArray() && + _g1h->is_potential_eager_reclaim_candidate(region); + } + + public: + G1PrepareRegionsClosure(G1CollectedHeap* g1h, G1PrepareEvacuationTask* parent_task) : + _g1h(g1h), + _parent_task(parent_task), + _worker_humongous_total(0), + _worker_humongous_candidates(0) { } + + ~G1PrepareRegionsClosure() { + _parent_task->add_humongous_candidates(_worker_humongous_candidates); + _parent_task->add_humongous_total(_worker_humongous_total); + } + + virtual bool do_heap_region(HeapRegion* hr) { + // First prepare the region for scanning + _g1h->rem_set()->prepare_region_for_scan(hr); + + // Now check if region is a humongous candidate + if (!hr->is_starts_humongous()) { + _g1h->register_region_with_region_attr(hr); + return false; + } + + uint index = hr->hrm_index(); + if (humongous_region_is_candidate(hr)) { + _g1h->set_humongous_reclaim_candidate(index, true); + _g1h->register_humongous_region_with_region_attr(index); + _worker_humongous_candidates++; + // We will later handle the remembered sets of these regions. + } else { + _g1h->set_humongous_reclaim_candidate(index, false); + _g1h->register_region_with_region_attr(hr); + } + _worker_humongous_total++; + + return false; + } + }; + + G1CollectedHeap* _g1h; + HeapRegionClaimer _claimer; + volatile size_t _humongous_total; + volatile size_t _humongous_candidates; +public: + G1PrepareEvacuationTask(G1CollectedHeap* g1h) : + AbstractGangTask("Prepare Evacuation"), + _g1h(g1h), + _claimer(_g1h->workers()->active_workers()), + _humongous_total(0), + _humongous_candidates(0) { } + + ~G1PrepareEvacuationTask() { + _g1h->set_has_humongous_reclaim_candidate(_humongous_candidates > 0); + } + + void work(uint worker_id) { + G1PrepareRegionsClosure cl(_g1h, this); + _g1h->heap_region_par_iterate_from_worker_offset(&cl, &_claimer, worker_id); + } + + void add_humongous_candidates(size_t candidates) { + Atomic::add(&_humongous_candidates, candidates); + } + + void add_humongous_total(size_t total) { + Atomic::add(&_humongous_total, total); + } + + size_t humongous_candidates() { + return _humongous_candidates; + } + + size_t humongous_total() { + return _humongous_total; + } +}; + void G1CollectedHeap::pre_evacuate_collection_set(G1EvacuationInfo& evacuation_info, G1ParScanThreadStateSet* per_thread_states) { + _bytes_used_during_gc = 0; + _expand_heap_after_alloc_failure = true; _evacuation_failed = false; @@ -3716,9 +3751,16 @@ phase_times()->record_prepare_heap_roots_time_ms((Ticks::now() - start).seconds() * 1000.0); } - register_regions_with_region_attr(); + { + G1PrepareEvacuationTask g1_prep_task(this); + Tickspan task_time = run_task(&g1_prep_task); + + phase_times()->record_register_regions(task_time.seconds() * 1000.0, + g1_prep_task.humongous_total(), + g1_prep_task.humongous_candidates()); + } + assert(_verifier->check_region_attr_table(), "Inconsistency in the region attributes table."); - _preserved_marks_set.assert_empty(); #if COMPILER2_OR_JVMCI @@ -3764,9 +3806,6 @@ Tickspan evac_time = (Ticks::now() - start); p->record_or_add_time_secs(objcopy_phase, worker_id, evac_time.seconds() - cl.term_time()); - p->record_or_add_thread_work_item(objcopy_phase, worker_id, pss->lab_waste_words() * HeapWordSize, G1GCPhaseTimes::ObjCopyLABWaste); - p->record_or_add_thread_work_item(objcopy_phase, worker_id, pss->lab_undo_waste_words() * HeapWordSize, G1GCPhaseTimes::ObjCopyLABUndoWaste); - if (termination_phase == G1GCPhaseTimes::Termination) { p->record_time_secs(termination_phase, worker_id, cl.term_time()); p->record_thread_work_item(termination_phase, worker_id, cl.term_attempts()); @@ -3943,6 +3982,8 @@ void G1CollectedHeap::post_evacuate_collection_set(G1EvacuationInfo& evacuation_info, G1RedirtyCardsQueueSet* rdcqs, G1ParScanThreadStateSet* per_thread_states) { + G1GCPhaseTimes* p = phase_times(); + rem_set()->cleanup_after_scan_heap_roots(); // Process any discovered reference objects - we have @@ -3955,16 +3996,15 @@ G1STWIsAliveClosure is_alive(this); G1KeepAliveClosure keep_alive(this); - WeakProcessor::weak_oops_do(workers(), &is_alive, &keep_alive, - phase_times()->weak_phase_times()); + WeakProcessor::weak_oops_do(workers(), &is_alive, &keep_alive, p->weak_phase_times()); if (G1StringDedup::is_enabled()) { double string_dedup_time_ms = os::elapsedTime(); - string_dedup_cleaning(&is_alive, &keep_alive, phase_times()); + string_dedup_cleaning(&is_alive, &keep_alive, p); double string_cleanup_time_ms = (os::elapsedTime() - string_dedup_time_ms) * 1000.0; - phase_times()->record_string_deduplication_time(string_cleanup_time_ms); + p->record_string_deduplication_time(string_cleanup_time_ms); } _allocator->release_gc_alloc_regions(evacuation_info); @@ -3977,7 +4017,7 @@ double recalculate_used_start = os::elapsedTime(); set_used(recalculate_used()); - phase_times()->record_evac_fail_recalc_used_time((os::elapsedTime() - recalculate_used_start) * 1000.0); + p->record_evac_fail_recalc_used_time((os::elapsedTime() - recalculate_used_start) * 1000.0); if (_archive_allocator != NULL) { _archive_allocator->clear_used(); @@ -3989,8 +4029,8 @@ } } else { // The "used" of the the collection set have already been subtracted - // when they were freed. Add in the bytes evacuated. - increase_used(policy()->bytes_copied_during_gc()); + // when they were freed. Add in the bytes used. + increase_used(_bytes_used_during_gc); } _preserved_marks_set.assert_empty(); @@ -4014,7 +4054,7 @@ record_obj_copy_mem_stats(); evacuation_info.set_collectionset_used_before(collection_set()->bytes_used_before()); - evacuation_info.set_bytes_copied(policy()->bytes_copied_during_gc()); + evacuation_info.set_bytes_used(_bytes_used_during_gc); #if COMPILER2_OR_JVMCI double start = os::elapsedTime(); @@ -4226,7 +4266,7 @@ HeapRegion* r = g1h->region_at(region_idx); assert(!g1h->is_on_master_free_list(r), "sanity"); - Atomic::add(r->rem_set()->occupied_locked(), &_rs_length); + Atomic::add(&_rs_length, r->rem_set()->occupied_locked()); if (!is_young) { g1h->hot_card_cache()->reset_card_counts(r); @@ -4290,7 +4330,7 @@ // Claim serial work. if (_serial_work_claim == 0) { - jint value = Atomic::add(1, &_serial_work_claim) - 1; + jint value = Atomic::add(&_serial_work_claim, 1) - 1; if (value == 0) { double serial_time = os::elapsedTime(); do_serial_work(); @@ -4305,7 +4345,7 @@ bool has_non_young_time = false; while (true) { - size_t end = Atomic::add(chunk_size(), &_parallel_work_claim); + size_t end = Atomic::add(&_parallel_work_claim, chunk_size()); size_t cur = end - chunk_size(); if (cur >= _num_work_items) { @@ -4786,7 +4826,7 @@ void G1CollectedHeap::retire_gc_alloc_region(HeapRegion* alloc_region, size_t allocated_bytes, G1HeapRegionAttr dest) { - policy()->record_bytes_copied_during_gc(allocated_bytes); + _bytes_used_during_gc += allocated_bytes; if (dest.is_old()) { old_set_add(alloc_region); } else {
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1CollectedHeap.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -214,6 +214,10 @@ void set_used(size_t bytes); + // Number of bytes used in all regions during GC. Typically changed when + // retiring a GC alloc region. + size_t _bytes_used_during_gc; + // Class that handles archive allocation ranges. G1ArchiveAllocator* _archive_allocator; @@ -589,6 +593,7 @@ // These are only valid for starts_humongous regions. inline void set_humongous_reclaim_candidate(uint region, bool value); inline bool is_humongous_reclaim_candidate(uint region); + inline void set_has_humongous_reclaim_candidate(bool value); // Remove from the reclaim candidate set. Also remove from the // collection set so that later encounters avoid the slow path. @@ -596,8 +601,7 @@ // Register the given region to be part of the collection set. inline void register_humongous_region_with_region_attr(uint index); - // Update region attributes table with information about all regions. - void register_regions_with_region_attr(); + // We register a region with the fast "in collection set" test. We // simply set to true the array slot corresponding to this region. void register_young_region_with_region_attr(HeapRegion* r) {
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -34,7 +34,6 @@ #include "gc/g1/heapRegionRemSet.hpp" #include "gc/g1/heapRegionSet.inline.hpp" #include "gc/shared/taskqueue.inline.hpp" -#include "runtime/orderAccess.hpp" G1GCPhaseTimes* G1CollectedHeap::phase_times() const { return _policy->phase_times(); @@ -181,7 +180,7 @@ void G1CollectedHeap::register_old_region_with_region_attr(HeapRegion* r) { _region_attr.set_in_old(r->hrm_index(), r->rem_set()->is_tracked()); - _rem_set->prepare_for_scan_heap_roots(r->hrm_index()); + _rem_set->exclude_region_from_scan(r->hrm_index()); } void G1CollectedHeap::register_optional_region_with_region_attr(HeapRegion* r) { @@ -299,6 +298,10 @@ return _humongous_reclaim_candidates.is_candidate(region); } +inline void G1CollectedHeap::set_has_humongous_reclaim_candidate(bool value) { + _has_humongous_reclaim_candidates = value; +} + inline void G1CollectedHeap::set_humongous_is_live(oop obj) { uint region = addr_to_region((HeapWord*)obj); // Clear the flag in the humongous_reclaim_candidates table. Also
--- a/src/hotspot/share/gc/g1/g1CollectionSet.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1CollectionSet.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -27,12 +27,14 @@ #include "gc/g1/g1CollectionSet.hpp" #include "gc/g1/g1CollectionSetCandidates.hpp" #include "gc/g1/g1CollectorState.hpp" +#include "gc/g1/g1HotCardCache.hpp" #include "gc/g1/g1ParScanThreadState.hpp" #include "gc/g1/g1Policy.hpp" #include "gc/g1/heapRegion.inline.hpp" #include "gc/g1/heapRegionRemSet.hpp" #include "gc/g1/heapRegionSet.hpp" #include "logging/logStream.hpp" +#include "runtime/orderAccess.hpp" #include "utilities/debug.hpp" #include "utilities/globalDefinitions.hpp" #include "utilities/quickSort.hpp" @@ -276,19 +278,6 @@ assert(hr->is_young(), "invariant"); assert(_inc_build_state == Active, "Precondition"); - size_t collection_set_length = _collection_set_cur_length; - // We use UINT_MAX as "invalid" marker in verification. - assert(collection_set_length < (UINT_MAX - 1), - "Collection set is too large with " SIZE_FORMAT " entries", collection_set_length); - hr->set_young_index_in_cset((uint)collection_set_length + 1); - - _collection_set_regions[collection_set_length] = hr->hrm_index(); - // Concurrent readers must observe the store of the value in the array before an - // update to the length field. - OrderAccess::storestore(); - _collection_set_cur_length++; - assert(_collection_set_cur_length <= _collection_set_max_length, "Collection set larger than maximum allowed."); - // This routine is used when: // * adding survivor regions to the incremental cset at the end of an // evacuation pause or @@ -323,6 +312,19 @@ assert(!hr->in_collection_set(), "invariant"); _g1h->register_young_region_with_region_attr(hr); + + size_t collection_set_length = _collection_set_cur_length; + // We use UINT_MAX as "invalid" marker in verification. + assert(collection_set_length < (UINT_MAX - 1), + "Collection set is too large with " SIZE_FORMAT " entries", collection_set_length); + hr->set_young_index_in_cset((uint)collection_set_length + 1); + + _collection_set_regions[collection_set_length] = hr->hrm_index(); + // Concurrent readers must observe the store of the value in the array before an + // update to the length field. + OrderAccess::storestore(); + _collection_set_cur_length++; + assert(_collection_set_cur_length <= _collection_set_max_length, "Collection set larger than maximum allowed."); } void G1CollectionSet::add_survivor_regions(HeapRegion* hr) { @@ -409,7 +411,7 @@ guarantee(target_pause_time_ms > 0.0, "target_pause_time_ms = %1.6lf should be positive", target_pause_time_ms); - size_t pending_cards = _policy->pending_cards_at_gc_start(); + size_t pending_cards = _policy->pending_cards_at_gc_start() + _g1h->hot_card_cache()->num_entries(); double base_time_ms = _policy->predict_base_elapsed_time_ms(pending_cards); double time_remaining_ms = MAX2(target_pause_time_ms - base_time_ms, 0.0);
--- a/src/hotspot/share/gc/g1/g1CollectionSetChooser.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1CollectionSetChooser.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -112,7 +112,7 @@ // Claim a new chunk, returning its bounds [from, to[. void claim_chunk(uint& from, uint& to) { - uint result = Atomic::add(_chunk_size, &_cur_claim_idx); + uint result = Atomic::add(&_cur_claim_idx, _chunk_size); assert(_max_size > result - 1, "Array too small, is %u should be %u with chunk size %u.", _max_size, result, _chunk_size); @@ -214,8 +214,8 @@ void update_totals(uint num_regions, size_t reclaimable_bytes) { if (num_regions > 0) { assert(reclaimable_bytes > 0, "invariant"); - Atomic::add(num_regions, &_num_regions_added); - Atomic::add(reclaimable_bytes, &_reclaimable_bytes_added); + Atomic::add(&_num_regions_added, num_regions); + Atomic::add(&_reclaimable_bytes_added, reclaimable_bytes); } else { assert(reclaimable_bytes == 0, "invariant"); }
--- a/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -62,6 +62,7 @@ #include "runtime/atomic.hpp" #include "runtime/handles.inline.hpp" #include "runtime/java.hpp" +#include "runtime/orderAccess.hpp" #include "runtime/prefetch.inline.hpp" #include "services/memTracker.hpp" #include "utilities/align.hpp" @@ -207,7 +208,7 @@ return NULL; } - size_t cur_idx = Atomic::add(1u, &_hwm) - 1; + size_t cur_idx = Atomic::add(&_hwm, 1u) - 1; if (cur_idx >= _chunk_capacity) { return NULL; } @@ -280,7 +281,7 @@ void G1CMRootMemRegions::add(HeapWord* start, HeapWord* end) { assert_at_safepoint(); - size_t idx = Atomic::add((size_t)1, &_num_root_regions) - 1; + size_t idx = Atomic::add(&_num_root_regions, (size_t)1) - 1; assert(idx < _max_regions, "Trying to add more root MemRegions than there is space " SIZE_FORMAT, _max_regions); assert(start != NULL && end != NULL && start <= end, "Start (" PTR_FORMAT ") should be less or equal to " "end (" PTR_FORMAT ")", p2i(start), p2i(end)); @@ -308,7 +309,7 @@ return NULL; } - size_t claimed_index = Atomic::add((size_t)1, &_claimed_root_regions) - 1; + size_t claimed_index = Atomic::add(&_claimed_root_regions, (size_t)1) - 1; if (claimed_index < _num_root_regions) { return &_root_regions[claimed_index]; } @@ -1121,7 +1122,7 @@ virtual void work(uint worker_id) { G1UpdateRemSetTrackingBeforeRebuild update_cl(_g1h, _cm, &_cl); _g1h->heap_region_par_iterate_from_worker_offset(&update_cl, &_hrclaimer, worker_id); - Atomic::add(update_cl.num_selected_for_rebuild(), &_total_selected_for_rebuild); + Atomic::add(&_total_selected_for_rebuild, update_cl.num_selected_for_rebuild()); } uint total_selected_for_rebuild() const { return _total_selected_for_rebuild; } @@ -1611,7 +1612,7 @@ // we utilize all the worker threads we can. bool processing_is_mt = rp->processing_is_mt(); uint active_workers = (processing_is_mt ? _g1h->workers()->active_workers() : 1U); - active_workers = MAX2(MIN2(active_workers, _max_num_tasks), 1U); + active_workers = clamp(active_workers, 1u, _max_num_tasks); // Parallel processing task executor. G1CMRefProcTaskExecutor par_task_executor(_g1h, this, @@ -1906,7 +1907,7 @@ HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; // Is the gap between reading the finger and doing the CAS too long? - HeapWord* res = Atomic::cmpxchg(end, &_finger, finger); + HeapWord* res = Atomic::cmpxchg(&_finger, finger, end); if (res == finger && curr_region != NULL) { // we succeeded HeapWord* bottom = curr_region->bottom(); @@ -2587,7 +2588,8 @@ // and do_marking_step() is not being called serially. bool do_stealing = do_termination && !is_serial; - double diff_prediction_ms = _g1h->policy()->predictor().get_new_prediction(&_marking_step_diff_ms); + G1Predictions const& predictor = _g1h->policy()->predictor(); + double diff_prediction_ms = predictor.get_new_lower_zero_bound_prediction(&_marking_step_diff_ms); _time_target_ms = time_target_ms - diff_prediction_ms; // set up the variables that are used in the work-based scheme to
--- a/src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -34,13 +34,14 @@ #include "gc/g1/heapRegionRemSet.hpp" #include "gc/shared/suspendibleThreadSet.hpp" #include "gc/shared/workgroup.hpp" -#include "runtime/atomic.hpp" #include "runtime/flags/flagSetting.hpp" #include "runtime/mutexLocker.hpp" +#include "runtime/orderAccess.hpp" #include "runtime/os.hpp" #include "runtime/safepoint.hpp" #include "runtime/thread.inline.hpp" #include "runtime/threadSMR.hpp" +#include "utilities/quickSort.hpp" G1DirtyCardQueue::G1DirtyCardQueue(G1DirtyCardQueueSet* qset) : // Dirty card queues are always active, so we create them with their @@ -226,21 +227,127 @@ return result; } +class G1RefineBufferedCards : public StackObj { + BufferNode* const _node; + CardTable::CardValue** const _node_buffer; + const size_t _node_buffer_size; + const uint _worker_id; + size_t* _total_refined_cards; + G1RemSet* const _g1rs; + + static inline int compare_card(const CardTable::CardValue* p1, + const CardTable::CardValue* p2) { + return p2 - p1; + } + + // Sorts the cards from start_index to _node_buffer_size in *decreasing* + // address order. Tests showed that this order is preferable to not sorting + // or increasing address order. + void sort_cards(size_t start_index) { + QuickSort::sort(&_node_buffer[start_index], + _node_buffer_size - start_index, + compare_card, + false); + } + + // Returns the index to the first clean card in the buffer. + size_t clean_cards() { + const size_t start = _node->index(); + assert(start <= _node_buffer_size, "invariant"); + + // Two-fingered compaction algorithm similar to the filtering mechanism in + // SATBMarkQueue. The main difference is that clean_card_before_refine() + // could change the buffer element in-place. + // We don't check for SuspendibleThreadSet::should_yield(), because + // cleaning and redirtying the cards is fast. + CardTable::CardValue** src = &_node_buffer[start]; + CardTable::CardValue** dst = &_node_buffer[_node_buffer_size]; + assert(src <= dst, "invariant"); + for ( ; src < dst; ++src) { + // Search low to high for a card to keep. + if (_g1rs->clean_card_before_refine(src)) { + // Found keeper. Search high to low for a card to discard. + while (src < --dst) { + if (!_g1rs->clean_card_before_refine(dst)) { + *dst = *src; // Replace discard with keeper. + break; + } + } + // If discard search failed (src == dst), the outer loop will also end. + } + } + + // dst points to the first retained clean card, or the end of the buffer + // if all the cards were discarded. + const size_t first_clean = dst - _node_buffer; + assert(first_clean >= start && first_clean <= _node_buffer_size, "invariant"); + // Discarded cards are considered as refined. + *_total_refined_cards += first_clean - start; + return first_clean; + } + + bool refine_cleaned_cards(size_t start_index) { + bool result = true; + size_t i = start_index; + for ( ; i < _node_buffer_size; ++i) { + if (SuspendibleThreadSet::should_yield()) { + redirty_unrefined_cards(i); + result = false; + break; + } + _g1rs->refine_card_concurrently(_node_buffer[i], _worker_id); + } + _node->set_index(i); + *_total_refined_cards += i - start_index; + return result; + } + + void redirty_unrefined_cards(size_t start) { + for ( ; start < _node_buffer_size; ++start) { + *_node_buffer[start] = G1CardTable::dirty_card_val(); + } + } + +public: + G1RefineBufferedCards(BufferNode* node, + size_t node_buffer_size, + uint worker_id, + size_t* total_refined_cards) : + _node(node), + _node_buffer(reinterpret_cast<CardTable::CardValue**>(BufferNode::make_buffer_from_node(node))), + _node_buffer_size(node_buffer_size), + _worker_id(worker_id), + _total_refined_cards(total_refined_cards), + _g1rs(G1CollectedHeap::heap()->rem_set()) {} + + bool refine() { + size_t first_clean_index = clean_cards(); + if (first_clean_index == _node_buffer_size) { + _node->set_index(first_clean_index); + return true; + } + // This fence serves two purposes. First, the cards must be cleaned + // before processing the contents. Second, we can't proceed with + // processing a region until after the read of the region's top in + // collect_and_clean_cards(), for synchronization with possibly concurrent + // humongous object allocation (see comment at the StoreStore fence before + // setting the regions' tops in humongous allocation path). + // It's okay that reading region's top and reading region's type were racy + // wrto each other. We need both set, in any order, to proceed. + OrderAccess::fence(); + sort_cards(first_clean_index); + return refine_cleaned_cards(first_clean_index); + } +}; + bool G1DirtyCardQueueSet::refine_buffer(BufferNode* node, uint worker_id, size_t* total_refined_cards) { - G1RemSet* rem_set = G1CollectedHeap::heap()->rem_set(); - size_t size = buffer_size(); - void** buffer = BufferNode::make_buffer_from_node(node); - size_t i = node->index(); - assert(i <= size, "invariant"); - for ( ; (i < size) && !SuspendibleThreadSet::should_yield(); ++i) { - CardTable::CardValue* cp = static_cast<CardTable::CardValue*>(buffer[i]); - rem_set->refine_card_concurrently(cp, worker_id); - } - *total_refined_cards += (i - node->index()); - node->set_index(i); - return i == size; + G1RefineBufferedCards buffered_cards(node, + buffer_size(), + worker_id, + total_refined_cards); + return buffered_cards.refine(); } #ifndef ASSERT
--- a/src/hotspot/share/gc/g1/g1EvacStats.inline.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1EvacStats.inline.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -29,17 +29,17 @@ #include "runtime/atomic.hpp" inline void G1EvacStats::add_direct_allocated(size_t value) { - Atomic::add(value, &_direct_allocated); + Atomic::add(&_direct_allocated, value); } inline void G1EvacStats::add_region_end_waste(size_t value) { - Atomic::add(value, &_region_end_waste); + Atomic::add(&_region_end_waste, value); Atomic::inc(&_regions_filled); } inline void G1EvacStats::add_failure_used_and_waste(size_t used, size_t waste) { - Atomic::add(used, &_failure_used); - Atomic::add(waste, &_failure_waste); + Atomic::add(&_failure_used, used); + Atomic::add(&_failure_waste, waste); } #endif // SHARE_GC_G1_G1EVACSTATS_INLINE_HPP
--- a/src/hotspot/share/gc/g1/g1EvacuationInfo.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1EvacuationInfo.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -33,13 +33,14 @@ size_t _collectionset_used_before; size_t _collectionset_used_after; size_t _alloc_regions_used_before; - size_t _bytes_copied; + size_t _bytes_used; uint _regions_freed; public: - G1EvacuationInfo() : _collectionset_regions(0), _allocation_regions(0), _collectionset_used_before(0), - _collectionset_used_after(0), _alloc_regions_used_before(0), - _bytes_copied(0), _regions_freed(0) { } + G1EvacuationInfo() : + _collectionset_regions(0), _allocation_regions(0), _collectionset_used_before(0), + _collectionset_used_after(0), _alloc_regions_used_before(0), + _bytes_used(0), _regions_freed(0) { } void set_collectionset_regions(uint collectionset_regions) { _collectionset_regions = collectionset_regions; @@ -61,8 +62,8 @@ _alloc_regions_used_before = used; } - void set_bytes_copied(size_t copied) { - _bytes_copied = copied; + void set_bytes_used(size_t used) { + _bytes_used = used; } void set_regions_freed(uint freed) { @@ -74,7 +75,7 @@ size_t collectionset_used_before() { return _collectionset_used_before; } size_t collectionset_used_after() { return _collectionset_used_after; } size_t alloc_regions_used_before() { return _alloc_regions_used_before; } - size_t bytes_copied() { return _bytes_copied; } + size_t bytes_used() { return _bytes_used; } uint regions_freed() { return _regions_freed; } };
--- a/src/hotspot/share/gc/g1/g1FreeIdSet.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1FreeIdSet.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -79,7 +79,7 @@ index = head_index(old_head); assert(index < _size, "invariant"); uintx new_head = make_head(_next[index], old_head); - new_head = Atomic::cmpxchg(new_head, &_head, old_head); + new_head = Atomic::cmpxchg(&_head, old_head, new_head); if (new_head == old_head) break; old_head = new_head; } @@ -95,7 +95,7 @@ while (true) { _next[index] = head_index(old_head); uintx new_head = make_head(index, old_head); - new_head = Atomic::cmpxchg(new_head, &_head, old_head); + new_head = Atomic::cmpxchg(&_head, old_head, new_head); if (new_head == old_head) break; old_head = new_head; }
--- a/src/hotspot/share/gc/g1/g1FullGCAdjustTask.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1FullGCAdjustTask.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -101,7 +101,7 @@ // Adjust the weak roots. - if (Atomic::add(1u, &_references_done) == 1u) { // First incr claims task. + if (Atomic::add(&_references_done, 1u) == 1u) { // First incr claims task. G1CollectedHeap::heap()->ref_processor_stw()->weak_oops_do(&_adjust); }
--- a/src/hotspot/share/gc/g1/g1FullGCCompactTask.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1FullGCCompactTask.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -53,7 +53,7 @@ assert(current->is_empty(), "Should have been cleared in phase 2."); } } - current->reset_during_compaction(); + current->reset_humongous_during_compaction(); } return false; }
--- a/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -72,6 +72,8 @@ _gc_par_phases[MergeRS]->link_thread_work_items(_merge_rs_merged_fine, MergeRSMergedFine); _merge_rs_merged_coarse = new WorkerDataArray<size_t>(max_gc_threads, "Merged Coarse:"); _gc_par_phases[MergeRS]->link_thread_work_items(_merge_rs_merged_coarse, MergeRSMergedCoarse); + _merge_rs_dirty_cards = new WorkerDataArray<size_t>(max_gc_threads, "Dirty Cards:"); + _gc_par_phases[MergeRS]->link_thread_work_items(_merge_rs_dirty_cards, MergeRSDirtyCards); _gc_par_phases[OptMergeRS] = new WorkerDataArray<double>(max_gc_threads, "Optional Remembered Sets (ms):"); _opt_merge_rs_merged_sparse = new WorkerDataArray<size_t>(max_gc_threads, "Merged Sparse:"); @@ -80,6 +82,8 @@ _gc_par_phases[OptMergeRS]->link_thread_work_items(_opt_merge_rs_merged_fine, MergeRSMergedFine); _opt_merge_rs_merged_coarse = new WorkerDataArray<size_t>(max_gc_threads, "Merged Coarse:"); _gc_par_phases[OptMergeRS]->link_thread_work_items(_opt_merge_rs_merged_coarse, MergeRSMergedCoarse); + _opt_merge_rs_dirty_cards = new WorkerDataArray<size_t>(max_gc_threads, "Dirty Cards:"); + _gc_par_phases[OptMergeRS]->link_thread_work_items(_opt_merge_rs_dirty_cards, MergeRSDirtyCards); _gc_par_phases[MergeLB] = new WorkerDataArray<double>(max_gc_threads, "Log Buffers (ms):"); if (G1HotCardCache::default_use_cache()) { @@ -128,15 +132,14 @@ _merge_lb_skipped_cards = new WorkerDataArray<size_t>(max_gc_threads, "Skipped Cards:"); _gc_par_phases[MergeLB]->link_thread_work_items(_merge_lb_skipped_cards, MergeLBSkippedCards); - _obj_copy_lab_waste = new WorkerDataArray<size_t>(max_gc_threads, "LAB Waste"); - _gc_par_phases[ObjCopy]->link_thread_work_items(_obj_copy_lab_waste, ObjCopyLABWaste); - _obj_copy_lab_undo_waste = new WorkerDataArray<size_t>(max_gc_threads, "LAB Undo Waste"); - _gc_par_phases[ObjCopy]->link_thread_work_items(_obj_copy_lab_undo_waste, ObjCopyLABUndoWaste); + _gc_par_phases[MergePSS] = new WorkerDataArray<double>(1, "Merge Per-Thread State", true /* is_serial */); - _opt_obj_copy_lab_waste = new WorkerDataArray<size_t>(max_gc_threads, "LAB Waste"); - _gc_par_phases[OptObjCopy]->link_thread_work_items(_obj_copy_lab_waste, ObjCopyLABWaste); - _opt_obj_copy_lab_undo_waste = new WorkerDataArray<size_t>(max_gc_threads, "LAB Undo Waste"); - _gc_par_phases[OptObjCopy]->link_thread_work_items(_obj_copy_lab_undo_waste, ObjCopyLABUndoWaste); + _merge_pss_copied_bytes = new WorkerDataArray<size_t>(max_gc_threads, "Copied Bytes"); + _gc_par_phases[MergePSS]->link_thread_work_items(_merge_pss_copied_bytes, MergePSSCopiedBytes); + _merge_pss_lab_waste_bytes = new WorkerDataArray<size_t>(max_gc_threads, "LAB Waste"); + _gc_par_phases[MergePSS]->link_thread_work_items(_merge_pss_lab_waste_bytes, MergePSSLABWasteBytes); + _merge_pss_lab_undo_waste_bytes = new WorkerDataArray<size_t>(max_gc_threads, "LAB Undo Waste"); + _gc_par_phases[MergePSS]->link_thread_work_items(_merge_pss_lab_undo_waste_bytes, MergePSSLABUndoWasteBytes); _termination_attempts = new WorkerDataArray<size_t>(max_gc_threads, "Termination Attempts:"); _gc_par_phases[Termination]->link_thread_work_items(_termination_attempts); @@ -189,7 +192,6 @@ _recorded_non_young_cset_choice_time_ms = 0.0; _recorded_redirty_logged_cards_time_ms = 0.0; _recorded_preserve_cm_referents_time_ms = 0.0; - _recorded_merge_pss_time_ms = 0.0; _recorded_start_new_cset_time_ms = 0.0; _recorded_total_free_cset_time_ms = 0.0; _recorded_serial_free_cset_time_ms = 0.0; @@ -306,10 +308,16 @@ // return the average time for a phase in milliseconds double G1GCPhaseTimes::average_time_ms(GCParPhases phase) { + if (_gc_par_phases[phase] == NULL) { + return 0.0; + } return _gc_par_phases[phase]->average() * 1000.0; } size_t G1GCPhaseTimes::sum_thread_work_items(GCParPhases phase, uint index) { + if (_gc_par_phases[phase] == NULL) { + return 0; + } assert(_gc_par_phases[phase]->thread_work_items(index) != NULL, "No sub count"); return _gc_par_phases[phase]->thread_work_items(index)->sum(); } @@ -464,13 +472,15 @@ double G1GCPhaseTimes::print_post_evacuate_collection_set() const { const double evac_fail_handling = _cur_evac_fail_recalc_used + _cur_evac_fail_remove_self_forwards; + assert(_gc_par_phases[MergePSS]->get(0) != WorkerDataArray<double>::uninitialized(), "must be set"); + const double merge_pss = _gc_par_phases[MergePSS]->get(0) * MILLIUNITS; const double sum_ms = evac_fail_handling + _cur_collection_code_root_fixup_time_ms + _recorded_preserve_cm_referents_time_ms + _cur_ref_proc_time_ms + (_weak_phase_times.total_time_sec() * MILLIUNITS) + _cur_clear_ct_time_ms + - _recorded_merge_pss_time_ms + + merge_pss + _cur_strong_code_root_purge_time_ms + _recorded_redirty_logged_cards_time_ms + _recorded_total_free_cset_time_ms + @@ -500,7 +510,7 @@ trace_time("Remove Self Forwards",_cur_evac_fail_remove_self_forwards); } - debug_time("Merge Per-Thread State", _recorded_merge_pss_time_ms); + debug_phase(_gc_par_phases[MergePSS], 0); debug_time("Code Roots Purge", _cur_strong_code_root_purge_time_ms); debug_time("Redirty Cards", _recorded_redirty_logged_cards_time_ms); @@ -585,7 +595,8 @@ "StringDedupTableFixup", "RedirtyCards", "YoungFreeCSet", - "NonYoungFreeCSet" + "NonYoungFreeCSet", + "MergePSS" //GCParPhasesSentinel only used to tell end of enum };
--- a/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -78,6 +78,7 @@ RedirtyCards, YoungFreeCSet, NonYoungFreeCSet, + MergePSS, GCParPhasesSentinel }; @@ -87,7 +88,8 @@ enum GCMergeRSWorkTimes { MergeRSMergedSparse, MergeRSMergedFine, - MergeRSMergedCoarse + MergeRSMergedCoarse, + MergeRSDirtyCards }; enum GCScanHRWorkItems { @@ -108,9 +110,10 @@ MergeLBSkippedCards }; - enum GCObjCopyWorkItems { - ObjCopyLABWaste, - ObjCopyLABUndoWaste + enum GCMergePSSWorkItems { + MergePSSCopiedBytes, + MergePSSLABWasteBytes, + MergePSSLABUndoWasteBytes }; private: @@ -122,6 +125,7 @@ WorkerDataArray<size_t>* _merge_rs_merged_sparse; WorkerDataArray<size_t>* _merge_rs_merged_fine; WorkerDataArray<size_t>* _merge_rs_merged_coarse; + WorkerDataArray<size_t>* _merge_rs_dirty_cards; WorkerDataArray<size_t>* _merge_hcc_dirty_cards; WorkerDataArray<size_t>* _merge_hcc_skipped_cards; @@ -136,6 +140,7 @@ WorkerDataArray<size_t>* _opt_merge_rs_merged_sparse; WorkerDataArray<size_t>* _opt_merge_rs_merged_fine; WorkerDataArray<size_t>* _opt_merge_rs_merged_coarse; + WorkerDataArray<size_t>* _opt_merge_rs_dirty_cards; WorkerDataArray<size_t>* _opt_scan_hr_scanned_cards; WorkerDataArray<size_t>* _opt_scan_hr_scanned_blocks; @@ -143,11 +148,9 @@ WorkerDataArray<size_t>* _opt_scan_hr_scanned_opt_refs; WorkerDataArray<size_t>* _opt_scan_hr_used_memory; - WorkerDataArray<size_t>* _obj_copy_lab_waste; - WorkerDataArray<size_t>* _obj_copy_lab_undo_waste; - - WorkerDataArray<size_t>* _opt_obj_copy_lab_waste; - WorkerDataArray<size_t>* _opt_obj_copy_lab_undo_waste; + WorkerDataArray<size_t>* _merge_pss_copied_bytes; + WorkerDataArray<size_t>* _merge_pss_lab_waste_bytes; + WorkerDataArray<size_t>* _merge_pss_lab_undo_waste_bytes; WorkerDataArray<size_t>* _termination_attempts; @@ -224,7 +227,9 @@ template <class T> void details(T* phase, const char* indent) const; + void log_work_items(WorkerDataArray<double>* phase, uint indent, outputStream* out) const; void log_phase(WorkerDataArray<double>* phase, uint indent, outputStream* out, bool print_sum) const; + void debug_serial_phase(WorkerDataArray<double>* phase, uint extra_indent = 0) const; void debug_phase(WorkerDataArray<double>* phase, uint extra_indent = 0) const; void trace_phase(WorkerDataArray<double>* phase, bool print_sum = true) const; @@ -269,8 +274,6 @@ size_t sum_thread_work_items(GCParPhases phase, uint index = 0); - public: - void record_prepare_tlab_time_ms(double ms) { _cur_prepare_tlab_time_ms = ms; } @@ -378,10 +381,6 @@ _recorded_preserve_cm_referents_time_ms = time_ms; } - void record_merge_pss_time_ms(double time_ms) { - _recorded_merge_pss_time_ms = time_ms; - } - void record_start_new_cset_time_ms(double time_ms) { _recorded_start_new_cset_time_ms = time_ms; }
--- a/src/hotspot/share/gc/g1/g1HeterogeneousHeapPolicy.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1HeterogeneousHeapPolicy.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -39,8 +39,8 @@ } // After a collection pause, young list target length is updated. So we need to make sure we have enough regions in dram for young gen. -void G1HeterogeneousHeapPolicy::record_collection_pause_end(double pause_time_ms, size_t heap_used_bytes_before_gc) { - G1Policy::record_collection_pause_end(pause_time_ms, heap_used_bytes_before_gc); +void G1HeterogeneousHeapPolicy::record_collection_pause_end(double pause_time_ms) { + G1Policy::record_collection_pause_end(pause_time_ms); _manager->adjust_dram_regions((uint)young_list_target_length(), G1CollectedHeap::heap()->workers()); }
--- a/src/hotspot/share/gc/g1/g1HeterogeneousHeapPolicy.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1HeterogeneousHeapPolicy.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -38,7 +38,7 @@ // initialize policy virtual void init(G1CollectedHeap* g1h, G1CollectionSet* collection_set); // Record end of an evacuation pause. - virtual void record_collection_pause_end(double pause_time_ms, size_t heap_used_bytes_before_gc); + virtual void record_collection_pause_end(double pause_time_ms); // Record the end of full collection. virtual void record_full_collection_end();
--- a/src/hotspot/share/gc/g1/g1HotCardCache.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1HotCardCache.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -32,7 +32,7 @@ G1HotCardCache::G1HotCardCache(G1CollectedHeap *g1h): _g1h(g1h), _use_cache(false), _card_counts(g1h), _hot_cache(NULL), _hot_cache_size(0), _hot_cache_par_chunk_size(0), - _hot_cache_idx(0), _hot_cache_par_claimed_idx(0) + _hot_cache_idx(0), _hot_cache_par_claimed_idx(0), _cache_wrapped_around(false) {} void G1HotCardCache::initialize(G1RegionToSpaceMapper* card_counts_storage) { @@ -48,6 +48,8 @@ _hot_cache_par_chunk_size = ClaimChunkSize; _hot_cache_par_claimed_idx = 0; + _cache_wrapped_around = false; + _card_counts.initialize(card_counts_storage); } } @@ -68,7 +70,12 @@ return card_ptr; } // Otherwise, the card is hot. - size_t index = Atomic::add(1u, &_hot_cache_idx) - 1; + size_t index = Atomic::add(&_hot_cache_idx, 1u) - 1; + if (index == _hot_cache_size) { + // Can use relaxed store because all racing threads are writing the same + // value and there aren't any concurrent readers. + Atomic::store(&_cache_wrapped_around, true); + } size_t masked_index = index & (_hot_cache_size - 1); CardValue* current_ptr = _hot_cache[masked_index]; @@ -78,9 +85,9 @@ // card_ptr in favor of the other option, which would be starting over. This // should be OK since card_ptr will likely be the older card already when/if // this ever happens. - CardValue* previous_ptr = Atomic::cmpxchg(card_ptr, - &_hot_cache[masked_index], - current_ptr); + CardValue* previous_ptr = Atomic::cmpxchg(&_hot_cache[masked_index], + current_ptr, + card_ptr); return (previous_ptr == current_ptr) ? previous_ptr : card_ptr; } @@ -91,8 +98,8 @@ assert(!use_cache(), "cache should be disabled"); while (_hot_cache_par_claimed_idx < _hot_cache_size) { - size_t end_idx = Atomic::add(_hot_cache_par_chunk_size, - &_hot_cache_par_claimed_idx); + size_t end_idx = Atomic::add(&_hot_cache_par_claimed_idx, + _hot_cache_par_chunk_size); size_t start_idx = end_idx - _hot_cache_par_chunk_size; // The current worker has successfully claimed the chunk [start_idx..end_idx) end_idx = MIN2(end_idx, _hot_cache_size);
--- a/src/hotspot/share/gc/g1/g1HotCardCache.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1HotCardCache.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -81,6 +81,11 @@ char _pad_after[DEFAULT_CACHE_LINE_SIZE]; + // Records whether insertion overflowed the hot card cache at least once. This + // avoids the need for a separate atomic counter of how many valid entries are + // in the HCC. + volatile bool _cache_wrapped_around; + // The number of cached cards a thread claims when flushing the cache static const int ClaimChunkSize = 32; @@ -125,13 +130,17 @@ assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint"); assert(Thread::current()->is_VM_thread(), "Current thread should be the VMthread"); if (default_use_cache()) { - reset_hot_cache_internal(); + reset_hot_cache_internal(); } } // Zeros the values in the card counts table for the given region void reset_card_counts(HeapRegion* hr); + // Number of entries in the HCC. + size_t num_entries() const { + return _cache_wrapped_around ? _hot_cache_size : _hot_cache_idx + 1; + } private: void reset_hot_cache_internal() { assert(_hot_cache != NULL, "Logic"); @@ -139,6 +148,7 @@ for (size_t i = 0; i < _hot_cache_size; i++) { _hot_cache[i] = NULL; } + _cache_wrapped_around = false; } };
--- a/src/hotspot/share/gc/g1/g1IHOPControl.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1IHOPControl.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -113,6 +113,10 @@ ); } +double G1AdaptiveIHOPControl::get_new_prediction(TruncatedSeq const* seq) const { + return _predictor->get_new_lower_zero_bound_prediction(seq); +} + bool G1AdaptiveIHOPControl::have_enough_data_for_prediction() const { return ((size_t)_marking_times_s.num() >= G1AdaptiveIHOPNumInitialSamples) && ((size_t)_allocation_rate_s.num() >= G1AdaptiveIHOPNumInitialSamples); @@ -120,8 +124,8 @@ size_t G1AdaptiveIHOPControl::get_conc_mark_start_threshold() { if (have_enough_data_for_prediction()) { - double pred_marking_time = _predictor->get_new_prediction(&_marking_times_s); - double pred_promotion_rate = _predictor->get_new_prediction(&_allocation_rate_s); + double pred_marking_time = get_new_prediction(&_marking_times_s); + double pred_promotion_rate = get_new_prediction(&_allocation_rate_s); size_t pred_promotion_size = (size_t)(pred_marking_time * pred_promotion_rate); size_t predicted_needed_bytes_during_marking = @@ -168,8 +172,8 @@ actual_target, G1CollectedHeap::heap()->used(), _last_unrestrained_young_size, - _predictor->get_new_prediction(&_allocation_rate_s), - _predictor->get_new_prediction(&_marking_times_s) * 1000.0, + get_new_prediction(&_allocation_rate_s), + get_new_prediction(&_marking_times_s) * 1000.0, have_enough_data_for_prediction() ? "true" : "false"); } @@ -179,7 +183,7 @@ actual_target_threshold(), G1CollectedHeap::heap()->used(), _last_unrestrained_young_size, - _predictor->get_new_prediction(&_allocation_rate_s), - _predictor->get_new_prediction(&_marking_times_s), + get_new_prediction(&_allocation_rate_s), + get_new_prediction(&_marking_times_s), have_enough_data_for_prediction()); }
--- a/src/hotspot/share/gc/g1/g1IHOPControl.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1IHOPControl.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -123,6 +123,9 @@ // as there is no marking or mixed gc that could impact its size too much. size_t _last_unrestrained_young_size; + // Get a new prediction bounded below by zero from the given sequence. + double get_new_prediction(TruncatedSeq const* seq) const; + bool have_enough_data_for_prediction() const; // The "actual" target threshold the algorithm wants to keep during and at the
--- a/src/hotspot/share/gc/g1/g1PageBasedVirtualSpace.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1PageBasedVirtualSpace.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -261,7 +261,7 @@ virtual void work(uint worker_id) { size_t const actual_chunk_size = MAX2(chunk_size(), _page_size); while (true) { - char* touch_addr = Atomic::add(actual_chunk_size, &_cur_addr) - actual_chunk_size; + char* touch_addr = Atomic::add(&_cur_addr, actual_chunk_size) - actual_chunk_size; if (touch_addr < _start_addr || touch_addr >= _end_addr) { break; }
--- a/src/hotspot/share/gc/g1/g1ParScanThreadState.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1ParScanThreadState.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -56,6 +56,9 @@ _stack_trim_upper_threshold(GCDrainStackTargetSize * 2 + 1), _stack_trim_lower_threshold(GCDrainStackTargetSize), _trim_ticks(), + _surviving_young_words_base(NULL), + _surviving_young_words(NULL), + _surviving_words_length(young_cset_length + 1), _old_gen_is_full(false), _num_optional_regions(optional_cset_length), _numa(g1h->numa()), @@ -65,11 +68,10 @@ // entries, since entry 0 keeps track of surviving bytes for non-young regions. // We also add a few elements at the beginning and at the end in // an attempt to eliminate cache contention - size_t real_length = young_cset_length + 1; - size_t array_length = PADDING_ELEM_NUM + real_length + PADDING_ELEM_NUM; + size_t array_length = PADDING_ELEM_NUM + _surviving_words_length + PADDING_ELEM_NUM; _surviving_young_words_base = NEW_C_HEAP_ARRAY(size_t, array_length, mtGC); _surviving_young_words = _surviving_young_words_base + PADDING_ELEM_NUM; - memset(_surviving_young_words, 0, real_length * sizeof(size_t)); + memset(_surviving_young_words, 0, _surviving_words_length * sizeof(size_t)); _plab_allocator = new G1PLABAllocator(_g1h->allocator()); @@ -85,18 +87,19 @@ initialize_numa_stats(); } -// Pass locally gathered statistics to global state. -void G1ParScanThreadState::flush(size_t* surviving_young_words) { +size_t G1ParScanThreadState::flush(size_t* surviving_young_words) { _rdcq.flush(); + flush_numa_stats(); // Update allocation statistics. _plab_allocator->flush_and_retire_stats(); _g1h->policy()->record_age_table(&_age_table); - uint length = _g1h->collection_set()->young_region_length() + 1; - for (uint i = 0; i < length; i++) { + size_t sum = 0; + for (uint i = 0; i < _surviving_words_length; i++) { surviving_young_words[i] += _surviving_young_words[i]; + sum += _surviving_young_words[i]; } - flush_numa_stats(); + return sum; } G1ParScanThreadState::~G1ParScanThreadState() { @@ -357,16 +360,27 @@ void G1ParScanThreadStateSet::flush() { assert(!_flushed, "thread local state from the per thread states should be flushed once"); - for (uint worker_index = 0; worker_index < _n_workers; ++worker_index) { - G1ParScanThreadState* pss = _states[worker_index]; + for (uint worker_id = 0; worker_id < _n_workers; ++worker_id) { + G1ParScanThreadState* pss = _states[worker_id]; if (pss == NULL) { continue; } - pss->flush(_surviving_young_words_total); + G1GCPhaseTimes* p = _g1h->phase_times(); + + // Need to get the following two before the call to G1ParThreadScanState::flush() + // because it resets the PLAB allocator where we get this info from. + size_t lab_waste_bytes = pss->lab_waste_words() * HeapWordSize; + size_t lab_undo_waste_bytes = pss->lab_undo_waste_words() * HeapWordSize; + size_t copied_bytes = pss->flush(_surviving_young_words_total) * HeapWordSize; + + p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, copied_bytes, G1GCPhaseTimes::MergePSSCopiedBytes); + p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, lab_waste_bytes, G1GCPhaseTimes::MergePSSLABWasteBytes); + p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, lab_undo_waste_bytes, G1GCPhaseTimes::MergePSSLABUndoWasteBytes); + delete pss; - _states[worker_index] = NULL; + _states[worker_id] = NULL; } _flushed = true; }
--- a/src/hotspot/share/gc/g1/g1ParScanThreadState.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1ParScanThreadState.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -74,7 +74,8 @@ size_t* _surviving_young_words_base; // this points into the array, as we use the first few entries for padding size_t* _surviving_young_words; - + // Number of elements in the array above. + size_t _surviving_words_length; // Indicates whether in the last generation (old) there is no more space // available for allocation. bool _old_gen_is_full; @@ -152,7 +153,9 @@ size_t lab_waste_words() const; size_t lab_undo_waste_words() const; - void flush(size_t* surviving_young_words); + // Pass locally gathered statistics to global state. Returns the total number of + // HeapWords copied. + size_t flush(size_t* surviving_young_words); private: #define G1_PARTIAL_ARRAY_MASK 0x2
--- a/src/hotspot/share/gc/g1/g1ParallelCleaning.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1ParallelCleaning.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -25,6 +25,7 @@ #include "precompiled.hpp" #include "gc/g1/g1ParallelCleaning.hpp" +#include "runtime/atomic.hpp" #if INCLUDE_JVMCI #include "jvmci/jvmci.hpp" #endif @@ -39,7 +40,7 @@ return false; } - return Atomic::cmpxchg(1, &_cleaning_claimed, 0) == 0; + return Atomic::cmpxchg(&_cleaning_claimed, 0, 1) == 0; } void JVMCICleaningTask::work(bool unloading_occurred) {
--- a/src/hotspot/share/gc/g1/g1Policy.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1Policy.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -78,7 +78,6 @@ _bytes_allocated_in_old_since_last_gc(0), _initial_mark_to_mixed(), _collection_set(NULL), - _bytes_copied_during_gc(0), _g1h(NULL), _phase_times(new G1GCPhaseTimes(gc_timer, ParallelGCThreads)), _mark_remark_start_sec(0), @@ -330,9 +329,8 @@ const double target_pause_time_ms = _mmu_tracker->max_gc_time() * 1000.0; const double survivor_regions_evac_time = predict_survivor_regions_evac_time(); const size_t pending_cards = _analytics->predict_pending_cards(); - const size_t scanned_cards = _analytics->predict_card_num(rs_length, true /* for_young_gc */); const double base_time_ms = - predict_base_elapsed_time_ms(pending_cards, scanned_cards) + + predict_base_elapsed_time_ms(pending_cards, rs_length) + survivor_regions_evac_time; const uint available_free_regions = _free_regions_at_end_of_collection; const uint base_free_regions = @@ -553,7 +551,6 @@ record_concurrent_refinement_data(false /* is_full_collection */); _collection_set->reset_bytes_used_before(); - _bytes_copied_during_gc = 0; // do that for any other surv rate groups _short_lived_surv_rate_group->stop_adding_regions(); @@ -647,13 +644,11 @@ // Anything below that is considered to be zero #define MIN_TIMER_GRANULARITY 0.0000001 -void G1Policy::record_collection_pause_end(double pause_time_ms, size_t heap_used_bytes_before_gc) { +void G1Policy::record_collection_pause_end(double pause_time_ms) { G1GCPhaseTimes* p = phase_times(); double end_time_sec = os::elapsedTime(); - assert_used_and_recalculate_used_equal(_g1h); - size_t cur_used_bytes = _g1h->used(); bool this_pause_included_initial_mark = false; bool this_pause_was_young_only = collector_state()->in_young_only_phase(); @@ -717,73 +712,62 @@ } _short_lived_surv_rate_group->start_adding_regions(); - // Do that for any other surv rate groups - double scan_hcc_time_ms = G1HotCardCache::default_use_cache() ? average_time_ms(G1GCPhaseTimes::MergeHCC) : 0.0; + double merge_hcc_time_ms = average_time_ms(G1GCPhaseTimes::MergeHCC); + if (update_stats) { + size_t const total_log_buffer_cards = p->sum_thread_work_items(G1GCPhaseTimes::MergeHCC, G1GCPhaseTimes::MergeHCCDirtyCards) + + p->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBDirtyCards); + // Update prediction for card merge; MergeRSDirtyCards includes the cards from the Eager Reclaim phase. + size_t const total_cards_merged = p->sum_thread_work_items(G1GCPhaseTimes::MergeRS, G1GCPhaseTimes::MergeRSDirtyCards) + + p->sum_thread_work_items(G1GCPhaseTimes::OptMergeRS, G1GCPhaseTimes::MergeRSDirtyCards) + + total_log_buffer_cards; - if (update_stats) { - double cost_per_logged_card = 0.0; - size_t const pending_logged_cards = p->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBDirtyCards); - if (pending_logged_cards > 0) { - cost_per_logged_card = logged_cards_processing_time() / pending_logged_cards; - _analytics->report_cost_per_logged_card_ms(cost_per_logged_card); + // The threshold for the number of cards in a given sampling which we consider + // large enough so that the impact from setup and other costs is negligible. + size_t const CardsNumSamplingThreshold = 10; + + if (total_cards_merged > CardsNumSamplingThreshold) { + double avg_time_merge_cards = average_time_ms(G1GCPhaseTimes::MergeER) + + average_time_ms(G1GCPhaseTimes::MergeRS) + + average_time_ms(G1GCPhaseTimes::MergeHCC) + + average_time_ms(G1GCPhaseTimes::MergeLB) + + average_time_ms(G1GCPhaseTimes::OptMergeRS); + _analytics->report_cost_per_card_merge_ms(avg_time_merge_cards / total_cards_merged, this_pause_was_young_only); } - _analytics->report_cost_scan_hcc(scan_hcc_time_ms); + // Update prediction for card scan size_t const total_cards_scanned = p->sum_thread_work_items(G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ScanHRScannedCards) + p->sum_thread_work_items(G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::ScanHRScannedCards); - size_t remset_cards_scanned = 0; - // There might have been duplicate log buffer entries in the queues which could - // increase this value beyond the cards scanned. In this case attribute all cards - // to the log buffers. - if (pending_logged_cards <= total_cards_scanned) { - remset_cards_scanned = total_cards_scanned - pending_logged_cards; + + if (total_cards_scanned > CardsNumSamplingThreshold) { + double avg_time_dirty_card_scan = average_time_ms(G1GCPhaseTimes::ScanHR) + + average_time_ms(G1GCPhaseTimes::OptScanHR); + + _analytics->report_cost_per_card_scan_ms(avg_time_dirty_card_scan / total_cards_scanned, this_pause_was_young_only); } - double cost_per_remset_card_ms = 0.0; - if (remset_cards_scanned > 10) { - double avg_time_remset_scan = ((average_time_ms(G1GCPhaseTimes::ScanHR) + average_time_ms(G1GCPhaseTimes::OptScanHR)) * - remset_cards_scanned / total_cards_scanned) + - average_time_ms(G1GCPhaseTimes::MergeER) + - average_time_ms(G1GCPhaseTimes::MergeRS) + - average_time_ms(G1GCPhaseTimes::OptMergeRS); + // Update prediction for the ratio between cards from the remembered + // sets and actually scanned cards from the remembered sets. + // Cards from the remembered sets are all cards not duplicated by cards from + // the logs. + // Due to duplicates in the log buffers, the number of actually scanned cards + // can be smaller than the cards in the log buffers. + const size_t from_rs_length_cards = (total_cards_scanned > total_log_buffer_cards) ? total_cards_scanned - total_log_buffer_cards : 0; + double merge_to_scan_ratio = 0.0; + if (total_cards_scanned > 0) { + merge_to_scan_ratio = (double) from_rs_length_cards / total_cards_scanned; + } + _analytics->report_card_merge_to_scan_ratio(merge_to_scan_ratio, this_pause_was_young_only); - cost_per_remset_card_ms = avg_time_remset_scan / remset_cards_scanned; - _analytics->report_cost_per_remset_card_ms(cost_per_remset_card_ms, this_pause_was_young_only); - } + const size_t recorded_rs_length = _collection_set->recorded_rs_length(); + const size_t rs_length_diff = _rs_length > recorded_rs_length ? _rs_length - recorded_rs_length : 0; + _analytics->report_rs_length_diff(rs_length_diff); - if (_rs_length > 0) { - double cards_per_entry_ratio = - (double) remset_cards_scanned / (double) _rs_length; - _analytics->report_cards_per_entry_ratio(cards_per_entry_ratio, this_pause_was_young_only); - } - - // This is defensive. For a while _rs_length could get - // smaller than _recorded_rs_length which was causing - // rs_length_diff to get very large and mess up the RSet length - // predictions. The reason was unsafe concurrent updates to the - // _inc_cset_recorded_rs_length field which the code below guards - // against (see CR 7118202). This bug has now been fixed (see CR - // 7119027). However, I'm still worried that - // _inc_cset_recorded_rs_length might still end up somewhat - // inaccurate. The concurrent refinement thread calculates an - // RSet's length concurrently with other CR threads updating it - // which might cause it to calculate the length incorrectly (if, - // say, it's in mid-coarsening). So I'll leave in the defensive - // conditional below just in case. - size_t rs_length_diff = 0; - size_t recorded_rs_length = _collection_set->recorded_rs_length(); - if (_rs_length > recorded_rs_length) { - rs_length_diff = _rs_length - recorded_rs_length; - } - _analytics->report_rs_length_diff((double) rs_length_diff); - - size_t freed_bytes = heap_used_bytes_before_gc - cur_used_bytes; - size_t copied_bytes = _collection_set->bytes_used_before() - freed_bytes; - double cost_per_byte_ms = 0.0; + // Update prediction for copy cost per byte + size_t copied_bytes = p->sum_thread_work_items(G1GCPhaseTimes::MergePSS, G1GCPhaseTimes::MergePSSCopiedBytes); if (copied_bytes > 0) { - cost_per_byte_ms = (average_time_ms(G1GCPhaseTimes::ObjCopy) + average_time_ms(G1GCPhaseTimes::OptObjCopy)) / (double) copied_bytes; + double cost_per_byte_ms = (average_time_ms(G1GCPhaseTimes::ObjCopy) + average_time_ms(G1GCPhaseTimes::OptObjCopy)) / copied_bytes; _analytics->report_cost_per_byte_ms(cost_per_byte_ms, collector_state()->mark_or_rebuild_in_progress()); } @@ -848,21 +832,21 @@ // Note that _mmu_tracker->max_gc_time() returns the time in seconds. double scan_logged_cards_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0; - if (scan_logged_cards_time_goal_ms < scan_hcc_time_ms) { + if (scan_logged_cards_time_goal_ms < merge_hcc_time_ms) { log_debug(gc, ergo, refine)("Adjust concurrent refinement thresholds (scanning the HCC expected to take longer than Update RS time goal)." "Logged Cards Scan time goal: %1.2fms Scan HCC time: %1.2fms", - scan_logged_cards_time_goal_ms, scan_hcc_time_ms); + scan_logged_cards_time_goal_ms, merge_hcc_time_ms); scan_logged_cards_time_goal_ms = 0; } else { - scan_logged_cards_time_goal_ms -= scan_hcc_time_ms; + scan_logged_cards_time_goal_ms -= merge_hcc_time_ms; } _pending_cards_at_prev_gc_end = _g1h->pending_card_num(); double const logged_cards_time = logged_cards_processing_time(); log_debug(gc, ergo, refine)("Concurrent refinement times: Logged Cards Scan time goal: %1.2fms Logged Cards Scan time: %1.2fms HCC time: %1.2fms", - scan_logged_cards_time_goal_ms, logged_cards_time, scan_hcc_time_ms); + scan_logged_cards_time_goal_ms, logged_cards_time, merge_hcc_time_ms); _g1h->concurrent_refine()->adjust(logged_cards_time, phase_times()->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBDirtyCards), @@ -930,11 +914,7 @@ double G1Policy::predict_yg_surv_rate(int age, SurvRateGroup* surv_rate_group) const { TruncatedSeq* seq = surv_rate_group->get_seq(age); guarantee(seq->num() > 0, "There should be some young gen survivor samples available. Tried to access with age %d", age); - double pred = _predictor.get_new_prediction(seq); - if (pred > 1.0) { - pred = 1.0; - } - return pred; + return _predictor.get_new_unit_prediction(seq); } double G1Policy::accum_yg_surv_rate_pred(int age) const { @@ -942,17 +922,17 @@ } double G1Policy::predict_base_elapsed_time_ms(size_t pending_cards, - size_t scanned_cards) const { + size_t rs_length) const { + size_t effective_scanned_cards = _analytics->predict_scan_card_num(rs_length, collector_state()->in_young_only_phase()); return - _analytics->predict_rs_update_time_ms(pending_cards) + - _analytics->predict_rs_scan_time_ms(scanned_cards, collector_state()->in_young_only_phase()) + + _analytics->predict_card_merge_time_ms(pending_cards + rs_length, collector_state()->in_young_only_phase()) + + _analytics->predict_card_scan_time_ms(effective_scanned_cards, collector_state()->in_young_only_phase()) + _analytics->predict_constant_other_time_ms(); } double G1Policy::predict_base_elapsed_time_ms(size_t pending_cards) const { size_t rs_length = _analytics->predict_rs_length(); - size_t card_num = _analytics->predict_card_num(rs_length, collector_state()->in_young_only_phase()); - return predict_base_elapsed_time_ms(pending_cards, card_num); + return predict_base_elapsed_time_ms(pending_cards, rs_length); } size_t G1Policy::predict_bytes_to_copy(HeapRegion* hr) const { @@ -971,13 +951,13 @@ double G1Policy::predict_region_elapsed_time_ms(HeapRegion* hr, bool for_young_gc) const { size_t rs_length = hr->rem_set()->occupied(); - // Predicting the number of cards is based on which type of GC - // we're predicting for. - size_t card_num = _analytics->predict_card_num(rs_length, for_young_gc); + size_t scan_card_num = _analytics->predict_scan_card_num(rs_length, for_young_gc); + size_t bytes_to_copy = predict_bytes_to_copy(hr); double region_elapsed_time_ms = - _analytics->predict_rs_scan_time_ms(card_num, collector_state()->in_young_only_phase()) + + _analytics->predict_card_merge_time_ms(rs_length, collector_state()->in_young_only_phase()) + + _analytics->predict_card_scan_time_ms(scan_card_num, collector_state()->in_young_only_phase()) + _analytics->predict_object_copy_time_ms(bytes_to_copy, collector_state()->mark_or_rebuild_in_progress()); // The prediction of the "other" time for this region is based
--- a/src/hotspot/share/gc/g1/g1Policy.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1Policy.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -140,9 +140,9 @@ _rs_length = rs_length; } - double predict_base_elapsed_time_ms(size_t pending_cards) const; - double predict_base_elapsed_time_ms(size_t pending_cards, - size_t scanned_cards) const; + double predict_base_elapsed_time_ms(size_t num_pending_cards) const; + double predict_base_elapsed_time_ms(size_t num_pending_cards, + size_t rs_length) const; size_t predict_bytes_to_copy(HeapRegion* hr) const; double predict_region_elapsed_time_ms(HeapRegion* hr, bool for_young_gc) const; @@ -184,9 +184,6 @@ G1CollectionSetChooser* cset_chooser() const; - // The number of bytes copied during the GC. - size_t _bytes_copied_during_gc; - // Stash a pointer to the g1 heap. G1CollectedHeap* _g1h; @@ -320,7 +317,7 @@ // Record the start and end of an evacuation pause. void record_collection_pause_start(double start_time_sec); - virtual void record_collection_pause_end(double pause_time_ms, size_t heap_used_bytes_before_gc); + virtual void record_collection_pause_end(double pause_time_ms); // Record the start and end of a full collection. void record_full_collection_start(); @@ -339,17 +336,6 @@ void print_phases(); - // Record how much space we copied during a GC. This is typically - // called when a GC alloc region is being retired. - void record_bytes_copied_during_gc(size_t bytes) { - _bytes_copied_during_gc += bytes; - } - - // The amount of space we copied during a GC. - size_t bytes_copied_during_gc() const { - return _bytes_copied_during_gc; - } - bool next_gc_should_be_mixed(const char* true_action_str, const char* false_action_str) const;
--- a/src/hotspot/share/gc/g1/g1Predictions.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1Predictions.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -57,6 +57,14 @@ double get_new_prediction(TruncatedSeq const* seq) const { return seq->davg() + _sigma * stddev_estimate(seq); } + + double get_new_unit_prediction(TruncatedSeq const* seq) const { + return clamp(get_new_prediction(seq), 0.0, 1.0); + } + + double get_new_lower_zero_bound_prediction(TruncatedSeq const* seq) const { + return MAX2(get_new_prediction(seq), 0.0); + } }; #endif // SHARE_GC_G1_G1PREDICTIONS_HPP
--- a/src/hotspot/share/gc/g1/g1RedirtyCardsQueue.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1RedirtyCardsQueue.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -129,7 +129,7 @@ void G1RedirtyCardsQueueSet::enqueue_completed_buffer(BufferNode* node) { assert(_collecting, "precondition"); - Atomic::add(buffer_size() - node->index(), &_entry_count); + Atomic::add(&_entry_count, buffer_size() - node->index()); _list.push(*node); update_tail(node); } @@ -139,7 +139,7 @@ const G1BufferNodeList from = src->take_all_completed_buffers(); if (from._head != NULL) { assert(from._tail != NULL, "invariant"); - Atomic::add(from._entry_count, &_entry_count); + Atomic::add(&_entry_count, from._entry_count); _list.prepend(*from._head, *from._tail); update_tail(from._tail); }
--- a/src/hotspot/share/gc/g1/g1RegionMarkStatsCache.inline.hpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1RegionMarkStatsCache.inline.hpp Mon Dec 02 12:01:40 2019 +0530 @@ -46,7 +46,7 @@ inline void G1RegionMarkStatsCache::evict(uint idx) { G1RegionMarkStatsCacheEntry* cur = &_cache[idx]; if (cur->_stats._live_words != 0) { - Atomic::add(cur->_stats._live_words, &_target[cur->_region_idx]._live_words); + Atomic::add(&_target[cur->_region_idx]._live_words, cur->_stats._live_words); } cur->clear(); }
--- a/src/hotspot/share/gc/g1/g1RemSet.cpp Mon Dec 02 11:58:14 2019 +0530 +++ b/src/hotspot/share/gc/g1/g1RemSet.cpp Mon Dec 02 12:01:40 2019 +0530 @@ -49,6 +49,7 @@ #include "memory/resourceArea.hpp" #include "oops/access.inline.hpp" #include "oops/oop.inline.hpp" +#include "runtime/atomic.hpp" #include "runtime/os.hpp" #include "utilities/align.hpp" #include "utilities/globalDefinitions.hpp" @@ -177,9 +178,9 @@ return; } - bool marked_as_dirty = Atomic::cmpxchg(true, &_contains[region], false) == false; + bool marked_as_dirty = Atomic::cmpxchg(&_contains[region], false, true) == false; if (marked_as_dirty) { - uint allocated = Atomic::add(1u, &_cur_idx) - 1; + uint allocated = Atomic::add(&_cur_idx, 1u) - 1; _buffer[allocated] = region; } } @@ -196,30 +197,6 @@ } }; - // Creates a snapshot of the current _top values at the start of collection to - // filter out card marks that we do not want to scan. - class G1ResetScanTopClosure : public HeapRegionClosure { - G1RemSetScanState* _scan_state; - - public: - G1ResetScanTopClosure(G1RemSetScanState* scan_state) : _scan_state(scan_state) { } - - virtual bool do_heap_region(HeapRegion* r) { - uint hrm_index = r->hrm_index(); - if (r->in_collection_set()) { - // Young regions had their card table marked as young at their allocation; - // we need to make sure that these marks are cleared at the end of GC, *but* - // they should not be scanned for cards. - // So directly add them to the "all_dirty_regions". - // Same for regions in the (initial) collection set: they may contain cards from - // the log buffers, make sure they are cleaned. - _scan_state->add_all_dirty_region(hrm_index); - } else if (r->is_old_or_humongous_or_archive()) { - _scan_state->set_scan_top(hrm_index, r->top()); - } - return false; - } - }; // For each region, contains the maximum top() value to be used during this garbage // collection. Subsumes common checks like filtering out everything but old and // humongous regions outside the collection set. @@ -255,7 +232,7 @@ void work(uint worker_id) { while (_cur_dirty_regions < _regions->size()) { - uint next = Atomic::add(_chunk_length, &_cur_dirty_regions) - _chunk_length; + uint next = Atomic::add(&_cur_dirty_regions, _chunk_length) - _chunk_length; uint max = MIN2(next + _chunk_length, _regions->size()); for (uint i = next; i < max; i++) { @@ -328,16 +305,8 @@ } void prepare() { - for (size_t i = 0; i < _max_regions; i++) { - _collection_set_iter_state[i] = false; - clear_scan_top((uint)i); - } - _all_dirty_regions = new G1DirtyRegions(_max_regions); _next_dirty_regions = new G1DirtyRegions(_max_regions); - - G1ResetScanTopClosure cl(this); - G1CollectedHeap::heap()->heap_region_iterate(&cl); } void prepare_for_merge_heap_roots() { @@ -430,6 +399,10 @@ } while (cur != start_pos); } + void reset_region_claim(uint region_idx) { + _collection_set_iter_state[region_idx] = false; + } + // Attempt to claim the given region in the collection set for iteration. Returns true // if this call caused the transition from Unclaimed to Claimed. inline bool claim_collection_set_region(uint region) { @@ -437,7 +410,7 @@ if (_collection_set_iter_state[region]) { return false; } - return !Atomic::cmpxchg(true, &_collection_set_iter_state[region], false); + return !Atomic::cmpxchg(&_collection_set_iter_state[region], false, true); } bool has_cards_to_scan(uint region) { @@ -447,7 +420,7 @@ uint claim_cards_to_scan(uint region, uint increment) { assert(region < _max_regions, "Tried to access invalid region %u", region); - return Atomic::add(increment, &_card_table_scan_state[region]) - increment