changeset 59270:f051fadd0d06

Merge
author psadhukhan
date Mon, 02 Dec 2019 12:01:40 +0530
parents 7637e77c4c8a 5028793d05ff
children fe5e931830e5
files src/jdk.zipfs/share/classes/jdk/nio/zipfs/JarFileSystem.java src/jdk.zipfs/share/classes/jdk/nio/zipfs/JarFileSystemProvider.java test/jdk/jdk/jfr/api/consumer/streaming/TestRepositoryMigration.java test/jdk/sun/security/tools/jarsigner/warnings/BadKeyUsageTest.java test/langtools/jdk/javadoc/doclet/testDocFiles/pkg/Test.java test/langtools/jdk/javadoc/doclet/testDocFiles/pkg/doc-files/test.txt
diffstat 869 files changed, 17882 insertions(+), 9590 deletions(-) [+]
line wrap: on
line diff
--- a/.hgtags	Mon Dec 02 11:58:14 2019 +0530
+++ b/.hgtags	Mon Dec 02 12:01:40 2019 +0530
@@ -597,3 +597,4 @@
 83810b7d12e7ff761ad3dd91f323a22dad96f108 jdk-14+22
 15936b142f86731afa4b1a2c0fe4a01e806c4944 jdk-14+23
 438337c846fb071900ddb6922bddf8b3e895a514 jdk-14+24
+17d242844fc9e7d18b3eac97426490a9c246119e jdk-14+25
--- a/make/autoconf/flags-cflags.m4	Mon Dec 02 11:58:14 2019 +0530
+++ b/make/autoconf/flags-cflags.m4	Mon Dec 02 12:01:40 2019 +0530
@@ -190,20 +190,7 @@
       WARNINGS_ENABLE_ALL_CXXFLAGS="$WARNINGS_ENABLE_ALL_CFLAGS $WARNINGS_ENABLE_ADDITIONAL_CXX"
 
       DISABLED_WARNINGS="unused-parameter unused"
-
-      # Repeate the check for the BUILD_CC and BUILD_CXX. Need to also reset
-      # CFLAGS since any target specific flags will likely not work with the
-      # build compiler
-      CC_OLD="$CC"
-      CXX_OLD="$CXX"
-      CC="$BUILD_CC"
-      CXX="$BUILD_CXX"
-      CFLAGS_OLD="$CFLAGS"
-      CFLAGS=""
       BUILD_CC_DISABLE_WARNING_PREFIX="-Wno-"
-      CC="$CC_OLD"
-      CXX="$CXX_OLD"
-      CFLAGS="$CFLAGS_OLD"
       ;;
 
     clang)
@@ -420,6 +407,17 @@
 
   FLAGS_SETUP_CFLAGS_CPU_DEP([TARGET])
 
+  # Repeat the check for the BUILD_CC and BUILD_CXX. Need to also reset CFLAGS
+  # since any target specific flags will likely not work with the build compiler.
+  CC_OLD="$CC"
+  CXX_OLD="$CXX"
+  CFLAGS_OLD="$CFLAGS"
+  CXXFLAGS_OLD="$CXXFLAGS"
+  CC="$BUILD_CC"
+  CXX="$BUILD_CXX"
+  CFLAGS=""
+  CXXFLAGS=""
+
   FLAGS_OS=$OPENJDK_BUILD_OS
   FLAGS_OS_TYPE=$OPENJDK_BUILD_OS_TYPE
   FLAGS_CPU=$OPENJDK_BUILD_CPU
@@ -430,6 +428,11 @@
   FLAGS_CPU_LEGACY_LIB=$OPENJDK_BUILD_CPU_LEGACY_LIB
 
   FLAGS_SETUP_CFLAGS_CPU_DEP([BUILD], [OPENJDK_BUILD_], [BUILD_])
+
+  CC="$CC_OLD"
+  CXX="$CXX_OLD"
+  CFLAGS="$CFLAGS_OLD"
+  CXXFLAGS="$CXXFLAGS_OLD"
 ])
 
 ################################################################################
@@ -529,6 +532,11 @@
   if test "x$TOOLCHAIN_TYPE" = xgcc; then
     TOOLCHAIN_CFLAGS_JVM="$TOOLCHAIN_CFLAGS_JVM -fcheck-new -fstack-protector"
     TOOLCHAIN_CFLAGS_JDK="-pipe -fstack-protector"
+    # reduce lib size on s390x in link step, this needs also special compile flags
+    if test "x$OPENJDK_TARGET_CPU" = xs390x; then
+      TOOLCHAIN_CFLAGS_JVM="$TOOLCHAIN_CFLAGS_JVM -ffunction-sections -fdata-sections"
+      TOOLCHAIN_CFLAGS_JDK="$TOOLCHAIN_CFLAGS_JDK -ffunction-sections -fdata-sections"
+    fi
     # technically NOT for CXX (but since this gives *worse* performance, use
     # no-strict-aliasing everywhere!)
     TOOLCHAIN_CFLAGS_JDK_CONLY="-fno-strict-aliasing"
--- a/make/autoconf/flags-ldflags.m4	Mon Dec 02 11:58:14 2019 +0530
+++ b/make/autoconf/flags-ldflags.m4	Mon Dec 02 12:01:40 2019 +0530
@@ -70,10 +70,14 @@
     fi
 
     # Add -z defs, to forbid undefined symbols in object files.
-    BASIC_LDFLAGS="$BASIC_LDFLAGS -Wl,-z,defs"
+    # add relro (mark relocations read only) for all libs
+    BASIC_LDFLAGS="$BASIC_LDFLAGS -Wl,-z,defs -Wl,-z,relro"
+    # s390x : remove unused code+data in link step
+    if test "x$OPENJDK_TARGET_CPU" = xs390x; then
+      BASIC_LDFLAGS="$BASIC_LDFLAGS -Wl,--gc-sections -Wl,--print-gc-sections"
+    fi
 
-    BASIC_LDFLAGS_JVM_ONLY="-Wl,-O1 -Wl,-z,relro"
-
+    BASIC_LDFLAGS_JVM_ONLY="-Wl,-O1"
 
   elif test "x$TOOLCHAIN_TYPE" = xclang; then
     BASIC_LDFLAGS_JVM_ONLY="-mno-omit-leaf-frame-pointer -mstack-alignment=16 \
@@ -120,9 +124,6 @@
     if test "x$OPENJDK_TARGET_OS" = xlinux; then
       if test x$DEBUG_LEVEL = xrelease; then
         DEBUGLEVEL_LDFLAGS_JDK_ONLY="$DEBUGLEVEL_LDFLAGS_JDK_ONLY -Wl,-O1"
-      else
-        # mark relocations read only on (fast/slow) debug builds
-        DEBUGLEVEL_LDFLAGS_JDK_ONLY="-Wl,-z,relro"
       fi
       if test x$DEBUG_LEVEL = xslowdebug; then
         # do relocations at load
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/make/data/cacerts/amazonrootca1	Mon Dec 02 12:01:40 2019 +0530
@@ -0,0 +1,27 @@
+Owner: CN=Amazon Root CA 1, O=Amazon, C=US
+Issuer: CN=Amazon Root CA 1, O=Amazon, C=US
+Serial number: 66c9fcf99bf8c0a39e2f0788a43e696365bca
+Valid from: Tue May 26 00:00:00 GMT 2015 until: Sun Jan 17 00:00:00 GMT 2038
+Signature algorithm name: SHA256withRSA
+Subject Public Key Algorithm: 2048-bit RSA key
+Version: 3
+-----BEGIN CERTIFICATE-----
+MIIDQTCCAimgAwIBAgITBmyfz5m/jAo54vB4ikPmljZbyjANBgkqhkiG9w0BAQsF
+ADA5MQswCQYDVQQGEwJVUzEPMA0GA1UEChMGQW1hem9uMRkwFwYDVQQDExBBbWF6
+b24gUm9vdCBDQSAxMB4XDTE1MDUyNjAwMDAwMFoXDTM4MDExNzAwMDAwMFowOTEL
+MAkGA1UEBhMCVVMxDzANBgNVBAoTBkFtYXpvbjEZMBcGA1UEAxMQQW1hem9uIFJv
+b3QgQ0EgMTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBALJ4gHHKeNXj
+ca9HgFB0fW7Y14h29Jlo91ghYPl0hAEvrAIthtOgQ3pOsqTQNroBvo3bSMgHFzZM
+9O6II8c+6zf1tRn4SWiw3te5djgdYZ6k/oI2peVKVuRF4fn9tBb6dNqcmzU5L/qw
+IFAGbHrQgLKm+a/sRxmPUDgH3KKHOVj4utWp+UhnMJbulHheb4mjUcAwhmahRWa6
+VOujw5H5SNz/0egwLX0tdHA114gk957EWW67c4cX8jJGKLhD+rcdqsq08p8kDi1L
+93FcXmn/6pUCyziKrlA4b9v7LWIbxcceVOF34GfID5yHI9Y/QCB/IIDEgEw+OyQm
+jgSubJrIqg0CAwEAAaNCMEAwDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMC
+AYYwHQYDVR0OBBYEFIQYzIU07LwMlJQuCFmcx7IQTgoIMA0GCSqGSIb3DQEBCwUA
+A4IBAQCY8jdaQZChGsV2USggNiMOruYou6r4lK5IpDB/G/wkjUu0yKGX9rbxenDI
+U5PMCCjjmCXPI6T53iHTfIUJrU6adTrCC2qJeHZERxhlbI1Bjjt/msv0tadQ1wUs
+N+gDS63pYaACbvXy8MWy7Vu33PqUXHeeE6V/Uq2V8viTO96LXFvKWlJbYK8U90vv
+o/ufQJVtMVT8QtPHRh8jrdkPSHCa2XV4cdFyQzR1bldZwgJcJmApzyMZFo6IQ6XU
+5MsI+yMRQ+hDKXJioaldXgjUkK642M4UwtBV8ob2xJNDd2ZhwLnoQdeXeGADbkpy
+rqXRfboQnoZsG4q5WTP468SQvvG5
+-----END CERTIFICATE-----
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/make/data/cacerts/amazonrootca2	Mon Dec 02 12:01:40 2019 +0530
@@ -0,0 +1,38 @@
+Owner: CN=Amazon Root CA 2, O=Amazon, C=US
+Issuer: CN=Amazon Root CA 2, O=Amazon, C=US
+Serial number: 66c9fd29635869f0a0fe58678f85b26bb8a37
+Valid from: Tue May 26 00:00:00 GMT 2015 until: Sat May 26 00:00:00 GMT 2040
+Signature algorithm name: SHA384withRSA
+Subject Public Key Algorithm: 4096-bit RSA key
+Version: 3
+-----BEGIN CERTIFICATE-----
+MIIFQTCCAymgAwIBAgITBmyf0pY1hp8KD+WGePhbJruKNzANBgkqhkiG9w0BAQwF
+ADA5MQswCQYDVQQGEwJVUzEPMA0GA1UEChMGQW1hem9uMRkwFwYDVQQDExBBbWF6
+b24gUm9vdCBDQSAyMB4XDTE1MDUyNjAwMDAwMFoXDTQwMDUyNjAwMDAwMFowOTEL
+MAkGA1UEBhMCVVMxDzANBgNVBAoTBkFtYXpvbjEZMBcGA1UEAxMQQW1hem9uIFJv
+b3QgQ0EgMjCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK2Wny2cSkxK
+gXlRmeyKy2tgURO8TW0G/LAIjd0ZEGrHJgw12MBvIITplLGbhQPDW9tK6Mj4kHbZ
+W0/jTOgGNk3Mmqw9DJArktQGGWCsN0R5hYGCrVo34A3MnaZMUnbqQ523BNFQ9lXg
+1dKmSYXpN+nKfq5clU1Imj+uIFptiJXZNLhSGkOQsL9sBbm2eLfq0OQ6PBJTYv9K
+8nu+NQWpEjTj82R0Yiw9AElaKP4yRLuH3WUnAnE72kr3H9rN9yFVkE8P7K6C4Z9r
+2UXTu/Bfh+08LDmG2j/e7HJV63mjrdvdfLC6HM783k81ds8P+HgfajZRRidhW+me
+z/CiVX18JYpvL7TFz4QuK/0NURBs+18bvBt+xa47mAExkv8LV/SasrlX6avvDXbR
+8O70zoan4G7ptGmh32n2M8ZpLpcTnqWHsFcQgTfJU7O7f/aS0ZzQGPSSbtqDT6Zj
+mUyl+17vIWR6IF9sZIUVyzfpYgwLKhbcAS4y2j5L9Z469hdAlO+ekQiG+r5jqFoz
+7Mt0Q5X5bGlSNscpb/xVA1wf+5+9R+vnSUeVC06JIglJ4PVhHvG/LopyboBZ/1c6
++XUyo05f7O0oYtlNc/LMgRdg7c3r3NunysV+Ar3yVAhU/bQtCSwXVEqY0VThUWcI
+0u1ufm8/0i2BWSlmy5A5lREedCf+3euvAgMBAAGjQjBAMA8GA1UdEwEB/wQFMAMB
+Af8wDgYDVR0PAQH/BAQDAgGGMB0GA1UdDgQWBBSwDPBMMPQFWAJI/TPlUq9LhONm
+UjANBgkqhkiG9w0BAQwFAAOCAgEAqqiAjw54o+Ci1M3m9Zh6O+oAA7CXDpO8Wqj2
+LIxyh6mx/H9z/WNxeKWHWc8w4Q0QshNabYL1auaAn6AFC2jkR2vHat+2/XcycuUY
++gn0oJMsXdKMdYV2ZZAMA3m3MSNjrXiDCYZohMr/+c8mmpJ5581LxedhpxfL86kS
+k5Nrp+gvU5LEYFiwzAJRGFuFjWJZY7attN6a+yb3ACfAXVU3dJnJUH/jWS5E4ywl
+7uxMMne0nxrpS10gxdr9HIcWxkPo1LsmmkVwXqkLN1PiRnsn/eBG8om3zEK2yygm
+btmlyTrIQRNg91CMFa6ybRoVGld45pIq2WWQgj9sAq+uEjonljYE1x2igGOpm/Hl
+urR8FLBOybEfdF849lHqm/osohHUqS0nGkWxr7JOcQ3AWEbWaQbLU8uz/mtBzUF+
+fUwPfHJ5elnNXkoOrJupmHN5fLT0zLm4BwyydFy4x2+IoZCn9Kr5v2c69BoVYh63
+n749sSmvZ6ES8lgQGVMDMBu4Gon2nL2XA46jCfMdiyHxtN/kHNGfZQIG6lzWE7OE
+76KlXIx3KadowGuuQNKotOrN8I1LOJwZmhsoVLiJkO/KdYE+HvJkJMcYr07/R54H
+9jVlpNMKVv/1F2Rs76giJUmTtt8AF9pYfl3uxRuw0dFfIRDH+fO6AgonB8Xx1sfT
+4PsJYGw=
+-----END CERTIFICATE-----
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/make/data/cacerts/amazonrootca3	Mon Dec 02 12:01:40 2019 +0530
@@ -0,0 +1,19 @@
+Owner: CN=Amazon Root CA 3, O=Amazon, C=US
+Issuer: CN=Amazon Root CA 3, O=Amazon, C=US
+Serial number: 66c9fd5749736663f3b0b9ad9e89e7603f24a
+Valid from: Tue May 26 00:00:00 GMT 2015 until: Sat May 26 00:00:00 GMT 2040
+Signature algorithm name: SHA256withECDSA
+Subject Public Key Algorithm: 256-bit EC key
+Version: 3
+-----BEGIN CERTIFICATE-----
+MIIBtjCCAVugAwIBAgITBmyf1XSXNmY/Owua2eiedgPySjAKBggqhkjOPQQDAjA5
+MQswCQYDVQQGEwJVUzEPMA0GA1UEChMGQW1hem9uMRkwFwYDVQQDExBBbWF6b24g
+Um9vdCBDQSAzMB4XDTE1MDUyNjAwMDAwMFoXDTQwMDUyNjAwMDAwMFowOTELMAkG
+A1UEBhMCVVMxDzANBgNVBAoTBkFtYXpvbjEZMBcGA1UEAxMQQW1hem9uIFJvb3Qg
+Q0EgMzBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABCmXp8ZBf8ANm+gBG1bG8lKl
+ui2yEujSLtf6ycXYqm0fc4E7O5hrOXwzpcVOho6AF2hiRVd9RFgdszflZwjrZt6j
+QjBAMA8GA1UdEwEB/wQFMAMBAf8wDgYDVR0PAQH/BAQDAgGGMB0GA1UdDgQWBBSr
+ttvXBp43rDCGB5Fwx5zEGbF4wDAKBggqhkjOPQQDAgNJADBGAiEA4IWSoxe3jfkr
+BqWTrBqYaGFy+uGh0PsceGCmQ5nFuMQCIQCcAu/xlJyzlvnrxir4tiz+OpAUFteM
+YyRIHN8wfdVoOw==
+-----END CERTIFICATE-----
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/make/data/cacerts/amazonrootca4	Mon Dec 02 12:01:40 2019 +0530
@@ -0,0 +1,20 @@
+Owner: CN=Amazon Root CA 4, O=Amazon, C=US
+Issuer: CN=Amazon Root CA 4, O=Amazon, C=US
+Serial number: 66c9fd7c1bb104c2943e5717b7b2cc81ac10e
+Valid from: Tue May 26 00:00:00 GMT 2015 until: Sat May 26 00:00:00 GMT 2040
+Signature algorithm name: SHA384withECDSA
+Subject Public Key Algorithm: 384-bit EC key
+Version: 3
+-----BEGIN CERTIFICATE-----
+MIIB8jCCAXigAwIBAgITBmyf18G7EEwpQ+Vxe3ssyBrBDjAKBggqhkjOPQQDAzA5
+MQswCQYDVQQGEwJVUzEPMA0GA1UEChMGQW1hem9uMRkwFwYDVQQDExBBbWF6b24g
+Um9vdCBDQSA0MB4XDTE1MDUyNjAwMDAwMFoXDTQwMDUyNjAwMDAwMFowOTELMAkG
+A1UEBhMCVVMxDzANBgNVBAoTBkFtYXpvbjEZMBcGA1UEAxMQQW1hem9uIFJvb3Qg
+Q0EgNDB2MBAGByqGSM49AgEGBSuBBAAiA2IABNKrijdPo1MN/sGKe0uoe0ZLY7Bi
+9i0b2whxIdIA6GO9mif78DluXeo9pcmBqqNbIJhFXRbb/egQbeOc4OO9X4Ri83Bk
+M6DLJC9wuoihKqB1+IGuYgbEgds5bimwHvouXKNCMEAwDwYDVR0TAQH/BAUwAwEB
+/zAOBgNVHQ8BAf8EBAMCAYYwHQYDVR0OBBYEFNPsxzplbszh2naaVvuc84ZtV+WB
+MAoGCCqGSM49BAMDA2gAMGUCMDqLIfG9fhGt0O9Yli/W651+kI0rz2ZVwyzjKKlw
+CkcO8DdZEv8tmZQoTipPNU0zWgIxAOp1AE47xDqUEpHJWEadIRNyp4iciuRMStuW
+1KyLa2tJElMzrdfkviT8tQp21KW8EA==
+-----END CERTIFICATE-----
--- a/src/hotspot/cpu/aarch64/aarch64.ad	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/cpu/aarch64/aarch64.ad	Mon Dec 02 12:01:40 2019 +0530
@@ -5639,7 +5639,6 @@
 operand cmpOpEqNe()
 %{
   match(Bool);
-  match(CmpOp);
   op_cost(0);
   predicate(n->as_Bool()->_test._test == BoolTest::ne
             || n->as_Bool()->_test._test == BoolTest::eq);
@@ -5663,7 +5662,6 @@
 operand cmpOpLtGe()
 %{
   match(Bool);
-  match(CmpOp);
   op_cost(0);
 
   predicate(n->as_Bool()->_test._test == BoolTest::lt
@@ -5688,7 +5686,6 @@
 operand cmpOpUEqNeLtGe()
 %{
   match(Bool);
-  match(CmpOp);
   op_cost(0);
 
   predicate(n->as_Bool()->_test._test == BoolTest::eq
--- a/src/hotspot/cpu/aarch64/gc/z/zGlobals_aarch64.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/cpu/aarch64/gc/z/zGlobals_aarch64.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -144,7 +144,7 @@
   const size_t max_address_offset_bits = 44; // 16TB
   const size_t address_offset = ZUtils::round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio);
   const size_t address_offset_bits = log2_intptr(address_offset);
-  return MIN2(MAX2(address_offset_bits, min_address_offset_bits), max_address_offset_bits);
+  return clamp(address_offset_bits, min_address_offset_bits, max_address_offset_bits);
 }
 
 size_t ZPlatformAddressMetadataShift() {
--- a/src/hotspot/cpu/aarch64/nativeInst_aarch64.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/cpu/aarch64/nativeInst_aarch64.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -30,6 +30,7 @@
 #include "nativeInst_aarch64.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/handles.hpp"
+#include "runtime/orderAccess.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stubRoutines.hpp"
 #include "utilities/ostream.hpp"
--- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -443,7 +443,6 @@
     Register obj = r0;
     Register mdp = r1;
     Register tmp = r2;
-    __ ldr(mdp, Address(rmethod, Method::method_data_offset()));
     __ profile_return_type(mdp, obj, tmp);
   }
 
@@ -1633,13 +1632,8 @@
   __ mov(rscratch2, true);
   __ strb(rscratch2, do_not_unlock_if_synchronized);
 
-  Label no_mdp;
   Register mdp = r3;
-  __ ldr(mdp, Address(rmethod, Method::method_data_offset()));
-  __ cbz(mdp, no_mdp);
-  __ add(mdp, mdp, in_bytes(MethodData::data_offset()));
   __ profile_parameters_type(mdp, r1, r2);
-  __ bind(no_mdp);
 
   // increment invocation count & check for overflow
   Label invocation_counter_overflow;
--- a/src/hotspot/cpu/arm/arm.ad	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/cpu/arm/arm.ad	Mon Dec 02 12:01:40 2019 +0530
@@ -2204,6 +2204,30 @@
   interface(REG_INTER);
 %}
 
+operand R8RegP() %{
+  constraint(ALLOC_IN_RC(R8_regP));
+  match(iRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand R9RegP() %{
+  constraint(ALLOC_IN_RC(R9_regP));
+  match(iRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand R12RegP() %{
+  constraint(ALLOC_IN_RC(R12_regP));
+  match(iRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
 operand R2RegP() %{
   constraint(ALLOC_IN_RC(R2_regP));
   match(iRegP);
@@ -2236,6 +2260,14 @@
   interface(REG_INTER);
 %}
 
+operand SPRegP() %{
+  constraint(ALLOC_IN_RC(SP_regP));
+  match(iRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
 operand LRRegP() %{
   constraint(ALLOC_IN_RC(LR_regP));
   match(iRegP);
--- a/src/hotspot/cpu/arm/arm_32.ad	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/cpu/arm/arm_32.ad	Mon Dec 02 12:01:40 2019 +0530
@@ -232,11 +232,15 @@
 reg_class R1_regP(R_R1);
 reg_class R2_regP(R_R2);
 reg_class R4_regP(R_R4);
+reg_class R8_regP(R_R8);
+reg_class R9_regP(R_R9);
+reg_class R12_regP(R_R12);
 reg_class Rexception_regP(R_Rexception_obj);
 reg_class Ricklass_regP(R_Ricklass);
 reg_class Rmethod_regP(R_Rmethod);
 reg_class Rthread_regP(R_Rthread);
 reg_class IP_regP(R_R12);
+reg_class SP_regP(R_R13);
 reg_class LR_regP(R_R14);
 
 reg_class FP_regP(R_R11);
--- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -581,6 +581,7 @@
         base_reg = Rtemp;
         __ str(from_lo, Address(Rtemp));
         if (patch != NULL) {
+          __ nop(); // see comment before patching_epilog for 2nd str
           patching_epilog(patch, lir_patch_low, base_reg, info);
           patch = new PatchingStub(_masm, PatchingStub::access_field_id);
           patch_code = lir_patch_high;
@@ -589,6 +590,7 @@
       } else if (base_reg == from_lo) {
         __ str(from_hi, as_Address_hi(to_addr));
         if (patch != NULL) {
+          __ nop(); // see comment before patching_epilog for 2nd str
           patching_epilog(patch, lir_patch_high, base_reg, info);
           patch = new PatchingStub(_masm, PatchingStub::access_field_id);
           patch_code = lir_patch_low;
@@ -597,6 +599,7 @@
       } else {
         __ str(from_lo, as_Address_lo(to_addr));
         if (patch != NULL) {
+          __ nop(); // see comment before patching_epilog for 2nd str
           patching_epilog(patch, lir_patch_low, base_reg, info);
           patch = new PatchingStub(_masm, PatchingStub::access_field_id);
           patch_code = lir_patch_high;
@@ -640,7 +643,7 @@
   }
 
   if (patch != NULL) {
-    // Offset embeedded into LDR/STR instruction may appear not enough
+    // Offset embedded into LDR/STR instruction may appear not enough
     // to address a field. So, provide a space for one more instruction
     // that will deal with larger offsets.
     __ nop();
@@ -791,6 +794,7 @@
         base_reg = Rtemp;
         __ ldr(to_lo, Address(Rtemp));
         if (patch != NULL) {
+          __ nop(); // see comment before patching_epilog for 2nd ldr
           patching_epilog(patch, lir_patch_low, base_reg, info);
           patch = new PatchingStub(_masm, PatchingStub::access_field_id);
           patch_code = lir_patch_high;
@@ -799,6 +803,7 @@
       } else if (base_reg == to_lo) {
         __ ldr(to_hi, as_Address_hi(addr));
         if (patch != NULL) {
+          __ nop(); // see comment before patching_epilog for 2nd ldr
           patching_epilog(patch, lir_patch_high, base_reg, info);
           patch = new PatchingStub(_masm, PatchingStub::access_field_id);
           patch_code = lir_patch_low;
@@ -807,6 +812,7 @@
       } else {
         __ ldr(to_lo, as_Address_lo(addr));
         if (patch != NULL) {
+          __ nop(); // see comment before patching_epilog for 2nd ldr
           patching_epilog(patch, lir_patch_low, base_reg, info);
           patch = new PatchingStub(_masm, PatchingStub::access_field_id);
           patch_code = lir_patch_high;
@@ -846,7 +852,7 @@
   }
 
   if (patch != NULL) {
-    // Offset embeedded into LDR/STR instruction may appear not enough
+    // Offset embedded into LDR/STR instruction may appear not enough
     // to address a field. So, provide a space for one more instruction
     // that will deal with larger offsets.
     __ nop();
--- a/src/hotspot/cpu/arm/nativeInst_arm_32.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/cpu/arm/nativeInst_arm_32.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -28,6 +28,7 @@
 #include "asm/macroAssembler.hpp"
 #include "code/codeCache.hpp"
 #include "runtime/icache.hpp"
+#include "runtime/orderAccess.hpp"
 #include "runtime/os.hpp"
 #include "runtime/thread.hpp"
 #include "register_arm.hpp"
--- a/src/hotspot/cpu/arm/relocInfo_arm.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/cpu/arm/relocInfo_arm.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -29,7 +29,6 @@
 #include "nativeInst_arm.hpp"
 #include "oops/compressedOops.inline.hpp"
 #include "oops/oop.hpp"
-#include "runtime/orderAccess.hpp"
 #include "runtime/safepoint.hpp"
 
 void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
--- a/src/hotspot/cpu/arm/stubGenerator_arm.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/cpu/arm/stubGenerator_arm.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -437,7 +437,8 @@
   // for which we do not support MP and so membars are not necessary. This ARMv5 code will
   // be removed in the future.
 
-  // Support for jint Atomic::add(jint add_value, volatile jint *dest)
+  // Implementation of atomic_add(jint add_value, volatile jint* dest)
+  // used by Atomic::add(volatile jint* dest, jint add_value)
   //
   // Arguments :
   //
@@ -487,7 +488,8 @@
     return start;
   }
 
-  // Support for jint Atomic::xchg(jint exchange_value, volatile jint *dest)
+  // Implementation of jint atomic_xchg(jint exchange_value, volatile jint* dest)
+  // used by Atomic::add(volatile jint* dest, jint exchange_value)
   //
   // Arguments :
   //
@@ -535,7 +537,8 @@
     return start;
   }
 
-  // Support for jint Atomic::cmpxchg(jint exchange_value, volatile jint *dest, jint compare_value)
+  // Implementation of jint atomic_cmpxchg(jint exchange_value, volatile jint *dest, jint compare_value)
+  // used by Atomic::cmpxchg(volatile jint *dest, jint compare_value, jint exchange_value)
   //
   // Arguments :
   //
--- a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -883,7 +883,7 @@
     //
     // markWord displaced_header = obj->mark().set_unlocked();
     // monitor->lock()->set_displaced_header(displaced_header);
-    // if (Atomic::cmpxchg(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) {
+    // if (Atomic::cmpxchg(/*addr*/obj->mark_addr(), /*cmp*/displaced_header, /*ex=*/monitor) == displaced_header) {
     //   // We stored the monitor address into the object's mark word.
     // } else if (THREAD->is_lock_owned((address)displaced_header))
     //   // Simple recursive case.
@@ -921,7 +921,7 @@
     std(displaced_header, BasicObjectLock::lock_offset_in_bytes() +
         BasicLock::displaced_header_offset_in_bytes(), monitor);
 
-    // if (Atomic::cmpxchg(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) {
+    // if (Atomic::cmpxchg(/*addr*/obj->mark_addr(), /*cmp*/displaced_header, /*ex=*/monitor) == displaced_header) {
 
     // Store stack address of the BasicObjectLock (this is monitor) into object.
     addi(object_mark_addr, object, oopDesc::mark_offset_in_bytes());
@@ -997,7 +997,7 @@
     // if ((displaced_header = monitor->displaced_header()) == NULL) {
     //   // Recursive unlock. Mark the monitor unlocked by setting the object field to NULL.
     //   monitor->set_obj(NULL);
-    // } else if (Atomic::cmpxchg(displaced_header, obj->mark_addr(), monitor) == monitor) {
+    // } else if (Atomic::cmpxchg(obj->mark_addr(), monitor, displaced_header) == monitor) {
     //   // We swapped the unlocked mark in displaced_header into the object's mark word.
     //   monitor->set_obj(NULL);
     // } else {
@@ -1030,7 +1030,7 @@
     cmpdi(CCR0, displaced_header, 0);
     beq(CCR0, free_slot); // recursive unlock
 
-    // } else if (Atomic::cmpxchg(displaced_header, obj->mark_addr(), monitor) == monitor) {
+    // } else if (Atomic::cmpxchg(obj->mark_addr(), monitor, displaced_header) == monitor) {
     //   // We swapped the unlocked mark in displaced_header into the object's mark word.
     //   monitor->set_obj(NULL);
 
--- a/src/hotspot/cpu/ppc/nativeInst_ppc.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/cpu/ppc/nativeInst_ppc.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -374,7 +374,7 @@
   // Finally patch out the jump.
   volatile juint *jump_addr = (volatile juint*)instr_addr;
   // Release not needed because caller uses invalidate_range after copying the remaining bytes.
-  //OrderAccess::release_store(jump_addr, *((juint*)code_buffer));
+  //Atomic::release_store(jump_addr, *((juint*)code_buffer));
   *jump_addr = *((juint*)code_buffer); // atomically store code over branch instruction
   ICache::ppc64_flush_icache_bytes(instr_addr, NativeGeneralJump::instruction_size);
 }
--- a/src/hotspot/cpu/s390/interp_masm_s390.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/cpu/s390/interp_masm_s390.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -976,7 +976,7 @@
   //
   // markWord displaced_header = obj->mark().set_unlocked();
   // monitor->lock()->set_displaced_header(displaced_header);
-  // if (Atomic::cmpxchg(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) {
+  // if (Atomic::cmpxchg(/*addr*/obj->mark_addr(), /*cmp*/displaced_header, /*ex=*/monitor) == displaced_header) {
   //   // We stored the monitor address into the object's mark word.
   // } else if (THREAD->is_lock_owned((address)displaced_header))
   //   // Simple recursive case.
@@ -1011,7 +1011,7 @@
   z_stg(displaced_header, BasicObjectLock::lock_offset_in_bytes() +
                           BasicLock::displaced_header_offset_in_bytes(), monitor);
 
-  // if (Atomic::cmpxchg(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) {
+  // if (Atomic::cmpxchg(/*addr*/obj->mark_addr(), /*cmp*/displaced_header, /*ex=*/monitor) == displaced_header) {
 
   // Store stack address of the BasicObjectLock (this is monitor) into object.
   add2reg(object_mark_addr, oopDesc::mark_offset_in_bytes(), object);
@@ -1082,7 +1082,7 @@
   // if ((displaced_header = monitor->displaced_header()) == NULL) {
   //   // Recursive unlock. Mark the monitor unlocked by setting the object field to NULL.
   //   monitor->set_obj(NULL);
-  // } else if (Atomic::cmpxchg(displaced_header, obj->mark_addr(), monitor) == monitor) {
+  // } else if (Atomic::cmpxchg(obj->mark_addr(), monitor, displaced_header) == monitor) {
   //   // We swapped the unlocked mark in displaced_header into the object's mark word.
   //   monitor->set_obj(NULL);
   // } else {
@@ -1123,7 +1123,7 @@
                                                       BasicLock::displaced_header_offset_in_bytes()));
   z_bre(done); // displaced_header == 0 -> goto done
 
-  // } else if (Atomic::cmpxchg(displaced_header, obj->mark_addr(), monitor) == monitor) {
+  // } else if (Atomic::cmpxchg(obj->mark_addr(), monitor, displaced_header) == monitor) {
   //   // We swapped the unlocked mark in displaced_header into the object's mark word.
   //   monitor->set_obj(NULL);
 
--- a/src/hotspot/cpu/sparc/stubGenerator_sparc.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/cpu/sparc/stubGenerator_sparc.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -585,7 +585,8 @@
     return start;
   }
 
-  // Support for jint Atomic::xchg(jint exchange_value, volatile jint* dest).
+  // Implementation of jint atomic_xchg(jint exchange_value, volatile jint* dest)
+  // used by Atomic::xchg(volatile jint* dest, jint exchange_value)
   //
   // Arguments:
   //
@@ -622,7 +623,8 @@
   }
 
 
-  // Support for jint Atomic::cmpxchg(jint exchange_value, volatile jint* dest, jint compare_value)
+  // Implementation of jint atomic_cmpxchg(jint exchange_value, volatile jint* dest, jint compare_value)
+  // used by Atomic::cmpxchg(volatile jint* dest, jint compare_value, jint exchange_value)
   //
   // Arguments:
   //
@@ -646,7 +648,8 @@
     return start;
   }
 
-  // Support for jlong Atomic::cmpxchg(jlong exchange_value, volatile jlong *dest, jlong compare_value)
+  // Implementation of jlong atomic_cmpxchg_long(jlong exchange_value, volatile jlong *dest, jlong compare_value)
+  // used by Atomic::cmpxchg(volatile jlong *dest, jlong compare_value, jlong exchange_value)
   //
   // Arguments:
   //
@@ -679,7 +682,8 @@
   }
 
 
-  // Support for jint Atomic::add(jint add_value, volatile jint* dest).
+  // Implementation of jint atomic_add(jint add_value, volatile jint* dest)
+  // used by Atomic::add(volatile jint* dest, jint add_value)
   //
   // Arguments:
   //
--- a/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -327,24 +327,42 @@
 #endif
 }
 
+#ifdef _LP64
 void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) {
   BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
   if (bs_nm == NULL) {
     return;
   }
-#ifndef _LP64
-  ShouldNotReachHere();
-#else
   Label continuation;
-  Register thread = LP64_ONLY(r15_thread);
+  Register thread = r15_thread;
   Address disarmed_addr(thread, in_bytes(bs_nm->thread_disarmed_offset()));
   __ align(8);
   __ cmpl(disarmed_addr, 0);
   __ jcc(Assembler::equal, continuation);
   __ call(RuntimeAddress(StubRoutines::x86::method_entry_barrier()));
   __ bind(continuation);
+}
+#else
+void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) {
+  BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
+  if (bs_nm == NULL) {
+    return;
+  }
+
+  Label continuation;
+
+  Register tmp = rdi;
+  __ push(tmp);
+  __ movptr(tmp, (intptr_t)bs_nm->disarmed_value_address());
+  Address disarmed_addr(tmp, 0);
+  __ align(4);
+  __ cmpl(disarmed_addr, 0);
+  __ pop(tmp);
+  __ jcc(Assembler::equal, continuation);
+  __ call(RuntimeAddress(StubRoutines::x86::method_entry_barrier()));
+  __ bind(continuation);
+}
 #endif
-}
 
 void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) {
   BarrierSetNMethod* bs = BarrierSet::barrier_set()->barrier_set_nmethod();
--- a/src/hotspot/cpu/x86/gc/shared/barrierSetNMethod_x86.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/cpu/x86/gc/shared/barrierSetNMethod_x86.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -35,6 +35,7 @@
 
 class NativeNMethodCmpBarrier: public NativeInstruction {
 public:
+#ifdef _LP64
   enum Intel_specific_constants {
     instruction_code        = 0x81,
     instruction_size        = 8,
@@ -42,6 +43,14 @@
     instruction_rex_prefix  = Assembler::REX | Assembler::REX_B,
     instruction_modrm       = 0x7f  // [r15 + offset]
   };
+#else
+  enum Intel_specific_constants {
+    instruction_code        = 0x81,
+    instruction_size        = 7,
+    imm_offset              = 2,
+    instruction_modrm       = 0x3f  // [rdi]
+  };
+#endif
 
   address instruction_address() const { return addr_at(0); }
   address immediate_address() const { return addr_at(imm_offset); }
@@ -51,6 +60,7 @@
   void verify() const;
 };
 
+#ifdef _LP64
 void NativeNMethodCmpBarrier::verify() const {
   if (((uintptr_t) instruction_address()) & 0x7) {
     fatal("Not properly aligned");
@@ -77,6 +87,27 @@
     fatal("not a cmp barrier");
   }
 }
+#else
+void NativeNMethodCmpBarrier::verify() const {
+  if (((uintptr_t) instruction_address()) & 0x3) {
+    fatal("Not properly aligned");
+  }
+
+  int inst = ubyte_at(0);
+  if (inst != instruction_code) {
+    tty->print_cr("Addr: " INTPTR_FORMAT " Code: 0x%x", p2i(instruction_address()),
+        inst);
+    fatal("not a cmp barrier");
+  }
+
+  int modrm = ubyte_at(1);
+  if (modrm != instruction_modrm) {
+    tty->print_cr("Addr: " INTPTR_FORMAT " mod/rm: 0x%x", p2i(instruction_address()),
+        modrm);
+    fatal("not a cmp barrier");
+  }
+}
+#endif // _LP64
 
 void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) {
   /*
@@ -127,7 +158,7 @@
 // NativeNMethodCmpBarrier::verify() will immediately complain when it does
 // not find the expected native instruction at this offset, which needs updating.
 // Note that this offset is invariant of PreserveFramePointer.
-static const int entry_barrier_offset = -19;
+static const int entry_barrier_offset = LP64_ONLY(-19) NOT_LP64(-18);
 
 static NativeNMethodCmpBarrier* native_nmethod_barrier(nmethod* nm) {
   address barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset;
--- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -395,6 +395,52 @@
   __ block_comment("load_reference_barrier_native { ");
 }
 
+#ifdef _LP64
+void ShenandoahBarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) {
+  // Use default version
+  BarrierSetAssembler::c2i_entry_barrier(masm);
+}
+#else
+void ShenandoahBarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) {
+  BarrierSetNMethod* bs = BarrierSet::barrier_set()->barrier_set_nmethod();
+  if (bs == NULL) {
+    return;
+  }
+
+  Label bad_call;
+  __ cmpptr(rbx, 0); // rbx contains the incoming method for c2i adapters.
+  __ jcc(Assembler::equal, bad_call);
+
+  Register tmp1 = rax;
+  Register tmp2 = rcx;
+
+  __ push(tmp1);
+  __ push(tmp2);
+
+  // Pointer chase to the method holder to find out if the method is concurrently unloading.
+  Label method_live;
+  __ load_method_holder_cld(tmp1, rbx);
+
+   // Is it a strong CLD?
+  __ cmpl(Address(tmp1, ClassLoaderData::keep_alive_offset()), 0);
+  __ jcc(Assembler::greater, method_live);
+
+   // Is it a weak but alive CLD?
+  __ movptr(tmp1, Address(tmp1, ClassLoaderData::holder_offset()));
+  __ resolve_weak_handle(tmp1, tmp2);
+  __ cmpptr(tmp1, 0);
+  __ jcc(Assembler::notEqual, method_live);
+  __ pop(tmp2);
+  __ pop(tmp1);
+
+  __ bind(bad_call);
+  __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
+  __ bind(method_live);
+  __ pop(tmp2);
+  __ pop(tmp1);
+}
+#endif
+
 void ShenandoahBarrierSetAssembler::storeval_barrier(MacroAssembler* masm, Register dst, Register tmp) {
   if (ShenandoahStoreValEnqueueBarrier) {
     storeval_barrier_impl(masm, dst, tmp);
@@ -511,8 +557,12 @@
 
   // 3: apply keep-alive barrier if needed
   if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) {
-    const Register thread = NOT_LP64(tmp_thread) LP64_ONLY(r15_thread);
+    __ push_IU_state();
+    Register thread = NOT_LP64(tmp_thread) LP64_ONLY(r15_thread);
     assert_different_registers(dst, tmp1, tmp_thread);
+    if (!thread->is_valid()) {
+      thread = rdx;
+    }
     NOT_LP64(__ get_thread(thread));
     // Generate the SATB pre-barrier code to log the value of
     // the referent field in an SATB buffer.
@@ -523,6 +573,7 @@
                                  tmp1 /* tmp */,
                                  true /* tosca_live */,
                                  true /* expand_call */);
+    __ pop_IU_state();
   }
 }
 
--- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -86,6 +86,7 @@
                         Address dst, Register val, Register tmp1, Register tmp2);
   virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
                                              Register obj, Register tmp, Label& slowpath);
+  virtual void c2i_entry_barrier(MacroAssembler* masm);
 
   virtual void barrier_stubs_init();
 
--- a/src/hotspot/cpu/x86/gc/z/zGlobals_x86.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/cpu/x86/gc/z/zGlobals_x86.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -144,7 +144,7 @@
   const size_t max_address_offset_bits = 44; // 16TB
   const size_t address_offset = ZUtils::round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio);
   const size_t address_offset_bits = log2_intptr(address_offset);
-  return MIN2(MAX2(address_offset_bits, min_address_offset_bits), max_address_offset_bits);
+  return clamp(address_offset_bits, min_address_offset_bits, max_address_offset_bits);
 }
 
 size_t ZPlatformAddressMetadataShift() {
--- a/src/hotspot/cpu/x86/rdtsc_x86.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/cpu/x86/rdtsc_x86.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -24,6 +24,7 @@
 
 #include "precompiled.hpp"
 #include "rdtsc_x86.hpp"
+#include "runtime/orderAccess.hpp"
 #include "runtime/thread.inline.hpp"
 #include "vm_version_ext_x86.hpp"
 
--- a/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -975,6 +975,9 @@
 
   address c2i_entry = __ pc();
 
+  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
+  bs->c2i_entry_barrier(masm);
+
   gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
 
   __ flush();
@@ -1886,6 +1889,10 @@
   // -2 because return address is already present and so is saved rbp
   __ subptr(rsp, stack_size - 2*wordSize);
 
+
+  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
+  bs->nmethod_entry_barrier(masm);
+
   // Frame is now completed as far as size and linkage.
   int frame_complete = ((intptr_t)__ pc()) - start;
 
@@ -1921,12 +1928,12 @@
   // if we load it once it is usable thru the entire wrapper
   const Register thread = rdi;
 
-  // We use rsi as the oop handle for the receiver/klass
-  // It is callee save so it survives the call to native
-
-  const Register oop_handle_reg = rsi;
-
-  __ get_thread(thread);
+   // We use rsi as the oop handle for the receiver/klass
+   // It is callee save so it survives the call to native
+
+   const Register oop_handle_reg = rsi;
+
+   __ get_thread(thread);
 
   if (is_critical_native && !Universe::heap()->supports_object_pinning()) {
     check_needs_gc_for_critical_native(masm, thread, stack_slots, total_c_args, total_in_args,
--- a/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -27,6 +27,7 @@
 #include "asm/macroAssembler.inline.hpp"
 #include "gc/shared/barrierSet.hpp"
 #include "gc/shared/barrierSetAssembler.hpp"
+#include "gc/shared/barrierSetNMethod.hpp"
 #include "interpreter/interpreter.hpp"
 #include "memory/universe.hpp"
 #include "nativeInst_x86.hpp"
@@ -430,7 +431,8 @@
 
 
   //----------------------------------------------------------------------------------------------------
-  // Support for int32_t Atomic::xchg(int32_t exchange_value, volatile int32_t* dest)
+  // Implementation of int32_t atomic_xchg(int32_t exchange_value, volatile int32_t* dest)
+  // used by Atomic::xchg(volatile int32_t* dest, int32_t exchange_value)
   //
   // xchg exists as far back as 8086, lock needed for MP only
   // Stack layout immediately after call:
@@ -3662,6 +3664,68 @@
     __ ret(0);
   }
 
+  address generate_method_entry_barrier() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier");
+
+    Label deoptimize_label;
+
+    address start = __ pc();
+
+    __ push(-1); // cookie, this is used for writing the new rsp when deoptimizing
+
+    BLOCK_COMMENT("Entry:");
+    __ enter(); // save rbp
+
+    // save rbx, because we want to use that value.
+    // We could do without it but then we depend on the number of slots used by pusha
+    __ push(rbx);
+
+    __ lea(rbx, Address(rsp, wordSize * 3)); // 1 for cookie, 1 for rbp, 1 for rbx - this should be the return address
+
+    __ pusha();
+
+    // xmm0 and xmm1 may be used for passing float/double arguments
+    const int xmm_size = wordSize * 2;
+    const int xmm_spill_size = xmm_size * 2;
+    __ subptr(rsp, xmm_spill_size);
+    __ movdqu(Address(rsp, xmm_size * 1), xmm1);
+    __ movdqu(Address(rsp, xmm_size * 0), xmm0);
+
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, static_cast<int (*)(address*)>(BarrierSetNMethod::nmethod_stub_entry_barrier)), rbx);
+
+    __ movdqu(xmm0, Address(rsp, xmm_size * 0));
+    __ movdqu(xmm1, Address(rsp, xmm_size * 1));
+    __ addptr(rsp, xmm_spill_size);
+
+    __ cmpl(rax, 1); // 1 means deoptimize
+    __ jcc(Assembler::equal, deoptimize_label);
+
+    __ popa();
+    __ pop(rbx);
+
+    __ leave();
+
+    __ addptr(rsp, 1 * wordSize); // cookie
+    __ ret(0);
+
+    __ BIND(deoptimize_label);
+
+    __ popa();
+    __ pop(rbx);
+
+    __ leave();
+
+    // this can be taken out, but is good for verification purposes. getting a SIGSEGV
+    // here while still having a correct stack is valuable
+    __ testptr(rsp, Address(rsp, 0));
+
+    __ movptr(rsp, Address(rsp, 0)); // new rsp was written in the barrier
+    __ jmp(Address(rsp, -1 * wordSize)); // jmp target should be callers verified_entry_point
+
+    return start;
+  }
+
  public:
   // Information about frame layout at time of blocking runtime call.
   // Note that we only have to preserve callee-saved registers since
@@ -3958,6 +4022,11 @@
     StubRoutines::_safefetchN_entry           = StubRoutines::_safefetch32_entry;
     StubRoutines::_safefetchN_fault_pc        = StubRoutines::_safefetch32_fault_pc;
     StubRoutines::_safefetchN_continuation_pc = StubRoutines::_safefetch32_continuation_pc;
+
+    BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
+    if (bs_nm != NULL) {
+      StubRoutines::x86::_method_entry_barrier = generate_method_entry_barrier();
+    }
   }
 
 
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -552,7 +552,8 @@
     return start;
   }
 
-  // Support for jint atomic::xchg(jint exchange_value, volatile jint* dest)
+  // Implementation of jint atomic_xchg(jint add_value, volatile jint* dest)
+  // used by Atomic::xchg(volatile jint* dest, jint exchange_value)
   //
   // Arguments :
   //    c_rarg0: exchange_value
@@ -571,7 +572,8 @@
     return start;
   }
 
-  // Support for intptr_t atomic::xchg_long(jlong exchange_value, volatile jlong* dest)
+  // Implementation of intptr_t atomic_xchg(jlong add_value, volatile jlong* dest)
+  // used by Atomic::xchg(volatile jlong* dest, jlong exchange_value)
   //
   // Arguments :
   //    c_rarg0: exchange_value
@@ -668,7 +670,8 @@
     return start;
   }
 
-  // Support for jint atomic::add(jint add_value, volatile jint* dest)
+  // Implementation of jint atomic_add(jint add_value, volatile jint* dest)
+  // used by Atomic::add(volatile jint* dest, jint add_value)
   //
   // Arguments :
   //    c_rarg0: add_value
@@ -690,7 +693,8 @@
     return start;
   }
 
-  // Support for intptr_t atomic::add_ptr(intptr_t add_value, volatile intptr_t* dest)
+  // Implementation of intptr_t atomic_add(intptr_t add_value, volatile intptr_t* dest)
+  // used by Atomic::add(volatile intptr_t* dest, intptr_t add_value)
   //
   // Arguments :
   //    c_rarg0: add_value
--- a/src/hotspot/cpu/x86/stubRoutines_x86.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/cpu/x86/stubRoutines_x86.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -55,14 +55,8 @@
   static address _double_sign_mask;
   static address _double_sign_flip;
 
-  static address _method_entry_barrier;
-
  public:
 
-  static address method_entry_barrier() {
-    return _method_entry_barrier;
-  }
-
   static address get_previous_fp_entry() {
     return _get_previous_fp_entry;
   }
@@ -121,6 +115,8 @@
   //shuffle mask for big-endian 128-bit integers
   static address _counter_shuffle_mask_addr;
 
+  static address _method_entry_barrier;
+
   // masks and table for CRC32
   static uint64_t _crc_by128_masks[];
   static juint    _crc_table[];
@@ -221,6 +217,7 @@
   static address upper_word_mask_addr() { return _upper_word_mask_addr; }
   static address shuffle_byte_flip_mask_addr() { return _shuffle_byte_flip_mask_addr; }
   static address k256_addr()      { return _k256_adr; }
+  static address method_entry_barrier() { return _method_entry_barrier; }
 
   static address vector_short_to_byte_mask() {
     return _vector_short_to_byte_mask;
--- a/src/hotspot/cpu/x86/stubRoutines_x86_32.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/cpu/x86/stubRoutines_x86_32.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -32,3 +32,5 @@
 // a description of how to extend it, see the stubRoutines.hpp file.
 
 address StubRoutines::x86::_verify_fpu_cntrl_wrd_entry = NULL;
+address StubRoutines::x86::_method_entry_barrier = NULL;
+
--- a/src/hotspot/cpu/x86/vm_version_x86.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/cpu/x86/vm_version_x86.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -367,26 +367,29 @@
     //
     intx saved_useavx = UseAVX;
     intx saved_usesse = UseSSE;
-    // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
-    __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
-    __ movl(rax, 0x10000);
-    __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm
-    __ cmpl(rax, 0x10000);
-    __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
-    // check _cpuid_info.xem_xcr0_eax.bits.opmask
-    // check _cpuid_info.xem_xcr0_eax.bits.zmm512
-    // check _cpuid_info.xem_xcr0_eax.bits.zmm32
-    __ movl(rax, 0xE0);
-    __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
-    __ cmpl(rax, 0xE0);
-    __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 
-    __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
-    __ movl(rax, Address(rsi, 0));
-    __ cmpl(rax, 0x50654);              // If it is Skylake
-    __ jcc(Assembler::equal, legacy_setup);
     // If UseAVX is unitialized or is set by the user to include EVEX
     if (use_evex) {
+      // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
+      __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
+      __ movl(rax, 0x10000);
+      __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm
+      __ cmpl(rax, 0x10000);
+      __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
+      // check _cpuid_info.xem_xcr0_eax.bits.opmask
+      // check _cpuid_info.xem_xcr0_eax.bits.zmm512
+      // check _cpuid_info.xem_xcr0_eax.bits.zmm32
+      __ movl(rax, 0xE0);
+      __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
+      __ cmpl(rax, 0xE0);
+      __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
+
+      if (FLAG_IS_DEFAULT(UseAVX)) {
+        __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
+        __ movl(rax, Address(rsi, 0));
+        __ cmpl(rax, 0x50654);              // If it is Skylake
+        __ jcc(Assembler::equal, legacy_setup);
+      }
       // EVEX setup: run in lowest evex mode
       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
       UseAVX = 3;
@@ -455,27 +458,28 @@
     VM_Version::set_cpuinfo_cont_addr(__ pc());
     // Returns here after signal. Save xmm0 to check it later.
 
-    // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
-    __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
-    __ movl(rax, 0x10000);
-    __ andl(rax, Address(rsi, 4));
-    __ cmpl(rax, 0x10000);
-    __ jcc(Assembler::notEqual, legacy_save_restore);
-    // check _cpuid_info.xem_xcr0_eax.bits.opmask
-    // check _cpuid_info.xem_xcr0_eax.bits.zmm512
-    // check _cpuid_info.xem_xcr0_eax.bits.zmm32
-    __ movl(rax, 0xE0);
-    __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
-    __ cmpl(rax, 0xE0);
-    __ jcc(Assembler::notEqual, legacy_save_restore);
-
-    __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
-    __ movl(rax, Address(rsi, 0));
-    __ cmpl(rax, 0x50654);              // If it is Skylake
-    __ jcc(Assembler::equal, legacy_save_restore);
-
     // If UseAVX is unitialized or is set by the user to include EVEX
     if (use_evex) {
+      // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
+      __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
+      __ movl(rax, 0x10000);
+      __ andl(rax, Address(rsi, 4));
+      __ cmpl(rax, 0x10000);
+      __ jcc(Assembler::notEqual, legacy_save_restore);
+      // check _cpuid_info.xem_xcr0_eax.bits.opmask
+      // check _cpuid_info.xem_xcr0_eax.bits.zmm512
+      // check _cpuid_info.xem_xcr0_eax.bits.zmm32
+      __ movl(rax, 0xE0);
+      __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
+      __ cmpl(rax, 0xE0);
+      __ jcc(Assembler::notEqual, legacy_save_restore);
+
+      if (FLAG_IS_DEFAULT(UseAVX)) {
+        __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
+        __ movl(rax, Address(rsi, 0));
+        __ cmpl(rax, 0x50654);              // If it is Skylake
+        __ jcc(Assembler::equal, legacy_save_restore);
+      }
       // EVEX check: run in lowest evex mode
       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
       UseAVX = 3;
--- a/src/hotspot/cpu/x86/x86_32.ad	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/cpu/x86/x86_32.ad	Mon Dec 02 12:01:40 2019 +0530
@@ -3917,6 +3917,13 @@
   interface(REG_INTER);
 %}
 
+operand eDXRegP(eRegP reg) %{
+  constraint(ALLOC_IN_RC(edx_reg));
+  match(reg);
+  format %{ "EDX" %}
+  interface(REG_INTER);
+%}
+
 operand eSIRegP(eRegP reg) %{
   constraint(ALLOC_IN_RC(esi_reg));
   match(reg);
@@ -8977,7 +8984,7 @@
   %}
 
   ins_pipe(ialu_reg_reg);
-%} 
+%}
 
 //----------Long Instructions------------------------------------------------
 // Add Long Register with Register
--- a/src/hotspot/cpu/x86/x86_64.ad	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/cpu/x86/x86_64.ad	Mon Dec 02 12:01:40 2019 +0530
@@ -267,6 +267,9 @@
 // Singleton class for RSI pointer register
 reg_class ptr_rsi_reg(RSI, RSI_H);
 
+// Singleton class for RBP pointer register
+reg_class ptr_rbp_reg(RBP, RBP_H);
+
 // Singleton class for RDI pointer register
 reg_class ptr_rdi_reg(RDI, RDI_H);
 
@@ -3530,6 +3533,16 @@
   interface(REG_INTER);
 %}
 
+operand rbp_RegP()
+%{
+  constraint(ALLOC_IN_RC(ptr_rbp_reg));
+  match(RegP);
+  match(rRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
 // Used in rep stosq
 operand rdi_RegP()
 %{
--- a/src/hotspot/cpu/zero/cppInterpreter_zero.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/cpu/zero/cppInterpreter_zero.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -38,13 +38,11 @@
 #include "prims/jvmtiExport.hpp"
 #include "prims/jvmtiThreadState.hpp"
 #include "runtime/arguments.hpp"
-#include "runtime/atomic.hpp"
 #include "runtime/deoptimization.hpp"
 #include "runtime/frame.inline.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/interfaceSupport.inline.hpp"
 #include "runtime/jniHandles.inline.hpp"
-#include "runtime/orderAccess.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stubRoutines.hpp"
 #include "runtime/synchronizer.hpp"
--- a/src/hotspot/os/aix/os_aix.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/os/aix/os_aix.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -60,7 +60,6 @@
 #include "runtime/javaCalls.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/objectMonitor.hpp"
-#include "runtime/orderAccess.hpp"
 #include "runtime/os.hpp"
 #include "runtime/osThread.hpp"
 #include "runtime/perfMemory.hpp"
@@ -1084,7 +1083,7 @@
     if (now <= prev) {
       return prev;   // same or retrograde time;
     }
-    jlong obsv = Atomic::cmpxchg(now, &max_real_time, prev);
+    jlong obsv = Atomic::cmpxchg(&max_real_time, prev, now);
     assert(obsv >= prev, "invariant");   // Monotonicity
     // If the CAS succeeded then we're done and return "now".
     // If the CAS failed and the observed value "obsv" is >= now then
@@ -1794,7 +1793,7 @@
   for (;;) {
     for (int i = 0; i < NSIG + 1; i++) {
       jint n = pending_signals[i];
-      if (n > 0 && n == Atomic::cmpxchg(n - 1, &pending_signals[i], n)) {
+      if (n > 0 && n == Atomic::cmpxchg(&pending_signals[i], n, n - 1)) {
         return i;
       }
     }
--- a/src/hotspot/os/bsd/os_bsd.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/os/bsd/os_bsd.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -51,7 +51,6 @@
 #include "runtime/javaCalls.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/objectMonitor.hpp"
-#include "runtime/orderAccess.hpp"
 #include "runtime/osThread.hpp"
 #include "runtime/perfMemory.hpp"
 #include "runtime/semaphore.hpp"
@@ -931,7 +930,7 @@
   if (now <= prev) {
     return prev;   // same or retrograde time;
   }
-  const uint64_t obsv = Atomic::cmpxchg(now, &Bsd::_max_abstime, prev);
+  const uint64_t obsv = Atomic::cmpxchg(&Bsd::_max_abstime, prev, now);
   assert(obsv >= prev, "invariant");   // Monotonicity
   // If the CAS succeeded then we're done and return "now".
   // If the CAS failed and the observed value "obsv" is >= now then
@@ -1834,7 +1833,7 @@
   for (;;) {
     for (int i = 0; i < NSIG + 1; i++) {
       jint n = pending_signals[i];
-      if (n > 0 && n == Atomic::cmpxchg(n - 1, &pending_signals[i], n)) {
+      if (n > 0 && n == Atomic::cmpxchg(&pending_signals[i], n, n - 1)) {
         return i;
       }
     }
@@ -1895,7 +1894,7 @@
   }
 
   char buf[PATH_MAX + 1];
-  int num = Atomic::add(1, &cnt);
+  int num = Atomic::add(&cnt, 1);
 
   snprintf(buf, PATH_MAX + 1, "%s/hs-vm-%d-%d",
            os::get_temp_directory(), os::current_process_id(), num);
@@ -3209,7 +3208,7 @@
 static volatile int next_processor_id = 0;
 
 static inline volatile int* get_apic_to_processor_mapping() {
-  volatile int* mapping = OrderAccess::load_acquire(&apic_to_processor_mapping);
+  volatile int* mapping = Atomic::load_acquire(&apic_to_processor_mapping);
   if (mapping == NULL) {
     // Calculate possible number space for APIC ids. This space is not necessarily
     // in the range [0, number_of_processors).
@@ -3238,9 +3237,9 @@
       mapping[i] = -1;
     }
 
-    if (!Atomic::replace_if_null(mapping, &apic_to_processor_mapping)) {
+    if (!Atomic::replace_if_null(&apic_to_processor_mapping, mapping)) {
       FREE_C_HEAP_ARRAY(int, mapping);
-      mapping = OrderAccess::load_acquire(&apic_to_processor_mapping);
+      mapping = Atomic::load_acquire(&apic_to_processor_mapping);
     }
   }
 
@@ -3264,12 +3263,14 @@
   int processor_id = Atomic::load(&mapping[apic_id]);
 
   while (processor_id < 0) {
-    if (Atomic::cmpxchg(-2, &mapping[apic_id], -1)) {
-      Atomic::store(Atomic::add(1, &next_processor_id) - 1, &mapping[apic_id]);
+    if (Atomic::cmpxchg(&mapping[apic_id], -1, -2) == -1) {
+      Atomic::store(&mapping[apic_id], Atomic::add(&next_processor_id, 1) - 1);
     }
     processor_id = Atomic::load(&mapping[apic_id]);
   }
 
+  assert(processor_id >= 0 && processor_id < os::processor_count(), "invalid processor id");
+
   return (uint)processor_id;
 }
 #endif
--- a/src/hotspot/os/linux/os_linux.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/os/linux/os_linux.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -53,7 +53,6 @@
 #include "runtime/javaCalls.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/objectMonitor.hpp"
-#include "runtime/orderAccess.hpp"
 #include "runtime/osThread.hpp"
 #include "runtime/perfMemory.hpp"
 #include "runtime/sharedRuntime.hpp"
@@ -2752,7 +2751,7 @@
   for (;;) {
     for (int i = 0; i < NSIG + 1; i++) {
       jint n = pending_signals[i];
-      if (n > 0 && n == Atomic::cmpxchg(n - 1, &pending_signals[i], n)) {
+      if (n > 0 && n == Atomic::cmpxchg(&pending_signals[i], n, n - 1)) {
         return i;
       }
     }
@@ -2813,7 +2812,7 @@
   }
 
   char buf[PATH_MAX+1];
-  int num = Atomic::add(1, &cnt);
+  int num = Atomic::add(&cnt, 1);
 
   snprintf(buf, sizeof(buf), "%s/hs-vm-%d-%d",
            os::get_temp_directory(), os::current_process_id(), num);
--- a/src/hotspot/os/posix/os_posix.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/os/posix/os_posix.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -30,6 +30,8 @@
 #include "runtime/frame.inline.hpp"
 #include "runtime/interfaceSupport.inline.hpp"
 #include "services/memTracker.hpp"
+#include "runtime/atomic.hpp"
+#include "runtime/orderAccess.hpp"
 #include "utilities/align.hpp"
 #include "utilities/events.hpp"
 #include "utilities/formatBuffer.hpp"
@@ -1900,7 +1902,7 @@
   // atomically decrement _event
   for (;;) {
     v = _event;
-    if (Atomic::cmpxchg(v - 1, &_event, v) == v) break;
+    if (Atomic::cmpxchg(&_event, v, v - 1) == v) break;
   }
   guarantee(v >= 0, "invariant");
 
@@ -1940,7 +1942,7 @@
   // atomically decrement _event
   for (;;) {
     v = _event;
-    if (Atomic::cmpxchg(v - 1, &_event, v) == v) break;
+    if (Atomic::cmpxchg(&_event, v, v - 1) == v) break;
   }
   guarantee(v >= 0, "invariant");
 
@@ -1998,7 +2000,7 @@
   // but only in the correctly written condition checking loops of ObjectMonitor,
   // Mutex/Monitor, Thread::muxAcquire and JavaThread::sleep
 
-  if (Atomic::xchg(1, &_event) >= 0) return;
+  if (Atomic::xchg(&_event, 1) >= 0) return;
 
   int status = pthread_mutex_lock(_mutex);
   assert_status(status == 0, status, "mutex_lock");
@@ -2046,7 +2048,7 @@
   // Return immediately if a permit is available.
   // We depend on Atomic::xchg() having full barrier semantics
   // since we are doing a lock-free update to _counter.
-  if (Atomic::xchg(0, &_counter) > 0) return;
+  if (Atomic::xchg(&_counter, 0) > 0) return;
 
   Thread* thread = Thread::current();
   assert(thread->is_Java_thread(), "Must be JavaThread");
--- a/src/hotspot/os/solaris/os_solaris.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/os/solaris/os_solaris.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -1024,7 +1024,7 @@
   if (now <= prev) {
     return prev;   // same or retrograde time;
   }
-  const hrtime_t obsv = Atomic::cmpxchg(now, &max_hrtime, prev);
+  const hrtime_t obsv = Atomic::cmpxchg(&max_hrtime, prev, now);
   assert(obsv >= prev, "invariant");   // Monotonicity
   // If the CAS succeeded then we're done and return "now".
   // If the CAS failed and the observed value "obsv" is >= now then
@@ -1984,7 +1984,7 @@
   while (true) {
     for (int i = 0; i < Sigexit + 1; i++) {
       jint n = pending_signals[i];
-      if (n > 0 && n == Atomic::cmpxchg(n - 1, &pending_signals[i], n)) {
+      if (n > 0 && n == Atomic::cmpxchg(&pending_signals[i], n, n - 1)) {
         return i;
       }
     }
@@ -4710,7 +4710,7 @@
   int v;
   for (;;) {
     v = _Event;
-    if (Atomic::cmpxchg(v-1, &_Event, v) == v) break;
+    if (Atomic::cmpxchg(&_Event, v, v-1) == v) break;
   }
   guarantee(v >= 0, "invariant");
   if (v == 0) {
@@ -4748,7 +4748,7 @@
   int v;
   for (;;) {
     v = _Event;
-    if (Atomic::cmpxchg(v-1, &_Event, v) == v) break;
+    if (Atomic::cmpxchg(&_Event, v, v-1) == v) break;
   }
   guarantee(v >= 0, "invariant");
   if (v != 0) return OS_OK;
@@ -4797,7 +4797,7 @@
   // from the first park() call after an unpark() call which will help
   // shake out uses of park() and unpark() without condition variables.
 
-  if (Atomic::xchg(1, &_Event) >= 0) return;
+  if (Atomic::xchg(&_Event, 1) >= 0) return;
 
   // If the thread associated with the event was parked, wake it.
   // Wait for the thread assoc with the PlatformEvent to vacate.
@@ -4896,7 +4896,7 @@
   // Return immediately if a permit is available.
   // We depend on Atomic::xchg() having full barrier semantics
   // since we are doing a lock-free update to _counter.
-  if (Atomic::xchg(0, &_counter) > 0) return;
+  if (Atomic::xchg(&_counter, 0) > 0) return;
 
   // Optional fast-exit: Check interrupt before trying to wait
   Thread* thread = Thread::current();
--- a/src/hotspot/os/windows/osThread_windows.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/os/windows/osThread_windows.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -23,7 +23,6 @@
  */
 
 // no precompiled headers
-#include "runtime/orderAccess.hpp"
 #include "runtime/os.hpp"
 #include "runtime/osThread.hpp"
 
--- a/src/hotspot/os/windows/os_windows.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/os/windows/os_windows.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -2096,7 +2096,7 @@
   while (true) {
     for (int i = 0; i < NSIG + 1; i++) {
       jint n = pending_signals[i];
-      if (n > 0 && n == Atomic::cmpxchg(n - 1, &pending_signals[i], n)) {
+      if (n > 0 && n == Atomic::cmpxchg(&pending_signals[i], n, n - 1)) {
         return i;
       }
     }
@@ -3747,15 +3747,15 @@
     // The first thread that reached this point, initializes the critical section.
     if (!InitOnceExecuteOnce(&init_once_crit_sect, init_crit_sect_call, &crit_sect, NULL)) {
       warning("crit_sect initialization failed in %s: %d\n", __FILE__, __LINE__);
-    } else if (OrderAccess::load_acquire(&process_exiting) == 0) {
+    } else if (Atomic::load_acquire(&process_exiting) == 0) {
       if (what != EPT_THREAD) {
         // Atomically set process_exiting before the critical section
         // to increase the visibility between racing threads.
-        Atomic::cmpxchg(GetCurrentThreadId(), &process_exiting, (DWORD)0);
+        Atomic::cmpxchg(&process_exiting, (DWORD)0, GetCurrentThreadId());
       }
       EnterCriticalSection(&crit_sect);
 
-      if (what == EPT_THREAD && OrderAccess::load_acquire(&process_exiting) == 0) {
+      if (what == EPT_THREAD && Atomic::load_acquire(&process_exiting) == 0) {
         // Remove from the array those handles of the threads that have completed exiting.
         for (i = 0, j = 0; i < handle_count; ++i) {
           res = WaitForSingleObject(handles[i], 0 /* don't wait */);
@@ -3868,7 +3868,7 @@
     }
 
     if (!registered &&
-        OrderAccess::load_acquire(&process_exiting) != 0 &&
+        Atomic::load_acquire(&process_exiting) != 0 &&
         process_exiting != GetCurrentThreadId()) {
       // Some other thread is about to call exit(), so we don't let
       // the current unregistered thread proceed to exit() or _endthreadex()
@@ -5136,7 +5136,7 @@
   int v;
   for (;;) {
     v = _Event;
-    if (Atomic::cmpxchg(v-1, &_Event, v) == v) break;
+    if (Atomic::cmpxchg(&_Event, v, v-1) == v) break;
   }
   guarantee((v == 0) || (v == 1), "invariant");
   if (v != 0) return OS_OK;
@@ -5198,7 +5198,7 @@
   int v;
   for (;;) {
     v = _Event;
-    if (Atomic::cmpxchg(v-1, &_Event, v) == v) break;
+    if (Atomic::cmpxchg(&_Event, v, v-1) == v) break;
   }
   guarantee((v == 0) || (v == 1), "invariant");
   if (v != 0) return;
@@ -5236,7 +5236,7 @@
   // from the first park() call after an unpark() call which will help
   // shake out uses of park() and unpark() without condition variables.
 
-  if (Atomic::xchg(1, &_Event) >= 0) return;
+  if (Atomic::xchg(&_Event, 1) >= 0) return;
 
   ::SetEvent(_ParkHandle);
 }
--- a/src/hotspot/os/windows/threadCritical_windows.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/os/windows/threadCritical_windows.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -56,7 +56,7 @@
 
   if (lock_owner != current_thread) {
     // Grab the lock before doing anything.
-    while (Atomic::cmpxchg(0, &lock_count, -1) != -1) {
+    while (Atomic::cmpxchg(&lock_count, -1, 0) != -1) {
       if (initialized) {
         DWORD ret = WaitForSingleObject(lock_event,  INFINITE);
         assert(ret == WAIT_OBJECT_0, "unexpected return value from WaitForSingleObject");
--- a/src/hotspot/os_cpu/aix_ppc/atomic_aix_ppc.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/os_cpu/aix_ppc/atomic_aix_ppc.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -30,6 +30,7 @@
 #error "Atomic currently only implemented for PPC64"
 #endif
 
+#include "orderAccess_aix_ppc.hpp"
 #include "utilities/debug.hpp"
 
 // Implementation of class atomic
@@ -95,13 +96,13 @@
 struct Atomic::PlatformAdd
   : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
 {
-  template<typename I, typename D>
-  D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const;
+  template<typename D, typename I>
+  D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const;
 };
 
 template<>
-template<typename I, typename D>
-inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest,
+template<typename D, typename I>
+inline D Atomic::PlatformAdd<4>::add_and_fetch(D volatile* dest, I add_value,
                                                atomic_memory_order order) const {
   STATIC_ASSERT(4 == sizeof(I));
   STATIC_ASSERT(4 == sizeof(D));
@@ -126,8 +127,8 @@
 
 
 template<>
-template<typename I, typename D>
-inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest,
+template<typename D, typename I>
+inline D Atomic::PlatformAdd<8>::add_and_fetch(D volatile* dest, I add_value,
                                                atomic_memory_order order) const {
   STATIC_ASSERT(8 == sizeof(I));
   STATIC_ASSERT(8 == sizeof(D));
@@ -152,8 +153,8 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
-                                             T volatile* dest,
+inline T Atomic::PlatformXchg<4>::operator()(T volatile* dest,
+                                             T exchange_value,
                                              atomic_memory_order order) const {
   // Note that xchg doesn't necessarily do an acquire
   // (see synchronizer.cpp).
@@ -191,8 +192,8 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
-                                             T volatile* dest,
+inline T Atomic::PlatformXchg<8>::operator()(T volatile* dest,
+                                             T exchange_value,
                                              atomic_memory_order order) const {
   STATIC_ASSERT(8 == sizeof(T));
   // Note that xchg doesn't necessarily do an acquire
@@ -231,9 +232,9 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
-                                                T volatile* dest,
+inline T Atomic::PlatformCmpxchg<1>::operator()(T volatile* dest,
                                                 T compare_value,
+                                                T exchange_value,
                                                 atomic_memory_order order) const {
   STATIC_ASSERT(1 == sizeof(T));
 
@@ -301,9 +302,9 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
-                                                T volatile* dest,
+inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest,
                                                 T compare_value,
+                                                T exchange_value,
                                                 atomic_memory_order order) const {
   STATIC_ASSERT(4 == sizeof(T));
 
@@ -351,9 +352,9 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
-                                                T volatile* dest,
+inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest,
                                                 T compare_value,
+                                                T exchange_value,
                                                 atomic_memory_order order) const {
   STATIC_ASSERT(8 == sizeof(T));
 
@@ -399,4 +400,15 @@
   return old_value;
 }
 
+template<size_t byte_size>
+struct Atomic::PlatformOrderedLoad<byte_size, X_ACQUIRE> {
+  template <typename T>
+  T operator()(const volatile T* p) const {
+    T t = Atomic::load(p);
+    // Use twi-isync for load_acquire (faster than lwsync).
+    __asm__ __volatile__ ("twi 0,%0,0\n isync\n" : : "r" (t) : "memory");
+    return t;
+  }
+};
+
 #endif // OS_CPU_AIX_PPC_ATOMIC_AIX_PPC_HPP
--- a/src/hotspot/os_cpu/aix_ppc/orderAccess_aix_ppc.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/os_cpu/aix_ppc/orderAccess_aix_ppc.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -64,8 +64,6 @@
 #define inlasm_lwsync()   __asm__ __volatile__ ("lwsync" : : : "memory");
 #define inlasm_eieio()    __asm__ __volatile__ ("eieio"  : : : "memory");
 #define inlasm_isync()    __asm__ __volatile__ ("isync"  : : : "memory");
-// Use twi-isync for load_acquire (faster than lwsync).
-#define inlasm_acquire_reg(X) __asm__ __volatile__ ("twi 0,%0,0\n isync\n" : : "r" (X) : "memory");
 
 inline void OrderAccess::loadload()   { inlasm_lwsync(); }
 inline void OrderAccess::storestore() { inlasm_lwsync(); }
@@ -78,13 +76,6 @@
 inline void OrderAccess::cross_modify_fence()
                                       { inlasm_isync();  }
 
-template<size_t byte_size>
-struct OrderAccess::PlatformOrderedLoad<byte_size, X_ACQUIRE>
-{
-  template <typename T>
-  T operator()(const volatile T* p) const { T t = Atomic::load(p); inlasm_acquire_reg(t); return t; }
-};
-
 #undef inlasm_sync
 #undef inlasm_lwsync
 #undef inlasm_eieio
--- a/src/hotspot/os_cpu/bsd_x86/atomic_bsd_x86.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/os_cpu/bsd_x86/atomic_bsd_x86.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -31,13 +31,13 @@
 struct Atomic::PlatformAdd
   : Atomic::FetchAndAdd<Atomic::PlatformAdd<byte_size> >
 {
-  template<typename I, typename D>
-  D fetch_and_add(I add_value, D volatile* dest, atomic_memory_order /* order */) const;
+  template<typename D, typename I>
+  D fetch_and_add(D volatile* dest, I add_value, atomic_memory_order /* order */) const;
 };
 
 template<>
-template<typename I, typename D>
-inline D Atomic::PlatformAdd<4>::fetch_and_add(I add_value, D volatile* dest,
+template<typename D, typename I>
+inline D Atomic::PlatformAdd<4>::fetch_and_add(D volatile* dest, I add_value,
                                                atomic_memory_order /* order */) const {
   STATIC_ASSERT(4 == sizeof(I));
   STATIC_ASSERT(4 == sizeof(D));
@@ -51,8 +51,8 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
-                                             T volatile* dest,
+inline T Atomic::PlatformXchg<4>::operator()(T volatile* dest,
+                                             T exchange_value,
                                              atomic_memory_order /* order */) const {
   STATIC_ASSERT(4 == sizeof(T));
   __asm__ volatile (  "xchgl (%2),%0"
@@ -64,9 +64,9 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
-                                                T volatile* dest,
+inline T Atomic::PlatformCmpxchg<1>::operator()(T volatile* dest,
                                                 T compare_value,
+                                                T exchange_value,
                                                 atomic_memory_order /* order */) const {
   STATIC_ASSERT(1 == sizeof(T));
   __asm__ volatile (  "lock cmpxchgb %1,(%3)"
@@ -78,9 +78,9 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
-                                                T volatile* dest,
+inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest,
                                                 T compare_value,
+                                                T exchange_value,
                                                 atomic_memory_order /* order */) const {
   STATIC_ASSERT(4 == sizeof(T));
   __asm__ volatile (  "lock cmpxchgl %1,(%3)"
@@ -92,8 +92,8 @@
 
 #ifdef AMD64
 template<>
-template<typename I, typename D>
-inline D Atomic::PlatformAdd<8>::fetch_and_add(I add_value, D volatile* dest,
+template<typename D, typename I>
+inline D Atomic::PlatformAdd<8>::fetch_and_add(D volatile* dest, I add_value,
                                                atomic_memory_order /* order */) const {
   STATIC_ASSERT(8 == sizeof(I));
   STATIC_ASSERT(8 == sizeof(D));
@@ -107,8 +107,8 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
-                                             T volatile* dest,
+inline T Atomic::PlatformXchg<8>::operator()(T volatile* dest,
+                                             T exchange_value,
                                              atomic_memory_order /* order */) const {
   STATIC_ASSERT(8 == sizeof(T));
   __asm__ __volatile__ ("xchgq (%2),%0"
@@ -120,9 +120,9 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
-                                                T volatile* dest,
+inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest,
                                                 T compare_value,
+                                                T exchange_value,
                                                 atomic_memory_order /* order */) const {
   STATIC_ASSERT(8 == sizeof(T));
   __asm__ __volatile__ (  "lock cmpxchgq %1,(%3)"
@@ -142,12 +142,12 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
-                                                T volatile* dest,
+inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest,
                                                 T compare_value,
+                                                T exchange_value,
                                                 atomic_memory_order /* order */) const {
   STATIC_ASSERT(8 == sizeof(T));
-  return cmpxchg_using_helper<int64_t>(_Atomic_cmpxchg_long, exchange_value, dest, compare_value);
+  return cmpxchg_using_helper<int64_t>(_Atomic_cmpxchg_long, dest, compare_value, exchange_value);
 }
 
 template<>
@@ -161,12 +161,62 @@
 
 template<>
 template<typename T>
-inline void Atomic::PlatformStore<8>::operator()(T store_value,
-                                                 T volatile* dest) const {
+inline void Atomic::PlatformStore<8>::operator()(T volatile* dest,
+                                                 T store_value) const {
   STATIC_ASSERT(8 == sizeof(T));
   _Atomic_move_long(reinterpret_cast<const volatile int64_t*>(&store_value), reinterpret_cast<volatile int64_t*>(dest));
 }
 
 #endif // AMD64
 
+template<>
+struct Atomic::PlatformOrderedStore<1, RELEASE_X_FENCE>
+{
+  template <typename T>
+  void operator()(volatile T* p, T v) const {
+    __asm__ volatile (  "xchgb (%2),%0"
+                      : "=q" (v)
+                      : "0" (v), "r" (p)
+                      : "memory");
+  }
+};
+
+template<>
+struct Atomic::PlatformOrderedStore<2, RELEASE_X_FENCE>
+{
+  template <typename T>
+  void operator()(volatile T* p, T v) const {
+    __asm__ volatile (  "xchgw (%2),%0"
+                      : "=r" (v)
+                      : "0" (v), "r" (p)
+                      : "memory");
+  }
+};
+
+template<>
+struct Atomic::PlatformOrderedStore<4, RELEASE_X_FENCE>
+{
+  template <typename T>
+  void operator()(volatile T* p, T v) const {
+    __asm__ volatile (  "xchgl (%2),%0"
+                      : "=r" (v)
+                      : "0" (v), "r" (p)
+                      : "memory");
+  }
+};
+
+#ifdef AMD64
+template<>
+struct Atomic::PlatformOrderedStore<8, RELEASE_X_FENCE>
+{
+  template <typename T>
+  void operator()(volatile T* p, T v) const {
+    __asm__ volatile (  "xchgq (%2), %0"
+                      : "=r" (v)
+                      : "0" (v), "r" (p)
+                      : "memory");
+  }
+};
+#endif // AMD64
+
 #endif // OS_CPU_BSD_X86_ATOMIC_BSD_X86_HPP
--- a/src/hotspot/os_cpu/bsd_x86/bsd_x86_32.s	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/os_cpu/bsd_x86/bsd_x86_32.s	Mon Dec 02 12:01:40 2019 +0530
@@ -633,9 +633,9 @@
         ret
 
 
-        # Support for int64_t Atomic::cmpxchg(int64_t exchange_value,
+        # Support for int64_t Atomic::cmpxchg(int64_t compare_value,
         #                                     volatile int64_t* dest,
-        #                                     int64_t compare_value)
+        #                                     int64_t exchange_value)
         #
         .p2align 4,,15
         ELF_TYPE(_Atomic_cmpxchg_long,@function)
@@ -665,4 +665,3 @@
         movl     8(%esp), %eax   # dest
         fistpll   (%eax)
         ret
-
--- a/src/hotspot/os_cpu/bsd_x86/orderAccess_bsd_x86.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/os_cpu/bsd_x86/orderAccess_bsd_x86.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -64,54 +64,4 @@
   __asm__ volatile ("cpuid " : "+a" (idx) : : "ebx", "ecx", "edx", "memory");
 }
 
-template<>
-struct OrderAccess::PlatformOrderedStore<1, RELEASE_X_FENCE>
-{
-  template <typename T>
-  void operator()(T v, volatile T* p) const {
-    __asm__ volatile (  "xchgb (%2),%0"
-                      : "=q" (v)
-                      : "0" (v), "r" (p)
-                      : "memory");
-  }
-};
-
-template<>
-struct OrderAccess::PlatformOrderedStore<2, RELEASE_X_FENCE>
-{
-  template <typename T>
-  void operator()(T v, volatile T* p) const {
-    __asm__ volatile (  "xchgw (%2),%0"
-                      : "=r" (v)
-                      : "0" (v), "r" (p)
-                      : "memory");
-  }
-};
-
-template<>
-struct OrderAccess::PlatformOrderedStore<4, RELEASE_X_FENCE>
-{
-  template <typename T>
-  void operator()(T v, volatile T* p) const {
-    __asm__ volatile (  "xchgl (%2),%0"
-                      : "=r" (v)
-                      : "0" (v), "r" (p)
-                      : "memory");
-  }
-};
-
-#ifdef AMD64
-template<>
-struct OrderAccess::PlatformOrderedStore<8, RELEASE_X_FENCE>
-{
-  template <typename T>
-  void operator()(T v, volatile T* p) const {
-    __asm__ volatile (  "xchgq (%2), %0"
-                      : "=r" (v)
-                      : "0" (v), "r" (p)
-                      : "memory");
-  }
-};
-#endif // AMD64
-
 #endif // OS_CPU_BSD_X86_ORDERACCESS_BSD_X86_HPP
--- a/src/hotspot/os_cpu/bsd_zero/atomic_bsd_zero.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/os_cpu/bsd_zero/atomic_bsd_zero.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -163,22 +163,22 @@
 struct Atomic::PlatformAdd
   : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
 {
-  template<typename I, typename D>
-  D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const;
+  template<typename D, typename I>
+  D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const;
 };
 
 template<>
-template<typename I, typename D>
-inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest,
+template<typename D, typename I>
+inline D Atomic::PlatformAdd<4>::add_and_fetch(D volatile* dest, I add_value,
                                                atomic_memory_order order) const {
   STATIC_ASSERT(4 == sizeof(I));
   STATIC_ASSERT(4 == sizeof(D));
 
 #ifdef ARM
-  return add_using_helper<int>(arm_add_and_fetch, add_value, dest);
+  return add_using_helper<int>(arm_add_and_fetch, dest, add_value);
 #else
 #ifdef M68K
-  return add_using_helper<int>(m68k_add_and_fetch, add_value, dest);
+  return add_using_helper<int>(m68k_add_and_fetch, dest, add_value);
 #else
   return __sync_add_and_fetch(dest, add_value);
 #endif // M68K
@@ -186,8 +186,8 @@
 }
 
 template<>
-template<typename I, typename D>
-inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest,
+template<typename D, typename !>
+inline D Atomic::PlatformAdd<8>::add_and_fetch(D volatile* dest, I add_value,
                                                atomic_memory_order order) const {
   STATIC_ASSERT(8 == sizeof(I));
   STATIC_ASSERT(8 == sizeof(D));
@@ -197,15 +197,15 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
-                                             T volatile* dest,
+inline T Atomic::PlatformXchg<4>::operator()(T volatile* dest,
+                                             T exchange_value,
                                              atomic_memory_order order) const {
   STATIC_ASSERT(4 == sizeof(T));
 #ifdef ARM
-  return xchg_using_helper<int>(arm_lock_test_and_set, exchange_value, dest);
+  return xchg_using_helper<int>(arm_lock_test_and_set, dest, exchange_value);
 #else
 #ifdef M68K
-  return xchg_using_helper<int>(m68k_lock_test_and_set, exchange_value, dest);
+  return xchg_using_helper<int>(m68k_lock_test_and_set, dest, exchange_value);
 #else
   // __sync_lock_test_and_set is a bizarrely named atomic exchange
   // operation.  Note that some platforms only support this with the
@@ -224,8 +224,8 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
-                                             T volatile* dest,
+inline T Atomic::PlatformXchg<8>::operator()(T volatile* dest,
+                                             T exchange_value,
                                              atomic_memory_order order) const {
   STATIC_ASSERT(8 == sizeof(T));
   T result = __sync_lock_test_and_set (dest, exchange_value);
@@ -239,16 +239,16 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
-                                                T volatile* dest,
+inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest,
                                                 T compare_value,
+                                                T exchange_value,
                                                 atomic_memory_order order) const {
   STATIC_ASSERT(4 == sizeof(T));
 #ifdef ARM
-  return cmpxchg_using_helper<int>(arm_compare_and_swap, exchange_value, dest, compare_value);
+  return cmpxchg_using_helper<int>(arm_compare_and_swap, dest, compare_value, exchange_value);
 #else
 #ifdef M68K
-  return cmpxchg_using_helper<int>(m68k_compare_and_swap, exchange_value, dest, compare_value);
+  return cmpxchg_using_helper<int>(m68k_compare_and_swap, dest, compare_value, exchange_value);
 #else
   return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
 #endif // M68K
@@ -257,9 +257,9 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
-                                                T volatile* dest,
+inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest,
                                                 T compare_value,
+                                                T exchange_value,
                                                 atomic_memory_order order) const {
   STATIC_ASSERT(8 == sizeof(T));
   return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
@@ -276,8 +276,8 @@
 
 template<>
 template<typename T>
-inline void Atomic::PlatformStore<8>::operator()(T store_value,
-                                                 T volatile* dest) const {
+inline void Atomic::PlatformStore<8>::operator()(T volatile* dest,
+                                                 T store_value) const {
   STATIC_ASSERT(8 == sizeof(T));
   os::atomic_copy64(reinterpret_cast<const volatile int64_t*>(&store_value), reinterpret_cast<volatile int64_t*>(dest));
 }
--- a/src/hotspot/os_cpu/linux_aarch64/atomic_linux_aarch64.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/os_cpu/linux_aarch64/atomic_linux_aarch64.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -32,16 +32,12 @@
 // Note that memory_order_conservative requires a full barrier after atomic stores.
 // See https://patchwork.kernel.org/patch/3575821/
 
-#define FULL_MEM_BARRIER  __sync_synchronize()
-#define READ_MEM_BARRIER  __atomic_thread_fence(__ATOMIC_ACQUIRE);
-#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE);
-
 template<size_t byte_size>
 struct Atomic::PlatformAdd
   : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
 {
-  template<typename I, typename D>
-  D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const {
+  template<typename D, typename I>
+  D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const {
     D res = __atomic_add_fetch(dest, add_value, __ATOMIC_RELEASE);
     FULL_MEM_BARRIER;
     return res;
@@ -50,8 +46,8 @@
 
 template<size_t byte_size>
 template<typename T>
-inline T Atomic::PlatformXchg<byte_size>::operator()(T exchange_value,
-                                                     T volatile* dest,
+inline T Atomic::PlatformXchg<byte_size>::operator()(T volatile* dest,
+                                                     T exchange_value,
                                                      atomic_memory_order order) const {
   STATIC_ASSERT(byte_size == sizeof(T));
   T res = __atomic_exchange_n(dest, exchange_value, __ATOMIC_RELEASE);
@@ -61,9 +57,9 @@
 
 template<size_t byte_size>
 template<typename T>
-inline T Atomic::PlatformCmpxchg<byte_size>::operator()(T exchange_value,
-                                                        T volatile* dest,
+inline T Atomic::PlatformCmpxchg<byte_size>::operator()(T volatile* dest,
                                                         T compare_value,
+                                                        T exchange_value,
                                                         atomic_memory_order order) const {
   STATIC_ASSERT(byte_size == sizeof(T));
   if (order == memory_order_relaxed) {
@@ -81,4 +77,25 @@
   }
 }
 
+template<size_t byte_size>
+struct Atomic::PlatformOrderedLoad<byte_size, X_ACQUIRE>
+{
+  template <typename T>
+  T operator()(const volatile T* p) const { T data; __atomic_load(const_cast<T*>(p), &data, __ATOMIC_ACQUIRE); return data; }
+};
+
+template<size_t byte_size>
+struct Atomic::PlatformOrderedStore<byte_size, RELEASE_X>
+{
+  template <typename T>
+  void operator()(volatile T* p, T v) const { __atomic_store(const_cast<T*>(p), &v, __ATOMIC_RELEASE); }
+};
+
+template<size_t byte_size>
+struct Atomic::PlatformOrderedStore<byte_size, RELEASE_X_FENCE>
+{
+  template <typename T>
+  void operator()(volatile T* p, T v) const { release_store(p, v); OrderAccess::fence(); }
+};
+
 #endif // OS_CPU_LINUX_AARCH64_ATOMIC_LINUX_AARCH64_HPP
--- a/src/hotspot/os_cpu/linux_aarch64/orderAccess_linux_aarch64.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/os_cpu/linux_aarch64/orderAccess_linux_aarch64.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -37,6 +37,10 @@
 inline void OrderAccess::loadstore()  { acquire(); }
 inline void OrderAccess::storeload()  { fence(); }
 
+#define FULL_MEM_BARRIER  __sync_synchronize()
+#define READ_MEM_BARRIER  __atomic_thread_fence(__ATOMIC_ACQUIRE);
+#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE);
+
 inline void OrderAccess::acquire() {
   READ_MEM_BARRIER;
 }
@@ -51,25 +55,4 @@
 
 inline void OrderAccess::cross_modify_fence() { }
 
-template<size_t byte_size>
-struct OrderAccess::PlatformOrderedLoad<byte_size, X_ACQUIRE>
-{
-  template <typename T>
-  T operator()(const volatile T* p) const { T data; __atomic_load(const_cast<T*>(p), &data, __ATOMIC_ACQUIRE); return data; }
-};
-
-template<size_t byte_size>
-struct OrderAccess::PlatformOrderedStore<byte_size, RELEASE_X>
-{
-  template <typename T>
-  void operator()(T v, volatile T* p) const { __atomic_store(const_cast<T*>(p), &v, __ATOMIC_RELEASE); }
-};
-
-template<size_t byte_size>
-struct OrderAccess::PlatformOrderedStore<byte_size, RELEASE_X_FENCE>
-{
-  template <typename T>
-  void operator()(T v, volatile T* p) const { release_store(p, v); fence(); }
-};
-
 #endif // OS_CPU_LINUX_AARCH64_ORDERACCESS_LINUX_AARCH64_HPP
--- a/src/hotspot/os_cpu/linux_arm/atomic_linux_arm.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/os_cpu/linux_arm/atomic_linux_arm.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -54,8 +54,8 @@
 
 template<>
 template<typename T>
-inline void Atomic::PlatformStore<8>::operator()(T store_value,
-                                                 T volatile* dest) const {
+inline void Atomic::PlatformStore<8>::operator()(T volatile* dest,
+                                                 T store_value) const {
   STATIC_ASSERT(8 == sizeof(T));
   (*os::atomic_store_long_func)(
     PrimitiveConversions::cast<int64_t>(store_value), reinterpret_cast<volatile int64_t*>(dest));
@@ -70,27 +70,27 @@
 struct Atomic::PlatformAdd
   : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
 {
-  template<typename I, typename D>
-  D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const;
+  template<typename D, typename I>
+  D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const;
 };
 
 template<>
-template<typename I, typename D>
-inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest,
+template<typename D, typename I>
+inline D Atomic::PlatformAdd<4>::add_and_fetch(D volatile* dest, I add_value,
                                                atomic_memory_order order) const {
   STATIC_ASSERT(4 == sizeof(I));
   STATIC_ASSERT(4 == sizeof(D));
-  return add_using_helper<int32_t>(os::atomic_add_func, add_value, dest);
+  return add_using_helper<int32_t>(os::atomic_add_func, dest, add_value);
 }
 
 
 template<>
 template<typename T>
-inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
-                                             T volatile* dest,
+inline T Atomic::PlatformXchg<4>::operator()(T volatile* dest,
+                                             T exchange_value,
                                              atomic_memory_order order) const {
   STATIC_ASSERT(4 == sizeof(T));
-  return xchg_using_helper<int32_t>(os::atomic_xchg_func, exchange_value, dest);
+  return xchg_using_helper<int32_t>(os::atomic_xchg_func, dest, exchange_value);
 }
 
 
@@ -119,22 +119,22 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
-                                                T volatile* dest,
+inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest,
                                                 T compare_value,
+                                                T exchange_value,
                                                 atomic_memory_order order) const {
   STATIC_ASSERT(4 == sizeof(T));
-  return cmpxchg_using_helper<int32_t>(reorder_cmpxchg_func, exchange_value, dest, compare_value);
+  return cmpxchg_using_helper<int32_t>(reorder_cmpxchg_func, dest, compare_value, exchange_value);
 }
 
 template<>
 template<typename T>
-inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
-                                                T volatile* dest,
+inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest,
                                                 T compare_value,
+                                                T exchange_value,
                                                 atomic_memory_order order) const {
   STATIC_ASSERT(8 == sizeof(T));
-  return cmpxchg_using_helper<int64_t>(reorder_cmpxchg_long_func, exchange_value, dest, compare_value);
+  return cmpxchg_using_helper<int64_t>(reorder_cmpxchg_long_func, dest, compare_value, exchange_value);
 }
 
 #endif // OS_CPU_LINUX_ARM_ATOMIC_LINUX_ARM_HPP
--- a/src/hotspot/os_cpu/linux_ppc/atomic_linux_ppc.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/os_cpu/linux_ppc/atomic_linux_ppc.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -30,6 +30,7 @@
 #error "Atomic currently only implemented for PPC64"
 #endif
 
+#include "orderAccess_linux_ppc.hpp"
 #include "utilities/debug.hpp"
 
 // Implementation of class atomic
@@ -95,13 +96,13 @@
 struct Atomic::PlatformAdd
   : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
 {
-  template<typename I, typename D>
-  D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const;
+  template<typename D, typename I>
+  D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const;
 };
 
 template<>
-template<typename I, typename D>
-inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest,
+template<typename D, typename I>
+inline D Atomic::PlatformAdd<4>::add_and_fetch(D volatile* dest, I add_value,
                                                atomic_memory_order order) const {
   STATIC_ASSERT(4 == sizeof(I));
   STATIC_ASSERT(4 == sizeof(D));
@@ -126,8 +127,8 @@
 
 
 template<>
-template<typename I, typename D>
-inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest,
+template<typename D, typename I>
+inline D Atomic::PlatformAdd<8>::add_and_fetch(D volatile* dest, I add_value,
                                                atomic_memory_order order) const {
   STATIC_ASSERT(8 == sizeof(I));
   STATIC_ASSERT(8 == sizeof(D));
@@ -152,8 +153,8 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
-                                             T volatile* dest,
+inline T Atomic::PlatformXchg<4>::operator()(T volatile* dest,
+                                             T exchange_value,
                                              atomic_memory_order order) const {
   // Note that xchg doesn't necessarily do an acquire
   // (see synchronizer.cpp).
@@ -191,8 +192,8 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
-                                             T volatile* dest,
+inline T Atomic::PlatformXchg<8>::operator()(T volatile* dest,
+                                             T exchange_value,
                                              atomic_memory_order order) const {
   STATIC_ASSERT(8 == sizeof(T));
   // Note that xchg doesn't necessarily do an acquire
@@ -231,9 +232,9 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
-                                                T volatile* dest,
+inline T Atomic::PlatformCmpxchg<1>::operator()(T volatile* dest,
                                                 T compare_value,
+                                                T exchange_value,
                                                 atomic_memory_order order) const {
   STATIC_ASSERT(1 == sizeof(T));
 
@@ -301,9 +302,9 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
-                                                T volatile* dest,
+inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest,
                                                 T compare_value,
+                                                T exchange_value,
                                                 atomic_memory_order order) const {
   STATIC_ASSERT(4 == sizeof(T));
 
@@ -351,9 +352,9 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
-                                                T volatile* dest,
+inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest,
                                                 T compare_value,
+                                                T exchange_value,
                                                 atomic_memory_order order) const {
   STATIC_ASSERT(8 == sizeof(T));
 
@@ -399,4 +400,16 @@
   return old_value;
 }
 
+template<size_t byte_size>
+struct Atomic::PlatformOrderedLoad<byte_size, X_ACQUIRE>
+{
+  template <typename T>
+  T operator()(const volatile T* p) const {
+    T t = Atomic::load(p);
+    // Use twi-isync for load_acquire (faster than lwsync).
+    __asm__ __volatile__ ("twi 0,%0,0\n isync\n" : : "r" (t) : "memory");
+    return t;
+  }
+};
+
 #endif // OS_CPU_LINUX_PPC_ATOMIC_LINUX_PPC_HPP
--- a/src/hotspot/os_cpu/linux_ppc/orderAccess_linux_ppc.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/os_cpu/linux_ppc/orderAccess_linux_ppc.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -68,8 +68,6 @@
 #define inlasm_lwsync()   __asm__ __volatile__ ("lwsync" : : : "memory");
 #define inlasm_eieio()    __asm__ __volatile__ ("eieio"  : : : "memory");
 #define inlasm_isync()    __asm__ __volatile__ ("isync"  : : : "memory");
-// Use twi-isync for load_acquire (faster than lwsync).
-#define inlasm_acquire_reg(X) __asm__ __volatile__ ("twi 0,%0,0\n isync\n" : : "r" (X) : "memory");
 
 inline void   OrderAccess::loadload()   { inlasm_lwsync(); }
 inline void   OrderAccess::storestore() { inlasm_lwsync(); }
@@ -82,17 +80,9 @@
 inline void   OrderAccess::cross_modify_fence()
                                         { inlasm_isync();  }
 
-template<size_t byte_size>
-struct OrderAccess::PlatformOrderedLoad<byte_size, X_ACQUIRE>
-{
-  template <typename T>
-  T operator()(const volatile T* p) const { T t = Atomic::load(p); inlasm_acquire_reg(t); return t; }
-};
-
 #undef inlasm_sync
 #undef inlasm_lwsync
 #undef inlasm_eieio
 #undef inlasm_isync
-#undef inlasm_acquire_reg
 
 #endif // OS_CPU_LINUX_PPC_ORDERACCESS_LINUX_PPC_HPP
--- a/src/hotspot/os_cpu/linux_s390/atomic_linux_s390.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/os_cpu/linux_s390/atomic_linux_s390.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -78,13 +78,13 @@
 struct Atomic::PlatformAdd
   : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
 {
-  template<typename I, typename D>
-  D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const;
+  template<typename D, typename I>
+  D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const;
 };
 
 template<>
-template<typename I, typename D>
-inline D Atomic::PlatformAdd<4>::add_and_fetch(I inc, D volatile* dest,
+template<typename D, typename I>
+inline D Atomic::PlatformAdd<4>::add_and_fetch(D volatile* dest, I inc,
                                                atomic_memory_order order) const {
   STATIC_ASSERT(4 == sizeof(I));
   STATIC_ASSERT(4 == sizeof(D));
@@ -137,8 +137,8 @@
 
 
 template<>
-template<typename I, typename D>
-inline D Atomic::PlatformAdd<8>::add_and_fetch(I inc, D volatile* dest,
+template<typename D, typename I>
+inline D Atomic::PlatformAdd<8>::add_and_fetch(D volatile* dest, I inc,
                                                atomic_memory_order order) const {
   STATIC_ASSERT(8 == sizeof(I));
   STATIC_ASSERT(8 == sizeof(D));
@@ -208,8 +208,8 @@
 // replacement succeeded.
 template<>
 template<typename T>
-inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
-                                             T volatile* dest,
+inline T Atomic::PlatformXchg<4>::operator()(T volatile* dest,
+                                             T exchange_value,
                                              atomic_memory_order unused) const {
   STATIC_ASSERT(4 == sizeof(T));
   T old;
@@ -232,8 +232,8 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
-                                             T volatile* dest,
+inline T Atomic::PlatformXchg<8>::operator()(T volatile* dest,
+                                             T exchange_value,
                                              atomic_memory_order unused) const {
   STATIC_ASSERT(8 == sizeof(T));
   T old;
@@ -289,9 +289,9 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformCmpxchg<4>::operator()(T xchg_val,
-                                                T volatile* dest,
+inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest,
                                                 T cmp_val,
+                                                T xchg_val,
                                                 atomic_memory_order unused) const {
   STATIC_ASSERT(4 == sizeof(T));
   T old;
@@ -313,9 +313,9 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformCmpxchg<8>::operator()(T xchg_val,
-                                                T volatile* dest,
+inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest,
                                                 T cmp_val,
+                                                T xchg_val,
                                                 atomic_memory_order unused) const {
   STATIC_ASSERT(8 == sizeof(T));
   T old;
@@ -335,4 +335,11 @@
   return old;
 }
 
+template<size_t byte_size>
+struct Atomic::PlatformOrderedLoad<byte_size, X_ACQUIRE>
+{
+  template <typename T>
+  T operator()(const volatile T* p) const { T t = *p; OrderAccess::acquire(); return t; }
+};
+
 #endif // OS_CPU_LINUX_S390_ATOMIC_LINUX_S390_HPP
--- a/src/hotspot/os_cpu/linux_s390/orderAccess_linux_s390.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/os_cpu/linux_s390/orderAccess_linux_s390.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -76,13 +76,6 @@
 inline void OrderAccess::fence()      { inlasm_zarch_sync(); }
 inline void OrderAccess::cross_modify_fence() { inlasm_zarch_sync(); }
 
-template<size_t byte_size>
-struct OrderAccess::PlatformOrderedLoad<byte_size, X_ACQUIRE>
-{
-  template <typename T>
-  T operator()(const volatile T* p) const { T t = *p; inlasm_zarch_acquire(); return t; }
-};
-
 #undef inlasm_compiler_barrier
 #undef inlasm_zarch_sync
 #undef inlasm_zarch_release
--- a/src/hotspot/os_cpu/linux_sparc/atomic_linux_sparc.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/os_cpu/linux_sparc/atomic_linux_sparc.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -31,13 +31,13 @@
 struct Atomic::PlatformAdd
   : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
 {
-  template<typename I, typename D>
-  D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const;
+  template<typename D, typename I>
+  D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const;
 };
 
 template<>
-template<typename I, typename D>
-inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest,
+template<typename D, typename I>
+inline D Atomic::PlatformAdd<4>::add_and_fetch(D volatile* dest, I add_value,
                                                atomic_memory_order order) const {
   STATIC_ASSERT(4 == sizeof(I));
   STATIC_ASSERT(4 == sizeof(D));
@@ -59,8 +59,8 @@
 }
 
 template<>
-template<typename I, typename D>
-inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest,
+template<typename D, typename I>
+inline D Atomic::PlatformAdd<8>::add_and_fetch(D volatile* dest, I add_value,
                                                atomic_memory_order order) const {
   STATIC_ASSERT(8 == sizeof(I));
   STATIC_ASSERT(8 == sizeof(D));
@@ -83,8 +83,8 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
-                                             T volatile* dest,
+inline T Atomic::PlatformXchg<4>::operator()(T volatile* dest,
+                                             T exchange_value,
                                              atomic_memory_order order) const {
   STATIC_ASSERT(4 == sizeof(T));
   T rv = exchange_value;
@@ -98,8 +98,8 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
-                                             T volatile* dest,
+inline T Atomic::PlatformXchg<8>::operator()(T volatile* dest,
+                                             T exchange_value,
                                              atomic_memory_order order) const {
   STATIC_ASSERT(8 == sizeof(T));
   T rv = exchange_value;
@@ -124,9 +124,9 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
-                                                T volatile* dest,
+inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest,
                                                 T compare_value,
+                                                T exchange_value,
                                                 atomic_memory_order order) const {
   STATIC_ASSERT(4 == sizeof(T));
   T rv;
@@ -140,9 +140,9 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
-                                                T volatile* dest,
+inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest,
                                                 T compare_value,
+                                                T exchange_value,
                                                 atomic_memory_order order) const {
   STATIC_ASSERT(8 == sizeof(T));
   T rv;
--- a/src/hotspot/os_cpu/linux_x86/atomic_linux_x86.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/os_cpu/linux_x86/atomic_linux_x86.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -31,13 +31,13 @@
 struct Atomic::PlatformAdd
   : Atomic::FetchAndAdd<Atomic::PlatformAdd<byte_size> >
 {
-  template<typename I, typename D>
-  D fetch_and_add(I add_value, D volatile* dest, atomic_memory_order order) const;
+  template<typename D, typename I>
+  D fetch_and_add(D volatile* dest, I add_value, atomic_memory_order order) const;
 };
 
 template<>
-template<typename I, typename D>
-inline D Atomic::PlatformAdd<4>::fetch_and_add(I add_value, D volatile* dest,
+template<typename D, typename I>
+inline D Atomic::PlatformAdd<4>::fetch_and_add(D volatile* dest, I add_value,
                                                atomic_memory_order order) const {
   STATIC_ASSERT(4 == sizeof(I));
   STATIC_ASSERT(4 == sizeof(D));
@@ -51,8 +51,8 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
-                                             T volatile* dest,
+inline T Atomic::PlatformXchg<4>::operator()(T volatile* dest,
+                                             T exchange_value,
                                              atomic_memory_order order) const {
   STATIC_ASSERT(4 == sizeof(T));
   __asm__ volatile (  "xchgl (%2),%0"
@@ -64,9 +64,9 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
-                                                T volatile* dest,
+inline T Atomic::PlatformCmpxchg<1>::operator()(T volatile* dest,
                                                 T compare_value,
+                                                T exchange_value,
                                                 atomic_memory_order /* order */) const {
   STATIC_ASSERT(1 == sizeof(T));
   __asm__ volatile ("lock cmpxchgb %1,(%3)"
@@ -78,9 +78,9 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
-                                                T volatile* dest,
+inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest,
                                                 T compare_value,
+                                                T exchange_value,
                                                 atomic_memory_order /* order */) const {
   STATIC_ASSERT(4 == sizeof(T));
   __asm__ volatile ("lock cmpxchgl %1,(%3)"
@@ -93,8 +93,8 @@
 #ifdef AMD64
 
 template<>
-template<typename I, typename D>
-inline D Atomic::PlatformAdd<8>::fetch_and_add(I add_value, D volatile* dest,
+template<typename D, typename I>
+inline D Atomic::PlatformAdd<8>::fetch_and_add(D volatile* dest, I add_value,
                                                atomic_memory_order order) const {
   STATIC_ASSERT(8 == sizeof(I));
   STATIC_ASSERT(8 == sizeof(D));
@@ -108,7 +108,7 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, T volatile* dest,
+inline T Atomic::PlatformXchg<8>::operator()(T volatile* dest, T exchange_value,
                                              atomic_memory_order order) const {
   STATIC_ASSERT(8 == sizeof(T));
   __asm__ __volatile__ ("xchgq (%2),%0"
@@ -120,9 +120,9 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
-                                                T volatile* dest,
+inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest,
                                                 T compare_value,
+                                                T exchange_value,
                                                 atomic_memory_order /* order */) const {
   STATIC_ASSERT(8 == sizeof(T));
   __asm__ __volatile__ ("lock cmpxchgq %1,(%3)"
@@ -142,12 +142,12 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
-                                                T volatile* dest,
+inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest,
                                                 T compare_value,
+                                                T exchange_value,
                                                 atomic_memory_order order) const {
   STATIC_ASSERT(8 == sizeof(T));
-  return cmpxchg_using_helper<int64_t>(_Atomic_cmpxchg_long, exchange_value, dest, compare_value);
+  return cmpxchg_using_helper<int64_t>(_Atomic_cmpxchg_long, dest, compare_value, exchange_value);
 }
 
 template<>
@@ -161,12 +161,62 @@
 
 template<>
 template<typename T>
-inline void Atomic::PlatformStore<8>::operator()(T store_value,
-                                                 T volatile* dest) const {
+inline void Atomic::PlatformStore<8>::operator()(T volatile* dest,
+                                                 T store_value) const {
   STATIC_ASSERT(8 == sizeof(T));
   _Atomic_move_long(reinterpret_cast<const volatile int64_t*>(&store_value), reinterpret_cast<volatile int64_t*>(dest));
 }
 
 #endif // AMD64
 
+template<>
+struct Atomic::PlatformOrderedStore<1, RELEASE_X_FENCE>
+{
+  template <typename T>
+  void operator()(volatile T* p, T v) const {
+    __asm__ volatile (  "xchgb (%2),%0"
+                      : "=q" (v)
+                      : "0" (v), "r" (p)
+                      : "memory");
+  }
+};
+
+template<>
+struct Atomic::PlatformOrderedStore<2, RELEASE_X_FENCE>
+{
+  template <typename T>
+  void operator()(volatile T* p, T v) const {
+    __asm__ volatile (  "xchgw (%2),%0"
+                      : "=r" (v)
+                      : "0" (v), "r" (p)
+                      : "memory");
+  }
+};
+
+template<>
+struct Atomic::PlatformOrderedStore<4, RELEASE_X_FENCE>
+{
+  template <typename T>
+  void operator()(volatile T* p, T v) const {
+    __asm__ volatile (  "xchgl (%2),%0"
+                      : "=r" (v)
+                      : "0" (v), "r" (p)
+                      : "memory");
+  }
+};
+
+#ifdef AMD64
+template<>
+struct Atomic::PlatformOrderedStore<8, RELEASE_X_FENCE>
+{
+  template <typename T>
+  void operator()(volatile T* p, T v) const {
+    __asm__ volatile (  "xchgq (%2), %0"
+                      : "=r" (v)
+                      : "0" (v), "r" (p)
+                      : "memory");
+  }
+};
+#endif // AMD64
+
 #endif // OS_CPU_LINUX_X86_ATOMIC_LINUX_X86_HPP
--- a/src/hotspot/os_cpu/linux_x86/linux_x86_32.s	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/os_cpu/linux_x86/linux_x86_32.s	Mon Dec 02 12:01:40 2019 +0530
@@ -1,4 +1,4 @@
-# 
+#
 # Copyright (c) 2004, 2017, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
@@ -19,15 +19,15 @@
 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 # or visit www.oracle.com if you need additional information or have any
 # questions.
-# 
+#
 
-	
+
         # NOTE WELL!  The _Copy functions are called directly
 	# from server-compiler-generated code via CallLeafNoFP,
 	# which means that they *must* either not use floating
 	# point or use it in the same manner as does the server
 	# compiler.
-	
+
         .globl _Copy_conjoint_bytes
         .globl _Copy_arrayof_conjoint_bytes
         .globl _Copy_conjoint_jshorts_atomic
@@ -174,7 +174,7 @@
         leal     -1(%esi,%ecx),%eax   # from + count - 1
         jbe      acb_CopyRight
         cmpl     %eax,%edi
-        jbe      acb_CopyLeft 
+        jbe      acb_CopyLeft
         # copy from low to high
 acb_CopyRight:
         cmpl     $3,%ecx
@@ -262,7 +262,7 @@
         leal     -2(%esi,%ecx,2),%eax # from + count*2 - 2
         jbe      cs_CopyRight
         cmpl     %eax,%edi
-        jbe      cs_CopyLeft 
+        jbe      cs_CopyLeft
         # copy from low to high
 cs_CopyRight:
         # align source address at dword address boundary
@@ -283,7 +283,7 @@
         jbe      2f                   # <= 32 dwords
         # copy aligned dwords
         rep;     smovl
-        jmp      4f 
+        jmp      4f
         # copy aligned dwords
 2:      subl     %esi,%edi
         .p2align 4,,15
@@ -349,7 +349,7 @@
         leal     -2(%esi,%ecx,2),%eax # from + count*2 - 2
         jbe      acs_CopyRight
         cmpl     %eax,%edi
-        jbe      acs_CopyLeft 
+        jbe      acs_CopyLeft
 acs_CopyRight:
         movl     %ecx,%eax            # word count
         sarl     %ecx                 # dword count
@@ -358,10 +358,10 @@
         jbe      2f                   # <= 32 dwords
         # copy aligned dwords
         rep;     smovl
-        jmp      4f 
+        jmp      4f
         # copy aligned dwords
         .space 5
-2:      subl     %esi,%edi 
+2:      subl     %esi,%edi
         .p2align 4,,15
 3:      movl     (%esi),%edx
         movl     %edx,(%edi,%esi,1)
@@ -428,7 +428,7 @@
         leal     -4(%esi,%ecx,4),%eax # from + count*4 - 4
         jbe      ci_CopyRight
         cmpl     %eax,%edi
-        jbe      ci_CopyLeft 
+        jbe      ci_CopyLeft
 ci_CopyRight:
         cmpl     $32,%ecx
         jbe      2f                   # <= 32 dwords
@@ -471,7 +471,7 @@
         popl     %edi
         popl     %esi
         ret
-	
+
         # Support for void Copy::conjoint_jlongs_atomic(jlong* from,
         #                                               jlong* to,
         #                                               size_t count)
@@ -537,7 +537,7 @@
         je       5f
         cmpl     $33,%ecx
         jae      3f
-1:      subl     %esi,%edi 
+1:      subl     %esi,%edi
         .p2align 4,,15
 2:      movl     (%esi),%edx
         movl     %edx,(%edi,%esi,1)
@@ -545,7 +545,7 @@
         subl     $1,%ecx
         jnz      2b
         addl     %esi,%edi
-        jmp      5f 
+        jmp      5f
 3:      smovl # align to 8 bytes, we know we are 4 byte aligned to start
         subl     $1,%ecx
 4:      .p2align 4,,15
@@ -612,9 +612,9 @@
         ret
 
 
-        # Support for jlong Atomic::cmpxchg(jlong exchange_value,
-        #                                   volatile jlong* dest,
-        #                                   jlong compare_value)
+        # Support for jlong Atomic::cmpxchg(volatile jlong* dest,
+        #                                   jlong compare_value,
+        #                                   jlong exchange_value)
         #
         .p2align 4,,15
 	.type    _Atomic_cmpxchg_long,@function
@@ -643,4 +643,3 @@
         movl     8(%esp), %eax   # dest
         fistpll   (%eax)
         ret
-
--- a/src/hotspot/os_cpu/linux_x86/orderAccess_linux_x86.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/os_cpu/linux_x86/orderAccess_linux_x86.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -66,54 +66,4 @@
 #endif
 }
 
-template<>
-struct OrderAccess::PlatformOrderedStore<1, RELEASE_X_FENCE>
-{
-  template <typename T>
-  void operator()(T v, volatile T* p) const {
-    __asm__ volatile (  "xchgb (%2),%0"
-                      : "=q" (v)
-                      : "0" (v), "r" (p)
-                      : "memory");
-  }
-};
-
-template<>
-struct OrderAccess::PlatformOrderedStore<2, RELEASE_X_FENCE>
-{
-  template <typename T>
-  void operator()(T v, volatile T* p) const {
-    __asm__ volatile (  "xchgw (%2),%0"
-                      : "=r" (v)
-                      : "0" (v), "r" (p)
-                      : "memory");
-  }
-};
-
-template<>
-struct OrderAccess::PlatformOrderedStore<4, RELEASE_X_FENCE>
-{
-  template <typename T>
-  void operator()(T v, volatile T* p) const {
-    __asm__ volatile (  "xchgl (%2),%0"
-                      : "=r" (v)
-                      : "0" (v), "r" (p)
-                      : "memory");
-  }
-};
-
-#ifdef AMD64
-template<>
-struct OrderAccess::PlatformOrderedStore<8, RELEASE_X_FENCE>
-{
-  template <typename T>
-  void operator()(T v, volatile T* p) const {
-    __asm__ volatile (  "xchgq (%2), %0"
-                      : "=r" (v)
-                      : "0" (v), "r" (p)
-                      : "memory");
-  }
-};
-#endif // AMD64
-
 #endif // OS_CPU_LINUX_X86_ORDERACCESS_LINUX_X86_HPP
--- a/src/hotspot/os_cpu/linux_zero/atomic_linux_zero.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/os_cpu/linux_zero/atomic_linux_zero.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -34,13 +34,13 @@
 struct Atomic::PlatformAdd
   : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
 {
-  template<typename I, typename D>
-  D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const;
+  template<typename D, typename I>
+  D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const;
 };
 
 template<>
-template<typename I, typename D>
-inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest,
+template<typename D, typename I>
+inline D Atomic::PlatformAdd<4>::add_and_fetch(D volatile* dest, I add_value,
                                                atomic_memory_order order) const {
   STATIC_ASSERT(4 == sizeof(I));
   STATIC_ASSERT(4 == sizeof(D));
@@ -49,8 +49,8 @@
 }
 
 template<>
-template<typename I, typename D>
-inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest,
+template<typename D, typename I>
+inline D Atomic::PlatformAdd<8>::add_and_fetch(D volatile* dest, I add_value,
                                                atomic_memory_order order) const {
   STATIC_ASSERT(8 == sizeof(I));
   STATIC_ASSERT(8 == sizeof(D));
@@ -59,8 +59,8 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
-                                             T volatile* dest,
+inline T Atomic::PlatformXchg<4>::operator()(T volatile* dest,
+                                             T exchange_value,
                                              atomic_memory_order order) const {
   STATIC_ASSERT(4 == sizeof(T));
   // __sync_lock_test_and_set is a bizarrely named atomic exchange
@@ -78,8 +78,8 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
-                                             T volatile* dest,
+inline T Atomic::PlatformXchg<8>::operator()(T volatile* dest,
+                                             T exchange_value,
                                              atomic_memory_order order) const {
   STATIC_ASSERT(8 == sizeof(T));
   T result = __sync_lock_test_and_set (dest, exchange_value);
@@ -93,9 +93,9 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
-                                                T volatile* dest,
+inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest,
                                                 T compare_value,
+                                                T exchange_value,
                                                 atomic_memory_order order) const {
   STATIC_ASSERT(4 == sizeof(T));
   return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
@@ -103,9 +103,9 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
-                                                T volatile* dest,
+inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest,
                                                 T compare_value,
+                                                T exchange_value,
                                                 atomic_memory_order order) const {
   STATIC_ASSERT(8 == sizeof(T));
   return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
@@ -122,8 +122,8 @@
 
 template<>
 template<typename T>
-inline void Atomic::PlatformStore<8>::operator()(T store_value,
-                                                 T volatile* dest) const {
+inline void Atomic::PlatformStore<8>::operator()(T volatile* dest,
+                                                 T store_value) const {
   STATIC_ASSERT(8 == sizeof(T));
   os::atomic_copy64(reinterpret_cast<const volatile int64_t*>(&store_value), reinterpret_cast<volatile int64_t*>(dest));
 }
--- a/src/hotspot/os_cpu/solaris_sparc/atomic_solaris_sparc.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/os_cpu/solaris_sparc/atomic_solaris_sparc.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -30,12 +30,12 @@
 // Implement ADD using a CAS loop.
 template<size_t byte_size>
 struct Atomic::PlatformAdd {
-  template<typename I, typename D>
-  inline D operator()(I add_value, D volatile* dest, atomic_memory_order order) const {
+  template<typename D, typename I>
+  inline D operator()(D volatile* dest, I add_value, atomic_memory_order order) const {
     D old_value = *dest;
     while (true) {
       D new_value = old_value + add_value;
-      D result = cmpxchg(new_value, dest, old_value);
+      D result = cmpxchg(dest, old_value, new_value);
       if (result == old_value) break;
       old_value = result;
     }
@@ -45,8 +45,8 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
-                                             T volatile* dest,
+inline T Atomic::PlatformXchg<4>::operator()(T volatile* dest,
+                                             T exchange_value,
                                              atomic_memory_order order) const {
   STATIC_ASSERT(4 == sizeof(T));
   __asm__ volatile (  "swap [%2],%0"
@@ -58,13 +58,13 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
-                                             T volatile* dest,
+inline T Atomic::PlatformXchg<8>::operator()(T volatile* dest,
+                                             T exchange_value,
                                              atomic_memory_order order) const {
   STATIC_ASSERT(8 == sizeof(T));
   T old_value = *dest;
   while (true) {
-    T result = cmpxchg(exchange_value, dest, old_value);
+    T result = cmpxchg(dest, old_value, exchange_value);
     if (result == old_value) break;
     old_value = result;
   }
@@ -77,9 +77,9 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
-                                                T volatile* dest,
+inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest,
                                                 T compare_value,
+                                                T exchange_value,
                                                 atomic_memory_order order) const {
   STATIC_ASSERT(4 == sizeof(T));
   T rv;
@@ -93,9 +93,9 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
-                                                T volatile* dest,
+inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest,
                                                 T compare_value,
+                                                T exchange_value,
                                                 atomic_memory_order order) const {
   STATIC_ASSERT(8 == sizeof(T));
   T rv;
--- a/src/hotspot/os_cpu/solaris_x86/atomic_solaris_x86.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/os_cpu/solaris_x86/atomic_solaris_x86.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -44,14 +44,14 @@
 struct Atomic::PlatformAdd
   : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
 {
-  template<typename I, typename D>
-  D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const;
+  template<typename D, typename I>
+  D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const;
 };
 
 // Not using add_using_helper; see comment for cmpxchg.
 template<>
-template<typename I, typename D>
-inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest,
+template<typename D, typename I>
+inline D Atomic::PlatformAdd<4>::add_and_fetch(D volatile* dest, I add_value,
                                                atomic_memory_order order) const {
   STATIC_ASSERT(4 == sizeof(I));
   STATIC_ASSERT(4 == sizeof(D));
@@ -62,8 +62,8 @@
 
 // Not using add_using_helper; see comment for cmpxchg.
 template<>
-template<typename I, typename D>
-inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest,
+template<typename D, typename I>
+inline D Atomic::PlatformAdd<8>::add_and_fetch(D volatile* dest, I add_value,
                                                atomic_memory_order order) const {
   STATIC_ASSERT(8 == sizeof(I));
   STATIC_ASSERT(8 == sizeof(D));
@@ -74,8 +74,8 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
-                                             T volatile* dest,
+inline T Atomic::PlatformXchg<4>::operator()(T volatile* dest,
+                                             T exchange_value,
                                              atomic_memory_order order) const {
   STATIC_ASSERT(4 == sizeof(T));
   return PrimitiveConversions::cast<T>(
@@ -87,8 +87,8 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
-                                             T volatile* dest,
+inline T Atomic::PlatformXchg<8>::operator()(T volatile* dest,
+                                             T exchange_value,
                                              atomic_memory_order order) const {
   STATIC_ASSERT(8 == sizeof(T));
   return PrimitiveConversions::cast<T>(
@@ -104,9 +104,9 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
-                                                T volatile* dest,
+inline T Atomic::PlatformCmpxchg<1>::operator()(T volatile* dest,
                                                 T compare_value,
+                                                T exchange_value,
                                                 atomic_memory_order order) const {
   STATIC_ASSERT(1 == sizeof(T));
   return PrimitiveConversions::cast<T>(
@@ -117,9 +117,9 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
-                                                T volatile* dest,
+inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest,
                                                 T compare_value,
+                                                T exchange_value,
                                                 atomic_memory_order order) const {
   STATIC_ASSERT(4 == sizeof(T));
   return PrimitiveConversions::cast<T>(
@@ -130,9 +130,9 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
-                                                T volatile* dest,
+inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest,
                                                 T compare_value,
+                                                T exchange_value,
                                                 atomic_memory_order order) const {
   STATIC_ASSERT(8 == sizeof(T));
   return PrimitiveConversions::cast<T>(
--- a/src/hotspot/os_cpu/solaris_x86/os_solaris_x86.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/os_cpu/solaris_x86/os_solaris_x86.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -38,7 +38,6 @@
 #include "prims/jniFastGetField.hpp"
 #include "prims/jvm_misc.hpp"
 #include "runtime/arguments.hpp"
-#include "runtime/atomic.hpp"
 #include "runtime/extendedPC.hpp"
 #include "runtime/frame.inline.hpp"
 #include "runtime/interfaceSupport.inline.hpp"
--- a/src/hotspot/os_cpu/solaris_x86/solaris_x86_64.il	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/os_cpu/solaris_x86/solaris_x86_64.il	Mon Dec 02 12:01:40 2019 +0530
@@ -49,7 +49,8 @@
       orq      %rdx, %rax
       .end
 
-  // Support for jint Atomic::add(jint add_value, volatile jint* dest)
+  // Implementation of jint _Atomic_add(jint add_value, volatile jint* dest)
+  // used by Atomic::add(volatile jint* dest, jint add_value)
       .inline _Atomic_add,2
       movl     %edi, %eax      // save add_value for return
       lock
@@ -57,7 +58,8 @@
       addl     %edi, %eax
       .end
 
-  // Support for jlong Atomic::add(jlong add_value, volatile jlong* dest)
+  // Implementation of jlong _Atomic_add(jlong add_value, volatile jlong* dest)
+  // used by Atomic::add(volatile jlong* dest, jint add_value)
       .inline _Atomic_add_long,2
       movq     %rdi, %rax      // save add_value for return
       lock
@@ -65,39 +67,41 @@
       addq     %rdi, %rax
       .end
 
-  // Support for jint Atomic::xchg(jint exchange_value, volatile jint* dest).
+  // Implementation of jint _Atomic_xchg(jint exchange_value, volatile jint* dest)
+  // used by Atomic::xchg(volatile jint* dest, jint exchange_value)
       .inline _Atomic_xchg,2
       xchgl    (%rsi), %edi
       movl     %edi, %eax
       .end
 
-  // Support for jlong Atomic::xchg(jlong exchange_value, volatile jlong* dest).
+  // Implementation of jlong _Atomic_xchg(jlong exchange_value, volatile jlong* dest)
+  // used by Atomic::xchg(volatile jlong* dest, jlong exchange_value)
       .inline _Atomic_xchg_long,2
       xchgq    (%rsi), %rdi
       movq     %rdi, %rax
       .end
 
-  // Support for jbyte Atomic::cmpxchg(jbyte exchange_value,
-  //                                   volatile jbyte *dest,
-  //                                   jbyte compare_value)
+  // Support for jbyte Atomic::cmpxchg(volatile jbyte *dest,
+  //                                   jbyte compare_value,
+  //                                   jbyte exchange_value)
       .inline _Atomic_cmpxchg_byte,3
       movb     %dl, %al      // compare_value
       lock
       cmpxchgb %dil, (%rsi)
       .end
 
-  // Support for jint Atomic::cmpxchg(jint exchange_value,
-  //                                  volatile jint *dest,
-  //                                  jint compare_value)
+  // Support for jint Atomic::cmpxchg(volatile jint *dest,
+  //                                  int compare_value,
+  //                                  jint exchange_value)
       .inline _Atomic_cmpxchg,3
       movl     %edx, %eax      // compare_value
       lock
       cmpxchgl %edi, (%rsi)
       .end
 
-  // Support for jlong Atomic::cmpxchg(jlong exchange_value,
-  //                                   volatile jlong* dest,
-  //                                   jlong compare_value)
+  // Support for jlong Atomic::cmpxchg(volatile jlong* dest,
+  //                                   jlong compare_value,
+  //                                   jlong exchange_value)
       .inline _Atomic_cmpxchg_long,3
       movq     %rdx, %rax      // compare_value
       lock
--- a/src/hotspot/os_cpu/windows_x86/atomic_windows_x86.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/os_cpu/windows_x86/atomic_windows_x86.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -27,6 +27,17 @@
 
 #include "runtime/os.hpp"
 
+// Note that in MSVC, volatile memory accesses are explicitly
+// guaranteed to have acquire release semantics (w.r.t. compiler
+// reordering) and therefore does not even need a compiler barrier
+// for normal acquire release accesses. And all generalized
+// bound calls like release_store go through Atomic::load
+// and Atomic::store which do volatile memory accesses.
+template<> inline void ScopedFence<X_ACQUIRE>::postfix()       { }
+template<> inline void ScopedFence<RELEASE_X>::prefix()        { }
+template<> inline void ScopedFence<RELEASE_X_FENCE>::prefix()  { }
+template<> inline void ScopedFence<RELEASE_X_FENCE>::postfix() { OrderAccess::fence(); }
+
 // The following alternative implementations are needed because
 // Windows 95 doesn't support (some of) the corresponding Windows NT
 // calls. Furthermore, these versions allow inlining in the caller.
@@ -46,33 +57,33 @@
 struct Atomic::PlatformAdd
   : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
 {
-  template<typename I, typename D>
-  D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const;
+  template<typename D, typename I>
+  D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const;
 };
 
 #ifdef AMD64
 template<>
-template<typename I, typename D>
-inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest,
+template<typename D, typename I>
+inline D Atomic::PlatformAdd<4>::add_and_fetch(D volatile* dest, I add_value,
                                                atomic_memory_order order) const {
-  return add_using_helper<int32_t>(os::atomic_add_func, add_value, dest);
+  return add_using_helper<int32_t>(os::atomic_add_func, dest, add_value);
 }
 
 template<>
-template<typename I, typename D>
-inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest,
+template<typename D, typename I>
+inline D Atomic::PlatformAdd<8>::add_and_fetch(D volatile* dest, I add_value,
                                                atomic_memory_order order) const {
-  return add_using_helper<int64_t>(os::atomic_add_long_func, add_value, dest);
+  return add_using_helper<int64_t>(os::atomic_add_long_func, dest, add_value);
 }
 
 #define DEFINE_STUB_XCHG(ByteSize, StubType, StubName)                  \
   template<>                                                            \
   template<typename T>                                                  \
-  inline T Atomic::PlatformXchg<ByteSize>::operator()(T exchange_value, \
-                                                      T volatile* dest, \
+  inline T Atomic::PlatformXchg<ByteSize>::operator()(T volatile* dest, \
+                                                      T exchange_value, \
                                                       atomic_memory_order order) const { \
     STATIC_ASSERT(ByteSize == sizeof(T));                               \
-    return xchg_using_helper<StubType>(StubName, exchange_value, dest); \
+    return xchg_using_helper<StubType>(StubName, dest, exchange_value); \
   }
 
 DEFINE_STUB_XCHG(4, int32_t, os::atomic_xchg_func)
@@ -80,15 +91,15 @@
 
 #undef DEFINE_STUB_XCHG
 
-#define DEFINE_STUB_CMPXCHG(ByteSize, StubType, StubName)               \
-  template<>                                                            \
-  template<typename T>                                                  \
-  inline T Atomic::PlatformCmpxchg<ByteSize>::operator()(T exchange_value, \
-                                                         T volatile* dest, \
-                                                         T compare_value, \
+#define DEFINE_STUB_CMPXCHG(ByteSize, StubType, StubName)                  \
+  template<>                                                               \
+  template<typename T>                                                     \
+  inline T Atomic::PlatformCmpxchg<ByteSize>::operator()(T volatile* dest, \
+                                                         T compare_value,  \
+                                                         T exchange_value, \
                                                          atomic_memory_order order) const { \
-    STATIC_ASSERT(ByteSize == sizeof(T));                               \
-    return cmpxchg_using_helper<StubType>(StubName, exchange_value, dest, compare_value); \
+    STATIC_ASSERT(ByteSize == sizeof(T));                                  \
+    return cmpxchg_using_helper<StubType>(StubName, dest, compare_value, exchange_value); \
   }
 
 DEFINE_STUB_CMPXCHG(1, int8_t,  os::atomic_cmpxchg_byte_func)
@@ -100,8 +111,8 @@
 #else // !AMD64
 
 template<>
-template<typename I, typename D>
-inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest,
+template<typename D, typename I>
+inline D Atomic::PlatformAdd<4>::add_and_fetch(D volatile* dest, I add_value,
                                                atomic_memory_order order) const {
   STATIC_ASSERT(4 == sizeof(I));
   STATIC_ASSERT(4 == sizeof(D));
@@ -116,8 +127,8 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
-                                             T volatile* dest,
+inline T Atomic::PlatformXchg<4>::operator()(T volatile* dest,
+                                             T exchange_value,
                                              atomic_memory_order order) const {
   STATIC_ASSERT(4 == sizeof(T));
   // alternative for InterlockedExchange
@@ -130,9 +141,9 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
-                                                T volatile* dest,
+inline T Atomic::PlatformCmpxchg<1>::operator()(T volatile* dest,
                                                 T compare_value,
+                                                T exchange_value,
                                                 atomic_memory_order order) const {
   STATIC_ASSERT(1 == sizeof(T));
   // alternative for InterlockedCompareExchange
@@ -146,9 +157,9 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
-                                                T volatile* dest,
+inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest,
                                                 T compare_value,
+                                                T exchange_value,
                                                 atomic_memory_order order) const {
   STATIC_ASSERT(4 == sizeof(T));
   // alternative for InterlockedCompareExchange
@@ -162,9 +173,9 @@
 
 template<>
 template<typename T>
-inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
-                                                T volatile* dest,
+inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest,
                                                 T compare_value,
+                                                T exchange_value,
                                                 atomic_memory_order order) const {
   STATIC_ASSERT(8 == sizeof(T));
   int32_t ex_lo  = (int32_t)exchange_value;
@@ -202,8 +213,8 @@
 
 template<>
 template<typename T>
-inline void Atomic::PlatformStore<8>::operator()(T store_value,
-                                                 T volatile* dest) const {
+inline void Atomic::PlatformStore<8>::operator()(T volatile* dest,
+                                                 T store_value) const {
   STATIC_ASSERT(8 == sizeof(T));
   volatile T* src = &store_value;
   __asm {
@@ -218,4 +229,45 @@
 
 #pragma warning(default: 4035) // Enables warnings reporting missing return statement
 
+#ifndef AMD64
+template<>
+struct Atomic::PlatformOrderedStore<1, RELEASE_X_FENCE>
+{
+  template <typename T>
+  void operator()(volatile T* p, T v) const {
+    __asm {
+      mov edx, p;
+      mov al, v;
+      xchg al, byte ptr [edx];
+    }
+  }
+};
+
+template<>
+struct Atomic::PlatformOrderedStore<2, RELEASE_X_FENCE>
+{
+  template <typename T>
+  void operator()(volatile T* p, T v) const {
+    __asm {
+      mov edx, p;
+      mov ax, v;
+      xchg ax, word ptr [edx];
+    }
+  }
+};
+
+template<>
+struct Atomic::PlatformOrderedStore<4, RELEASE_X_FENCE>
+{
+  template <typename T>
+  void operator()(volatile T* p, T v) const {
+    __asm {
+      mov edx, p;
+      mov eax, v;
+      xchg eax, dword ptr [edx];
+    }
+  }
+};
+#endif // AMD64
+
 #endif // OS_CPU_WINDOWS_X86_ATOMIC_WINDOWS_X86_HPP
--- a/src/hotspot/os_cpu/windows_x86/orderAccess_windows_x86.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/os_cpu/windows_x86/orderAccess_windows_x86.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -39,17 +39,6 @@
   _ReadWriteBarrier();
 }
 
-// Note that in MSVC, volatile memory accesses are explicitly
-// guaranteed to have acquire release semantics (w.r.t. compiler
-// reordering) and therefore does not even need a compiler barrier
-// for normal acquire release accesses. And all generalized
-// bound calls like release_store go through OrderAccess::load
-// and OrderAccess::store which do volatile memory accesses.
-template<> inline void ScopedFence<X_ACQUIRE>::postfix()       { }
-template<> inline void ScopedFence<RELEASE_X>::prefix()        { }
-template<> inline void ScopedFence<RELEASE_X_FENCE>::prefix()  { }
-template<> inline void ScopedFence<RELEASE_X_FENCE>::postfix() { OrderAccess::fence(); }
-
 inline void OrderAccess::loadload()   { compiler_barrier(); }
 inline void OrderAccess::storestore() { compiler_barrier(); }
 inline void OrderAccess::loadstore()  { compiler_barrier(); }
@@ -74,45 +63,4 @@
   __cpuid(regs, 0);
 }
 
-#ifndef AMD64
-template<>
-struct OrderAccess::PlatformOrderedStore<1, RELEASE_X_FENCE>
-{
-  template <typename T>
-  void operator()(T v, volatile T* p) const {
-    __asm {
-      mov edx, p;
-      mov al, v;
-      xchg al, byte ptr [edx];
-    }
-  }
-};
-
-template<>
-struct OrderAccess::PlatformOrderedStore<2, RELEASE_X_FENCE>
-{
-  template <typename T>
-  void operator()(T v, volatile T* p) const {
-    __asm {
-      mov edx, p;
-      mov ax, v;
-      xchg ax, word ptr [edx];
-    }
-  }
-};
-
-template<>
-struct OrderAccess::PlatformOrderedStore<4, RELEASE_X_FENCE>
-{
-  template <typename T>
-  void operator()(T v, volatile T* p) const {
-    __asm {
-      mov edx, p;
-      mov eax, v;
-      xchg eax, dword ptr [edx];
-    }
-  }
-};
-#endif // AMD64
-
 #endif // OS_CPU_WINDOWS_X86_ORDERACCESS_WINDOWS_X86_HPP
--- a/src/hotspot/share/adlc/adlparse.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/adlc/adlparse.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -123,6 +123,7 @@
     parse_err(SEMERR, "Did not declare 'register' definitions");
   }
   regBlock->addSpillRegClass();
+  regBlock->addDynamicRegClass();
 
   // Done with parsing, check consistency.
 
--- a/src/hotspot/share/adlc/archDesc.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/adlc/archDesc.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -245,12 +245,12 @@
     // Construct chain rules
     build_chain_rule(op);
 
-    MatchRule &mrule = *op->_matrule;
-    Predicate *pred  =  op->_predicate;
+    MatchRule *mrule = op->_matrule;
+    Predicate *pred  = op->_predicate;
 
     // Grab the machine type of the operand
     const char  *rootOp    = op->_ident;
-    mrule._machType  = rootOp;
+    mrule->_machType  = rootOp;
 
     // Check for special cases
     if (strcmp(rootOp,"Universe")==0) continue;
@@ -271,10 +271,13 @@
 
     // Find result type for match.
     const char *result      = op->reduce_result();
-    bool        has_root    = false;
 
-    // Construct a MatchList for this entry
-    buildMatchList(op->_matrule, result, rootOp, pred, cost);
+    // Construct a MatchList for this entry.
+    // Iterate over the list to enumerate all match cases for operands with multiple match rules.
+    for (; mrule != NULL; mrule = mrule->_next) {
+      mrule->_machType = rootOp;
+      buildMatchList(mrule, result, rootOp, pred, cost);
+    }
   }
 }
 
@@ -805,6 +808,8 @@
     return "RegMask::Empty";
   } else if (strcmp(reg_class_name,"stack_slots")==0) {
     return "(Compile::current()->FIRST_STACK_mask())";
+  } else if (strcmp(reg_class_name, "dynamic")==0) {
+    return "*_opnds[0]->in_RegMask(0)";
   } else {
     char       *rc_name = toUpper(reg_class_name);
     const char *mask    = "_mask";
@@ -867,7 +872,7 @@
   }
 
   // Instructions producing 'Universe' use RegMask::Empty
-  if( strcmp(result,"Universe")==0 ) {
+  if (strcmp(result,"Universe") == 0) {
     return "RegMask::Empty";
   }
 
--- a/src/hotspot/share/adlc/formsopt.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/adlc/formsopt.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -80,6 +80,15 @@
   _regClass.Insert(rc_name,reg_class);
 }
 
+// Called after parsing the Register block.  Record the register class
+// for operands which are overwritten after matching.
+void RegisterForm::addDynamicRegClass() {
+  const char *rc_name = "dynamic";
+  RegClass* reg_class = new RegClass(rc_name);
+  reg_class->set_stack_version(false);
+  _rclasses.addName(rc_name);
+  _regClass.Insert(rc_name,reg_class);
+}
 
 // Provide iteration over all register definitions
 // in the order used by the register allocator
--- a/src/hotspot/share/adlc/formsopt.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/adlc/formsopt.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -104,6 +104,7 @@
 
   AllocClass *addAllocClass(char *allocName);
   void        addSpillRegClass();
+  void        addDynamicRegClass();
 
   // Provide iteration over all register definitions
   // in the order used by the register allocator
--- a/src/hotspot/share/adlc/output_c.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/adlc/output_c.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -2781,6 +2781,8 @@
       // Return the sole RegMask.
       if (strcmp(first_reg_class, "stack_slots") == 0) {
         fprintf(fp,"  return &(Compile::current()->FIRST_STACK_mask());\n");
+      } else if (strcmp(first_reg_class, "dynamic") == 0) {
+        fprintf(fp,"  return &RegMask::Empty;\n");
       } else {
         const char* first_reg_class_to_upper = toUpper(first_reg_class);
         fprintf(fp,"  return &%s_mask();\n", first_reg_class_to_upper);
--- a/src/hotspot/share/aot/aotCodeHeap.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/aot/aotCodeHeap.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -40,6 +40,7 @@
 #include "oops/compressedOops.hpp"
 #include "oops/klass.inline.hpp"
 #include "oops/method.inline.hpp"
+#include "runtime/atomic.hpp"
 #include "runtime/deoptimization.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/os.hpp"
@@ -347,7 +348,7 @@
   AOTCompiledMethod *aot = new AOTCompiledMethod(code, mh(), meta, metadata_table, metadata_size, state_adr, this, name, code_id, _aot_id);
   assert(_code_to_aot[code_id]._aot == NULL, "should be not initialized");
   _code_to_aot[code_id]._aot = aot; // Should set this first
-  if (Atomic::cmpxchg(in_use, &_code_to_aot[code_id]._state, not_set) != not_set) {
+  if (Atomic::cmpxchg(&_code_to_aot[code_id]._state, not_set, in_use) != not_set) {
     _code_to_aot[code_id]._aot = NULL; // Clean
   } else { // success
     // Publish method
@@ -410,7 +411,7 @@
     AOTCompiledMethod* aot = new AOTCompiledMethod(entry, NULL, meta, metadata_table, metadata_size, state_adr, this, full_name, code_id, i);
     assert(_code_to_aot[code_id]._aot  == NULL, "should be not initialized");
     _code_to_aot[code_id]._aot  = aot;
-    if (Atomic::cmpxchg(in_use, &_code_to_aot[code_id]._state, not_set) != not_set) {
+    if (Atomic::cmpxchg(&_code_to_aot[code_id]._state, not_set, in_use) != not_set) {
       fatal("stab '%s' code state is %d", full_name, _code_to_aot[code_id]._state);
     }
     // Adjust code buffer boundaries only for stubs because they are last in the buffer.
@@ -721,7 +722,7 @@
   for (int i = 0; i < methods_cnt; ++i) {
     int code_id = indexes[i];
     // Invalidate aot code.
-    if (Atomic::cmpxchg(invalid, &_code_to_aot[code_id]._state, not_set) != not_set) {
+    if (Atomic::cmpxchg(&_code_to_aot[code_id]._state, not_set, invalid) != not_set) {
       if (_code_to_aot[code_id]._state == in_use) {
         AOTCompiledMethod* aot = _code_to_aot[code_id]._aot;
         assert(aot != NULL, "aot should be set");
--- a/src/hotspot/share/aot/aotCompiledMethod.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/aot/aotCompiledMethod.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -37,6 +37,7 @@
 #include "runtime/frame.inline.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/java.hpp"
+#include "runtime/orderAccess.hpp"
 #include "runtime/os.hpp"
 #include "runtime/safepointVerifiers.hpp"
 #include "runtime/sharedRuntime.hpp"
--- a/src/hotspot/share/asm/assembler.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/asm/assembler.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -29,7 +29,6 @@
 #include "gc/shared/collectedHeap.hpp"
 #include "memory/universe.hpp"
 #include "oops/compressedOops.hpp"
-#include "runtime/atomic.hpp"
 #include "runtime/icache.hpp"
 #include "runtime/os.hpp"
 #include "runtime/thread.hpp"
--- a/src/hotspot/share/c1/c1_GraphBuilder.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/c1/c1_GraphBuilder.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -3793,7 +3793,7 @@
       INLINE_BAILOUT("total inlining greater than DesiredMethodLimit");
     }
     // printing
-    print_inlining(callee);
+    print_inlining(callee, "inline", /*success*/ true);
   }
 
   // NOTE: Bailouts from this point on, which occur at the
@@ -4315,16 +4315,11 @@
 void GraphBuilder::print_inlining(ciMethod* callee, const char* msg, bool success) {
   CompileLog* log = compilation()->log();
   if (log != NULL) {
+    assert(msg != NULL, "inlining msg should not be null!");
     if (success) {
-      if (msg != NULL)
-        log->inline_success(msg);
-      else
-        log->inline_success("receiver is statically known");
+      log->inline_success(msg);
     } else {
-      if (msg != NULL)
-        log->inline_fail(msg);
-      else
-        log->inline_fail("reason unknown");
+      log->inline_fail(msg);
     }
   }
   EventCompilerInlining event;
--- a/src/hotspot/share/c1/c1_GraphBuilder.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/c1/c1_GraphBuilder.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -381,7 +381,7 @@
   void append_unsafe_get_and_set_obj(ciMethod* callee, bool is_add);
   void append_char_access(ciMethod* callee, bool is_store);
 
-  void print_inlining(ciMethod* callee, const char* msg = NULL, bool success = true);
+  void print_inlining(ciMethod* callee, const char* msg, bool success = true);
 
   void profile_call(ciMethod* callee, Value recv, ciKlass* predicted_holder, Values* obj_args, bool inlined);
   void profile_return_type(Value ret, ciMethod* callee, ciMethod* m = NULL, int bci = -1);
--- a/src/hotspot/share/classfile/classLoader.inline.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/classfile/classLoader.inline.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -26,14 +26,14 @@
 #define SHARE_CLASSFILE_CLASSLOADER_INLINE_HPP
 
 #include "classfile/classLoader.hpp"
-#include "runtime/orderAccess.hpp"
+#include "runtime/atomic.hpp"
 
 // Next entry in class path
-inline ClassPathEntry* ClassPathEntry::next() const { return OrderAccess::load_acquire(&_next); }
+inline ClassPathEntry* ClassPathEntry::next() const { return Atomic::load_acquire(&_next); }
 
 inline void ClassPathEntry::set_next(ClassPathEntry* next) {
   // may have unlocked readers, so ensure visibility.
-  OrderAccess::release_store(&_next, next);
+  Atomic::release_store(&_next, next);
 }
 
 inline ClassPathEntry* ClassLoader::classpath_entry(int n) {
--- a/src/hotspot/share/classfile/classLoaderData.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/classfile/classLoaderData.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -67,7 +67,6 @@
 #include "runtime/atomic.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/mutex.hpp"
-#include "runtime/orderAccess.hpp"
 #include "runtime/safepoint.hpp"
 #include "utilities/growableArray.hpp"
 #include "utilities/macros.hpp"
@@ -187,11 +186,11 @@
 oop* ClassLoaderData::ChunkedHandleList::add(oop o) {
   if (_head == NULL || _head->_size == Chunk::CAPACITY) {
     Chunk* next = new Chunk(_head);
-    OrderAccess::release_store(&_head, next);
+    Atomic::release_store(&_head, next);
   }
   oop* handle = &_head->_data[_head->_size];
   NativeAccess<IS_DEST_UNINITIALIZED>::oop_store(handle, o);
-  OrderAccess::release_store(&_head->_size, _head->_size + 1);
+  Atomic::release_store(&_head->_size, _head->_size + 1);
   return handle;
 }
 
@@ -214,10 +213,10 @@
 }
 
 void ClassLoaderData::ChunkedHandleList::oops_do(OopClosure* f) {
-  Chunk* head = OrderAccess::load_acquire(&_head);
+  Chunk* head = Atomic::load_acquire(&_head);
   if (head != NULL) {
     // Must be careful when reading size of head
-    oops_do_chunk(f, head, OrderAccess::load_acquire(&head->_size));
+    oops_do_chunk(f, head, Atomic::load_acquire(&head->_size));
     for (Chunk* c = head->_next; c != NULL; c = c->_next) {
       oops_do_chunk(f, c, c->_size);
     }
@@ -273,7 +272,7 @@
       return;
     }
     int new_claim = old_claim & ~claim;
-    if (Atomic::cmpxchg(new_claim, &_claim, old_claim) == old_claim) {
+    if (Atomic::cmpxchg(&_claim, old_claim, new_claim) == old_claim) {
       return;
     }
   }
@@ -286,7 +285,7 @@
       return false;
     }
     int new_claim = old_claim | claim;
-    if (Atomic::cmpxchg(new_claim, &_claim, old_claim) == old_claim) {
+    if (Atomic::cmpxchg(&_claim, old_claim, new_claim) == old_claim) {
       return true;
     }
   }
@@ -326,7 +325,7 @@
 
 void ClassLoaderData::classes_do(KlassClosure* klass_closure) {
   // Lock-free access requires load_acquire
-  for (Klass* k = OrderAccess::load_acquire(&_klasses); k != NULL; k = k->next_link()) {
+  for (Klass* k = Atomic::load_acquire(&_klasses); k != NULL; k = k->next_link()) {
     klass_closure->do_klass(k);
     assert(k != k->next_link(), "no loops!");
   }
@@ -334,7 +333,7 @@
 
 void ClassLoaderData::classes_do(void f(Klass * const)) {
   // Lock-free access requires load_acquire
-  for (Klass* k = OrderAccess::load_acquire(&_klasses); k != NULL; k = k->next_link()) {
+  for (Klass* k = Atomic::load_acquire(&_klasses); k != NULL; k = k->next_link()) {
     f(k);
     assert(k != k->next_link(), "no loops!");
   }
@@ -342,7 +341,7 @@
 
 void ClassLoaderData::methods_do(void f(Method*)) {
   // Lock-free access requires load_acquire
-  for (Klass* k = OrderAccess::load_acquire(&_klasses); k != NULL; k = k->next_link()) {
+  for (Klass* k = Atomic::load_acquire(&_klasses); k != NULL; k = k->next_link()) {
     if (k->is_instance_klass() && InstanceKlass::cast(k)->is_loaded()) {
       InstanceKlass::cast(k)->methods_do(f);
     }
@@ -351,7 +350,7 @@
 
 void ClassLoaderData::loaded_classes_do(KlassClosure* klass_closure) {
   // Lock-free access requires load_acquire
-  for (Klass* k = OrderAccess::load_acquire(&_klasses); k != NULL; k = k->next_link()) {
+  for (Klass* k = Atomic::load_acquire(&_klasses); k != NULL; k = k->next_link()) {
     // Do not filter ArrayKlass oops here...
     if (k->is_array_klass() || (k->is_instance_klass() && InstanceKlass::cast(k)->is_loaded())) {
 #ifdef ASSERT
@@ -366,7 +365,7 @@
 
 void ClassLoaderData::classes_do(void f(InstanceKlass*)) {
   // Lock-free access requires load_acquire
-  for (Klass* k = OrderAccess::load_acquire(&_klasses); k != NULL; k = k->next_link()) {
+  for (Klass* k = Atomic::load_acquire(&_klasses); k != NULL; k = k->next_link()) {
     if (k->is_instance_klass()) {
       f(InstanceKlass::cast(k));
     }
@@ -465,7 +464,7 @@
     k->set_next_link(old_value);
     // Link the new item into the list, making sure the linked class is stable
     // since the list can be walked without a lock
-    OrderAccess::release_store(&_klasses, k);
+    Atomic::release_store(&_klasses, k);
     if (k->is_array_klass()) {
       ClassLoaderDataGraph::inc_array_classes(1);
     } else {
@@ -552,7 +551,7 @@
 ModuleEntryTable* ClassLoaderData::modules() {
   // Lazily create the module entry table at first request.
   // Lock-free access requires load_acquire.
-  ModuleEntryTable* modules = OrderAccess::load_acquire(&_modules);
+  ModuleEntryTable* modules = Atomic::load_acquire(&_modules);
   if (modules == NULL) {
     MutexLocker m1(Module_lock);
     // Check if _modules got allocated while we were waiting for this lock.
@@ -562,7 +561,7 @@
       {
         MutexLocker m1(metaspace_lock(), Mutex::_no_safepoint_check_flag);
         // Ensure _modules is stable, since it is examined without a lock
-        OrderAccess::release_store(&_modules, modules);
+        Atomic::release_store(&_modules, modules);
       }
     }
   }
@@ -752,7 +751,7 @@
   // The reason for the delayed allocation is because some class loaders are
   // simply for delegating with no metadata of their own.
   // Lock-free access requires load_acquire.
-  ClassLoaderMetaspace* metaspace = OrderAccess::load_acquire(&_metaspace);
+  ClassLoaderMetaspace* metaspace = Atomic::load_acquire(&_metaspace);
   if (metaspace == NULL) {
     MutexLocker ml(_metaspace_lock,  Mutex::_no_safepoint_check_flag);
     // Check if _metaspace got allocated while we were waiting for this lock.
@@ -768,7 +767,7 @@
         metaspace = new ClassLoaderMetaspace(_metaspace_lock, Metaspace::StandardMetaspaceType);
       }
       // Ensure _metaspace is stable, since it is examined without a lock
-      OrderAccess::release_store(&_metaspace, metaspace);
+      Atomic::release_store(&_metaspace, metaspace);
     }
   }
   return metaspace;
@@ -969,7 +968,7 @@
 
 bool ClassLoaderData::contains_klass(Klass* klass) {
   // Lock-free access requires load_acquire
-  for (Klass* k = OrderAccess::load_acquire(&_klasses); k != NULL; k = k->next_link()) {
+  for (Klass* k = Atomic::load_acquire(&_klasses); k != NULL; k = k->next_link()) {
     if (k == klass) return true;
   }
   return false;
--- a/src/hotspot/share/classfile/classLoaderDataGraph.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/classfile/classLoaderDataGraph.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -38,7 +38,6 @@
 #include "runtime/atomic.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/mutex.hpp"
-#include "runtime/orderAccess.hpp"
 #include "runtime/safepoint.hpp"
 #include "runtime/safepointVerifiers.hpp"
 #include "utilities/growableArray.hpp"
@@ -59,13 +58,13 @@
   //
   // Any ClassLoaderData added after or during walking the list are prepended to
   // _head. Their claim mark need not be handled here.
-  for (ClassLoaderData* cld = OrderAccess::load_acquire(&_head); cld != NULL; cld = cld->next()) {
+  for (ClassLoaderData* cld = Atomic::load_acquire(&_head); cld != NULL; cld = cld->next()) {
     cld->clear_claim();
   }
 }
 
 void ClassLoaderDataGraph::clear_claimed_marks(int claim) {
- for (ClassLoaderData* cld = OrderAccess::load_acquire(&_head); cld != NULL; cld = cld->next()) {
+ for (ClassLoaderData* cld = Atomic::load_acquire(&_head); cld != NULL; cld = cld->next()) {
     cld->clear_claim(claim);
   }
 }
@@ -186,10 +185,7 @@
 // GC root of class loader data created.
 ClassLoaderData* volatile ClassLoaderDataGraph::_head = NULL;
 ClassLoaderData* ClassLoaderDataGraph::_unloading = NULL;
-ClassLoaderData* ClassLoaderDataGraph::_saved_unloading = NULL;
-ClassLoaderData* ClassLoaderDataGraph::_saved_head = NULL;
 
-bool ClassLoaderDataGraph::_should_purge = false;
 bool ClassLoaderDataGraph::_should_clean_deallocate_lists = false;
 bool ClassLoaderDataGraph::_safepoint_cleanup_needed = false;
 bool ClassLoaderDataGraph::_metaspace_oom = false;
@@ -220,7 +216,7 @@
 
   // First install the new CLD to the Graph.
   cld->set_next(_head);
-  OrderAccess::release_store(&_head, cld);
+  Atomic::release_store(&_head, cld);
 
   // Next associate with the class_loader.
   if (!is_unsafe_anonymous) {
@@ -249,9 +245,7 @@
 
 void ClassLoaderDataGraph::cld_unloading_do(CLDClosure* cl) {
   assert_locked_or_safepoint_weak(ClassLoaderDataGraph_lock);
-  // Only walk the head until any clds not purged from prior unloading
-  // (CMS doesn't purge right away).
-  for (ClassLoaderData* cld = _unloading; cld != _saved_unloading; cld = cld->next()) {
+  for (ClassLoaderData* cld = _unloading; cld != NULL; cld = cld->next()) {
     assert(cld->is_unloading(), "invariant");
     cl->do_cld(cld);
   }
@@ -381,9 +375,7 @@
 
 void ClassLoaderDataGraph::modules_unloading_do(void f(ModuleEntry*)) {
   assert_locked_or_safepoint(ClassLoaderDataGraph_lock);
-  // Only walk the head until any clds not purged from prior unloading
-  // (CMS doesn't purge right away).
-  for (ClassLoaderData* cld = _unloading; cld != _saved_unloading; cld = cld->next()) {
+  for (ClassLoaderData* cld = _unloading; cld != NULL; cld = cld->next()) {
     assert(cld->is_unloading(), "invariant");
     cld->modules_do(f);
   }
@@ -399,9 +391,7 @@
 
 void ClassLoaderDataGraph::packages_unloading_do(void f(PackageEntry*)) {
   assert_locked_or_safepoint(ClassLoaderDataGraph_lock);
-  // Only walk the head until any clds not purged from prior unloading
-  // (CMS doesn't purge right away).
-  for (ClassLoaderData* cld = _unloading; cld != _saved_unloading; cld = cld->next()) {
+  for (ClassLoaderData* cld = _unloading; cld != NULL; cld = cld->next()) {
     assert(cld->is_unloading(), "invariant");
     cld->packages_do(f);
   }
@@ -424,9 +414,7 @@
 
 void ClassLoaderDataGraph::classes_unloading_do(void f(Klass* const)) {
   assert_locked_or_safepoint(ClassLoaderDataGraph_lock);
-  // Only walk the head until any clds not purged from prior unloading
-  // (CMS doesn't purge right away).
-  for (ClassLoaderData* cld = _unloading; cld != _saved_unloading; cld = cld->next()) {
+  for (ClassLoaderData* cld = _unloading; cld != NULL; cld = cld->next()) {
     assert(cld->is_unloading(), "invariant");
     cld->classes_do(f);
   }
@@ -476,32 +464,6 @@
   }
 }
 
-GrowableArray<ClassLoaderData*>* ClassLoaderDataGraph::new_clds() {
-  assert_locked_or_safepoint(ClassLoaderDataGraph_lock);
-  assert(_head == NULL || _saved_head != NULL, "remember_new_clds(true) not called?");
-
-  GrowableArray<ClassLoaderData*>* array = new GrowableArray<ClassLoaderData*>();
-
-  // The CLDs in [_head, _saved_head] were all added during last call to remember_new_clds(true);
-  ClassLoaderData* curr = _head;
-  while (curr != _saved_head) {
-    if (!curr->claimed(ClassLoaderData::_claim_strong)) {
-      array->push(curr);
-      LogTarget(Debug, class, loader, data) lt;
-      if (lt.is_enabled()) {
-        LogStream ls(lt);
-        ls.print("found new CLD: ");
-        curr->print_value_on(&ls);
-        ls.cr();
-      }
-    }
-
-    curr = curr->_next;
-  }
-
-  return array;
-}
-
 #ifndef PRODUCT
 bool ClassLoaderDataGraph::contains_loader_data(ClassLoaderData* loader_data) {
   assert_locked_or_safepoint(ClassLoaderDataGraph_lock);
@@ -544,10 +506,6 @@
   uint loaders_processed = 0;
   uint loaders_removed = 0;
 
-  // Save previous _unloading pointer for CMS which may add to unloading list before
-  // purging and we don't want to rewalk the previously unloaded class loader data.
-  _saved_unloading = _unloading;
-
   data = _head;
   while (data != NULL) {
     if (data->is_alive()) {
@@ -676,7 +634,7 @@
   while (head != NULL) {
     Klass* next = next_klass_in_cldg(head);
 
-    Klass* old_head = Atomic::cmpxchg(next, &_next_klass, head);
+    Klass* old_head = Atomic::cmpxchg(&_next_klass, head, next);
 
     if (old_head == head) {
       return head; // Won the CAS.
--- a/src/hotspot/share/classfile/classLoaderDataGraph.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/classfile/classLoaderDataGraph.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -43,10 +43,6 @@
   // All CLDs (except the null CLD) can be reached by walking _head->_next->...
   static ClassLoaderData* volatile _head;
   static ClassLoaderData* _unloading;
-  // CMS support.
-  static ClassLoaderData* _saved_head;
-  static ClassLoaderData* _saved_unloading;
-  static bool _should_purge;
 
   // Set if there's anything to purge in the deallocate lists or previous versions
   // during a safepoint after class unloading in a full GC.
@@ -115,18 +111,6 @@
   static void print_dictionary(outputStream* st);
   static void print_table_statistics(outputStream* st);
 
-  // CMS support.
-  static void remember_new_clds(bool remember) { _saved_head = (remember ? _head : NULL); }
-  static GrowableArray<ClassLoaderData*>* new_clds();
-
-  static void set_should_purge(bool b) { _should_purge = b; }
-  static bool should_purge_and_reset() {
-    bool res = _should_purge;
-    // reset for next time.
-    set_should_purge(false);
-    return res;
-  }
-
   static int resize_dictionaries();
 
   static bool has_metaspace_oom()           { return _metaspace_oom; }
--- a/src/hotspot/share/classfile/classLoaderDataGraph.inline.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/classfile/classLoaderDataGraph.inline.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -50,21 +50,21 @@
 }
 
 void ClassLoaderDataGraph::inc_instance_classes(size_t count) {
-  Atomic::add(count, &_num_instance_classes);
+  Atomic::add(&_num_instance_classes, count);
 }
 
 void ClassLoaderDataGraph::dec_instance_classes(size_t count) {
   assert(count <= _num_instance_classes, "Sanity");
-  Atomic::sub(count, &_num_instance_classes);
+  Atomic::sub(&_num_instance_classes, count);
 }
 
 void ClassLoaderDataGraph::inc_array_classes(size_t count) {
-  Atomic::add(count, &_num_array_classes);
+  Atomic::add(&_num_array_classes, count);
 }
 
 void ClassLoaderDataGraph::dec_array_classes(size_t count) {
   assert(count <= _num_array_classes, "Sanity");
-  Atomic::sub(count, &_num_array_classes);
+  Atomic::sub(&_num_array_classes, count);
 }
 
 bool ClassLoaderDataGraph::should_clean_metaspaces_and_reset() {
--- a/src/hotspot/share/classfile/dictionary.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/classfile/dictionary.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -33,8 +33,6 @@
 #include "memory/metaspaceClosure.hpp"
 #include "memory/resourceArea.hpp"
 #include "oops/oop.inline.hpp"
-#include "runtime/atomic.hpp"
-#include "runtime/orderAccess.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/safepointVerifiers.hpp"
 #include "utilities/hashtable.inline.hpp"
--- a/src/hotspot/share/classfile/stringTable.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/classfile/stringTable.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -214,11 +214,11 @@
 }
 
 size_t StringTable::item_added() {
-  return Atomic::add((size_t)1, &_items_count);
+  return Atomic::add(&_items_count, (size_t)1);
 }
 
 size_t StringTable::add_items_to_clean(size_t ndead) {
-  size_t total = Atomic::add((size_t)ndead, &_uncleaned_items_count);
+  size_t total = Atomic::add(&_uncleaned_items_count, (size_t)ndead);
   log_trace(stringtable)(
      "Uncleaned items:" SIZE_FORMAT " added: " SIZE_FORMAT " total:" SIZE_FORMAT,
      _uncleaned_items_count, ndead, total);
@@ -226,7 +226,7 @@
 }
 
 void StringTable::item_removed() {
-  Atomic::add((size_t)-1, &_items_count);
+  Atomic::add(&_items_count, (size_t)-1);
 }
 
 double StringTable::get_load_factor() {
--- a/src/hotspot/share/classfile/symbolTable.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/classfile/symbolTable.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -189,8 +189,8 @@
   }
 }
 
-void SymbolTable::reset_has_items_to_clean() { Atomic::store(false, &_has_items_to_clean); }
-void SymbolTable::mark_has_items_to_clean()  { Atomic::store(true, &_has_items_to_clean); }
+void SymbolTable::reset_has_items_to_clean() { Atomic::store(&_has_items_to_clean, false); }
+void SymbolTable::mark_has_items_to_clean()  { Atomic::store(&_has_items_to_clean, true); }
 bool SymbolTable::has_items_to_clean()       { return Atomic::load(&_has_items_to_clean); }
 
 void SymbolTable::item_added() {
@@ -724,7 +724,7 @@
     bdt.done(jt);
   }
 
-  Atomic::add(stdc._processed, &_symbols_counted);
+  Atomic::add(&_symbols_counted, stdc._processed);
 
   log_debug(symboltable)("Cleaned " SIZE_FORMAT " of " SIZE_FORMAT,
                          stdd._deleted, stdc._processed);
--- a/src/hotspot/share/classfile/systemDictionary.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/classfile/systemDictionary.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -79,7 +79,6 @@
 #include "runtime/java.hpp"
 #include "runtime/javaCalls.hpp"
 #include "runtime/mutexLocker.hpp"
-#include "runtime/orderAccess.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/signature.hpp"
 #include "services/classLoadingService.hpp"
--- a/src/hotspot/share/classfile/verifier.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/classfile/verifier.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -50,7 +50,6 @@
 #include "runtime/interfaceSupport.inline.hpp"
 #include "runtime/javaCalls.hpp"
 #include "runtime/jniHandles.inline.hpp"
-#include "runtime/orderAccess.hpp"
 #include "runtime/os.hpp"
 #include "runtime/safepointVerifiers.hpp"
 #include "runtime/thread.hpp"
--- a/src/hotspot/share/code/codeCache.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/code/codeCache.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -47,6 +47,7 @@
 #include "oops/oop.inline.hpp"
 #include "oops/verifyOopClosure.hpp"
 #include "runtime/arguments.hpp"
+#include "runtime/atomic.hpp"
 #include "runtime/deoptimization.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/icache.hpp"
@@ -749,7 +750,7 @@
     for (;;) {
       ExceptionCache* purge_list_head = Atomic::load(&_exception_cache_purge_list);
       entry->set_purge_list_next(purge_list_head);
-      if (Atomic::cmpxchg(entry, &_exception_cache_purge_list, purge_list_head) == purge_list_head) {
+      if (Atomic::cmpxchg(&_exception_cache_purge_list, purge_list_head, entry) == purge_list_head) {
         break;
       }
     }
--- a/src/hotspot/share/code/compiledMethod.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/code/compiledMethod.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -38,6 +38,7 @@
 #include "oops/methodData.hpp"
 #include "oops/method.inline.hpp"
 #include "prims/methodHandles.hpp"
+#include "runtime/atomic.hpp"
 #include "runtime/deoptimization.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/mutexLocker.hpp"
@@ -113,7 +114,7 @@
 //-----------------------------------------------------------------------------
 
 ExceptionCache* CompiledMethod::exception_cache_acquire() const {
-  return OrderAccess::load_acquire(&_exception_cache);
+  return Atomic::load_acquire(&_exception_cache);
 }
 
 void CompiledMethod::add_exception_cache_entry(ExceptionCache* new_entry) {
@@ -133,7 +134,7 @@
         // next pointers always point at live ExceptionCaches, that are not removed due
         // to concurrent ExceptionCache cleanup.
         ExceptionCache* next = ec->next();
-        if (Atomic::cmpxchg(next, &_exception_cache, ec) == ec) {
+        if (Atomic::cmpxchg(&_exception_cache, ec, next) == ec) {
           CodeCache::release_exception_cache(ec);
         }
         continue;
@@ -143,7 +144,7 @@
         new_entry->set_next(ec);
       }
     }
-    if (Atomic::cmpxchg(new_entry, &_exception_cache, ec) == ec) {
+    if (Atomic::cmpxchg(&_exception_cache, ec, new_entry) == ec) {
       return;
     }
   }
@@ -176,7 +177,7 @@
         // Try to clean head; this is contended by concurrent inserts, that
         // both lazily clean the head, and insert entries at the head. If
         // the CAS fails, the operation is restarted.
-        if (Atomic::cmpxchg(next, &_exception_cache, curr) != curr) {
+        if (Atomic::cmpxchg(&_exception_cache, curr, next) != curr) {
           prev = NULL;
           curr = exception_cache_acquire();
           continue;
@@ -615,7 +616,7 @@
       if (md != NULL && md->is_method()) {
         Method* method = static_cast<Method*>(md);
         if (!method->method_holder()->is_loader_alive()) {
-          Atomic::store((Method*)NULL, r->metadata_addr());
+          Atomic::store(r->metadata_addr(), (Method*)NULL);
 
           if (!r->metadata_is_immediate()) {
             r->fix_metadata_relocation();
--- a/src/hotspot/share/code/compiledMethod.inline.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/code/compiledMethod.inline.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -27,8 +27,8 @@
 
 #include "code/compiledMethod.hpp"
 #include "code/nativeInst.hpp"
+#include "runtime/atomic.hpp"
 #include "runtime/frame.hpp"
-#include "runtime/orderAccess.hpp"
 
 inline bool CompiledMethod::is_deopt_pc(address pc) { return is_deopt_entry(pc) || is_deopt_mh_entry(pc); }
 
@@ -61,7 +61,7 @@
 
 // class ExceptionCache methods
 
-inline int ExceptionCache::count() { return OrderAccess::load_acquire(&_count); }
+inline int ExceptionCache::count() { return Atomic::load_acquire(&_count); }
 
 address ExceptionCache::pc_at(int index) {
   assert(index >= 0 && index < count(),"");
@@ -74,7 +74,7 @@
 }
 
 // increment_count is only called under lock, but there may be concurrent readers.
-inline void ExceptionCache::increment_count() { OrderAccess::release_store(&_count, _count + 1); }
+inline void ExceptionCache::increment_count() { Atomic::release_store(&_count, _count + 1); }
 
 
 #endif // SHARE_CODE_COMPILEDMETHOD_INLINE_HPP
--- a/src/hotspot/share/code/dependencyContext.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/code/dependencyContext.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -28,6 +28,7 @@
 #include "code/dependencyContext.hpp"
 #include "memory/resourceArea.hpp"
 #include "runtime/atomic.hpp"
+#include "runtime/orderAccess.hpp"
 #include "runtime/perfData.hpp"
 #include "utilities/exceptions.hpp"
 
@@ -101,7 +102,7 @@
   for (;;) {
     nmethodBucket* head = Atomic::load(_dependency_context_addr);
     new_head->set_next(head);
-    if (Atomic::cmpxchg(new_head, _dependency_context_addr, head) == head) {
+    if (Atomic::cmpxchg(_dependency_context_addr, head, new_head) == head) {
       break;
     }
   }
@@ -124,7 +125,7 @@
     for (;;) {
       nmethodBucket* purge_list_head = Atomic::load(&_purge_list);
       b->set_purge_list_next(purge_list_head);
-      if (Atomic::cmpxchg(b, &_purge_list, purge_list_head) == purge_list_head) {
+      if (Atomic::cmpxchg(&_purge_list, purge_list_head, b) == purge_list_head) {
         break;
       }
     }
@@ -260,7 +261,7 @@
 #endif //PRODUCT
 
 int nmethodBucket::decrement() {
-  return Atomic::sub(1, &_count);
+  return Atomic::sub(&_count, 1);
 }
 
 // We use a monotonically increasing epoch counter to track the last epoch a given
@@ -272,7 +273,7 @@
   if (last_cleanup >= cleaning_epoch) {
     return false;
   }
-  return Atomic::cmpxchg(cleaning_epoch, _last_cleanup_addr, last_cleanup) == last_cleanup;
+  return Atomic::cmpxchg(_last_cleanup_addr, last_cleanup, cleaning_epoch) == last_cleanup;
 }
 
 // Retrieve the first nmethodBucket that has a dependent that does not correspond to
@@ -281,7 +282,7 @@
 nmethodBucket* DependencyContext::dependencies_not_unloading() {
   for (;;) {
     // Need acquire becase the read value could come from a concurrent insert.
-    nmethodBucket* head = OrderAccess::load_acquire(_dependency_context_addr);
+    nmethodBucket* head = Atomic::load_acquire(_dependency_context_addr);
     if (head == NULL || !head->get_nmethod()->is_unloading()) {
       return head;
     }
@@ -291,7 +292,7 @@
       // Unstable load of head w.r.t. head->next
       continue;
     }
-    if (Atomic::cmpxchg(head_next, _dependency_context_addr, head) == head) {
+    if (Atomic::cmpxchg(_dependency_context_addr, head, head_next) == head) {
       // Release is_unloading entries if unlinking was claimed
       DependencyContext::release(head);
     }
@@ -300,7 +301,7 @@
 
 // Relaxed accessors
 void DependencyContext::set_dependencies(nmethodBucket* b) {
-  Atomic::store(b, _dependency_context_addr);
+  Atomic::store(_dependency_context_addr, b);
 }
 
 nmethodBucket* DependencyContext::dependencies() {
@@ -313,7 +314,7 @@
 void DependencyContext::cleaning_start() {
   assert(SafepointSynchronize::is_at_safepoint(), "must be");
   uint64_t epoch = ++_cleaning_epoch_monotonic;
-  Atomic::store(epoch, &_cleaning_epoch);
+  Atomic::store(&_cleaning_epoch, epoch);
 }
 
 // The epilogue marks the end of dependency context cleanup by the GC,
@@ -323,7 +324,7 @@
 // was called. That allows dependency contexts to be cleaned concurrently.
 void DependencyContext::cleaning_end() {
   uint64_t epoch = 0;
-  Atomic::store(epoch, &_cleaning_epoch);
+  Atomic::store(&_cleaning_epoch, epoch);
 }
 
 // This function skips over nmethodBuckets in the list corresponding to
@@ -345,7 +346,7 @@
       // Unstable load of next w.r.t. next->next
       continue;
     }
-    if (Atomic::cmpxchg(next_next, &_next, next) == next) {
+    if (Atomic::cmpxchg(&_next, next, next_next) == next) {
       // Release is_unloading entries if unlinking was claimed
       DependencyContext::release(next);
     }
@@ -358,7 +359,7 @@
 }
 
 void nmethodBucket::set_next(nmethodBucket* b) {
-  Atomic::store(b, &_next);
+  Atomic::store(&_next, b);
 }
 
 nmethodBucket* nmethodBucket::purge_list_next() {
@@ -366,5 +367,5 @@
 }
 
 void nmethodBucket::set_purge_list_next(nmethodBucket* b) {
-  Atomic::store(b, &_purge_list_next);
+  Atomic::store(&_purge_list_next, b);
 }
--- a/src/hotspot/share/code/nmethod.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/code/nmethod.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -315,7 +315,7 @@
 }
 
 void ExceptionCache::set_next(ExceptionCache *ec) {
-  Atomic::store(ec, &_next);
+  Atomic::store(&_next, ec);
 }
 
 //-----------------------------------------------------------------------------
@@ -1150,7 +1150,7 @@
       // Ensure monotonicity of transitions.
       return false;
     }
-    if (Atomic::cmpxchg(new_state, &_state, old_state) == old_state) {
+    if (Atomic::cmpxchg(&_state, old_state, new_state) == old_state) {
       return true;
     }
   }
@@ -1849,7 +1849,7 @@
   assert(SafepointSynchronize::is_at_safepoint(), "only at safepoint");
 
   if ((_oops_do_mark_link == NULL) &&
-      (Atomic::replace_if_null(mark_link(this, claim_weak_request_tag), &_oops_do_mark_link))) {
+      (Atomic::replace_if_null(&_oops_do_mark_link, mark_link(this, claim_weak_request_tag)))) {
     oops_do_log_change("oops_do, mark weak request");
     return true;
   }
@@ -1863,7 +1863,7 @@
 nmethod::oops_do_mark_link* nmethod::oops_do_try_claim_strong_done() {
   assert(SafepointSynchronize::is_at_safepoint(), "only at safepoint");
 
-  oops_do_mark_link* old_next = Atomic::cmpxchg(mark_link(this, claim_strong_done_tag), &_oops_do_mark_link, mark_link(NULL, claim_weak_request_tag));
+  oops_do_mark_link* old_next = Atomic::cmpxchg(&_oops_do_mark_link, mark_link(NULL, claim_weak_request_tag), mark_link(this, claim_strong_done_tag));
   if (old_next == NULL) {
     oops_do_log_change("oops_do, mark strong done");
   }
@@ -1874,7 +1874,7 @@
   assert(SafepointSynchronize::is_at_safepoint(), "only at safepoint");
   assert(next == mark_link(this, claim_weak_request_tag), "Should be claimed as weak");
 
-  oops_do_mark_link* old_next = Atomic::cmpxchg(mark_link(this, claim_strong_request_tag), &_oops_do_mark_link, next);
+  oops_do_mark_link* old_next = Atomic::cmpxchg(&_oops_do_mark_link, next, mark_link(this, claim_strong_request_tag));
   if (old_next == next) {
     oops_do_log_change("oops_do, mark strong request");
   }
@@ -1885,7 +1885,7 @@
   assert(SafepointSynchronize::is_at_safepoint(), "only at safepoint");
   assert(extract_state(next) == claim_weak_done_tag, "Should be claimed as weak done");
 
-  oops_do_mark_link* old_next = Atomic::cmpxchg(mark_link(extract_nmethod(next), claim_strong_done_tag), &_oops_do_mark_link, next);
+  oops_do_mark_link* old_next = Atomic::cmpxchg(&_oops_do_mark_link, next, mark_link(extract_nmethod(next), claim_strong_done_tag));
   if (old_next == next) {
     oops_do_log_change("oops_do, mark weak done -> mark strong done");
     return true;
@@ -1900,13 +1900,13 @@
          extract_state(_oops_do_mark_link) == claim_strong_request_tag,
          "must be but is nmethod " PTR_FORMAT " %u", p2i(extract_nmethod(_oops_do_mark_link)), extract_state(_oops_do_mark_link));
 
-  nmethod* old_head = Atomic::xchg(this, &_oops_do_mark_nmethods);
+  nmethod* old_head = Atomic::xchg(&_oops_do_mark_nmethods, this);
   // Self-loop if needed.
   if (old_head == NULL) {
     old_head = this;
   }
   // Try to install end of list and weak done tag.
-  if (Atomic::cmpxchg(mark_link(old_head, claim_weak_done_tag), &_oops_do_mark_link, mark_link(this, claim_weak_request_tag)) == mark_link(this, claim_weak_request_tag)) {
+  if (Atomic::cmpxchg(&_oops_do_mark_link, mark_link(this, claim_weak_request_tag), mark_link(old_head, claim_weak_done_tag)) == mark_link(this, claim_weak_request_tag)) {
     oops_do_log_change("oops_do, mark weak done");
     return NULL;
   } else {
@@ -1917,7 +1917,7 @@
 void nmethod::oops_do_add_to_list_as_strong_done() {
   assert(SafepointSynchronize::is_at_safepoint(), "only at safepoint");
 
-  nmethod* old_head = Atomic::xchg(this, &_oops_do_mark_nmethods);
+  nmethod* old_head = Atomic::xchg(&_oops_do_mark_nmethods, this);
   // Self-loop if needed.
   if (old_head == NULL) {
     old_head = this;
--- a/src/hotspot/share/compiler/compileBroker.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/compiler/compileBroker.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -1479,14 +1479,14 @@
     assert(!is_osr, "can't be osr");
     // Adapters, native wrappers and method handle intrinsics
     // should be generated always.
-    return Atomic::add(1, &_compilation_id);
+    return Atomic::add(&_compilation_id, 1);
   } else if (CICountOSR && is_osr) {
-    id = Atomic::add(1, &_osr_compilation_id);
+    id = Atomic::add(&_osr_compilation_id, 1);
     if (CIStartOSR <= id && id < CIStopOSR) {
       return id;
     }
   } else {
-    id = Atomic::add(1, &_compilation_id);
+    id = Atomic::add(&_compilation_id, 1);
     if (CIStart <= id && id < CIStop) {
       return id;
     }
@@ -1498,7 +1498,7 @@
 #else
   // CICountOSR is a develop flag and set to 'false' by default. In a product built,
   // only _compilation_id is incremented.
-  return Atomic::add(1, &_compilation_id);
+  return Atomic::add(&_compilation_id, 1);
 #endif
 }
 
--- a/src/hotspot/share/compiler/compileBroker.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/compiler/compileBroker.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -29,6 +29,7 @@
 #include "compiler/abstractCompiler.hpp"
 #include "compiler/compileTask.hpp"
 #include "compiler/compilerDirectives.hpp"
+#include "runtime/atomic.hpp"
 #include "runtime/perfData.hpp"
 #include "utilities/stack.hpp"
 #if INCLUDE_JVMCI
@@ -335,7 +336,7 @@
   static bool should_compile_new_jobs() { return UseCompiler && (_should_compile_new_jobs == run_compilation); }
   static bool set_should_compile_new_jobs(jint new_state) {
     // Return success if the current caller set it
-    jint old = Atomic::cmpxchg(new_state, &_should_compile_new_jobs, 1-new_state);
+    jint old = Atomic::cmpxchg(&_should_compile_new_jobs, 1-new_state, new_state);
     bool success = (old == (1-new_state));
     if (success) {
       if (new_state == run_compilation) {
@@ -350,7 +351,7 @@
   static void disable_compilation_forever() {
     UseCompiler               = false;
     AlwaysCompileLoopMethods  = false;
-    Atomic::xchg(jint(shutdown_compilation), &_should_compile_new_jobs);
+    Atomic::xchg(&_should_compile_new_jobs, jint(shutdown_compilation));
   }
 
   static bool is_compilation_disabled_forever() {
@@ -359,7 +360,7 @@
   static void handle_full_code_cache(int code_blob_type);
   // Ensures that warning is only printed once.
   static bool should_print_compiler_warning() {
-    jint old = Atomic::cmpxchg(1, &_print_compilation_warning, 0);
+    jint old = Atomic::cmpxchg(&_print_compilation_warning, 0, 1);
     return old == 0;
   }
   // Return total compilation ticks
--- a/src/hotspot/share/compiler/disassembler.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/compiler/disassembler.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -414,9 +414,10 @@
   _bytes_per_line  = Disassembler::pd_instruction_alignment();
   _print_file_name = true;
 
-  if (_optionsParsed) return;  // parse only once
-
-  // parse the global option string:
+  // parse the global option string
+  // We need to fill the options buffer for each newly created
+  // decode_env instance. The hsdis_* library looks for options
+  // in that buffer.
   collect_options(Disassembler::pd_cpu_opts());
   collect_options(PrintAssemblyOptions);
 
@@ -424,6 +425,8 @@
     _print_raw = (strstr(options(), "xml") ? 2 : 1);
   }
 
+  if (_optionsParsed) return;  // parse only once
+
   if (strstr(options(), "help")) {
     _print_help = true;
   }
--- a/src/hotspot/share/gc/epsilon/epsilonHeap.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/epsilon/epsilonHeap.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -31,6 +31,7 @@
 #include "memory/allocation.inline.hpp"
 #include "memory/resourceArea.hpp"
 #include "memory/universe.hpp"
+#include "runtime/atomic.hpp"
 #include "runtime/globals.hpp"
 
 jint EpsilonHeap::initialize() {
@@ -156,7 +157,7 @@
   // Allocation successful, update counters
   {
     size_t last = _last_counter_update;
-    if ((used - last >= _step_counter_update) && Atomic::cmpxchg(used, &_last_counter_update, last) == last) {
+    if ((used - last >= _step_counter_update) && Atomic::cmpxchg(&_last_counter_update, last, used) == last) {
       _monitoring_support->update_counters();
     }
   }
@@ -164,7 +165,7 @@
   // ...and print the occupancy line, if needed
   {
     size_t last = _last_heap_print;
-    if ((used - last >= _step_heap_print) && Atomic::cmpxchg(used, &_last_heap_print, last) == last) {
+    if ((used - last >= _step_heap_print) && Atomic::cmpxchg(&_last_heap_print, last, used) == last) {
       print_heap_info(used);
       print_metaspace_info();
     }
@@ -212,7 +213,7 @@
   }
 
   // Always honor boundaries
-  size = MAX2(min_size, MIN2(_max_tlab_size, size));
+  size = clamp(size, min_size, _max_tlab_size);
 
   // Always honor alignment
   size = align_up(size, MinObjAlignment);
--- a/src/hotspot/share/gc/g1/g1Allocator.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1Allocator.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -194,7 +194,7 @@
   if (hr == NULL) {
     return max_tlab;
   } else {
-    return MIN2(MAX2(hr->free(), (size_t) MinTLABSize), max_tlab);
+    return clamp(hr->free(), MinTLABSize, max_tlab);
   }
 }
 
--- a/src/hotspot/share/gc/g1/g1Analytics.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1Analytics.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -28,6 +28,7 @@
 #include "runtime/globals.hpp"
 #include "runtime/os.hpp"
 #include "utilities/debug.hpp"
+#include "utilities/globalDefinitions.hpp"
 #include "utilities/numberSeq.hpp"
 
 // Different defaults for different number of GC threads
@@ -44,11 +45,11 @@
 };
 
 // all the same
-static double young_cards_per_entry_ratio_defaults[] = {
+static double young_card_merge_to_scan_ratio_defaults[] = {
   1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
 };
 
-static double young_only_cost_per_remset_card_ms_defaults[] = {
+static double young_only_cost_per_card_scan_ms_defaults[] = {
   0.015, 0.01, 0.01, 0.008, 0.008, 0.0055, 0.0055, 0.005
 };
 
@@ -61,7 +62,6 @@
   5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0
 };
 
-
 static double young_other_cost_per_region_ms_defaults[] = {
   0.3, 0.2, 0.2, 0.15, 0.15, 0.12, 0.12, 0.1
 };
@@ -80,13 +80,13 @@
     _rs_length_diff_seq(new TruncatedSeq(TruncatedSeqLength)),
     _concurrent_refine_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
     _logged_cards_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
-    _cost_per_logged_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
-    _cost_scan_hcc_seq(new TruncatedSeq(TruncatedSeqLength)),
-    _young_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)),
-    _mixed_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)),
-    _young_only_cost_per_remset_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
-    _mixed_cost_per_remset_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
-    _cost_per_byte_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+    _young_card_merge_to_scan_ratio_seq(new TruncatedSeq(TruncatedSeqLength)),
+    _mixed_card_merge_to_scan_ratio_seq(new TruncatedSeq(TruncatedSeqLength)),
+    _young_cost_per_card_scan_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+    _mixed_cost_per_card_scan_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+    _young_cost_per_card_merge_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+    _mixed_cost_per_card_merge_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+    _copy_cost_per_byte_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
     _constant_other_time_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
     _young_other_cost_per_region_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
     _non_young_other_cost_per_region_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
@@ -108,11 +108,10 @@
   _concurrent_refine_rate_ms_seq->add(1/cost_per_logged_card_ms_defaults[0]);
   // Some applications have very low rates for logging cards.
   _logged_cards_rate_ms_seq->add(0.0);
-  _cost_per_logged_card_ms_seq->add(cost_per_logged_card_ms_defaults[index]);
-  _cost_scan_hcc_seq->add(0.0);
-  _young_cards_per_entry_ratio_seq->add(young_cards_per_entry_ratio_defaults[index]);
-  _young_only_cost_per_remset_card_ms_seq->add(young_only_cost_per_remset_card_ms_defaults[index]);
-  _cost_per_byte_ms_seq->add(cost_per_byte_ms_defaults[index]);
+  _young_card_merge_to_scan_ratio_seq->add(young_card_merge_to_scan_ratio_defaults[index]);
+  _young_cost_per_card_scan_ms_seq->add(young_only_cost_per_card_scan_ms_defaults[index]);
+
+  _copy_cost_per_byte_ms_seq->add(cost_per_byte_ms_defaults[index]);
   _constant_other_time_ms_seq->add(constant_other_time_ms_defaults[index]);
   _young_other_cost_per_region_ms_seq->add(young_other_cost_per_region_ms_defaults[index]);
   _non_young_other_cost_per_region_ms_seq->add(non_young_other_cost_per_region_ms_defaults[index]);
@@ -122,12 +121,20 @@
   _concurrent_mark_cleanup_times_ms->add(0.20);
 }
 
-double G1Analytics::get_new_prediction(TruncatedSeq const* seq) const {
-  return _predictor->get_new_prediction(seq);
+bool G1Analytics::enough_samples_available(TruncatedSeq const* seq) const {
+  return seq->num() >= 3;
+}
+
+double G1Analytics::get_new_unit_prediction(TruncatedSeq const* seq) const {
+  return _predictor->get_new_unit_prediction(seq);
 }
 
 size_t G1Analytics::get_new_size_prediction(TruncatedSeq const* seq) const {
-  return (size_t)get_new_prediction(seq);
+  return (size_t)get_new_lower_zero_bound_prediction(seq);
+}
+
+double G1Analytics::get_new_lower_zero_bound_prediction(TruncatedSeq const* seq) const {
+  return _predictor->get_new_lower_zero_bound_prediction(seq);
 }
 
 int G1Analytics::num_alloc_rate_ms() const {
@@ -144,17 +151,9 @@
 
 void G1Analytics::compute_pause_time_ratio(double interval_ms, double pause_time_ms) {
   _recent_avg_pause_time_ratio = _recent_gc_times_ms->sum() / interval_ms;
-  if (_recent_avg_pause_time_ratio < 0.0 ||
-      (_recent_avg_pause_time_ratio - 1.0 > 0.0)) {
-    // Clip ratio between 0.0 and 1.0, and continue. This will be fixed in
-    // CR 6902692 by redoing the manner in which the ratio is incrementally computed.
-    if (_recent_avg_pause_time_ratio < 0.0) {
-      _recent_avg_pause_time_ratio = 0.0;
-    } else {
-      assert(_recent_avg_pause_time_ratio - 1.0 > 0.0, "Ctl-point invariant");
-      _recent_avg_pause_time_ratio = 1.0;
-    }
-  }
+
+  // Clamp the result to [0.0 ... 1.0] to filter out nonsensical results due to bad input.
+  _recent_avg_pause_time_ratio = clamp(_recent_avg_pause_time_ratio, 0.0, 1.0);
 
   // Compute the ratio of just this last pause time to the entire time range stored
   // in the vectors. Comparing this pause to the entire range, rather than only the
@@ -173,27 +172,27 @@
   _logged_cards_rate_ms_seq->add(cards_per_ms);
 }
 
-void G1Analytics::report_cost_per_logged_card_ms(double cost_per_logged_card_ms) {
-  _cost_per_logged_card_ms_seq->add(cost_per_logged_card_ms);
-}
-
-void G1Analytics::report_cost_scan_hcc(double cost_scan_hcc) {
-  _cost_scan_hcc_seq->add(cost_scan_hcc);
-}
-
-void G1Analytics::report_cost_per_remset_card_ms(double cost_per_remset_card_ms, bool for_young_gc) {
+void G1Analytics::report_cost_per_card_scan_ms(double cost_per_card_ms, bool for_young_gc) {
   if (for_young_gc) {
-    _young_only_cost_per_remset_card_ms_seq->add(cost_per_remset_card_ms);
+    _young_cost_per_card_scan_ms_seq->add(cost_per_card_ms);
   } else {
-    _mixed_cost_per_remset_card_ms_seq->add(cost_per_remset_card_ms);
+    _mixed_cost_per_card_scan_ms_seq->add(cost_per_card_ms);
   }
 }
 
-void G1Analytics::report_cards_per_entry_ratio(double cards_per_entry_ratio, bool for_young_gc) {
+void G1Analytics::report_cost_per_card_merge_ms(double cost_per_card_ms, bool for_young_gc) {
   if (for_young_gc) {
-    _young_cards_per_entry_ratio_seq->add(cards_per_entry_ratio);
+    _young_cost_per_card_merge_ms_seq->add(cost_per_card_ms);
   } else {
-    _mixed_cards_per_entry_ratio_seq->add(cards_per_entry_ratio);
+    _mixed_cost_per_card_merge_ms_seq->add(cost_per_card_ms);
+  }
+}
+
+void G1Analytics::report_card_merge_to_scan_ratio(double merge_to_scan_ratio, bool for_young_gc) {
+  if (for_young_gc) {
+    _young_card_merge_to_scan_ratio_seq->add(merge_to_scan_ratio);
+  } else {
+    _mixed_card_merge_to_scan_ratio_seq->add(merge_to_scan_ratio);
   }
 }
 
@@ -205,7 +204,7 @@
   if (mark_or_rebuild_in_progress) {
     _cost_per_byte_ms_during_cm_seq->add(cost_per_byte_ms);
   } else {
-    _cost_per_byte_ms_seq->add(cost_per_byte_ms);
+    _copy_cost_per_byte_ms_seq->add(cost_per_byte_ms);
   }
 }
 
@@ -230,70 +229,50 @@
 }
 
 double G1Analytics::predict_alloc_rate_ms() const {
-  return get_new_prediction(_alloc_rate_ms_seq);
+  return get_new_lower_zero_bound_prediction(_alloc_rate_ms_seq);
 }
 
 double G1Analytics::predict_concurrent_refine_rate_ms() const {
-  return get_new_prediction(_concurrent_refine_rate_ms_seq);
+  return get_new_lower_zero_bound_prediction(_concurrent_refine_rate_ms_seq);
 }
 
 double G1Analytics::predict_logged_cards_rate_ms() const {
-  return get_new_prediction(_logged_cards_rate_ms_seq);
+  return get_new_lower_zero_bound_prediction(_logged_cards_rate_ms_seq);
 }
 
-double G1Analytics::predict_cost_per_logged_card_ms() const {
-  return get_new_prediction(_cost_per_logged_card_ms_seq);
+double G1Analytics::predict_young_card_merge_to_scan_ratio() const {
+  return get_new_unit_prediction(_young_card_merge_to_scan_ratio_seq);
 }
 
-double G1Analytics::predict_scan_hcc_ms() const {
-  return get_new_prediction(_cost_scan_hcc_seq);
-}
-
-double G1Analytics::predict_rs_update_time_ms(size_t pending_cards) const {
-  return pending_cards * predict_cost_per_logged_card_ms() + predict_scan_hcc_ms();
-}
-
-double G1Analytics::predict_young_cards_per_entry_ratio() const {
-  return get_new_prediction(_young_cards_per_entry_ratio_seq);
-}
-
-double G1Analytics::predict_mixed_cards_per_entry_ratio() const {
-  if (_mixed_cards_per_entry_ratio_seq->num() < 2) {
-    return predict_young_cards_per_entry_ratio();
+size_t G1Analytics::predict_scan_card_num(size_t rs_length, bool for_young_gc) const {
+  if (for_young_gc || !enough_samples_available(_mixed_card_merge_to_scan_ratio_seq)) {
+    return (size_t)(rs_length * predict_young_card_merge_to_scan_ratio());
   } else {
-    return get_new_prediction(_mixed_cards_per_entry_ratio_seq);
+    return (size_t)(rs_length * get_new_unit_prediction(_mixed_card_merge_to_scan_ratio_seq));
   }
 }
 
-size_t G1Analytics::predict_card_num(size_t rs_length, bool for_young_gc) const {
-  if (for_young_gc) {
-    return (size_t) (rs_length * predict_young_cards_per_entry_ratio());
+double G1Analytics::predict_card_merge_time_ms(size_t card_num, bool for_young_gc) const {
+  if (for_young_gc || !enough_samples_available(_mixed_cost_per_card_merge_ms_seq)) {
+    return card_num * get_new_lower_zero_bound_prediction(_young_cost_per_card_merge_ms_seq);
   } else {
-    return (size_t) (rs_length * predict_mixed_cards_per_entry_ratio());
+    return card_num * get_new_lower_zero_bound_prediction(_mixed_cost_per_card_merge_ms_seq);
   }
 }
 
-double G1Analytics::predict_rs_scan_time_ms(size_t card_num, bool for_young_gc) const {
-  if (for_young_gc) {
-    return card_num * get_new_prediction(_young_only_cost_per_remset_card_ms_seq);
+double G1Analytics::predict_card_scan_time_ms(size_t card_num, bool for_young_gc) const {
+  if (for_young_gc || !enough_samples_available(_mixed_cost_per_card_scan_ms_seq)) {
+    return card_num * get_new_lower_zero_bound_prediction(_young_cost_per_card_scan_ms_seq);
   } else {
-    return predict_mixed_rs_scan_time_ms(card_num);
-  }
-}
-
-double G1Analytics::predict_mixed_rs_scan_time_ms(size_t card_num) const {
-  if (_mixed_cost_per_remset_card_ms_seq->num() < 3) {
-    return card_num * get_new_prediction(_young_only_cost_per_remset_card_ms_seq);
-  } else {
-    return card_num * get_new_prediction(_mixed_cost_per_remset_card_ms_seq);
+    return card_num * get_new_lower_zero_bound_prediction(_mixed_cost_per_card_scan_ms_seq);
   }
 }
 
 double G1Analytics::predict_object_copy_time_ms_during_cm(size_t bytes_to_copy) const {
-  if (_cost_per_byte_ms_during_cm_seq->num() < 3) {
-    return (1.1 * bytes_to_copy) * get_new_prediction(_cost_per_byte_ms_seq);
+  if (!enough_samples_available(_cost_per_byte_ms_during_cm_seq)) {
+    return (1.1 * bytes_to_copy) * get_new_lower_zero_bound_prediction(_copy_cost_per_byte_ms_seq);
   } else {
-    return bytes_to_copy * get_new_prediction(_cost_per_byte_ms_during_cm_seq);
+    return bytes_to_copy * get_new_lower_zero_bound_prediction(_cost_per_byte_ms_during_cm_seq);
   }
 }
 
@@ -301,36 +280,32 @@
   if (during_concurrent_mark) {
     return predict_object_copy_time_ms_during_cm(bytes_to_copy);
   } else {
-    return bytes_to_copy * get_new_prediction(_cost_per_byte_ms_seq);
+    return bytes_to_copy * get_new_lower_zero_bound_prediction(_copy_cost_per_byte_ms_seq);
   }
 }
 
-double G1Analytics::predict_cost_per_byte_ms() const {
-  return get_new_prediction(_cost_per_byte_ms_seq);
-}
-
 double G1Analytics::predict_constant_other_time_ms() const {
-  return get_new_prediction(_constant_other_time_ms_seq);
+  return get_new_lower_zero_bound_prediction(_constant_other_time_ms_seq);
 }
 
 double G1Analytics::predict_young_other_time_ms(size_t young_num) const {
-  return young_num * get_new_prediction(_young_other_cost_per_region_ms_seq);
+  return young_num * get_new_lower_zero_bound_prediction(_young_other_cost_per_region_ms_seq);
 }
 
 double G1Analytics::predict_non_young_other_time_ms(size_t non_young_num) const {
-  return non_young_num * get_new_prediction(_non_young_other_cost_per_region_ms_seq);
+  return non_young_num * get_new_lower_zero_bound_prediction(_non_young_other_cost_per_region_ms_seq);
 }
 
 double G1Analytics::predict_remark_time_ms() const {
-  return get_new_prediction(_concurrent_mark_remark_times_ms);
+  return get_new_lower_zero_bound_prediction(_concurrent_mark_remark_times_ms);
 }
 
 double G1Analytics::predict_cleanup_time_ms() const {
-  return get_new_prediction(_concurrent_mark_cleanup_times_ms);
+  return get_new_lower_zero_bound_prediction(_concurrent_mark_cleanup_times_ms);
 }
 
 size_t G1Analytics::predict_rs_length() const {
-  return get_new_size_prediction(_rs_length_seq) + get_new_prediction(_rs_length_diff_seq);
+  return get_new_size_prediction(_rs_length_seq) + get_new_size_prediction(_rs_length_diff_seq);
 }
 
 size_t G1Analytics::predict_pending_cards() const {
--- a/src/hotspot/share/gc/g1/g1Analytics.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1Analytics.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -48,13 +48,21 @@
   TruncatedSeq* _rs_length_diff_seq;
   TruncatedSeq* _concurrent_refine_rate_ms_seq;
   TruncatedSeq* _logged_cards_rate_ms_seq;
-  TruncatedSeq* _cost_per_logged_card_ms_seq;
-  TruncatedSeq* _cost_scan_hcc_seq;
-  TruncatedSeq* _young_cards_per_entry_ratio_seq;
-  TruncatedSeq* _mixed_cards_per_entry_ratio_seq;
-  TruncatedSeq* _young_only_cost_per_remset_card_ms_seq;
-  TruncatedSeq* _mixed_cost_per_remset_card_ms_seq;
-  TruncatedSeq* _cost_per_byte_ms_seq;
+  // The ratio between the number of merged cards and actually scanned cards, for
+  // young-only and mixed gcs.
+  TruncatedSeq* _young_card_merge_to_scan_ratio_seq;
+  TruncatedSeq* _mixed_card_merge_to_scan_ratio_seq;
+
+  // The cost to scan a card during young-only and mixed gcs in ms.
+  TruncatedSeq* _young_cost_per_card_scan_ms_seq;
+  TruncatedSeq* _mixed_cost_per_card_scan_ms_seq;
+
+  // The cost to merge a card during young-only and mixed gcs in ms.
+  TruncatedSeq* _young_cost_per_card_merge_ms_seq;
+  TruncatedSeq* _mixed_cost_per_card_merge_ms_seq;
+
+  // The cost to copy a byte in ms.
+  TruncatedSeq* _copy_cost_per_byte_ms_seq;
   TruncatedSeq* _constant_other_time_ms_seq;
   TruncatedSeq* _young_other_cost_per_region_ms_seq;
   TruncatedSeq* _non_young_other_cost_per_region_ms_seq;
@@ -72,8 +80,13 @@
   double _recent_avg_pause_time_ratio;
   double _last_pause_time_ratio;
 
-  double get_new_prediction(TruncatedSeq const* seq) const;
+  // Returns whether the sequence have enough samples to get a "good" prediction.
+  // The constant used is random but "small".
+  bool enough_samples_available(TruncatedSeq const* seq) const;
+
+  double get_new_unit_prediction(TruncatedSeq const* seq) const;
   size_t get_new_size_prediction(TruncatedSeq const* seq) const;
+  double get_new_lower_zero_bound_prediction(TruncatedSeq const* seq) const;
 
 public:
   G1Analytics(const G1Predictions* predictor);
@@ -103,10 +116,9 @@
   void report_alloc_rate_ms(double alloc_rate);
   void report_concurrent_refine_rate_ms(double cards_per_ms);
   void report_logged_cards_rate_ms(double cards_per_ms);
-  void report_cost_per_logged_card_ms(double cost_per_logged_card_ms);
-  void report_cost_scan_hcc(double cost_scan_hcc);
-  void report_cost_per_remset_card_ms(double cost_per_remset_card_ms, bool for_young_gc);
-  void report_cards_per_entry_ratio(double cards_per_entry_ratio, bool for_young_gc);
+  void report_cost_per_card_scan_ms(double cost_per_remset_card_ms, bool for_young_gc);
+  void report_cost_per_card_merge_ms(double cost_per_card_ms, bool for_young_gc);
+  void report_card_merge_to_scan_ratio(double cards_per_entry_ratio, bool for_young_gc);
   void report_rs_length_diff(double rs_length_diff);
   void report_cost_per_byte_ms(double cost_per_byte_ms, bool mark_or_rebuild_in_progress);
   void report_young_other_cost_per_region_ms(double other_cost_per_region_ms);
@@ -120,21 +132,14 @@
 
   double predict_concurrent_refine_rate_ms() const;
   double predict_logged_cards_rate_ms() const;
-  double predict_cost_per_logged_card_ms() const;
+  double predict_young_card_merge_to_scan_ratio() const;
 
-  double predict_scan_hcc_ms() const;
+  double predict_mixed_card_merge_to_scan_ratio() const;
 
-  double predict_rs_update_time_ms(size_t pending_cards) const;
+  size_t predict_scan_card_num(size_t rs_length, bool for_young_gc) const;
 
-  double predict_young_cards_per_entry_ratio() const;
-
-  double predict_mixed_cards_per_entry_ratio() const;
-
-  size_t predict_card_num(size_t rs_length, bool for_young_gc) const;
-
-  double predict_rs_scan_time_ms(size_t card_num, bool for_young_gc) const;
-
-  double predict_mixed_rs_scan_time_ms(size_t card_num) const;
+  double predict_card_merge_time_ms(size_t card_num, bool for_young_gc) const;
+  double predict_card_scan_time_ms(size_t card_num, bool for_young_gc) const;
 
   double predict_object_copy_time_ms_during_cm(size_t bytes_to_copy) const;
 
@@ -153,8 +158,6 @@
   size_t predict_rs_length() const;
   size_t predict_pending_cards() const;
 
-  double predict_cost_per_byte_ms() const;
-
   // Add a new GC of the given duration and end time to the record.
   void update_recent_gc_times(double end_time_sec, double elapsed_ms);
   void compute_pause_time_ratio(double interval_ms, double pause_time_ms);
--- a/src/hotspot/share/gc/g1/g1BarrierSet.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1BarrierSet.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -37,6 +37,7 @@
 #include "oops/oop.inline.hpp"
 #include "runtime/interfaceSupport.inline.hpp"
 #include "runtime/mutexLocker.hpp"
+#include "runtime/orderAccess.hpp"
 #include "runtime/thread.inline.hpp"
 #include "utilities/macros.hpp"
 #ifdef COMPILER1
--- a/src/hotspot/share/gc/g1/g1BlockOffsetTable.inline.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1BlockOffsetTable.inline.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -55,7 +55,7 @@
 }
 
 void G1BlockOffsetTable::set_offset_array_raw(size_t index, u_char offset) {
-  Atomic::store(offset, &_offset_array[index]);
+  Atomic::store(&_offset_array[index], offset);
 }
 
 void G1BlockOffsetTable::set_offset_array(size_t index, u_char offset) {
--- a/src/hotspot/share/gc/g1/g1CardTable.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1CardTable.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -27,8 +27,6 @@
 #include "gc/g1/g1CollectedHeap.inline.hpp"
 #include "gc/shared/memset_with_concurrent_readers.hpp"
 #include "logging/log.hpp"
-#include "runtime/atomic.hpp"
-#include "runtime/orderAccess.hpp"
 
 void G1CardTable::g1_mark_as_young(const MemRegion& mr) {
   CardValue *const first = byte_for(mr.start());
--- a/src/hotspot/share/gc/g1/g1CardTable.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1CardTable.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -92,12 +92,16 @@
     return pointer_delta(p, _byte_map, sizeof(CardValue));
   }
 
-  // Mark the given card as Dirty if it is Clean.
-  inline void mark_clean_as_dirty(size_t card_index);
+  // Mark the given card as Dirty if it is Clean. Returns the number of dirtied
+  // cards that were not yet dirty. This result may be inaccurate as it does not
+  // perform the dirtying atomically.
+  inline size_t mark_clean_as_dirty(size_t card_index);
 
   // Change Clean cards in a (large) area on the card table as Dirty, preserving
   // already scanned cards. Assumes that most cards in that area are Clean.
-  inline void mark_region_dirty(size_t start_card_index, size_t num_cards);
+  // Returns the number of dirtied cards that were not yet dirty. This result may
+  // be inaccurate as it does not perform the dirtying atomically.
+  inline size_t mark_region_dirty(size_t start_card_index, size_t num_cards);
 
   // Mark the given range of cards as Scanned. All of these cards must be Dirty.
   inline void mark_as_scanned(size_t start_card_index, size_t num_cards);
--- a/src/hotspot/share/gc/g1/g1CardTable.inline.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1CardTable.inline.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -33,17 +33,21 @@
   return (uint)(card_idx >> (HeapRegion::LogOfHRGrainBytes - card_shift));
 }
 
-inline void G1CardTable::mark_clean_as_dirty(size_t card_index) {
+inline size_t G1CardTable::mark_clean_as_dirty(size_t card_index) {
   CardValue value = _byte_map[card_index];
   if (value == clean_card_val()) {
     _byte_map[card_index] = dirty_card_val();
+    return 1;
   }
+  return 0;
 }
 
-inline void G1CardTable::mark_region_dirty(size_t start_card_index, size_t num_cards) {
+inline size_t G1CardTable::mark_region_dirty(size_t start_card_index, size_t num_cards) {
   assert(is_aligned(start_card_index, sizeof(size_t)), "Start card index must be aligned.");
   assert(is_aligned(num_cards, sizeof(size_t)), "Number of cards to change must be evenly divisible.");
 
+  size_t result = 0;
+
   size_t const num_chunks = num_cards / sizeof(size_t);
 
   size_t* cur_word = (size_t*)&_byte_map[start_card_index];
@@ -52,6 +56,7 @@
     size_t value = *cur_word;
     if (value == WordAllClean) {
       *cur_word = WordAllDirty;
+      result += sizeof(value);
     } else if (value == WordAllDirty) {
       // do nothing.
     } else {
@@ -61,12 +66,15 @@
         CardValue value = *cur;
         if (value == clean_card_val()) {
           *cur = dirty_card_val();
+          result++;
         }
         cur++;
       }
     }
     cur_word++;
   }
+
+  return result;
 }
 
 inline void G1CardTable::mark_as_scanned(size_t start_card_index, size_t num_cards) {
--- a/src/hotspot/share/gc/g1/g1CodeCacheRemSet.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1CodeCacheRemSet.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -32,6 +32,7 @@
 #include "memory/iterator.hpp"
 #include "oops/access.inline.hpp"
 #include "oops/oop.inline.hpp"
+#include "runtime/atomic.hpp"
 #include "utilities/hashtable.inline.hpp"
 #include "utilities/stack.inline.hpp"
 
@@ -158,19 +159,19 @@
 }
 
 G1CodeRootSetTable* G1CodeRootSet::load_acquire_table() {
-  return OrderAccess::load_acquire(&_table);
+  return Atomic::load_acquire(&_table);
 }
 
 void G1CodeRootSet::allocate_small_table() {
   G1CodeRootSetTable* temp = new G1CodeRootSetTable(SmallSize);
 
-  OrderAccess::release_store(&_table, temp);
+  Atomic::release_store(&_table, temp);
 }
 
 void G1CodeRootSetTable::purge_list_append(G1CodeRootSetTable* table) {
   for (;;) {
     table->_purge_next = _purge_list;
-    G1CodeRootSetTable* old = Atomic::cmpxchg(table, &_purge_list, table->_purge_next);
+    G1CodeRootSetTable* old = Atomic::cmpxchg(&_purge_list, table->_purge_next, table);
     if (old == table->_purge_next) {
       break;
     }
@@ -194,7 +195,7 @@
 
   G1CodeRootSetTable::purge_list_append(_table);
 
-  OrderAccess::release_store(&_table, temp);
+  Atomic::release_store(&_table, temp);
 }
 
 void G1CodeRootSet::purge() {
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1CollectedHeap.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -1,4 +1,4 @@
-/*
+ /*
  * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -1516,6 +1516,7 @@
   _allocator(NULL),
   _verifier(NULL),
   _summary_bytes_used(0),
+  _bytes_used_during_gc(0),
   _archive_allocator(NULL),
   _survivor_evac_stats("Young", YoungPLABSize, PLABWeight),
   _old_evac_stats("Old", OldPLABSize, PLABWeight),
@@ -2776,112 +2777,6 @@
          G1EagerReclaimHumongousObjects && rem_set->is_empty();
 }
 
-class RegisterRegionsWithRegionAttrTableClosure : public HeapRegionClosure {
- private:
-  size_t _total_humongous;
-  size_t _candidate_humongous;
-
-  bool humongous_region_is_candidate(G1CollectedHeap* g1h, HeapRegion* region) const {
-    assert(region->is_starts_humongous(), "Must start a humongous object");
-
-    oop obj = oop(region->bottom());
-
-    // Dead objects cannot be eager reclaim candidates. Due to class
-    // unloading it is unsafe to query their classes so we return early.
-    if (g1h->is_obj_dead(obj, region)) {
-      return false;
-    }
-
-    // If we do not have a complete remembered set for the region, then we can
-    // not be sure that we have all references to it.
-    if (!region->rem_set()->is_complete()) {
-      return false;
-    }
-    // Candidate selection must satisfy the following constraints
-    // while concurrent marking is in progress:
-    //
-    // * In order to maintain SATB invariants, an object must not be
-    // reclaimed if it was allocated before the start of marking and
-    // has not had its references scanned.  Such an object must have
-    // its references (including type metadata) scanned to ensure no
-    // live objects are missed by the marking process.  Objects
-    // allocated after the start of concurrent marking don't need to
-    // be scanned.
-    //
-    // * An object must not be reclaimed if it is on the concurrent
-    // mark stack.  Objects allocated after the start of concurrent
-    // marking are never pushed on the mark stack.
-    //
-    // Nominating only objects allocated after the start of concurrent
-    // marking is sufficient to meet both constraints.  This may miss
-    // some objects that satisfy the constraints, but the marking data
-    // structures don't support efficiently performing the needed
-    // additional tests or scrubbing of the mark stack.
-    //
-    // However, we presently only nominate is_typeArray() objects.
-    // A humongous object containing references induces remembered
-    // set entries on other regions.  In order to reclaim such an
-    // object, those remembered sets would need to be cleaned up.
-    //
-    // We also treat is_typeArray() objects specially, allowing them
-    // to be reclaimed even if allocated before the start of
-    // concurrent mark.  For this we rely on mark stack insertion to
-    // exclude is_typeArray() objects, preventing reclaiming an object
-    // that is in the mark stack.  We also rely on the metadata for
-    // such objects to be built-in and so ensured to be kept live.
-    // Frequent allocation and drop of large binary blobs is an
-    // important use case for eager reclaim, and this special handling
-    // may reduce needed headroom.
-
-    return obj->is_typeArray() &&
-           g1h->is_potential_eager_reclaim_candidate(region);
-  }
-
- public:
-  RegisterRegionsWithRegionAttrTableClosure()
-  : _total_humongous(0),
-    _candidate_humongous(0) {
-  }
-
-  virtual bool do_heap_region(HeapRegion* r) {
-    G1CollectedHeap* g1h = G1CollectedHeap::heap();
-
-    if (!r->is_starts_humongous()) {
-      g1h->register_region_with_region_attr(r);
-      return false;
-    }
-
-    bool is_candidate = humongous_region_is_candidate(g1h, r);
-    uint rindex = r->hrm_index();
-    g1h->set_humongous_reclaim_candidate(rindex, is_candidate);
-    if (is_candidate) {
-      g1h->register_humongous_region_with_region_attr(rindex);
-      _candidate_humongous++;
-      // We will later handle the remembered sets of these regions.
-    } else {
-      g1h->register_region_with_region_attr(r);
-    }
-    _total_humongous++;
-
-    return false;
-  }
-
-  size_t total_humongous() const { return _total_humongous; }
-  size_t candidate_humongous() const { return _candidate_humongous; }
-};
-
-void G1CollectedHeap::register_regions_with_region_attr() {
-  Ticks start = Ticks::now();
-
-  RegisterRegionsWithRegionAttrTableClosure cl;
-  heap_region_iterate(&cl);
-
-  phase_times()->record_register_regions((Ticks::now() - start).seconds() * 1000.0,
-                                         cl.total_humongous(),
-                                         cl.candidate_humongous());
-  _has_humongous_reclaim_candidates = cl.candidate_humongous() > 0;
-}
-
 #ifndef PRODUCT
 void G1CollectedHeap::verify_region_attr_remset_update() {
   class VerifyRegionAttrRemSet : public HeapRegionClosure {
@@ -3138,7 +3033,6 @@
                          collector_state()->yc_type() == Mixed /* all_memory_pools_affected */);
 
     G1HeapTransition heap_transition(this);
-    size_t heap_used_bytes_before_gc = used();
 
     {
       IsGCActiveMark x;
@@ -3211,7 +3105,7 @@
 
         double sample_end_time_sec = os::elapsedTime();
         double pause_time_ms = (sample_end_time_sec - sample_start_time_sec) * MILLIUNITS;
-        policy()->record_collection_pause_end(pause_time_ms, heap_used_bytes_before_gc);
+        policy()->record_collection_pause_end(pause_time_ms);
       }
 
       verify_after_young_collection(verify_type);
@@ -3377,7 +3271,7 @@
     BufferNode* next = Atomic::load(&_nodes);
     while (next != NULL) {
       BufferNode* node = next;
-      next = Atomic::cmpxchg(node->next(), &_nodes, node);
+      next = Atomic::cmpxchg(&_nodes, node, node->next());
       if (next == node) {
         cl->apply_to_buffer(node, buffer_size, worker_id);
         next = node->next();
@@ -3694,12 +3588,153 @@
 }
 
 void G1CollectedHeap::merge_per_thread_state_info(G1ParScanThreadStateSet* per_thread_states) {
-  double merge_pss_time_start = os::elapsedTime();
+  Ticks start = Ticks::now();
   per_thread_states->flush();
-  phase_times()->record_merge_pss_time_ms((os::elapsedTime() - merge_pss_time_start) * 1000.0);
+  phase_times()->record_or_add_time_secs(G1GCPhaseTimes::MergePSS, 0 /* worker_id */, (Ticks::now() - start).seconds());
 }
 
+class G1PrepareEvacuationTask : public AbstractGangTask {
+  class G1PrepareRegionsClosure : public HeapRegionClosure {
+    G1CollectedHeap* _g1h;
+    G1PrepareEvacuationTask* _parent_task;
+    size_t _worker_humongous_total;
+    size_t _worker_humongous_candidates;
+
+    bool humongous_region_is_candidate(HeapRegion* region) const {
+      assert(region->is_starts_humongous(), "Must start a humongous object");
+
+      oop obj = oop(region->bottom());
+
+      // Dead objects cannot be eager reclaim candidates. Due to class
+      // unloading it is unsafe to query their classes so we return early.
+      if (_g1h->is_obj_dead(obj, region)) {
+        return false;
+      }
+
+      // If we do not have a complete remembered set for the region, then we can
+      // not be sure that we have all references to it.
+      if (!region->rem_set()->is_complete()) {
+        return false;
+      }
+      // Candidate selection must satisfy the following constraints
+      // while concurrent marking is in progress:
+      //
+      // * In order to maintain SATB invariants, an object must not be
+      // reclaimed if it was allocated before the start of marking and
+      // has not had its references scanned.  Such an object must have
+      // its references (including type metadata) scanned to ensure no
+      // live objects are missed by the marking process.  Objects
+      // allocated after the start of concurrent marking don't need to
+      // be scanned.
+      //
+      // * An object must not be reclaimed if it is on the concurrent
+      // mark stack.  Objects allocated after the start of concurrent
+      // marking are never pushed on the mark stack.
+      //
+      // Nominating only objects allocated after the start of concurrent
+      // marking is sufficient to meet both constraints.  This may miss
+      // some objects that satisfy the constraints, but the marking data
+      // structures don't support efficiently performing the needed
+      // additional tests or scrubbing of the mark stack.
+      //
+      // However, we presently only nominate is_typeArray() objects.
+      // A humongous object containing references induces remembered
+      // set entries on other regions.  In order to reclaim such an
+      // object, those remembered sets would need to be cleaned up.
+      //
+      // We also treat is_typeArray() objects specially, allowing them
+      // to be reclaimed even if allocated before the start of
+      // concurrent mark.  For this we rely on mark stack insertion to
+      // exclude is_typeArray() objects, preventing reclaiming an object
+      // that is in the mark stack.  We also rely on the metadata for
+      // such objects to be built-in and so ensured to be kept live.
+      // Frequent allocation and drop of large binary blobs is an
+      // important use case for eager reclaim, and this special handling
+      // may reduce needed headroom.
+
+      return obj->is_typeArray() &&
+             _g1h->is_potential_eager_reclaim_candidate(region);
+    }
+
+  public:
+    G1PrepareRegionsClosure(G1CollectedHeap* g1h, G1PrepareEvacuationTask* parent_task) :
+      _g1h(g1h),
+      _parent_task(parent_task),
+      _worker_humongous_total(0),
+      _worker_humongous_candidates(0) { }
+
+    ~G1PrepareRegionsClosure() {
+      _parent_task->add_humongous_candidates(_worker_humongous_candidates);
+      _parent_task->add_humongous_total(_worker_humongous_total);
+    }
+
+    virtual bool do_heap_region(HeapRegion* hr) {
+      // First prepare the region for scanning
+      _g1h->rem_set()->prepare_region_for_scan(hr);
+
+      // Now check if region is a humongous candidate
+      if (!hr->is_starts_humongous()) {
+        _g1h->register_region_with_region_attr(hr);
+        return false;
+      }
+
+      uint index = hr->hrm_index();
+      if (humongous_region_is_candidate(hr)) {
+        _g1h->set_humongous_reclaim_candidate(index, true);
+        _g1h->register_humongous_region_with_region_attr(index);
+        _worker_humongous_candidates++;
+        // We will later handle the remembered sets of these regions.
+      } else {
+        _g1h->set_humongous_reclaim_candidate(index, false);
+        _g1h->register_region_with_region_attr(hr);
+      }
+      _worker_humongous_total++;
+
+      return false;
+    }
+  };
+
+  G1CollectedHeap* _g1h;
+  HeapRegionClaimer _claimer;
+  volatile size_t _humongous_total;
+  volatile size_t _humongous_candidates;
+public:
+  G1PrepareEvacuationTask(G1CollectedHeap* g1h) :
+    AbstractGangTask("Prepare Evacuation"),
+    _g1h(g1h),
+    _claimer(_g1h->workers()->active_workers()),
+    _humongous_total(0),
+    _humongous_candidates(0) { }
+
+  ~G1PrepareEvacuationTask() {
+    _g1h->set_has_humongous_reclaim_candidate(_humongous_candidates > 0);
+  }
+
+  void work(uint worker_id) {
+    G1PrepareRegionsClosure cl(_g1h, this);
+    _g1h->heap_region_par_iterate_from_worker_offset(&cl, &_claimer, worker_id);
+  }
+
+  void add_humongous_candidates(size_t candidates) {
+    Atomic::add(&_humongous_candidates, candidates);
+  }
+
+  void add_humongous_total(size_t total) {
+    Atomic::add(&_humongous_total, total);
+  }
+
+  size_t humongous_candidates() {
+    return _humongous_candidates;
+  }
+
+  size_t humongous_total() {
+    return _humongous_total;
+  }
+};
+
 void G1CollectedHeap::pre_evacuate_collection_set(G1EvacuationInfo& evacuation_info, G1ParScanThreadStateSet* per_thread_states) {
+  _bytes_used_during_gc = 0;
+
   _expand_heap_after_alloc_failure = true;
   _evacuation_failed = false;
 
@@ -3716,9 +3751,16 @@
     phase_times()->record_prepare_heap_roots_time_ms((Ticks::now() - start).seconds() * 1000.0);
   }
 
-  register_regions_with_region_attr();
+  {
+    G1PrepareEvacuationTask g1_prep_task(this);
+    Tickspan task_time = run_task(&g1_prep_task);
+
+    phase_times()->record_register_regions(task_time.seconds() * 1000.0,
+                                           g1_prep_task.humongous_total(),
+                                           g1_prep_task.humongous_candidates());
+  }
+
   assert(_verifier->check_region_attr_table(), "Inconsistency in the region attributes table.");
-
   _preserved_marks_set.assert_empty();
 
 #if COMPILER2_OR_JVMCI
@@ -3764,9 +3806,6 @@
     Tickspan evac_time = (Ticks::now() - start);
     p->record_or_add_time_secs(objcopy_phase, worker_id, evac_time.seconds() - cl.term_time());
 
-    p->record_or_add_thread_work_item(objcopy_phase, worker_id, pss->lab_waste_words() * HeapWordSize, G1GCPhaseTimes::ObjCopyLABWaste);
-    p->record_or_add_thread_work_item(objcopy_phase, worker_id, pss->lab_undo_waste_words() * HeapWordSize, G1GCPhaseTimes::ObjCopyLABUndoWaste);
-
     if (termination_phase == G1GCPhaseTimes::Termination) {
       p->record_time_secs(termination_phase, worker_id, cl.term_time());
       p->record_thread_work_item(termination_phase, worker_id, cl.term_attempts());
@@ -3943,6 +3982,8 @@
 void G1CollectedHeap::post_evacuate_collection_set(G1EvacuationInfo& evacuation_info,
                                                    G1RedirtyCardsQueueSet* rdcqs,
                                                    G1ParScanThreadStateSet* per_thread_states) {
+  G1GCPhaseTimes* p = phase_times();
+
   rem_set()->cleanup_after_scan_heap_roots();
 
   // Process any discovered reference objects - we have
@@ -3955,16 +3996,15 @@
   G1STWIsAliveClosure is_alive(this);
   G1KeepAliveClosure keep_alive(this);
 
-  WeakProcessor::weak_oops_do(workers(), &is_alive, &keep_alive,
-                              phase_times()->weak_phase_times());
+  WeakProcessor::weak_oops_do(workers(), &is_alive, &keep_alive, p->weak_phase_times());
 
   if (G1StringDedup::is_enabled()) {
     double string_dedup_time_ms = os::elapsedTime();
 
-    string_dedup_cleaning(&is_alive, &keep_alive, phase_times());
+    string_dedup_cleaning(&is_alive, &keep_alive, p);
 
     double string_cleanup_time_ms = (os::elapsedTime() - string_dedup_time_ms) * 1000.0;
-    phase_times()->record_string_deduplication_time(string_cleanup_time_ms);
+    p->record_string_deduplication_time(string_cleanup_time_ms);
   }
 
   _allocator->release_gc_alloc_regions(evacuation_info);
@@ -3977,7 +4017,7 @@
 
     double recalculate_used_start = os::elapsedTime();
     set_used(recalculate_used());
-    phase_times()->record_evac_fail_recalc_used_time((os::elapsedTime() - recalculate_used_start) * 1000.0);
+    p->record_evac_fail_recalc_used_time((os::elapsedTime() - recalculate_used_start) * 1000.0);
 
     if (_archive_allocator != NULL) {
       _archive_allocator->clear_used();
@@ -3989,8 +4029,8 @@
     }
   } else {
     // The "used" of the the collection set have already been subtracted
-    // when they were freed.  Add in the bytes evacuated.
-    increase_used(policy()->bytes_copied_during_gc());
+    // when they were freed.  Add in the bytes used.
+    increase_used(_bytes_used_during_gc);
   }
 
   _preserved_marks_set.assert_empty();
@@ -4014,7 +4054,7 @@
   record_obj_copy_mem_stats();
 
   evacuation_info.set_collectionset_used_before(collection_set()->bytes_used_before());
-  evacuation_info.set_bytes_copied(policy()->bytes_copied_during_gc());
+  evacuation_info.set_bytes_used(_bytes_used_during_gc);
 
 #if COMPILER2_OR_JVMCI
   double start = os::elapsedTime();
@@ -4226,7 +4266,7 @@
     HeapRegion* r = g1h->region_at(region_idx);
     assert(!g1h->is_on_master_free_list(r), "sanity");
 
-    Atomic::add(r->rem_set()->occupied_locked(), &_rs_length);
+    Atomic::add(&_rs_length, r->rem_set()->occupied_locked());
 
     if (!is_young) {
       g1h->hot_card_cache()->reset_card_counts(r);
@@ -4290,7 +4330,7 @@
 
     // Claim serial work.
     if (_serial_work_claim == 0) {
-      jint value = Atomic::add(1, &_serial_work_claim) - 1;
+      jint value = Atomic::add(&_serial_work_claim, 1) - 1;
       if (value == 0) {
         double serial_time = os::elapsedTime();
         do_serial_work();
@@ -4305,7 +4345,7 @@
     bool has_non_young_time = false;
 
     while (true) {
-      size_t end = Atomic::add(chunk_size(), &_parallel_work_claim);
+      size_t end = Atomic::add(&_parallel_work_claim, chunk_size());
       size_t cur = end - chunk_size();
 
       if (cur >= _num_work_items) {
@@ -4786,7 +4826,7 @@
 void G1CollectedHeap::retire_gc_alloc_region(HeapRegion* alloc_region,
                                              size_t allocated_bytes,
                                              G1HeapRegionAttr dest) {
-  policy()->record_bytes_copied_during_gc(allocated_bytes);
+  _bytes_used_during_gc += allocated_bytes;
   if (dest.is_old()) {
     old_set_add(alloc_region);
   } else {
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1CollectedHeap.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -214,6 +214,10 @@
 
   void set_used(size_t bytes);
 
+  // Number of bytes used in all regions during GC. Typically changed when
+  // retiring a GC alloc region.
+  size_t _bytes_used_during_gc;
+
   // Class that handles archive allocation ranges.
   G1ArchiveAllocator* _archive_allocator;
 
@@ -589,6 +593,7 @@
   // These are only valid for starts_humongous regions.
   inline void set_humongous_reclaim_candidate(uint region, bool value);
   inline bool is_humongous_reclaim_candidate(uint region);
+  inline void set_has_humongous_reclaim_candidate(bool value);
 
   // Remove from the reclaim candidate set.  Also remove from the
   // collection set so that later encounters avoid the slow path.
@@ -596,8 +601,7 @@
 
   // Register the given region to be part of the collection set.
   inline void register_humongous_region_with_region_attr(uint index);
-  // Update region attributes table with information about all regions.
-  void register_regions_with_region_attr();
+
   // We register a region with the fast "in collection set" test. We
   // simply set to true the array slot corresponding to this region.
   void register_young_region_with_region_attr(HeapRegion* r) {
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -34,7 +34,6 @@
 #include "gc/g1/heapRegionRemSet.hpp"
 #include "gc/g1/heapRegionSet.inline.hpp"
 #include "gc/shared/taskqueue.inline.hpp"
-#include "runtime/orderAccess.hpp"
 
 G1GCPhaseTimes* G1CollectedHeap::phase_times() const {
   return _policy->phase_times();
@@ -181,7 +180,7 @@
 
 void G1CollectedHeap::register_old_region_with_region_attr(HeapRegion* r) {
   _region_attr.set_in_old(r->hrm_index(), r->rem_set()->is_tracked());
-  _rem_set->prepare_for_scan_heap_roots(r->hrm_index());
+  _rem_set->exclude_region_from_scan(r->hrm_index());
 }
 
 void G1CollectedHeap::register_optional_region_with_region_attr(HeapRegion* r) {
@@ -299,6 +298,10 @@
   return _humongous_reclaim_candidates.is_candidate(region);
 }
 
+inline void G1CollectedHeap::set_has_humongous_reclaim_candidate(bool value) {
+  _has_humongous_reclaim_candidates = value;
+}
+
 inline void G1CollectedHeap::set_humongous_is_live(oop obj) {
   uint region = addr_to_region((HeapWord*)obj);
   // Clear the flag in the humongous_reclaim_candidates table.  Also
--- a/src/hotspot/share/gc/g1/g1CollectionSet.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1CollectionSet.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -27,12 +27,14 @@
 #include "gc/g1/g1CollectionSet.hpp"
 #include "gc/g1/g1CollectionSetCandidates.hpp"
 #include "gc/g1/g1CollectorState.hpp"
+#include "gc/g1/g1HotCardCache.hpp"
 #include "gc/g1/g1ParScanThreadState.hpp"
 #include "gc/g1/g1Policy.hpp"
 #include "gc/g1/heapRegion.inline.hpp"
 #include "gc/g1/heapRegionRemSet.hpp"
 #include "gc/g1/heapRegionSet.hpp"
 #include "logging/logStream.hpp"
+#include "runtime/orderAccess.hpp"
 #include "utilities/debug.hpp"
 #include "utilities/globalDefinitions.hpp"
 #include "utilities/quickSort.hpp"
@@ -276,19 +278,6 @@
   assert(hr->is_young(), "invariant");
   assert(_inc_build_state == Active, "Precondition");
 
-  size_t collection_set_length = _collection_set_cur_length;
-  // We use UINT_MAX as "invalid" marker in verification.
-  assert(collection_set_length < (UINT_MAX - 1),
-         "Collection set is too large with " SIZE_FORMAT " entries", collection_set_length);
-  hr->set_young_index_in_cset((uint)collection_set_length + 1);
-
-  _collection_set_regions[collection_set_length] = hr->hrm_index();
-  // Concurrent readers must observe the store of the value in the array before an
-  // update to the length field.
-  OrderAccess::storestore();
-  _collection_set_cur_length++;
-  assert(_collection_set_cur_length <= _collection_set_max_length, "Collection set larger than maximum allowed.");
-
   // This routine is used when:
   // * adding survivor regions to the incremental cset at the end of an
   //   evacuation pause or
@@ -323,6 +312,19 @@
 
   assert(!hr->in_collection_set(), "invariant");
   _g1h->register_young_region_with_region_attr(hr);
+
+  size_t collection_set_length = _collection_set_cur_length;
+  // We use UINT_MAX as "invalid" marker in verification.
+  assert(collection_set_length < (UINT_MAX - 1),
+         "Collection set is too large with " SIZE_FORMAT " entries", collection_set_length);
+  hr->set_young_index_in_cset((uint)collection_set_length + 1);
+
+  _collection_set_regions[collection_set_length] = hr->hrm_index();
+  // Concurrent readers must observe the store of the value in the array before an
+  // update to the length field.
+  OrderAccess::storestore();
+  _collection_set_cur_length++;
+  assert(_collection_set_cur_length <= _collection_set_max_length, "Collection set larger than maximum allowed.");
 }
 
 void G1CollectionSet::add_survivor_regions(HeapRegion* hr) {
@@ -409,7 +411,7 @@
   guarantee(target_pause_time_ms > 0.0,
             "target_pause_time_ms = %1.6lf should be positive", target_pause_time_ms);
 
-  size_t pending_cards = _policy->pending_cards_at_gc_start();
+  size_t pending_cards = _policy->pending_cards_at_gc_start() + _g1h->hot_card_cache()->num_entries();
   double base_time_ms = _policy->predict_base_elapsed_time_ms(pending_cards);
   double time_remaining_ms = MAX2(target_pause_time_ms - base_time_ms, 0.0);
 
--- a/src/hotspot/share/gc/g1/g1CollectionSetChooser.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1CollectionSetChooser.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -112,7 +112,7 @@
 
     // Claim a new chunk, returning its bounds [from, to[.
     void claim_chunk(uint& from, uint& to) {
-      uint result = Atomic::add(_chunk_size, &_cur_claim_idx);
+      uint result = Atomic::add(&_cur_claim_idx, _chunk_size);
       assert(_max_size > result - 1,
              "Array too small, is %u should be %u with chunk size %u.",
              _max_size, result, _chunk_size);
@@ -214,8 +214,8 @@
   void update_totals(uint num_regions, size_t reclaimable_bytes) {
     if (num_regions > 0) {
       assert(reclaimable_bytes > 0, "invariant");
-      Atomic::add(num_regions, &_num_regions_added);
-      Atomic::add(reclaimable_bytes, &_reclaimable_bytes_added);
+      Atomic::add(&_num_regions_added, num_regions);
+      Atomic::add(&_reclaimable_bytes_added, reclaimable_bytes);
     } else {
       assert(reclaimable_bytes == 0, "invariant");
     }
--- a/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -62,6 +62,7 @@
 #include "runtime/atomic.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/java.hpp"
+#include "runtime/orderAccess.hpp"
 #include "runtime/prefetch.inline.hpp"
 #include "services/memTracker.hpp"
 #include "utilities/align.hpp"
@@ -207,7 +208,7 @@
     return NULL;
   }
 
-  size_t cur_idx = Atomic::add(1u, &_hwm) - 1;
+  size_t cur_idx = Atomic::add(&_hwm, 1u) - 1;
   if (cur_idx >= _chunk_capacity) {
     return NULL;
   }
@@ -280,7 +281,7 @@
 
 void G1CMRootMemRegions::add(HeapWord* start, HeapWord* end) {
   assert_at_safepoint();
-  size_t idx = Atomic::add((size_t)1, &_num_root_regions) - 1;
+  size_t idx = Atomic::add(&_num_root_regions, (size_t)1) - 1;
   assert(idx < _max_regions, "Trying to add more root MemRegions than there is space " SIZE_FORMAT, _max_regions);
   assert(start != NULL && end != NULL && start <= end, "Start (" PTR_FORMAT ") should be less or equal to "
          "end (" PTR_FORMAT ")", p2i(start), p2i(end));
@@ -308,7 +309,7 @@
     return NULL;
   }
 
-  size_t claimed_index = Atomic::add((size_t)1, &_claimed_root_regions) - 1;
+  size_t claimed_index = Atomic::add(&_claimed_root_regions, (size_t)1) - 1;
   if (claimed_index < _num_root_regions) {
     return &_root_regions[claimed_index];
   }
@@ -1121,7 +1122,7 @@
   virtual void work(uint worker_id) {
     G1UpdateRemSetTrackingBeforeRebuild update_cl(_g1h, _cm, &_cl);
     _g1h->heap_region_par_iterate_from_worker_offset(&update_cl, &_hrclaimer, worker_id);
-    Atomic::add(update_cl.num_selected_for_rebuild(), &_total_selected_for_rebuild);
+    Atomic::add(&_total_selected_for_rebuild, update_cl.num_selected_for_rebuild());
   }
 
   uint total_selected_for_rebuild() const { return _total_selected_for_rebuild; }
@@ -1611,7 +1612,7 @@
     // we utilize all the worker threads we can.
     bool processing_is_mt = rp->processing_is_mt();
     uint active_workers = (processing_is_mt ? _g1h->workers()->active_workers() : 1U);
-    active_workers = MAX2(MIN2(active_workers, _max_num_tasks), 1U);
+    active_workers = clamp(active_workers, 1u, _max_num_tasks);
 
     // Parallel processing task executor.
     G1CMRefProcTaskExecutor par_task_executor(_g1h, this,
@@ -1906,7 +1907,7 @@
     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
 
     // Is the gap between reading the finger and doing the CAS too long?
-    HeapWord* res = Atomic::cmpxchg(end, &_finger, finger);
+    HeapWord* res = Atomic::cmpxchg(&_finger, finger, end);
     if (res == finger && curr_region != NULL) {
       // we succeeded
       HeapWord*   bottom        = curr_region->bottom();
@@ -2587,7 +2588,8 @@
   // and do_marking_step() is not being called serially.
   bool do_stealing = do_termination && !is_serial;
 
-  double diff_prediction_ms = _g1h->policy()->predictor().get_new_prediction(&_marking_step_diff_ms);
+  G1Predictions const& predictor = _g1h->policy()->predictor();
+  double diff_prediction_ms = predictor.get_new_lower_zero_bound_prediction(&_marking_step_diff_ms);
   _time_target_ms = time_target_ms - diff_prediction_ms;
 
   // set up the variables that are used in the work-based scheme to
--- a/src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -34,13 +34,14 @@
 #include "gc/g1/heapRegionRemSet.hpp"
 #include "gc/shared/suspendibleThreadSet.hpp"
 #include "gc/shared/workgroup.hpp"
-#include "runtime/atomic.hpp"
 #include "runtime/flags/flagSetting.hpp"
 #include "runtime/mutexLocker.hpp"
+#include "runtime/orderAccess.hpp"
 #include "runtime/os.hpp"
 #include "runtime/safepoint.hpp"
 #include "runtime/thread.inline.hpp"
 #include "runtime/threadSMR.hpp"
+#include "utilities/quickSort.hpp"
 
 G1DirtyCardQueue::G1DirtyCardQueue(G1DirtyCardQueueSet* qset) :
   // Dirty card queues are always active, so we create them with their
@@ -226,21 +227,127 @@
   return result;
 }
 
+class G1RefineBufferedCards : public StackObj {
+  BufferNode* const _node;
+  CardTable::CardValue** const _node_buffer;
+  const size_t _node_buffer_size;
+  const uint _worker_id;
+  size_t* _total_refined_cards;
+  G1RemSet* const _g1rs;
+
+  static inline int compare_card(const CardTable::CardValue* p1,
+                                 const CardTable::CardValue* p2) {
+    return p2 - p1;
+  }
+
+  // Sorts the cards from start_index to _node_buffer_size in *decreasing*
+  // address order. Tests showed that this order is preferable to not sorting
+  // or increasing address order.
+  void sort_cards(size_t start_index) {
+    QuickSort::sort(&_node_buffer[start_index],
+                    _node_buffer_size - start_index,
+                    compare_card,
+                    false);
+  }
+
+  // Returns the index to the first clean card in the buffer.
+  size_t clean_cards() {
+    const size_t start = _node->index();
+    assert(start <= _node_buffer_size, "invariant");
+
+    // Two-fingered compaction algorithm similar to the filtering mechanism in
+    // SATBMarkQueue. The main difference is that clean_card_before_refine()
+    // could change the buffer element in-place.
+    // We don't check for SuspendibleThreadSet::should_yield(), because
+    // cleaning and redirtying the cards is fast.
+    CardTable::CardValue** src = &_node_buffer[start];
+    CardTable::CardValue** dst = &_node_buffer[_node_buffer_size];
+    assert(src <= dst, "invariant");
+    for ( ; src < dst; ++src) {
+      // Search low to high for a card to keep.
+      if (_g1rs->clean_card_before_refine(src)) {
+        // Found keeper.  Search high to low for a card to discard.
+        while (src < --dst) {
+          if (!_g1rs->clean_card_before_refine(dst)) {
+            *dst = *src;         // Replace discard with keeper.
+            break;
+          }
+        }
+        // If discard search failed (src == dst), the outer loop will also end.
+      }
+    }
+
+    // dst points to the first retained clean card, or the end of the buffer
+    // if all the cards were discarded.
+    const size_t first_clean = dst - _node_buffer;
+    assert(first_clean >= start && first_clean <= _node_buffer_size, "invariant");
+    // Discarded cards are considered as refined.
+    *_total_refined_cards += first_clean - start;
+    return first_clean;
+  }
+
+  bool refine_cleaned_cards(size_t start_index) {
+    bool result = true;
+    size_t i = start_index;
+    for ( ; i < _node_buffer_size; ++i) {
+      if (SuspendibleThreadSet::should_yield()) {
+        redirty_unrefined_cards(i);
+        result = false;
+        break;
+      }
+      _g1rs->refine_card_concurrently(_node_buffer[i], _worker_id);
+    }
+    _node->set_index(i);
+    *_total_refined_cards += i - start_index;
+    return result;
+  }
+
+  void redirty_unrefined_cards(size_t start) {
+    for ( ; start < _node_buffer_size; ++start) {
+      *_node_buffer[start] = G1CardTable::dirty_card_val();
+    }
+  }
+
+public:
+  G1RefineBufferedCards(BufferNode* node,
+                        size_t node_buffer_size,
+                        uint worker_id,
+                        size_t* total_refined_cards) :
+    _node(node),
+    _node_buffer(reinterpret_cast<CardTable::CardValue**>(BufferNode::make_buffer_from_node(node))),
+    _node_buffer_size(node_buffer_size),
+    _worker_id(worker_id),
+    _total_refined_cards(total_refined_cards),
+    _g1rs(G1CollectedHeap::heap()->rem_set()) {}
+
+  bool refine() {
+    size_t first_clean_index = clean_cards();
+    if (first_clean_index == _node_buffer_size) {
+      _node->set_index(first_clean_index);
+      return true;
+    }
+    // This fence serves two purposes. First, the cards must be cleaned
+    // before processing the contents. Second, we can't proceed with
+    // processing a region until after the read of the region's top in
+    // collect_and_clean_cards(), for synchronization with possibly concurrent
+    // humongous object allocation (see comment at the StoreStore fence before
+    // setting the regions' tops in humongous allocation path).
+    // It's okay that reading region's top and reading region's type were racy
+    // wrto each other. We need both set, in any order, to proceed.
+    OrderAccess::fence();
+    sort_cards(first_clean_index);
+    return refine_cleaned_cards(first_clean_index);
+  }
+};
+
 bool G1DirtyCardQueueSet::refine_buffer(BufferNode* node,
                                         uint worker_id,
                                         size_t* total_refined_cards) {
-  G1RemSet* rem_set = G1CollectedHeap::heap()->rem_set();
-  size_t size = buffer_size();
-  void** buffer = BufferNode::make_buffer_from_node(node);
-  size_t i = node->index();
-  assert(i <= size, "invariant");
-  for ( ; (i < size) && !SuspendibleThreadSet::should_yield(); ++i) {
-    CardTable::CardValue* cp = static_cast<CardTable::CardValue*>(buffer[i]);
-    rem_set->refine_card_concurrently(cp, worker_id);
-  }
-  *total_refined_cards += (i - node->index());
-  node->set_index(i);
-  return i == size;
+  G1RefineBufferedCards buffered_cards(node,
+                                       buffer_size(),
+                                       worker_id,
+                                       total_refined_cards);
+  return buffered_cards.refine();
 }
 
 #ifndef ASSERT
--- a/src/hotspot/share/gc/g1/g1EvacStats.inline.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1EvacStats.inline.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -29,17 +29,17 @@
 #include "runtime/atomic.hpp"
 
 inline void G1EvacStats::add_direct_allocated(size_t value) {
-  Atomic::add(value, &_direct_allocated);
+  Atomic::add(&_direct_allocated, value);
 }
 
 inline void G1EvacStats::add_region_end_waste(size_t value) {
-  Atomic::add(value, &_region_end_waste);
+  Atomic::add(&_region_end_waste, value);
   Atomic::inc(&_regions_filled);
 }
 
 inline void G1EvacStats::add_failure_used_and_waste(size_t used, size_t waste) {
-  Atomic::add(used, &_failure_used);
-  Atomic::add(waste, &_failure_waste);
+  Atomic::add(&_failure_used, used);
+  Atomic::add(&_failure_waste, waste);
 }
 
 #endif // SHARE_GC_G1_G1EVACSTATS_INLINE_HPP
--- a/src/hotspot/share/gc/g1/g1EvacuationInfo.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1EvacuationInfo.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -33,13 +33,14 @@
   size_t _collectionset_used_before;
   size_t _collectionset_used_after;
   size_t _alloc_regions_used_before;
-  size_t _bytes_copied;
+  size_t _bytes_used;
   uint   _regions_freed;
 
 public:
-  G1EvacuationInfo() : _collectionset_regions(0), _allocation_regions(0), _collectionset_used_before(0),
-                     _collectionset_used_after(0), _alloc_regions_used_before(0),
-                     _bytes_copied(0), _regions_freed(0) { }
+  G1EvacuationInfo() :
+    _collectionset_regions(0), _allocation_regions(0), _collectionset_used_before(0),
+    _collectionset_used_after(0), _alloc_regions_used_before(0),
+    _bytes_used(0), _regions_freed(0) { }
 
   void set_collectionset_regions(uint collectionset_regions) {
     _collectionset_regions = collectionset_regions;
@@ -61,8 +62,8 @@
     _alloc_regions_used_before = used;
   }
 
-  void set_bytes_copied(size_t copied) {
-    _bytes_copied = copied;
+  void set_bytes_used(size_t used) {
+    _bytes_used = used;
   }
 
   void set_regions_freed(uint freed) {
@@ -74,7 +75,7 @@
   size_t collectionset_used_before() { return _collectionset_used_before; }
   size_t collectionset_used_after()  { return _collectionset_used_after; }
   size_t alloc_regions_used_before() { return _alloc_regions_used_before; }
-  size_t bytes_copied()              { return _bytes_copied; }
+  size_t bytes_used()                { return _bytes_used; }
   uint   regions_freed()             { return _regions_freed; }
 };
 
--- a/src/hotspot/share/gc/g1/g1FreeIdSet.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1FreeIdSet.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -79,7 +79,7 @@
     index = head_index(old_head);
     assert(index < _size, "invariant");
     uintx new_head = make_head(_next[index], old_head);
-    new_head = Atomic::cmpxchg(new_head, &_head, old_head);
+    new_head = Atomic::cmpxchg(&_head, old_head, new_head);
     if (new_head == old_head) break;
     old_head = new_head;
   }
@@ -95,7 +95,7 @@
   while (true) {
     _next[index] = head_index(old_head);
     uintx new_head = make_head(index, old_head);
-    new_head = Atomic::cmpxchg(new_head, &_head, old_head);
+    new_head = Atomic::cmpxchg(&_head, old_head, new_head);
     if (new_head == old_head) break;
     old_head = new_head;
   }
--- a/src/hotspot/share/gc/g1/g1FullGCAdjustTask.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1FullGCAdjustTask.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -101,7 +101,7 @@
 
   // Adjust the weak roots.
 
-  if (Atomic::add(1u, &_references_done) == 1u) { // First incr claims task.
+  if (Atomic::add(&_references_done, 1u) == 1u) { // First incr claims task.
     G1CollectedHeap::heap()->ref_processor_stw()->weak_oops_do(&_adjust);
   }
 
--- a/src/hotspot/share/gc/g1/g1FullGCCompactTask.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1FullGCCompactTask.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -53,7 +53,7 @@
           assert(current->is_empty(), "Should have been cleared in phase 2.");
         }
       }
-      current->reset_during_compaction();
+      current->reset_humongous_during_compaction();
     }
     return false;
   }
--- a/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -72,6 +72,8 @@
   _gc_par_phases[MergeRS]->link_thread_work_items(_merge_rs_merged_fine, MergeRSMergedFine);
   _merge_rs_merged_coarse = new WorkerDataArray<size_t>(max_gc_threads, "Merged Coarse:");
   _gc_par_phases[MergeRS]->link_thread_work_items(_merge_rs_merged_coarse, MergeRSMergedCoarse);
+  _merge_rs_dirty_cards = new WorkerDataArray<size_t>(max_gc_threads, "Dirty Cards:");
+  _gc_par_phases[MergeRS]->link_thread_work_items(_merge_rs_dirty_cards, MergeRSDirtyCards);
 
   _gc_par_phases[OptMergeRS] = new WorkerDataArray<double>(max_gc_threads, "Optional Remembered Sets (ms):");
   _opt_merge_rs_merged_sparse = new WorkerDataArray<size_t>(max_gc_threads, "Merged Sparse:");
@@ -80,6 +82,8 @@
   _gc_par_phases[OptMergeRS]->link_thread_work_items(_opt_merge_rs_merged_fine, MergeRSMergedFine);
   _opt_merge_rs_merged_coarse = new WorkerDataArray<size_t>(max_gc_threads, "Merged Coarse:");
   _gc_par_phases[OptMergeRS]->link_thread_work_items(_opt_merge_rs_merged_coarse, MergeRSMergedCoarse);
+  _opt_merge_rs_dirty_cards = new WorkerDataArray<size_t>(max_gc_threads, "Dirty Cards:");
+  _gc_par_phases[OptMergeRS]->link_thread_work_items(_opt_merge_rs_dirty_cards, MergeRSDirtyCards);
 
   _gc_par_phases[MergeLB] = new WorkerDataArray<double>(max_gc_threads, "Log Buffers (ms):");
   if (G1HotCardCache::default_use_cache()) {
@@ -128,15 +132,14 @@
   _merge_lb_skipped_cards = new WorkerDataArray<size_t>(max_gc_threads, "Skipped Cards:");
   _gc_par_phases[MergeLB]->link_thread_work_items(_merge_lb_skipped_cards, MergeLBSkippedCards);
 
-  _obj_copy_lab_waste = new WorkerDataArray<size_t>(max_gc_threads, "LAB Waste");
-  _gc_par_phases[ObjCopy]->link_thread_work_items(_obj_copy_lab_waste, ObjCopyLABWaste);
-  _obj_copy_lab_undo_waste = new WorkerDataArray<size_t>(max_gc_threads, "LAB Undo Waste");
-  _gc_par_phases[ObjCopy]->link_thread_work_items(_obj_copy_lab_undo_waste, ObjCopyLABUndoWaste);
+  _gc_par_phases[MergePSS] = new WorkerDataArray<double>(1, "Merge Per-Thread State", true /* is_serial */);
 
-  _opt_obj_copy_lab_waste = new WorkerDataArray<size_t>(max_gc_threads, "LAB Waste");
-  _gc_par_phases[OptObjCopy]->link_thread_work_items(_obj_copy_lab_waste, ObjCopyLABWaste);
-  _opt_obj_copy_lab_undo_waste  = new WorkerDataArray<size_t>(max_gc_threads, "LAB Undo Waste");
-  _gc_par_phases[OptObjCopy]->link_thread_work_items(_obj_copy_lab_undo_waste, ObjCopyLABUndoWaste);
+  _merge_pss_copied_bytes = new WorkerDataArray<size_t>(max_gc_threads, "Copied Bytes");
+  _gc_par_phases[MergePSS]->link_thread_work_items(_merge_pss_copied_bytes, MergePSSCopiedBytes);
+  _merge_pss_lab_waste_bytes = new WorkerDataArray<size_t>(max_gc_threads, "LAB Waste");
+  _gc_par_phases[MergePSS]->link_thread_work_items(_merge_pss_lab_waste_bytes, MergePSSLABWasteBytes);
+  _merge_pss_lab_undo_waste_bytes = new WorkerDataArray<size_t>(max_gc_threads, "LAB Undo Waste");
+  _gc_par_phases[MergePSS]->link_thread_work_items(_merge_pss_lab_undo_waste_bytes, MergePSSLABUndoWasteBytes);
 
   _termination_attempts = new WorkerDataArray<size_t>(max_gc_threads, "Termination Attempts:");
   _gc_par_phases[Termination]->link_thread_work_items(_termination_attempts);
@@ -189,7 +192,6 @@
   _recorded_non_young_cset_choice_time_ms = 0.0;
   _recorded_redirty_logged_cards_time_ms = 0.0;
   _recorded_preserve_cm_referents_time_ms = 0.0;
-  _recorded_merge_pss_time_ms = 0.0;
   _recorded_start_new_cset_time_ms = 0.0;
   _recorded_total_free_cset_time_ms = 0.0;
   _recorded_serial_free_cset_time_ms = 0.0;
@@ -306,10 +308,16 @@
 
 // return the average time for a phase in milliseconds
 double G1GCPhaseTimes::average_time_ms(GCParPhases phase) {
+  if (_gc_par_phases[phase] == NULL) {
+    return 0.0;
+  }
   return _gc_par_phases[phase]->average() * 1000.0;
 }
 
 size_t G1GCPhaseTimes::sum_thread_work_items(GCParPhases phase, uint index) {
+  if (_gc_par_phases[phase] == NULL) {
+    return 0;
+  }
   assert(_gc_par_phases[phase]->thread_work_items(index) != NULL, "No sub count");
   return _gc_par_phases[phase]->thread_work_items(index)->sum();
 }
@@ -464,13 +472,15 @@
 double G1GCPhaseTimes::print_post_evacuate_collection_set() const {
   const double evac_fail_handling = _cur_evac_fail_recalc_used +
                                     _cur_evac_fail_remove_self_forwards;
+  assert(_gc_par_phases[MergePSS]->get(0) != WorkerDataArray<double>::uninitialized(), "must be set");
+  const double merge_pss = _gc_par_phases[MergePSS]->get(0) * MILLIUNITS;
   const double sum_ms = evac_fail_handling +
                         _cur_collection_code_root_fixup_time_ms +
                         _recorded_preserve_cm_referents_time_ms +
                         _cur_ref_proc_time_ms +
                         (_weak_phase_times.total_time_sec() * MILLIUNITS) +
                         _cur_clear_ct_time_ms +
-                        _recorded_merge_pss_time_ms +
+                        merge_pss +
                         _cur_strong_code_root_purge_time_ms +
                         _recorded_redirty_logged_cards_time_ms +
                         _recorded_total_free_cset_time_ms +
@@ -500,7 +510,7 @@
     trace_time("Remove Self Forwards",_cur_evac_fail_remove_self_forwards);
   }
 
-  debug_time("Merge Per-Thread State", _recorded_merge_pss_time_ms);
+  debug_phase(_gc_par_phases[MergePSS], 0);
   debug_time("Code Roots Purge", _cur_strong_code_root_purge_time_ms);
 
   debug_time("Redirty Cards", _recorded_redirty_logged_cards_time_ms);
@@ -585,7 +595,8 @@
       "StringDedupTableFixup",
       "RedirtyCards",
       "YoungFreeCSet",
-      "NonYoungFreeCSet"
+      "NonYoungFreeCSet",
+      "MergePSS"
       //GCParPhasesSentinel only used to tell end of enum
       };
 
--- a/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -78,6 +78,7 @@
     RedirtyCards,
     YoungFreeCSet,
     NonYoungFreeCSet,
+    MergePSS,
     GCParPhasesSentinel
   };
 
@@ -87,7 +88,8 @@
   enum GCMergeRSWorkTimes {
     MergeRSMergedSparse,
     MergeRSMergedFine,
-    MergeRSMergedCoarse
+    MergeRSMergedCoarse,
+    MergeRSDirtyCards
   };
 
   enum GCScanHRWorkItems {
@@ -108,9 +110,10 @@
     MergeLBSkippedCards
   };
 
-  enum GCObjCopyWorkItems {
-    ObjCopyLABWaste,
-    ObjCopyLABUndoWaste
+  enum GCMergePSSWorkItems {
+    MergePSSCopiedBytes,
+    MergePSSLABWasteBytes,
+    MergePSSLABUndoWasteBytes
   };
 
  private:
@@ -122,6 +125,7 @@
   WorkerDataArray<size_t>* _merge_rs_merged_sparse;
   WorkerDataArray<size_t>* _merge_rs_merged_fine;
   WorkerDataArray<size_t>* _merge_rs_merged_coarse;
+  WorkerDataArray<size_t>* _merge_rs_dirty_cards;
 
   WorkerDataArray<size_t>* _merge_hcc_dirty_cards;
   WorkerDataArray<size_t>* _merge_hcc_skipped_cards;
@@ -136,6 +140,7 @@
   WorkerDataArray<size_t>* _opt_merge_rs_merged_sparse;
   WorkerDataArray<size_t>* _opt_merge_rs_merged_fine;
   WorkerDataArray<size_t>* _opt_merge_rs_merged_coarse;
+  WorkerDataArray<size_t>* _opt_merge_rs_dirty_cards;
 
   WorkerDataArray<size_t>* _opt_scan_hr_scanned_cards;
   WorkerDataArray<size_t>* _opt_scan_hr_scanned_blocks;
@@ -143,11 +148,9 @@
   WorkerDataArray<size_t>* _opt_scan_hr_scanned_opt_refs;
   WorkerDataArray<size_t>* _opt_scan_hr_used_memory;
 
-  WorkerDataArray<size_t>* _obj_copy_lab_waste;
-  WorkerDataArray<size_t>* _obj_copy_lab_undo_waste;
-
-  WorkerDataArray<size_t>* _opt_obj_copy_lab_waste;
-  WorkerDataArray<size_t>* _opt_obj_copy_lab_undo_waste;
+  WorkerDataArray<size_t>* _merge_pss_copied_bytes;
+  WorkerDataArray<size_t>* _merge_pss_lab_waste_bytes;
+  WorkerDataArray<size_t>* _merge_pss_lab_undo_waste_bytes;
 
   WorkerDataArray<size_t>* _termination_attempts;
 
@@ -224,7 +227,9 @@
   template <class T>
   void details(T* phase, const char* indent) const;
 
+  void log_work_items(WorkerDataArray<double>* phase, uint indent, outputStream* out) const;
   void log_phase(WorkerDataArray<double>* phase, uint indent, outputStream* out, bool print_sum) const;
+  void debug_serial_phase(WorkerDataArray<double>* phase, uint extra_indent = 0) const;
   void debug_phase(WorkerDataArray<double>* phase, uint extra_indent = 0) const;
   void trace_phase(WorkerDataArray<double>* phase, bool print_sum = true) const;
 
@@ -269,8 +274,6 @@
 
   size_t sum_thread_work_items(GCParPhases phase, uint index = 0);
 
- public:
-
   void record_prepare_tlab_time_ms(double ms) {
     _cur_prepare_tlab_time_ms = ms;
   }
@@ -378,10 +381,6 @@
     _recorded_preserve_cm_referents_time_ms = time_ms;
   }
 
-  void record_merge_pss_time_ms(double time_ms) {
-    _recorded_merge_pss_time_ms = time_ms;
-  }
-
   void record_start_new_cset_time_ms(double time_ms) {
     _recorded_start_new_cset_time_ms = time_ms;
   }
--- a/src/hotspot/share/gc/g1/g1HeterogeneousHeapPolicy.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1HeterogeneousHeapPolicy.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -39,8 +39,8 @@
 }
 
 // After a collection pause, young list target length is updated. So we need to make sure we have enough regions in dram for young gen.
-void G1HeterogeneousHeapPolicy::record_collection_pause_end(double pause_time_ms, size_t heap_used_bytes_before_gc) {
-  G1Policy::record_collection_pause_end(pause_time_ms, heap_used_bytes_before_gc);
+void G1HeterogeneousHeapPolicy::record_collection_pause_end(double pause_time_ms) {
+  G1Policy::record_collection_pause_end(pause_time_ms);
   _manager->adjust_dram_regions((uint)young_list_target_length(), G1CollectedHeap::heap()->workers());
 }
 
--- a/src/hotspot/share/gc/g1/g1HeterogeneousHeapPolicy.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1HeterogeneousHeapPolicy.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -38,7 +38,7 @@
   // initialize policy
   virtual void init(G1CollectedHeap* g1h, G1CollectionSet* collection_set);
   // Record end of an evacuation pause.
-  virtual void record_collection_pause_end(double pause_time_ms, size_t heap_used_bytes_before_gc);
+  virtual void record_collection_pause_end(double pause_time_ms);
   // Record the end of full collection.
   virtual void record_full_collection_end();
 
--- a/src/hotspot/share/gc/g1/g1HotCardCache.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1HotCardCache.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -32,7 +32,7 @@
 G1HotCardCache::G1HotCardCache(G1CollectedHeap *g1h):
   _g1h(g1h), _use_cache(false), _card_counts(g1h),
   _hot_cache(NULL), _hot_cache_size(0), _hot_cache_par_chunk_size(0),
-  _hot_cache_idx(0), _hot_cache_par_claimed_idx(0)
+  _hot_cache_idx(0), _hot_cache_par_claimed_idx(0), _cache_wrapped_around(false)
 {}
 
 void G1HotCardCache::initialize(G1RegionToSpaceMapper* card_counts_storage) {
@@ -48,6 +48,8 @@
     _hot_cache_par_chunk_size = ClaimChunkSize;
     _hot_cache_par_claimed_idx = 0;
 
+    _cache_wrapped_around = false;
+
     _card_counts.initialize(card_counts_storage);
   }
 }
@@ -68,7 +70,12 @@
     return card_ptr;
   }
   // Otherwise, the card is hot.
-  size_t index = Atomic::add(1u, &_hot_cache_idx) - 1;
+  size_t index = Atomic::add(&_hot_cache_idx, 1u) - 1;
+  if (index == _hot_cache_size) {
+    // Can use relaxed store because all racing threads are writing the same
+    // value and there aren't any concurrent readers.
+    Atomic::store(&_cache_wrapped_around, true);
+  }
   size_t masked_index = index & (_hot_cache_size - 1);
   CardValue* current_ptr = _hot_cache[masked_index];
 
@@ -78,9 +85,9 @@
   // card_ptr in favor of the other option, which would be starting over. This
   // should be OK since card_ptr will likely be the older card already when/if
   // this ever happens.
-  CardValue* previous_ptr = Atomic::cmpxchg(card_ptr,
-                                            &_hot_cache[masked_index],
-                                            current_ptr);
+  CardValue* previous_ptr = Atomic::cmpxchg(&_hot_cache[masked_index],
+                                            current_ptr,
+                                            card_ptr);
   return (previous_ptr == current_ptr) ? previous_ptr : card_ptr;
 }
 
@@ -91,8 +98,8 @@
   assert(!use_cache(), "cache should be disabled");
 
   while (_hot_cache_par_claimed_idx < _hot_cache_size) {
-    size_t end_idx = Atomic::add(_hot_cache_par_chunk_size,
-                                 &_hot_cache_par_claimed_idx);
+    size_t end_idx = Atomic::add(&_hot_cache_par_claimed_idx,
+                                 _hot_cache_par_chunk_size);
     size_t start_idx = end_idx - _hot_cache_par_chunk_size;
     // The current worker has successfully claimed the chunk [start_idx..end_idx)
     end_idx = MIN2(end_idx, _hot_cache_size);
--- a/src/hotspot/share/gc/g1/g1HotCardCache.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1HotCardCache.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -81,6 +81,11 @@
 
   char _pad_after[DEFAULT_CACHE_LINE_SIZE];
 
+  // Records whether insertion overflowed the hot card cache at least once. This
+  // avoids the need for a separate atomic counter of how many valid entries are
+  // in the HCC.
+  volatile bool _cache_wrapped_around;
+
   // The number of cached cards a thread claims when flushing the cache
   static const int ClaimChunkSize = 32;
 
@@ -125,13 +130,17 @@
     assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint");
     assert(Thread::current()->is_VM_thread(), "Current thread should be the VMthread");
     if (default_use_cache()) {
-        reset_hot_cache_internal();
+      reset_hot_cache_internal();
     }
   }
 
   // Zeros the values in the card counts table for the given region
   void reset_card_counts(HeapRegion* hr);
 
+  // Number of entries in the HCC.
+  size_t num_entries() const {
+    return _cache_wrapped_around ? _hot_cache_size : _hot_cache_idx + 1;
+  }
  private:
   void reset_hot_cache_internal() {
     assert(_hot_cache != NULL, "Logic");
@@ -139,6 +148,7 @@
     for (size_t i = 0; i < _hot_cache_size; i++) {
       _hot_cache[i] = NULL;
     }
+    _cache_wrapped_around = false;
   }
 };
 
--- a/src/hotspot/share/gc/g1/g1IHOPControl.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1IHOPControl.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -113,6 +113,10 @@
     );
 }
 
+double G1AdaptiveIHOPControl::get_new_prediction(TruncatedSeq const* seq) const {
+  return _predictor->get_new_lower_zero_bound_prediction(seq);
+}
+
 bool G1AdaptiveIHOPControl::have_enough_data_for_prediction() const {
   return ((size_t)_marking_times_s.num() >= G1AdaptiveIHOPNumInitialSamples) &&
          ((size_t)_allocation_rate_s.num() >= G1AdaptiveIHOPNumInitialSamples);
@@ -120,8 +124,8 @@
 
 size_t G1AdaptiveIHOPControl::get_conc_mark_start_threshold() {
   if (have_enough_data_for_prediction()) {
-    double pred_marking_time = _predictor->get_new_prediction(&_marking_times_s);
-    double pred_promotion_rate = _predictor->get_new_prediction(&_allocation_rate_s);
+    double pred_marking_time = get_new_prediction(&_marking_times_s);
+    double pred_promotion_rate = get_new_prediction(&_allocation_rate_s);
     size_t pred_promotion_size = (size_t)(pred_marking_time * pred_promotion_rate);
 
     size_t predicted_needed_bytes_during_marking =
@@ -168,8 +172,8 @@
                       actual_target,
                       G1CollectedHeap::heap()->used(),
                       _last_unrestrained_young_size,
-                      _predictor->get_new_prediction(&_allocation_rate_s),
-                      _predictor->get_new_prediction(&_marking_times_s) * 1000.0,
+                      get_new_prediction(&_allocation_rate_s),
+                      get_new_prediction(&_marking_times_s) * 1000.0,
                       have_enough_data_for_prediction() ? "true" : "false");
 }
 
@@ -179,7 +183,7 @@
                                           actual_target_threshold(),
                                           G1CollectedHeap::heap()->used(),
                                           _last_unrestrained_young_size,
-                                          _predictor->get_new_prediction(&_allocation_rate_s),
-                                          _predictor->get_new_prediction(&_marking_times_s),
+                                          get_new_prediction(&_allocation_rate_s),
+                                          get_new_prediction(&_marking_times_s),
                                           have_enough_data_for_prediction());
 }
--- a/src/hotspot/share/gc/g1/g1IHOPControl.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1IHOPControl.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -123,6 +123,9 @@
   // as there is no marking or mixed gc that could impact its size too much.
   size_t _last_unrestrained_young_size;
 
+  // Get a new prediction bounded below by zero from the given sequence.
+  double get_new_prediction(TruncatedSeq const* seq) const;
+
   bool have_enough_data_for_prediction() const;
 
   // The "actual" target threshold the algorithm wants to keep during and at the
--- a/src/hotspot/share/gc/g1/g1PageBasedVirtualSpace.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1PageBasedVirtualSpace.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -261,7 +261,7 @@
   virtual void work(uint worker_id) {
     size_t const actual_chunk_size = MAX2(chunk_size(), _page_size);
     while (true) {
-      char* touch_addr = Atomic::add(actual_chunk_size, &_cur_addr) - actual_chunk_size;
+      char* touch_addr = Atomic::add(&_cur_addr, actual_chunk_size) - actual_chunk_size;
       if (touch_addr < _start_addr || touch_addr >= _end_addr) {
         break;
       }
--- a/src/hotspot/share/gc/g1/g1ParScanThreadState.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1ParScanThreadState.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -56,6 +56,9 @@
     _stack_trim_upper_threshold(GCDrainStackTargetSize * 2 + 1),
     _stack_trim_lower_threshold(GCDrainStackTargetSize),
     _trim_ticks(),
+    _surviving_young_words_base(NULL),
+    _surviving_young_words(NULL),
+    _surviving_words_length(young_cset_length + 1),
     _old_gen_is_full(false),
     _num_optional_regions(optional_cset_length),
     _numa(g1h->numa()),
@@ -65,11 +68,10 @@
   // entries, since entry 0 keeps track of surviving bytes for non-young regions.
   // We also add a few elements at the beginning and at the end in
   // an attempt to eliminate cache contention
-  size_t real_length = young_cset_length + 1;
-  size_t array_length = PADDING_ELEM_NUM + real_length + PADDING_ELEM_NUM;
+  size_t array_length = PADDING_ELEM_NUM + _surviving_words_length + PADDING_ELEM_NUM;
   _surviving_young_words_base = NEW_C_HEAP_ARRAY(size_t, array_length, mtGC);
   _surviving_young_words = _surviving_young_words_base + PADDING_ELEM_NUM;
-  memset(_surviving_young_words, 0, real_length * sizeof(size_t));
+  memset(_surviving_young_words, 0, _surviving_words_length * sizeof(size_t));
 
   _plab_allocator = new G1PLABAllocator(_g1h->allocator());
 
@@ -85,18 +87,19 @@
   initialize_numa_stats();
 }
 
-// Pass locally gathered statistics to global state.
-void G1ParScanThreadState::flush(size_t* surviving_young_words) {
+size_t G1ParScanThreadState::flush(size_t* surviving_young_words) {
   _rdcq.flush();
+  flush_numa_stats();
   // Update allocation statistics.
   _plab_allocator->flush_and_retire_stats();
   _g1h->policy()->record_age_table(&_age_table);
 
-  uint length = _g1h->collection_set()->young_region_length() + 1;
-  for (uint i = 0; i < length; i++) {
+  size_t sum = 0;
+  for (uint i = 0; i < _surviving_words_length; i++) {
     surviving_young_words[i] += _surviving_young_words[i];
+    sum += _surviving_young_words[i];
   }
-  flush_numa_stats();
+  return sum;
 }
 
 G1ParScanThreadState::~G1ParScanThreadState() {
@@ -357,16 +360,27 @@
 void G1ParScanThreadStateSet::flush() {
   assert(!_flushed, "thread local state from the per thread states should be flushed once");
 
-  for (uint worker_index = 0; worker_index < _n_workers; ++worker_index) {
-    G1ParScanThreadState* pss = _states[worker_index];
+  for (uint worker_id = 0; worker_id < _n_workers; ++worker_id) {
+    G1ParScanThreadState* pss = _states[worker_id];
 
     if (pss == NULL) {
       continue;
     }
 
-    pss->flush(_surviving_young_words_total);
+    G1GCPhaseTimes* p = _g1h->phase_times();
+
+    // Need to get the following two before the call to G1ParThreadScanState::flush()
+    // because it resets the PLAB allocator where we get this info from.
+    size_t lab_waste_bytes = pss->lab_waste_words() * HeapWordSize;
+    size_t lab_undo_waste_bytes = pss->lab_undo_waste_words() * HeapWordSize;
+    size_t copied_bytes = pss->flush(_surviving_young_words_total) * HeapWordSize;
+
+    p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, copied_bytes, G1GCPhaseTimes::MergePSSCopiedBytes);
+    p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, lab_waste_bytes, G1GCPhaseTimes::MergePSSLABWasteBytes);
+    p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, lab_undo_waste_bytes, G1GCPhaseTimes::MergePSSLABUndoWasteBytes);
+
     delete pss;
-    _states[worker_index] = NULL;
+    _states[worker_id] = NULL;
   }
   _flushed = true;
 }
--- a/src/hotspot/share/gc/g1/g1ParScanThreadState.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1ParScanThreadState.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -74,7 +74,8 @@
   size_t* _surviving_young_words_base;
   // this points into the array, as we use the first few entries for padding
   size_t* _surviving_young_words;
-
+  // Number of elements in the array above.
+  size_t _surviving_words_length;
   // Indicates whether in the last generation (old) there is no more space
   // available for allocation.
   bool _old_gen_is_full;
@@ -152,7 +153,9 @@
   size_t lab_waste_words() const;
   size_t lab_undo_waste_words() const;
 
-  void flush(size_t* surviving_young_words);
+  // Pass locally gathered statistics to global state. Returns the total number of
+  // HeapWords copied.
+  size_t flush(size_t* surviving_young_words);
 
 private:
   #define G1_PARTIAL_ARRAY_MASK 0x2
--- a/src/hotspot/share/gc/g1/g1ParallelCleaning.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1ParallelCleaning.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -25,6 +25,7 @@
 #include "precompiled.hpp"
 
 #include "gc/g1/g1ParallelCleaning.hpp"
+#include "runtime/atomic.hpp"
 #if INCLUDE_JVMCI
 #include "jvmci/jvmci.hpp"
 #endif
@@ -39,7 +40,7 @@
     return false;
   }
 
-  return Atomic::cmpxchg(1, &_cleaning_claimed, 0) == 0;
+  return Atomic::cmpxchg(&_cleaning_claimed, 0, 1) == 0;
 }
 
 void JVMCICleaningTask::work(bool unloading_occurred) {
--- a/src/hotspot/share/gc/g1/g1Policy.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1Policy.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -78,7 +78,6 @@
   _bytes_allocated_in_old_since_last_gc(0),
   _initial_mark_to_mixed(),
   _collection_set(NULL),
-  _bytes_copied_during_gc(0),
   _g1h(NULL),
   _phase_times(new G1GCPhaseTimes(gc_timer, ParallelGCThreads)),
   _mark_remark_start_sec(0),
@@ -330,9 +329,8 @@
   const double target_pause_time_ms = _mmu_tracker->max_gc_time() * 1000.0;
   const double survivor_regions_evac_time = predict_survivor_regions_evac_time();
   const size_t pending_cards = _analytics->predict_pending_cards();
-  const size_t scanned_cards = _analytics->predict_card_num(rs_length, true /* for_young_gc */);
   const double base_time_ms =
-    predict_base_elapsed_time_ms(pending_cards, scanned_cards) +
+    predict_base_elapsed_time_ms(pending_cards, rs_length) +
     survivor_regions_evac_time;
   const uint available_free_regions = _free_regions_at_end_of_collection;
   const uint base_free_regions =
@@ -553,7 +551,6 @@
   record_concurrent_refinement_data(false /* is_full_collection */);
 
   _collection_set->reset_bytes_used_before();
-  _bytes_copied_during_gc = 0;
 
   // do that for any other surv rate groups
   _short_lived_surv_rate_group->stop_adding_regions();
@@ -647,13 +644,11 @@
 // Anything below that is considered to be zero
 #define MIN_TIMER_GRANULARITY 0.0000001
 
-void G1Policy::record_collection_pause_end(double pause_time_ms, size_t heap_used_bytes_before_gc) {
+void G1Policy::record_collection_pause_end(double pause_time_ms) {
   G1GCPhaseTimes* p = phase_times();
 
   double end_time_sec = os::elapsedTime();
 
-  assert_used_and_recalculate_used_equal(_g1h);
-  size_t cur_used_bytes = _g1h->used();
   bool this_pause_included_initial_mark = false;
   bool this_pause_was_young_only = collector_state()->in_young_only_phase();
 
@@ -717,73 +712,62 @@
   }
 
   _short_lived_surv_rate_group->start_adding_regions();
-  // Do that for any other surv rate groups
 
-  double scan_hcc_time_ms = G1HotCardCache::default_use_cache() ? average_time_ms(G1GCPhaseTimes::MergeHCC) : 0.0;
+  double merge_hcc_time_ms = average_time_ms(G1GCPhaseTimes::MergeHCC);
+  if (update_stats) {
+    size_t const total_log_buffer_cards = p->sum_thread_work_items(G1GCPhaseTimes::MergeHCC, G1GCPhaseTimes::MergeHCCDirtyCards) +
+                                          p->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBDirtyCards);
+    // Update prediction for card merge; MergeRSDirtyCards includes the cards from the Eager Reclaim phase.
+    size_t const total_cards_merged = p->sum_thread_work_items(G1GCPhaseTimes::MergeRS, G1GCPhaseTimes::MergeRSDirtyCards) +
+                                      p->sum_thread_work_items(G1GCPhaseTimes::OptMergeRS, G1GCPhaseTimes::MergeRSDirtyCards) +
+                                      total_log_buffer_cards;
 
-  if (update_stats) {
-    double cost_per_logged_card = 0.0;
-    size_t const pending_logged_cards = p->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBDirtyCards);
-    if (pending_logged_cards > 0) {
-      cost_per_logged_card = logged_cards_processing_time() / pending_logged_cards;
-      _analytics->report_cost_per_logged_card_ms(cost_per_logged_card);
+    // The threshold for the number of cards in a given sampling which we consider
+    // large enough so that the impact from setup and other costs is negligible.
+    size_t const CardsNumSamplingThreshold = 10;
+
+    if (total_cards_merged > CardsNumSamplingThreshold) {
+      double avg_time_merge_cards = average_time_ms(G1GCPhaseTimes::MergeER) +
+                                    average_time_ms(G1GCPhaseTimes::MergeRS) +
+                                    average_time_ms(G1GCPhaseTimes::MergeHCC) +
+                                    average_time_ms(G1GCPhaseTimes::MergeLB) +
+                                    average_time_ms(G1GCPhaseTimes::OptMergeRS);
+      _analytics->report_cost_per_card_merge_ms(avg_time_merge_cards / total_cards_merged, this_pause_was_young_only);
     }
-    _analytics->report_cost_scan_hcc(scan_hcc_time_ms);
 
+    // Update prediction for card scan
     size_t const total_cards_scanned = p->sum_thread_work_items(G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ScanHRScannedCards) +
                                        p->sum_thread_work_items(G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::ScanHRScannedCards);
-    size_t remset_cards_scanned = 0;
-    // There might have been duplicate log buffer entries in the queues which could
-    // increase this value beyond the cards scanned. In this case attribute all cards
-    // to the log buffers.
-    if (pending_logged_cards <= total_cards_scanned) {
-      remset_cards_scanned = total_cards_scanned - pending_logged_cards;
+
+    if (total_cards_scanned > CardsNumSamplingThreshold) {
+      double avg_time_dirty_card_scan = average_time_ms(G1GCPhaseTimes::ScanHR) +
+                                        average_time_ms(G1GCPhaseTimes::OptScanHR);
+
+      _analytics->report_cost_per_card_scan_ms(avg_time_dirty_card_scan / total_cards_scanned, this_pause_was_young_only);
     }
 
-    double cost_per_remset_card_ms = 0.0;
-    if (remset_cards_scanned > 10) {
-      double avg_time_remset_scan = ((average_time_ms(G1GCPhaseTimes::ScanHR) + average_time_ms(G1GCPhaseTimes::OptScanHR)) *
-                                     remset_cards_scanned / total_cards_scanned) +
-                                     average_time_ms(G1GCPhaseTimes::MergeER) +
-                                     average_time_ms(G1GCPhaseTimes::MergeRS) +
-                                     average_time_ms(G1GCPhaseTimes::OptMergeRS);
+    // Update prediction for the ratio between cards from the remembered
+    // sets and actually scanned cards from the remembered sets.
+    // Cards from the remembered sets are all cards not duplicated by cards from
+    // the logs.
+    // Due to duplicates in the log buffers, the number of actually scanned cards
+    // can be smaller than the cards in the log buffers.
+    const size_t from_rs_length_cards = (total_cards_scanned > total_log_buffer_cards) ? total_cards_scanned - total_log_buffer_cards : 0;
+    double merge_to_scan_ratio = 0.0;
+    if (total_cards_scanned > 0) {
+      merge_to_scan_ratio = (double) from_rs_length_cards / total_cards_scanned;
+    }
+    _analytics->report_card_merge_to_scan_ratio(merge_to_scan_ratio, this_pause_was_young_only);
 
-      cost_per_remset_card_ms = avg_time_remset_scan / remset_cards_scanned;
-      _analytics->report_cost_per_remset_card_ms(cost_per_remset_card_ms, this_pause_was_young_only);
-    }
+    const size_t recorded_rs_length = _collection_set->recorded_rs_length();
+    const size_t rs_length_diff = _rs_length > recorded_rs_length ? _rs_length - recorded_rs_length : 0;
+    _analytics->report_rs_length_diff(rs_length_diff);
 
-    if (_rs_length > 0) {
-      double cards_per_entry_ratio =
-        (double) remset_cards_scanned / (double) _rs_length;
-      _analytics->report_cards_per_entry_ratio(cards_per_entry_ratio, this_pause_was_young_only);
-    }
-
-    // This is defensive. For a while _rs_length could get
-    // smaller than _recorded_rs_length which was causing
-    // rs_length_diff to get very large and mess up the RSet length
-    // predictions. The reason was unsafe concurrent updates to the
-    // _inc_cset_recorded_rs_length field which the code below guards
-    // against (see CR 7118202). This bug has now been fixed (see CR
-    // 7119027). However, I'm still worried that
-    // _inc_cset_recorded_rs_length might still end up somewhat
-    // inaccurate. The concurrent refinement thread calculates an
-    // RSet's length concurrently with other CR threads updating it
-    // which might cause it to calculate the length incorrectly (if,
-    // say, it's in mid-coarsening). So I'll leave in the defensive
-    // conditional below just in case.
-    size_t rs_length_diff = 0;
-    size_t recorded_rs_length = _collection_set->recorded_rs_length();
-    if (_rs_length > recorded_rs_length) {
-      rs_length_diff = _rs_length - recorded_rs_length;
-    }
-    _analytics->report_rs_length_diff((double) rs_length_diff);
-
-    size_t freed_bytes = heap_used_bytes_before_gc - cur_used_bytes;
-    size_t copied_bytes = _collection_set->bytes_used_before() - freed_bytes;
-    double cost_per_byte_ms = 0.0;
+    // Update prediction for copy cost per byte
+    size_t copied_bytes = p->sum_thread_work_items(G1GCPhaseTimes::MergePSS, G1GCPhaseTimes::MergePSSCopiedBytes);
 
     if (copied_bytes > 0) {
-      cost_per_byte_ms = (average_time_ms(G1GCPhaseTimes::ObjCopy) + average_time_ms(G1GCPhaseTimes::OptObjCopy)) / (double) copied_bytes;
+      double cost_per_byte_ms = (average_time_ms(G1GCPhaseTimes::ObjCopy) + average_time_ms(G1GCPhaseTimes::OptObjCopy)) / copied_bytes;
       _analytics->report_cost_per_byte_ms(cost_per_byte_ms, collector_state()->mark_or_rebuild_in_progress());
     }
 
@@ -848,21 +832,21 @@
   // Note that _mmu_tracker->max_gc_time() returns the time in seconds.
   double scan_logged_cards_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0;
 
-  if (scan_logged_cards_time_goal_ms < scan_hcc_time_ms) {
+  if (scan_logged_cards_time_goal_ms < merge_hcc_time_ms) {
     log_debug(gc, ergo, refine)("Adjust concurrent refinement thresholds (scanning the HCC expected to take longer than Update RS time goal)."
                                 "Logged Cards Scan time goal: %1.2fms Scan HCC time: %1.2fms",
-                                scan_logged_cards_time_goal_ms, scan_hcc_time_ms);
+                                scan_logged_cards_time_goal_ms, merge_hcc_time_ms);
 
     scan_logged_cards_time_goal_ms = 0;
   } else {
-    scan_logged_cards_time_goal_ms -= scan_hcc_time_ms;
+    scan_logged_cards_time_goal_ms -= merge_hcc_time_ms;
   }
 
   _pending_cards_at_prev_gc_end = _g1h->pending_card_num();
   double const logged_cards_time = logged_cards_processing_time();
 
   log_debug(gc, ergo, refine)("Concurrent refinement times: Logged Cards Scan time goal: %1.2fms Logged Cards Scan time: %1.2fms HCC time: %1.2fms",
-                              scan_logged_cards_time_goal_ms, logged_cards_time, scan_hcc_time_ms);
+                              scan_logged_cards_time_goal_ms, logged_cards_time, merge_hcc_time_ms);
 
   _g1h->concurrent_refine()->adjust(logged_cards_time,
                                     phase_times()->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBDirtyCards),
@@ -930,11 +914,7 @@
 double G1Policy::predict_yg_surv_rate(int age, SurvRateGroup* surv_rate_group) const {
   TruncatedSeq* seq = surv_rate_group->get_seq(age);
   guarantee(seq->num() > 0, "There should be some young gen survivor samples available. Tried to access with age %d", age);
-  double pred = _predictor.get_new_prediction(seq);
-  if (pred > 1.0) {
-    pred = 1.0;
-  }
-  return pred;
+  return _predictor.get_new_unit_prediction(seq);
 }
 
 double G1Policy::accum_yg_surv_rate_pred(int age) const {
@@ -942,17 +922,17 @@
 }
 
 double G1Policy::predict_base_elapsed_time_ms(size_t pending_cards,
-                                              size_t scanned_cards) const {
+                                              size_t rs_length) const {
+  size_t effective_scanned_cards = _analytics->predict_scan_card_num(rs_length, collector_state()->in_young_only_phase());
   return
-    _analytics->predict_rs_update_time_ms(pending_cards) +
-    _analytics->predict_rs_scan_time_ms(scanned_cards, collector_state()->in_young_only_phase()) +
+    _analytics->predict_card_merge_time_ms(pending_cards + rs_length, collector_state()->in_young_only_phase()) +
+    _analytics->predict_card_scan_time_ms(effective_scanned_cards, collector_state()->in_young_only_phase()) +
     _analytics->predict_constant_other_time_ms();
 }
 
 double G1Policy::predict_base_elapsed_time_ms(size_t pending_cards) const {
   size_t rs_length = _analytics->predict_rs_length();
-  size_t card_num = _analytics->predict_card_num(rs_length, collector_state()->in_young_only_phase());
-  return predict_base_elapsed_time_ms(pending_cards, card_num);
+  return predict_base_elapsed_time_ms(pending_cards, rs_length);
 }
 
 size_t G1Policy::predict_bytes_to_copy(HeapRegion* hr) const {
@@ -971,13 +951,13 @@
 double G1Policy::predict_region_elapsed_time_ms(HeapRegion* hr,
                                                 bool for_young_gc) const {
   size_t rs_length = hr->rem_set()->occupied();
-  // Predicting the number of cards is based on which type of GC
-  // we're predicting for.
-  size_t card_num = _analytics->predict_card_num(rs_length, for_young_gc);
+  size_t scan_card_num = _analytics->predict_scan_card_num(rs_length, for_young_gc);
+
   size_t bytes_to_copy = predict_bytes_to_copy(hr);
 
   double region_elapsed_time_ms =
-    _analytics->predict_rs_scan_time_ms(card_num, collector_state()->in_young_only_phase()) +
+    _analytics->predict_card_merge_time_ms(rs_length, collector_state()->in_young_only_phase()) +
+    _analytics->predict_card_scan_time_ms(scan_card_num, collector_state()->in_young_only_phase()) +
     _analytics->predict_object_copy_time_ms(bytes_to_copy, collector_state()->mark_or_rebuild_in_progress());
 
   // The prediction of the "other" time for this region is based
--- a/src/hotspot/share/gc/g1/g1Policy.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1Policy.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -140,9 +140,9 @@
     _rs_length = rs_length;
   }
 
-  double predict_base_elapsed_time_ms(size_t pending_cards) const;
-  double predict_base_elapsed_time_ms(size_t pending_cards,
-                                      size_t scanned_cards) const;
+  double predict_base_elapsed_time_ms(size_t num_pending_cards) const;
+  double predict_base_elapsed_time_ms(size_t num_pending_cards,
+                                      size_t rs_length) const;
   size_t predict_bytes_to_copy(HeapRegion* hr) const;
   double predict_region_elapsed_time_ms(HeapRegion* hr, bool for_young_gc) const;
 
@@ -184,9 +184,6 @@
 
   G1CollectionSetChooser* cset_chooser() const;
 
-  // The number of bytes copied during the GC.
-  size_t _bytes_copied_during_gc;
-
   // Stash a pointer to the g1 heap.
   G1CollectedHeap* _g1h;
 
@@ -320,7 +317,7 @@
 
   // Record the start and end of an evacuation pause.
   void record_collection_pause_start(double start_time_sec);
-  virtual void record_collection_pause_end(double pause_time_ms, size_t heap_used_bytes_before_gc);
+  virtual void record_collection_pause_end(double pause_time_ms);
 
   // Record the start and end of a full collection.
   void record_full_collection_start();
@@ -339,17 +336,6 @@
 
   void print_phases();
 
-  // Record how much space we copied during a GC. This is typically
-  // called when a GC alloc region is being retired.
-  void record_bytes_copied_during_gc(size_t bytes) {
-    _bytes_copied_during_gc += bytes;
-  }
-
-  // The amount of space we copied during a GC.
-  size_t bytes_copied_during_gc() const {
-    return _bytes_copied_during_gc;
-  }
-
   bool next_gc_should_be_mixed(const char* true_action_str,
                                const char* false_action_str) const;
 
--- a/src/hotspot/share/gc/g1/g1Predictions.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1Predictions.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -57,6 +57,14 @@
   double get_new_prediction(TruncatedSeq const* seq) const {
     return seq->davg() + _sigma * stddev_estimate(seq);
   }
+
+  double get_new_unit_prediction(TruncatedSeq const* seq) const {
+    return clamp(get_new_prediction(seq), 0.0, 1.0);
+  }
+
+  double get_new_lower_zero_bound_prediction(TruncatedSeq const* seq) const {
+    return MAX2(get_new_prediction(seq), 0.0);
+  }
 };
 
 #endif // SHARE_GC_G1_G1PREDICTIONS_HPP
--- a/src/hotspot/share/gc/g1/g1RedirtyCardsQueue.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1RedirtyCardsQueue.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -129,7 +129,7 @@
 
 void G1RedirtyCardsQueueSet::enqueue_completed_buffer(BufferNode* node) {
   assert(_collecting, "precondition");
-  Atomic::add(buffer_size() - node->index(), &_entry_count);
+  Atomic::add(&_entry_count, buffer_size() - node->index());
   _list.push(*node);
   update_tail(node);
 }
@@ -139,7 +139,7 @@
   const G1BufferNodeList from = src->take_all_completed_buffers();
   if (from._head != NULL) {
     assert(from._tail != NULL, "invariant");
-    Atomic::add(from._entry_count, &_entry_count);
+    Atomic::add(&_entry_count, from._entry_count);
     _list.prepend(*from._head, *from._tail);
     update_tail(from._tail);
   }
--- a/src/hotspot/share/gc/g1/g1RegionMarkStatsCache.inline.hpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1RegionMarkStatsCache.inline.hpp	Mon Dec 02 12:01:40 2019 +0530
@@ -46,7 +46,7 @@
 inline void G1RegionMarkStatsCache::evict(uint idx) {
   G1RegionMarkStatsCacheEntry* cur = &_cache[idx];
   if (cur->_stats._live_words != 0) {
-    Atomic::add(cur->_stats._live_words, &_target[cur->_region_idx]._live_words);
+    Atomic::add(&_target[cur->_region_idx]._live_words, cur->_stats._live_words);
   }
   cur->clear();
 }
--- a/src/hotspot/share/gc/g1/g1RemSet.cpp	Mon Dec 02 11:58:14 2019 +0530
+++ b/src/hotspot/share/gc/g1/g1RemSet.cpp	Mon Dec 02 12:01:40 2019 +0530
@@ -49,6 +49,7 @@
 #include "memory/resourceArea.hpp"
 #include "oops/access.inline.hpp"
 #include "oops/oop.inline.hpp"
+#include "runtime/atomic.hpp"
 #include "runtime/os.hpp"
 #include "utilities/align.hpp"
 #include "utilities/globalDefinitions.hpp"
@@ -177,9 +178,9 @@
         return;
       }
 
-      bool marked_as_dirty = Atomic::cmpxchg(true, &_contains[region], false) == false;
+      bool marked_as_dirty = Atomic::cmpxchg(&_contains[region], false, true) == false;
       if (marked_as_dirty) {
-        uint allocated = Atomic::add(1u, &_cur_idx) - 1;
+        uint allocated = Atomic::add(&_cur_idx, 1u) - 1;
         _buffer[allocated] = region;
       }
     }
@@ -196,30 +197,6 @@
     }
   };
 
-  // Creates a snapshot of the current _top values at the start of collection to
-  // filter out card marks that we do not want to scan.
-  class G1ResetScanTopClosure : public HeapRegionClosure {
-    G1RemSetScanState* _scan_state;
-
-  public:
-    G1ResetScanTopClosure(G1RemSetScanState* scan_state) : _scan_state(scan_state) { }
-
-    virtual bool do_heap_region(HeapRegion* r) {
-      uint hrm_index = r->hrm_index();
-      if (r->in_collection_set()) {
-        // Young regions had their card table marked as young at their allocation;
-        // we need to make sure that these marks are cleared at the end of GC, *but*
-        // they should not be scanned for cards.
-        // So directly add them to the "all_dirty_regions".
-        // Same for regions in the (initial) collection set: they may contain cards from
-        // the log buffers, make sure they are cleaned.
-        _scan_state->add_all_dirty_region(hrm_index);
-       } else if (r->is_old_or_humongous_or_archive()) {
-        _scan_state->set_scan_top(hrm_index, r->top());
-       }
-       return false;
-     }
-  };
   // For each region, contains the maximum top() value to be used during this garbage
   // collection. Subsumes common checks like filtering out everything but old and
   // humongous regions outside the collection set.
@@ -255,7 +232,7 @@
 
     void work(uint worker_id) {
       while (_cur_dirty_regions < _regions->size()) {
-        uint next = Atomic::add(_chunk_length, &_cur_dirty_regions) - _chunk_length;
+        uint next = Atomic::add(&_cur_dirty_regions, _chunk_length) - _chunk_length;
         uint max = MIN2(next + _chunk_length, _regions->size());
 
         for (uint i = next; i < max; i++) {
@@ -328,16 +305,8 @@
   }
 
   void prepare() {
-    for (size_t i = 0; i < _max_regions; i++) {
-      _collection_set_iter_state[i] = false;
-      clear_scan_top((uint)i);
-    }
-
     _all_dirty_regions = new G1DirtyRegions(_max_regions);
     _next_dirty_regions = new G1DirtyRegions(_max_regions);
-
-    G1ResetScanTopClosure cl(this);
-    G1CollectedHeap::heap()->heap_region_iterate(&cl);
   }
 
   void prepare_for_merge_heap_roots() {
@@ -430,6 +399,10 @@
     } while (cur != start_pos);
   }
 
+  void reset_region_claim(uint region_idx) {
+    _collection_set_iter_state[region_idx] = false;
+  }
+
   // Attempt to claim the given region in the collection set for iteration. Returns true
   // if this call caused the transition from Unclaimed to Claimed.
   inline bool claim_collection_set_region(uint region) {
@@ -437,7 +410,7 @@
     if (_collection_set_iter_state[region]) {
       return false;
     }
-    return !Atomic::cmpxchg(true, &_collection_set_iter_state[region], false);
+    return !Atomic::cmpxchg(&_collection_set_iter_state[region], false, true);
   }
 
   bool has_cards_to_scan(uint region) {
@@ -447,7 +420,7 @@
 
   uint claim_cards_to_scan(uint region, uint increment) {
     assert(region < _max_regions, "Tried to access invalid region %u", region);
-    return Atomic::add(increment, &_card_table_scan_state[region]) - increment