changeset 51294:9d7f647a2b6d

Merge
author prr
date Tue, 19 Jun 2018 09:22:38 -0700
parents 9ff8428f3e51 f2d94a0619a2
children 681b118332d7
files src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/algorithms/implementations/package.html src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/algorithms/package.html src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/c14n/helper/package.html src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/c14n/implementations/Canonicalizer11.java src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/c14n/implementations/package.html src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/c14n/package.html src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/encryption/AbstractSerializer.java src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/encryption/AgreementMethod.java src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/encryption/CipherData.java src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/encryption/CipherReference.java src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/encryption/CipherValue.java src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/encryption/DocumentSerializer.java src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/encryption/EncryptedData.java src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/encryption/EncryptedKey.java src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/encryption/EncryptedType.java src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/encryption/EncryptionMethod.java src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/encryption/EncryptionProperties.java src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/encryption/EncryptionProperty.java src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/encryption/Reference.java src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/encryption/ReferenceList.java src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/encryption/Serializer.java src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/encryption/Transforms.java src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/encryption/XMLCipher.java src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/encryption/XMLCipherInput.java src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/encryption/XMLCipherParameters.java src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/encryption/XMLEncryptionException.java src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/encryption/package.html src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/exceptions/package.html src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/keys/content/keyvalues/package.html src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/keys/content/package.html src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/keys/content/x509/package.html src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/keys/keyresolver/implementations/EncryptedKeyResolver.java src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/keys/keyresolver/implementations/package.html src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/keys/keyresolver/package.html src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/keys/package.html src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/keys/storage/implementations/package.html src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/keys/storage/package.html src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/package.html src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/resource/package.html src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/signature/package.html src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/transforms/implementations/package.html src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/transforms/package.html src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/utils/ElementChecker.java src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/utils/ElementCheckerImpl.java src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/utils/EncryptionElementProxy.java src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/utils/package.html src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/utils/resolver/implementations/package.html src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/utils/resolver/package.html src/java.xml.crypto/share/classes/org/jcp/xml/dsig/internal/dom/DOMCryptoBinary.java test/hotspot/jtreg/runtime/exceptionMsgs/IncompatibleClassChangeError/ICC_B.jasm test/jdk/ProblemList.txt
diffstat 557 files changed, 20529 insertions(+), 16745 deletions(-) [+]
line wrap: on
line diff
--- a/make/autoconf/hotspot.m4	Fri Jun 15 13:07:46 2018 -0700
+++ b/make/autoconf/hotspot.m4	Tue Jun 19 09:22:38 2018 -0700
@@ -241,10 +241,12 @@
 #
 AC_DEFUN_ONCE([HOTSPOT_ENABLE_DISABLE_CDS],
 [
-  AC_ARG_ENABLE([cds], [AS_HELP_STRING([--enable-cds@<:@=yes/no@:>@],
-      [enable class data sharing feature in non-minimal VM. Default is yes.])])
+  AC_ARG_ENABLE([cds], [AS_HELP_STRING([--enable-cds@<:@=yes/no/auto@:>@],
+      [enable class data sharing feature in non-minimal VM. Default is auto, where cds is enabled if supported on the platform.])])
 
-  if test "x$enable_cds" = "x" || test "x$enable_cds" = "xyes"; then
+  if test "x$enable_cds" = "x" || test "x$enable_cds" = "xauto"; then
+    ENABLE_CDS="true"
+  elif test "x$enable_cds" = "xyes"; then
     ENABLE_CDS="true"
   elif test "x$enable_cds" = "xno"; then
     ENABLE_CDS="false"
@@ -252,6 +254,14 @@
     AC_MSG_ERROR([Invalid value for --enable-cds: $enable_cds])
   fi
 
+  # Disable CDS on AIX.
+  if test "x$OPENJDK_TARGET_OS" = "xaix"; then
+    ENABLE_CDS="false"
+    if test "x$enable_cds" = "xyes"; then
+      AC_MSG_ERROR([CDS is currently not supported on AIX. Remove --enable-cds.])
+    fi
+  fi
+
   AC_SUBST(ENABLE_CDS)
 ])
 
@@ -424,8 +434,21 @@
 
   # All variants but minimal (and custom) get these features
   NON_MINIMAL_FEATURES="$NON_MINIMAL_FEATURES cmsgc g1gc parallelgc serialgc epsilongc jni-check jvmti management nmt services vm-structs"
+
+  AC_MSG_CHECKING([if cds should be enabled])
   if test "x$ENABLE_CDS" = "xtrue"; then
+    if test "x$enable_cds" = "xyes"; then
+      AC_MSG_RESULT([yes, forced])
+    else
+      AC_MSG_RESULT([yes])
+    fi
     NON_MINIMAL_FEATURES="$NON_MINIMAL_FEATURES cds"
+  else
+    if test "x$enable_cds" = "xno"; then
+      AC_MSG_RESULT([no, forced])
+    else
+      AC_MSG_RESULT([no])
+    fi
   fi
 
   # Enable features depending on variant.
--- a/make/autoconf/toolchain_windows.m4	Fri Jun 15 13:07:46 2018 -0700
+++ b/make/autoconf/toolchain_windows.m4	Tue Jun 19 09:22:38 2018 -0700
@@ -37,6 +37,7 @@
 VS_SDK_INSTALLDIR_2010="Microsoft SDKs/Windows/v7.1"
 VS_VS_PLATFORM_NAME_2010="v100"
 VS_SDK_PLATFORM_NAME_2010="Windows7.1SDK"
+VS_SUPPORTED_2010=false
 
 VS_DESCRIPTION_2012="Microsoft Visual Studio 2012"
 VS_VERSION_INTERNAL_2012=110
@@ -47,6 +48,7 @@
 VS_SDK_INSTALLDIR_2012=
 VS_VS_PLATFORM_NAME_2012="v110"
 VS_SDK_PLATFORM_NAME_2012=
+VS_SUPPORTED_2012=false
 
 VS_DESCRIPTION_2013="Microsoft Visual Studio 2013"
 VS_VERSION_INTERNAL_2013=120
@@ -57,6 +59,7 @@
 VS_SDK_INSTALLDIR_2013=
 VS_VS_PLATFORM_NAME_2013="v120"
 VS_SDK_PLATFORM_NAME_2013=
+VS_SUPPORTED_2013=false
 
 VS_DESCRIPTION_2015="Microsoft Visual Studio 2015"
 VS_VERSION_INTERNAL_2015=140
@@ -70,6 +73,7 @@
 # The vcvars of 2015 breaks if 2017 is also installed. Work around this by
 # explicitly specifying Windows Kit 8.1 to be used.
 VS_ENV_ARGS_2015="8.1"
+VS_SUPPORTED_2015=false
 
 VS_DESCRIPTION_2017="Microsoft Visual Studio 2017"
 VS_VERSION_INTERNAL_2017=141
@@ -82,6 +86,7 @@
 VS_SDK_INSTALLDIR_2017=
 VS_VS_PLATFORM_NAME_2017="v141"
 VS_SDK_PLATFORM_NAME_2017=
+VS_SUPPORTED_2017=true
 
 ################################################################################
 
@@ -266,6 +271,7 @@
     eval MSVCR_NAME="\${VS_MSVCR_${VS_VERSION}}"
     eval MSVCP_NAME="\${VS_MSVCP_${VS_VERSION}}"
     eval USE_UCRT="\${VS_USE_UCRT_${VS_VERSION}}"
+    eval VS_SUPPORTED="\${VS_SUPPORTED_${VS_VERSION}}"
     eval PLATFORM_TOOLSET="\${VS_VS_PLATFORM_NAME_${VS_VERSION}}"
     VS_PATH="$TOOLCHAIN_PATH:$PATH"
 
@@ -312,6 +318,7 @@
       eval MSVCR_NAME="\${VS_MSVCR_${VS_VERSION}}"
       eval MSVCP_NAME="\${VS_MSVCP_${VS_VERSION}}"
       eval USE_UCRT="\${VS_USE_UCRT_${VS_VERSION}}"
+      eval VS_SUPPORTED="\${VS_SUPPORTED_${VS_VERSION}}"
       # The rest of the variables are already evaled while probing
       AC_MSG_NOTICE([Found $VS_DESCRIPTION])
       break
@@ -319,7 +326,7 @@
   done
 
   TOOLCHAIN_DESCRIPTION="$VS_DESCRIPTION"
-  if test "$TOOLCHAIN_VERSION" -gt 2013; then
+  if test "x$VS_SUPPORTED" = "xfalse"; then
     UNSUPPORTED_TOOLCHAIN_VERSION=yes
   fi
 ])
--- a/make/lib/CoreLibraries.gmk	Fri Jun 15 13:07:46 2018 -0700
+++ b/make/lib/CoreLibraries.gmk	Tue Jun 19 09:22:38 2018 -0700
@@ -195,7 +195,6 @@
     OPTIMIZATION := LOW, \
     CFLAGS := $(CFLAGS_JDKLIB), \
     CXXFLAGS := $(CXXFLAGS_JDKLIB), \
-    DISABLED_WARNINGS_gcc := implicit-fallthrough, \
     CFLAGS_unix := -UDEBUG, \
     LDFLAGS := $(LDFLAGS_JDKLIB) $(LDFLAGS_CXX_JDK) \
         $(call SET_SHARED_LIBRARY_ORIGIN), \
--- a/src/hotspot/cpu/aarch64/assembler_aarch64.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/cpu/aarch64/assembler_aarch64.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -58,6 +58,28 @@
 
 static float unpack(unsigned value);
 
+short Assembler::SIMD_Size_in_bytes[] = {
+  // T8B, T16B, T4H, T8H, T2S, T4S, T1D, T2D, T1Q
+       8,   16,   8,  16,   8,  16,   8,  16,  16
+};
+
+#ifdef ASSERT
+static void asm_check(const unsigned int *insns, const unsigned int *insns1, size_t len) {
+    bool ok = true;
+    for (unsigned int i = 0; i < len; i++) {
+      if (insns[i] != insns1[i]) {
+        ok = false;
+        printf("Ours:\n");
+        Disassembler::decode((address)&insns1[i], (address)&insns1[i+1]);
+        printf("Theirs:\n");
+        Disassembler::decode((address)&insns[i], (address)&insns[i+1]);
+        printf("\n");
+      }
+    }
+    assert(ok, "Assembler smoke test failed");
+  }
+#endif // ASSERT
+
 void entry(CodeBuffer *cb) {
 
   // {
@@ -1155,31 +1177,24 @@
   };
 // END  Generated code -- do not edit
 
+  asm_check((unsigned int *)entry, insns, sizeof insns / sizeof insns[0]);
+
   {
-    bool ok = true;
-    unsigned int *insns1 = (unsigned int *)entry;
-    for (unsigned int i = 0; i < sizeof insns / sizeof insns[0]; i++) {
-      if (insns[i] != insns1[i]) {
-        ok = false;
-        printf("Ours:\n");
-        Disassembler::decode((address)&insns1[i], (address)&insns1[i+1]);
-        printf("Theirs:\n");
-        Disassembler::decode((address)&insns[i], (address)&insns[i+1]);
-        printf("\n");
-      }
-    }
-    assert(ok, "Assembler smoke test failed");
+    address PC = __ pc();
+    __ ld1(v0, __ T16B, Address(r16));      // No offset
+    __ ld1(v0, __ T8H, __ post(r16, 16));   // Post-index
+    __ ld2(v0, v1, __ T8H, __ post(r24, 16 * 2));   // Post-index
+    __ ld1(v0, __ T16B, __ post(r16, r17)); // Register post-index
+    static const unsigned int vector_insns[] = {
+       0x4c407200, // ld1   {v0.16b}, [x16]
+       0x4cdf7600, // ld1   {v0.8h}, [x16], #16
+       0x4cdf8700, // ld2   {v0.8h, v1.8h}, [x24], #32
+       0x4cd17200, // ld1   {v0.16b}, [x16], x17
+      };
+    asm_check((unsigned int *)PC, vector_insns,
+              sizeof vector_insns / sizeof vector_insns[0]);
   }
 
-#ifndef PRODUCT
-
-  address PC = __ pc();
-  __ ld1(v0, __ T16B, Address(r16)); // No offset
-  __ ld1(v0, __ T16B, __ post(r16, 0)); // Post-index
-  __ ld1(v0, __ T16B, Address(r16, r17)); //
-
-
-#endif // PRODUCT
 #endif // ASSERT
 }
 
--- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -305,8 +305,11 @@
   Pre(Register reg, int o) : PrePost(reg, o) { }
 };
 class Post : public PrePost {
+  Register _idx;
 public:
-  Post(Register reg, int o) : PrePost(reg, o) { }
+  Post(Register reg, int o) : PrePost(reg, o) { _idx = NULL; }
+  Post(Register reg, Register idx) : PrePost(reg, 0) { _idx = idx; }
+  Register idx_reg() { return _idx; }
 };
 
 namespace ext
@@ -341,7 +344,7 @@
 class Address {
  public:
 
-  enum mode { no_mode, base_plus_offset, pre, post, pcrel,
+  enum mode { no_mode, base_plus_offset, pre, post, post_reg, pcrel,
               base_plus_offset_reg, literal };
 
   // Shift and extend for base reg + reg offset addressing
@@ -413,7 +416,8 @@
   Address(Pre p)
     : _mode(pre), _base(p.reg()), _offset(p.offset()) { }
   Address(Post p)
-    : _mode(post), _base(p.reg()), _offset(p.offset()), _target(0) { }
+    : _mode(p.idx_reg() == NULL ? post : post_reg), _base(p.reg()),
+      _offset(p.offset()), _target(0), _index(p.idx_reg()) { }
   Address(address target, RelocationHolder const& rspec)
     : _mode(literal),
       _rspec(rspec),
@@ -436,7 +440,7 @@
 
   Register base() const {
     guarantee((_mode == base_plus_offset | _mode == base_plus_offset_reg
-               | _mode == post),
+               | _mode == post | _mode == post_reg),
               "wrong mode");
     return _base;
   }
@@ -660,10 +664,14 @@
     return adjust(base, offset, true);
   }
 
-  Address post (Register base, int offset) {
+  Address post(Register base, int offset) {
     return adjust(base, offset, false);
   }
 
+  Address post(Register base, Register idx) {
+    return Address(Post(base, idx));
+  }
+
   Instruction_aarch64* current;
 
   void set_current(Instruction_aarch64* i) { current = i; }
@@ -2032,6 +2040,10 @@
        B, H, S, D, Q
   };
 
+private:
+  static short SIMD_Size_in_bytes[];
+
+public:
 #define INSN(NAME, op)                                            \
   void NAME(FloatRegister Rt, SIMD_RegVariant T, const Address &adr) {   \
     ld_st2((Register)Rt, adr, (int)T & 3, op + ((T==Q) ? 0b10:0b00), 1); \
@@ -2051,7 +2063,8 @@
     f((int)T >> 1, 11, 10), srf(Xn, 5), rf(Vt, 0);
   }
   void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn,
-             int imm, int op1, int op2) {
+             int imm, int op1, int op2, int regs) {
+    guarantee(T <= T1Q && imm == SIMD_Size_in_bytes[T] * regs, "bad offset");
     starti;
     f(0,31), f((int)T & 1, 30);
     f(op1 | 0b100, 29, 21), f(0b11111, 20, 16), f(op2, 15, 12);
@@ -2065,34 +2078,34 @@
     f((int)T >> 1, 11, 10), srf(Xn, 5), rf(Vt, 0);
   }
 
- void ld_st(FloatRegister Vt, SIMD_Arrangement T, Address a, int op1, int op2) {
-   switch (a.getMode()) {
-   case Address::base_plus_offset:
-     guarantee(a.offset() == 0, "no offset allowed here");
-     ld_st(Vt, T, a.base(), op1, op2);
-     break;
-   case Address::post:
-     ld_st(Vt, T, a.base(), a.offset(), op1, op2);
-     break;
-   case Address::base_plus_offset_reg:
-     ld_st(Vt, T, a.base(), a.index(), op1, op2);
-     break;
-   default:
-     ShouldNotReachHere();
-   }
- }
+  void ld_st(FloatRegister Vt, SIMD_Arrangement T, Address a, int op1, int op2, int regs) {
+    switch (a.getMode()) {
+    case Address::base_plus_offset:
+      guarantee(a.offset() == 0, "no offset allowed here");
+      ld_st(Vt, T, a.base(), op1, op2);
+      break;
+    case Address::post:
+      ld_st(Vt, T, a.base(), a.offset(), op1, op2, regs);
+      break;
+    case Address::post_reg:
+      ld_st(Vt, T, a.base(), a.index(), op1, op2);
+      break;
+    default:
+      ShouldNotReachHere();
+    }
+  }
 
  public:
 
-#define INSN1(NAME, op1, op2)                                   \
+#define INSN1(NAME, op1, op2)                                           \
   void NAME(FloatRegister Vt, SIMD_Arrangement T, const Address &a) {   \
-   ld_st(Vt, T, a, op1, op2);                                           \
+    ld_st(Vt, T, a, op1, op2, 1);                                       \
  }
 
 #define INSN2(NAME, op1, op2)                                           \
   void NAME(FloatRegister Vt, FloatRegister Vt2, SIMD_Arrangement T, const Address &a) { \
     assert(Vt->successor() == Vt2, "Registers must be ordered");        \
-    ld_st(Vt, T, a, op1, op2);                                          \
+    ld_st(Vt, T, a, op1, op2, 2);                                       \
   }
 
 #define INSN3(NAME, op1, op2)                                           \
@@ -2100,7 +2113,7 @@
             SIMD_Arrangement T, const Address &a) {                     \
     assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3,           \
            "Registers must be ordered");                                \
-    ld_st(Vt, T, a, op1, op2);                                          \
+    ld_st(Vt, T, a, op1, op2, 3);                                       \
   }
 
 #define INSN4(NAME, op1, op2)                                           \
@@ -2108,7 +2121,7 @@
             FloatRegister Vt4, SIMD_Arrangement T, const Address &a) {  \
     assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3 &&         \
            Vt3->successor() == Vt4, "Registers must be ordered");       \
-    ld_st(Vt, T, a, op1, op2);                                          \
+    ld_st(Vt, T, a, op1, op2, 4);                                       \
   }
 
   INSN1(ld1,  0b001100010, 0b0111);
--- a/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -33,13 +33,13 @@
 
   // LR is live.  It must be saved around calls.
 
-  bool on_heap = (decorators & IN_HEAP) != 0;
-  bool on_root = (decorators & IN_ROOT) != 0;
+  bool in_heap = (decorators & IN_HEAP) != 0;
+  bool in_native = (decorators & IN_NATIVE) != 0;
   bool oop_not_null = (decorators & OOP_NOT_NULL) != 0;
   switch (type) {
   case T_OBJECT:
   case T_ARRAY: {
-    if (on_heap) {
+    if (in_heap) {
       if (UseCompressedOops) {
         __ ldrw(dst, src);
         if (oop_not_null) {
@@ -51,7 +51,7 @@
         __ ldr(dst, src);
       }
     } else {
-      assert(on_root, "why else?");
+      assert(in_native, "why else?");
       __ ldr(dst, src);
     }
     break;
@@ -71,13 +71,13 @@
 
 void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
                                    Address dst, Register val, Register tmp1, Register tmp2) {
-  bool on_heap = (decorators & IN_HEAP) != 0;
-  bool on_root = (decorators & IN_ROOT) != 0;
+  bool in_heap = (decorators & IN_HEAP) != 0;
+  bool in_native = (decorators & IN_NATIVE) != 0;
   switch (type) {
   case T_OBJECT:
   case T_ARRAY: {
     val = val == noreg ? zr : val;
-    if (on_heap) {
+    if (in_heap) {
       if (UseCompressedOops) {
         assert(!dst.uses(val), "not enough registers");
         if (val != zr) {
@@ -88,7 +88,7 @@
         __ str(val, dst);
       }
     } else {
-      assert(on_root, "why else?");
+      assert(in_native, "why else?");
       __ str(val, dst);
     }
     break;
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -2120,7 +2120,7 @@
   tbz(r0, 0, not_weak);    // Test for jweak tag.
 
   // Resolve jweak.
-  access_load_at(T_OBJECT, IN_ROOT | ON_PHANTOM_OOP_REF, value,
+  access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, value,
                  Address(value, -JNIHandles::weak_tag_value), tmp, thread);
   verify_oop(value);
   b(done);
@@ -2570,54 +2570,47 @@
 #endif
 
 void MacroAssembler::push_call_clobbered_registers() {
+  int step = 4 * wordSize;
   push(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp);
-
+  sub(sp, sp, step);
+  mov(rscratch1, -step);
   // Push v0-v7, v16-v31.
-  for (int i = 30; i >= 0; i -= 2) {
-    if (i <= v7->encoding() || i >= v16->encoding()) {
-        stpd(as_FloatRegister(i), as_FloatRegister(i+1),
-             Address(pre(sp, -2 * wordSize)));
-    }
+  for (int i = 31; i>= 4; i -= 4) {
+    if (i <= v7->encoding() || i >= v16->encoding())
+      st1(as_FloatRegister(i-3), as_FloatRegister(i-2), as_FloatRegister(i-1),
+          as_FloatRegister(i), T1D, Address(post(sp, rscratch1)));
   }
+  st1(as_FloatRegister(0), as_FloatRegister(1), as_FloatRegister(2),
+      as_FloatRegister(3), T1D, Address(sp));
 }
 
 void MacroAssembler::pop_call_clobbered_registers() {
-
-  for (int i = 0; i < 32; i += 2) {
-    if (i <= v7->encoding() || i >= v16->encoding()) {
-      ldpd(as_FloatRegister(i), as_FloatRegister(i+1),
-           Address(post(sp, 2 * wordSize)));
-    }
+  for (int i = 0; i < 32; i += 4) {
+    if (i <= v7->encoding() || i >= v16->encoding())
+      ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
+          as_FloatRegister(i+3), T1D, Address(post(sp, 4 * wordSize)));
   }
 
   pop(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp);
 }
 
 void MacroAssembler::push_CPU_state(bool save_vectors) {
+  int step = (save_vectors ? 8 : 4) * wordSize;
   push(0x3fffffff, sp);         // integer registers except lr & sp
-
-  if (!save_vectors) {
-    for (int i = 30; i >= 0; i -= 2)
-      stpd(as_FloatRegister(i), as_FloatRegister(i+1),
-           Address(pre(sp, -2 * wordSize)));
-  } else {
-    for (int i = 30; i >= 0; i -= 2)
-      stpq(as_FloatRegister(i), as_FloatRegister(i+1),
-           Address(pre(sp, -4 * wordSize)));
+  mov(rscratch1, -step);
+  sub(sp, sp, step);
+  for (int i = 28; i >= 4; i -= 4) {
+    st1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
+        as_FloatRegister(i+3), save_vectors ? T2D : T1D, Address(post(sp, rscratch1)));
   }
+  st1(v0, v1, v2, v3, save_vectors ? T2D : T1D, sp);
 }
 
 void MacroAssembler::pop_CPU_state(bool restore_vectors) {
-  if (!restore_vectors) {
-    for (int i = 0; i < 32; i += 2)
-      ldpd(as_FloatRegister(i), as_FloatRegister(i+1),
-           Address(post(sp, 2 * wordSize)));
-  } else {
-    for (int i = 0; i < 32; i += 2)
-      ldpq(as_FloatRegister(i), as_FloatRegister(i+1),
-           Address(post(sp, 4 * wordSize)));
-  }
-
+  int step = (restore_vectors ? 8 : 4) * wordSize;
+  for (int i = 0; i <= 28; i += 4)
+    ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
+        as_FloatRegister(i+3), restore_vectors ? T2D : T1D, Address(post(sp, step)));
   pop(0x3fffffff, sp);         // integer registers except lr & sp
 }
 
--- a/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -29,12 +29,12 @@
 
 void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
                                   Register dst, Address src, Register tmp1, Register tmp2, Register tmp3) {
-  bool on_heap = (decorators & IN_HEAP) != 0;
-  bool on_root = (decorators & IN_ROOT) != 0;
+  bool in_heap = (decorators & IN_HEAP) != 0;
+  bool in_native = (decorators & IN_NATIVE) != 0;
   switch (type) {
   case T_OBJECT:
   case T_ARRAY: {
-    if (on_heap) {
+    if (in_heap) {
 #ifdef AARCH64
       if (UseCompressedOops) {
         __ ldr_w(dst, src);
@@ -45,7 +45,7 @@
         __ ldr(dst, src);
       }
     } else {
-      assert(on_root, "why else?");
+      assert(in_native, "why else?");
       __ ldr(dst, src);
     }
     break;
@@ -57,12 +57,12 @@
 
 void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
                                    Address obj, Register val, Register tmp1, Register tmp2, Register tmp3, bool is_null) {
-  bool on_heap = (decorators & IN_HEAP) != 0;
-  bool on_root = (decorators & IN_ROOT) != 0;
+  bool in_heap = (decorators & IN_HEAP) != 0;
+  bool in_native = (decorators & IN_NATIVE) != 0;
   switch (type) {
   case T_OBJECT:
   case T_ARRAY: {
-    if (on_heap) {
+    if (in_heap) {
 #ifdef AARCH64
       if (UseCompressedOops) {
         assert(!dst.uses(src), "not enough registers");
@@ -76,7 +76,7 @@
         __ str(val, obj);
       }
     } else {
-      assert(on_root, "why else?");
+      assert(in_native, "why else?");
       __ str(val, obj);
     }
     break;
--- a/src/hotspot/cpu/arm/macroAssembler_arm.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/cpu/arm/macroAssembler_arm.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -2135,7 +2135,7 @@
   tbz(value, 0, not_weak);      // Test for jweak tag.
 
   // Resolve jweak.
-  access_load_at(T_OBJECT, IN_ROOT | ON_PHANTOM_OOP_REF,
+  access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
                  Address(value, -JNIHandles::weak_tag_value), value, tmp1, tmp2, noreg);
   b(done);
   bind(not_weak);
--- a/src/hotspot/cpu/arm/vm_version_arm_32.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/cpu/arm/vm_version_arm_32.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -104,6 +104,18 @@
 extern "C" address check_vfp_fault_instr;
 extern "C" address check_simd_fault_instr;
 
+void VM_Version::early_initialize() {
+
+  // Make sure that _arm_arch is initialized so that any calls to OrderAccess will
+  // use proper dmb instruction
+  get_os_cpu_info();
+
+  _kuser_helper_version = *(int*)KUSER_HELPER_VERSION_ADDR;
+  // armv7 has the ldrexd instruction that can be used to implement cx8
+  // armv5 with linux >= 3.1 can use kernel helper routine
+  _supports_cx8 = (supports_ldrexd() || supports_kuser_cmpxchg64());
+}
+
 void VM_Version::initialize() {
   ResourceMark rm;
 
@@ -216,10 +228,6 @@
     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
   }
 
-  get_os_cpu_info();
-
-  _kuser_helper_version = *(int*)KUSER_HELPER_VERSION_ADDR;
-
 #ifdef COMPILER2
   // C2 is only supported on v7+ VFP at this time
   if (_arm_arch < 7 || !has_vfp()) {
@@ -227,9 +235,6 @@
   }
 #endif
 
-  // armv7 has the ldrexd instruction that can be used to implement cx8
-  // armv5 with linux >= 3.1 can use kernel helper routine
-  _supports_cx8 = (supports_ldrexd() || supports_kuser_cmpxchg64());
   // ARM doesn't have special instructions for these but ldrex/ldrexd
   // enable shorter instruction sequences that the ones based on cas.
   _supports_atomic_getset4 = supports_ldrex();
--- a/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -335,7 +335,7 @@
 
   __ beq(CCR0, not_weak);     // Test for jweak tag.
   __ verify_oop(value);
-  g1_write_barrier_pre(masm, IN_ROOT | ON_PHANTOM_OOP_REF,
+  g1_write_barrier_pre(masm, IN_NATIVE | ON_PHANTOM_OOP_REF,
                        noreg, noreg, value,
                        tmp1, tmp2, needs_frame);
   __ bind(not_weak);
--- a/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -33,16 +33,16 @@
 void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
                                    Register base, RegisterOrConstant ind_or_offs, Register val,
                                    Register tmp1, Register tmp2, Register tmp3, bool needs_frame) {
-  bool on_heap  = (decorators & IN_HEAP) != 0;
-  bool on_root  = (decorators & IN_ROOT) != 0;
+  bool in_heap = (decorators & IN_HEAP) != 0;
+  bool in_native = (decorators & IN_NATIVE) != 0;
   bool not_null = (decorators & OOP_NOT_NULL) != 0;
-  assert(on_heap || on_root, "where?");
+  assert(in_heap || in_native, "where?");
   assert_different_registers(base, val, tmp1, tmp2, R0);
 
   switch (type) {
   case T_ARRAY:
   case T_OBJECT: {
-    if (UseCompressedOops && on_heap) {
+    if (UseCompressedOops && in_heap) {
       Register co = tmp1;
       if (val == noreg) {
         __ li(co, 0);
@@ -66,16 +66,16 @@
 void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
                                   Register base, RegisterOrConstant ind_or_offs, Register dst,
                                   Register tmp1, Register tmp2, bool needs_frame, Label *L_handle_null) {
-  bool on_heap  = (decorators & IN_HEAP) != 0;
-  bool on_root  = (decorators & IN_ROOT) != 0;
+  bool in_heap = (decorators & IN_HEAP) != 0;
+  bool in_native = (decorators & IN_NATIVE) != 0;
   bool not_null = (decorators & OOP_NOT_NULL) != 0;
-  assert(on_heap || on_root, "where?");
+  assert(in_heap || in_native, "where?");
   assert_different_registers(ind_or_offs.register_or_noreg(), dst, R0);
 
   switch (type) {
   case T_ARRAY:
   case T_OBJECT: {
-    if (UseCompressedOops && on_heap) {
+    if (UseCompressedOops && in_heap) {
       if (L_handle_null != NULL) { // Label provided.
         __ lwz(dst, ind_or_offs, base);
         __ cmpwi(CCR0, dst, 0);
--- a/src/hotspot/cpu/ppc/macroAssembler_ppc.inline.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.inline.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -329,7 +329,7 @@
 inline void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
                                             Register base, RegisterOrConstant ind_or_offs, Register val,
                                             Register tmp1, Register tmp2, Register tmp3, bool needs_frame) {
-  assert((decorators & ~(AS_RAW | IN_HEAP | IN_HEAP_ARRAY | IN_ROOT | OOP_NOT_NULL |
+  assert((decorators & ~(AS_RAW | IN_HEAP | IN_HEAP_ARRAY | IN_NATIVE | OOP_NOT_NULL |
                          ON_UNKNOWN_OOP_REF)) == 0, "unsupported decorator");
   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
   bool as_raw = (decorators & AS_RAW) != 0;
@@ -348,7 +348,7 @@
 inline void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators,
                                            Register base, RegisterOrConstant ind_or_offs, Register dst,
                                            Register tmp1, Register tmp2, bool needs_frame, Label *L_handle_null) {
-  assert((decorators & ~(AS_RAW | IN_HEAP | IN_HEAP_ARRAY | IN_ROOT | OOP_NOT_NULL |
+  assert((decorators & ~(AS_RAW | IN_HEAP | IN_HEAP_ARRAY | IN_NATIVE | OOP_NOT_NULL |
                          ON_PHANTOM_OOP_REF | ON_WEAK_OOP_REF)) == 0, "unsupported decorator");
   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
   decorators = AccessInternal::decorator_fixup(decorators);
--- a/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -403,7 +403,7 @@
   __ z_tmll(tmp1, JNIHandles::weak_tag_mask); // Test for jweak tag.
   __ z_braz(Lnot_weak);
   __ verify_oop(value);
-  DecoratorSet decorators = IN_ROOT | ON_PHANTOM_OOP_REF;
+  DecoratorSet decorators = IN_NATIVE | ON_PHANTOM_OOP_REF;
   g1_write_barrier_pre(masm, decorators, (const Address*)NULL, value, noreg, tmp1, tmp2, true);
   __ bind(Lnot_weak);
   __ verify_oop(value);
--- a/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -37,15 +37,15 @@
 
 void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
                                   const Address& addr, Register dst, Register tmp1, Register tmp2, Label *L_handle_null) {
-  bool on_heap  = (decorators & IN_HEAP) != 0;
-  bool on_root  = (decorators & IN_ROOT) != 0;
+  bool in_heap = (decorators & IN_HEAP) != 0;
+  bool in_native = (decorators & IN_NATIVE) != 0;
   bool not_null = (decorators & OOP_NOT_NULL) != 0;
-  assert(on_heap || on_root, "where?");
+  assert(in_heap || in_native, "where?");
 
   switch (type) {
   case T_ARRAY:
   case T_OBJECT: {
-    if (UseCompressedOops && on_heap) {
+    if (UseCompressedOops && in_heap) {
       __ z_llgf(dst, addr);
       if (L_handle_null != NULL) { // Label provided.
         __ compareU32_and_branch(dst, (intptr_t)0, Assembler::bcondEqual, *L_handle_null);
@@ -67,16 +67,16 @@
 
 void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
                                    const Address& addr, Register val, Register tmp1, Register tmp2, Register tmp3) {
-  bool on_heap  = (decorators & IN_HEAP) != 0;
-  bool on_root  = (decorators & IN_ROOT) != 0;
+  bool in_heap = (decorators & IN_HEAP) != 0;
+  bool in_native = (decorators & IN_NATIVE) != 0;
   bool not_null = (decorators & OOP_NOT_NULL) != 0;
-  assert(on_heap || on_root, "where?");
+  assert(in_heap || in_native, "where?");
   assert_different_registers(val, tmp1, tmp2);
 
   switch (type) {
   case T_ARRAY:
   case T_OBJECT: {
-    if (UseCompressedOops && on_heap) {
+    if (UseCompressedOops && in_heap) {
       if (val == noreg) {
         __ clear_mem(addr, 4);
       } else if (Universe::narrow_oop_mode() == Universe::UnscaledNarrowOop) {
--- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -4051,7 +4051,7 @@
 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
                                      const Address& addr, Register val,
                                      Register tmp1, Register tmp2, Register tmp3) {
-  assert((decorators & ~(AS_RAW | IN_HEAP | IN_HEAP_ARRAY | IN_ROOT | OOP_NOT_NULL |
+  assert((decorators & ~(AS_RAW | IN_HEAP | IN_HEAP_ARRAY | IN_NATIVE | OOP_NOT_NULL |
                          ON_UNKNOWN_OOP_REF)) == 0, "unsupported decorator");
   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
   decorators = AccessInternal::decorator_fixup(decorators);
@@ -4070,7 +4070,7 @@
 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators,
                                     const Address& addr, Register dst,
                                     Register tmp1, Register tmp2, Label *is_null) {
-  assert((decorators & ~(AS_RAW | IN_HEAP | IN_HEAP_ARRAY | IN_ROOT | OOP_NOT_NULL |
+  assert((decorators & ~(AS_RAW | IN_HEAP | IN_HEAP_ARRAY | IN_NATIVE | OOP_NOT_NULL |
                          ON_PHANTOM_OOP_REF | ON_WEAK_OOP_REF)) == 0, "unsupported decorator");
   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
   decorators = AccessInternal::decorator_fixup(decorators);
--- a/src/hotspot/cpu/sparc/gc/shared/barrierSetAssembler_sparc.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/cpu/sparc/gc/shared/barrierSetAssembler_sparc.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -32,14 +32,14 @@
 
 void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
                                    Register val, Address dst, Register tmp) {
-  bool on_heap = (decorators & IN_HEAP) != 0;
-  bool on_root = (decorators & IN_ROOT) != 0;
+  bool in_heap = (decorators & IN_HEAP) != 0;
+  bool in_native = (decorators & IN_NATIVE) != 0;
   bool oop_not_null = (decorators & OOP_NOT_NULL) != 0;
 
   switch (type) {
   case T_ARRAY:
   case T_OBJECT: {
-    if (on_heap) {
+    if (in_heap) {
       if (dst.has_disp() && !Assembler::is_simm13(dst.disp())) {
         assert(!dst.has_index(), "not supported yet");
         __ set(dst.disp(), tmp);
@@ -57,7 +57,7 @@
         __ st_ptr(val, dst);
       }
     } else {
-      assert(on_root, "why else?");
+      assert(in_native, "why else?");
       __ st_ptr(val, dst);
     }
     break;
@@ -68,14 +68,14 @@
 
 void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
                                   Address src, Register dst, Register tmp) {
-  bool on_heap = (decorators & IN_HEAP) != 0;
-  bool on_root = (decorators & IN_ROOT) != 0;
+  bool in_heap = (decorators & IN_HEAP) != 0;
+  bool in_native = (decorators & IN_NATIVE) != 0;
   bool oop_not_null = (decorators & OOP_NOT_NULL) != 0;
 
   switch (type) {
   case T_ARRAY:
   case T_OBJECT: {
-    if (on_heap) {
+    if (in_heap) {
       if (src.has_disp() && !Assembler::is_simm13(src.disp())) {
         assert(!src.has_index(), "not supported yet");
         __ set(src.disp(), tmp);
@@ -92,7 +92,7 @@
         __ ld_ptr(src, dst);
       }
     } else {
-      assert(on_root, "why else?");
+      assert(in_native, "why else?");
       __ ld_ptr(src, dst);
     }
     break;
--- a/src/hotspot/cpu/sparc/macroAssembler_sparc.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/cpu/sparc/macroAssembler_sparc.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -176,7 +176,7 @@
   delayed()->andcc(value, JNIHandles::weak_tag_mask, G0); // Test for jweak
   brx(Assembler::zero, true, Assembler::pt, not_weak);
   delayed()->nop();
-  access_load_at(T_OBJECT, IN_ROOT | ON_PHANTOM_OOP_REF,
+  access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
                  Address(value, -JNIHandles::weak_tag_value), value, tmp);
   verify_oop(value);
   br (Assembler::always, true, Assembler::pt, done);
--- a/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -31,15 +31,15 @@
 
 void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
                                   Register dst, Address src, Register tmp1, Register tmp_thread) {
-  bool on_heap = (decorators & IN_HEAP) != 0;
-  bool on_root = (decorators & IN_ROOT) != 0;
+  bool in_heap = (decorators & IN_HEAP) != 0;
+  bool in_native = (decorators & IN_NATIVE) != 0;
   bool oop_not_null = (decorators & OOP_NOT_NULL) != 0;
   bool atomic = (decorators & MO_RELAXED) != 0;
 
   switch (type) {
   case T_OBJECT:
   case T_ARRAY: {
-    if (on_heap) {
+    if (in_heap) {
 #ifdef _LP64
       if (UseCompressedOops) {
         __ movl(dst, src);
@@ -54,7 +54,7 @@
         __ movptr(dst, src);
       }
     } else {
-      assert(on_root, "why else?");
+      assert(in_native, "why else?");
       __ movptr(dst, src);
     }
     break;
@@ -96,15 +96,15 @@
 
 void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
                                    Address dst, Register val, Register tmp1, Register tmp2) {
-  bool on_heap = (decorators & IN_HEAP) != 0;
-  bool on_root = (decorators & IN_ROOT) != 0;
+  bool in_heap = (decorators & IN_HEAP) != 0;
+  bool in_native = (decorators & IN_NATIVE) != 0;
   bool oop_not_null = (decorators & OOP_NOT_NULL) != 0;
   bool atomic = (decorators & MO_RELAXED) != 0;
 
   switch (type) {
   case T_OBJECT:
   case T_ARRAY: {
-    if (on_heap) {
+    if (in_heap) {
       if (val == noreg) {
         assert(!oop_not_null, "inconsistent access");
 #ifdef _LP64
@@ -133,7 +133,7 @@
         }
       }
     } else {
-      assert(on_root, "why else?");
+      assert(in_native, "why else?");
       assert(val != noreg, "not supported");
       __ movptr(dst, val);
     }
--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -5266,7 +5266,7 @@
   testptr(value, JNIHandles::weak_tag_mask); // Test for jweak tag.
   jcc(Assembler::zero, not_weak);
   // Resolve jweak.
-  access_load_at(T_OBJECT, IN_ROOT | ON_PHANTOM_OOP_REF,
+  access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
                  value, Address(value, -JNIHandles::weak_tag_value), tmp, thread);
   verify_oop(value);
   jmp(done);
--- a/src/hotspot/cpu/x86/templateTable_x86.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/cpu/x86/templateTable_x86.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -4013,11 +4013,7 @@
 #endif // _LP64
 
   if (UseTLAB) {
-    __ movptr(rax, Address(thread, in_bytes(JavaThread::tlab_top_offset())));
-    __ lea(rbx, Address(rax, rdx, Address::times_1));
-    __ cmpptr(rbx, Address(thread, in_bytes(JavaThread::tlab_end_offset())));
-    __ jcc(Assembler::above, slow_case);
-    __ movptr(Address(thread, in_bytes(JavaThread::tlab_top_offset())), rbx);
+    __ tlab_allocate(rax, rdx, 0, rcx, rbx, slow_case);
     if (ZeroTLAB) {
       // the fields have been already cleared
       __ jmp(initialize_header);
@@ -4030,28 +4026,7 @@
     //
     // rdx: instance size in bytes
     if (allow_shared_alloc) {
-      ExternalAddress heap_top((address)Universe::heap()->top_addr());
-      ExternalAddress heap_end((address)Universe::heap()->end_addr());
-
-      Label retry;
-      __ bind(retry);
-      __ movptr(rax, heap_top);
-      __ lea(rbx, Address(rax, rdx, Address::times_1));
-      __ cmpptr(rbx, heap_end);
-      __ jcc(Assembler::above, slow_case);
-
-      // Compare rax, with the top addr, and if still equal, store the new
-      // top addr in rbx, at the address of the top addr pointer. Sets ZF if was
-      // equal, and clears it otherwise. Use lock prefix for atomicity on MPs.
-      //
-      // rax,: object begin
-      // rbx,: object end
-      // rdx: instance size in bytes
-      __ locked_cmpxchgptr(rbx, heap_top);
-
-      // if someone beat us on the allocation, try again, otherwise continue
-      __ jcc(Assembler::notEqual, retry);
-
+      __ eden_allocate(rax, rdx, 0, rbx, slow_case);
       __ incr_allocated_bytes(thread, rdx, 0);
     }
   }
--- a/src/hotspot/os_cpu/linux_arm/vm_version_linux_arm_32.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/os_cpu/linux_arm/vm_version_linux_arm_32.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -46,8 +46,3 @@
   }
 }
 
-// Make sure that _arm_arch is initialized so that any calls to OrderAccess will
-// use proper dmb instruction
-void VM_Version::early_initialize() {
-  get_os_cpu_info();
-}
--- a/src/hotspot/share/classfile/classFileParser.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/classfile/classFileParser.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -955,7 +955,9 @@
 
       if (!interf->is_interface()) {
         THROW_MSG(vmSymbols::java_lang_IncompatibleClassChangeError(),
-                   "Implementing class");
+                  err_msg("Class %s can not implement %s, because it is not an interface",
+                          _class_name->as_klass_external_name(),
+                          interf->class_loader_and_module_name()));
       }
 
       if (InstanceKlass::cast(interf)->has_nonstatic_concrete_methods()) {
@@ -4509,7 +4511,7 @@
           vmSymbols::java_lang_IllegalAccessError(),
           "class %s loaded by %s cannot access jdk/internal/reflect superclass %s",
           this_klass->external_name(),
-          this_klass->class_loader_data()->loader_name(),
+          this_klass->class_loader_data()->loader_name_and_id(),
           super->external_name());
         return;
       }
--- a/src/hotspot/share/classfile/classListParser.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/classfile/classListParser.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -274,8 +274,8 @@
 // This function is used for loading classes for customized class loaders
 // during archive dumping.
 InstanceKlass* ClassListParser::load_class_from_source(Symbol* class_name, TRAPS) {
-#if !(defined(_LP64) && (defined(LINUX)|| defined(SOLARIS) || defined(AIX)))
-  // The only supported platforms are: (1) Linux/64-bit; (2) Solaris/64-bit; (3) AIX/64-bit
+#if !(defined(_LP64) && (defined(LINUX)|| defined(SOLARIS)))
+  // The only supported platforms are: (1) Linux/64-bit and (2) Solaris/64-bit
   //
   // This #if condition should be in sync with the areCustomLoadersSupportedForCDS
   // method in test/lib/jdk/test/lib/Platform.java.
--- a/src/hotspot/share/classfile/classLoaderData.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/classfile/classLoaderData.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -105,22 +105,41 @@
   }
 }
 
-// JFR and logging support so that the name and klass are available after the
-// class_loader oop is no longer alive, during unloading.
+// Obtain and set the class loader's name within the ClassLoaderData so
+// it will be available for error messages, logging, JFR, etc.  The name
+// and klass are available after the class_loader oop is no longer alive,
+// during unloading.
 void ClassLoaderData::initialize_name_and_klass(Handle class_loader) {
+  Thread* THREAD = Thread::current();
+  ResourceMark rm(THREAD);
   _class_loader_klass = class_loader->klass();
-  oop class_loader_name = java_lang_ClassLoader::name(class_loader());
-  if (class_loader_name != NULL) {
-    Thread* THREAD = Thread::current();
-    ResourceMark rm(THREAD);
-    const char* class_loader_instance_name =
-      java_lang_String::as_utf8_string(class_loader_name);
 
-    if (class_loader_instance_name != NULL && class_loader_instance_name[0] != '\0') {
+  // Obtain the class loader's name.  If the class loader's name was not
+  // explicitly set during construction, the CLD's _name field will be null.
+  oop cl_name = java_lang_ClassLoader::name(class_loader());
+  if (cl_name != NULL) {
+    const char* cl_instance_name = java_lang_String::as_utf8_string(cl_name);
+
+    if (cl_instance_name != NULL && cl_instance_name[0] != '\0') {
       // Can't throw InternalError and SymbolTable doesn't throw OOM anymore.
-      _class_loader_name = SymbolTable::new_symbol(class_loader_instance_name, CATCH);
+      _name = SymbolTable::new_symbol(cl_instance_name, CATCH);
     }
   }
+
+  // Obtain the class loader's name and identity hash.  If the class loader's
+  // name was not explicitly set during construction, the class loader's name and id
+  // will be set to the qualified class name of the class loader along with its
+  // identity hash.
+  // If for some reason the ClassLoader's constructor has not been run, instead of
+  // leaving the _name_and_id field null, fall back to the external qualified class
+  // name.  Thus CLD's _name_and_id field should never have a null value.
+  oop cl_name_and_id = java_lang_ClassLoader::nameAndId(class_loader());
+  const char* cl_instance_name_and_id =
+                  (cl_name_and_id == NULL) ? _class_loader_klass->external_name() :
+                                             java_lang_String::as_utf8_string(cl_name_and_id);
+  assert(cl_instance_name_and_id != NULL && cl_instance_name_and_id[0] != '\0', "class loader has no name and id");
+  // Can't throw InternalError and SymbolTable doesn't throw OOM anymore.
+  _name_and_id = SymbolTable::new_symbol(cl_instance_name_and_id, CATCH);
 }
 
 ClassLoaderData::ClassLoaderData(Handle h_class_loader, bool is_anonymous) :
@@ -134,7 +153,7 @@
   _claimed(0), _modified_oops(true), _accumulated_modified_oops(false),
   _jmethod_ids(NULL), _handles(), _deallocate_list(NULL),
   _next(NULL),
-  _class_loader_klass(NULL), _class_loader_name(NULL),
+  _class_loader_klass(NULL), _name(NULL), _name_and_id(NULL),
   _metaspace_lock(new Mutex(Monitor::leaf+1, "Metaspace allocation lock", true,
                             Monitor::_safepoint_check_never)) {
 
@@ -815,7 +834,7 @@
     assert(_handles.owner_of(ptr), "Got unexpected handle " PTR_FORMAT, p2i(ptr));
     // This root is not walked in safepoints, and hence requires an appropriate
     // decorator that e.g. maintains the SATB invariant in SATB collectors.
-    RootAccess<IN_CONCURRENT_ROOT>::oop_store(ptr, oop(NULL));
+    NativeAccess<IN_CONCURRENT_ROOT>::oop_store(ptr, oop(NULL));
   }
 }
 
@@ -911,29 +930,40 @@
   return ClassLoaderDataGraph::add(loader, true);
 }
 
+// Caller needs ResourceMark
+// If the class loader's _name has not been explicitly set, the class loader's
+// qualified class name is returned.
 const char* ClassLoaderData::loader_name() const {
-  if (is_unloading()) {
-    if (_class_loader_klass == NULL) {
-      return "<bootloader>";
-    } else if (_class_loader_name != NULL) {
-      return _class_loader_name->as_C_string();
-    } else {
-      return _class_loader_klass->name()->as_C_string();
-    }
+   if (_class_loader_klass == NULL) {
+     return BOOTSTRAP_LOADER_NAME;
+   } else if (_name != NULL) {
+     return _name->as_C_string();
+   } else {
+     return _class_loader_klass->external_name();
+   }
+}
+
+// Caller needs ResourceMark
+// Format of the _name_and_id is as follows:
+//   If the defining loader has a name explicitly set then '<loader-name>' @<id>
+//   If the defining loader has no name then <qualified-class-name> @<id>
+//   If built-in loader, then omit '@<id>' as there is only one instance.
+const char* ClassLoaderData::loader_name_and_id() const {
+  if (_class_loader_klass == NULL) {
+    return "'" BOOTSTRAP_LOADER_NAME "'";
   } else {
-    // Handles null class loader
-    return SystemDictionary::loader_name(class_loader());
+    assert(_name_and_id != NULL, "encountered a class loader null name and id");
+    return _name_and_id->as_C_string();
   }
 }
 
-
 void ClassLoaderData::print_value_on(outputStream* out) const {
   if (!is_unloading() && class_loader() != NULL) {
     out->print("loader data: " INTPTR_FORMAT " for instance ", p2i(this));
-    class_loader()->print_value_on(out);  // includes loader_name() and address of class loader instance
+    class_loader()->print_value_on(out);  // includes loader_name_and_id() and address of class loader instance
   } else {
-    // loader data: 0xsomeaddr of <bootloader>
-    out->print("loader data: " INTPTR_FORMAT " of %s", p2i(this), loader_name());
+    // loader data: 0xsomeaddr of 'bootstrap'
+    out->print("loader data: " INTPTR_FORMAT " of %s", p2i(this), loader_name_and_id());
   }
   if (is_anonymous()) {
     out->print(" anonymous");
@@ -943,7 +973,7 @@
 #ifndef PRODUCT
 void ClassLoaderData::print_on(outputStream* out) const {
   out->print("ClassLoaderData CLD: " PTR_FORMAT ", loader: " PTR_FORMAT ", loader_klass: %s {",
-              p2i(this), p2i(_class_loader.ptr_raw()), loader_name());
+              p2i(this), p2i(_class_loader.ptr_raw()), loader_name_and_id());
   if (is_anonymous()) out->print(" anonymous");
   if (claimed()) out->print(" claimed");
   if (is_unloading()) out->print(" unloading");
@@ -1237,7 +1267,7 @@
   FOR_ALL_DICTIONARY(cld) {
     ResourceMark rm;
     stringStream tempst;
-    tempst.print("System Dictionary for %s", cld->loader_name());
+    tempst.print("System Dictionary for %s class loader", cld->loader_name_and_id());
     cld->dictionary()->print_table_statistics(st, tempst.as_string());
   }
 }
--- a/src/hotspot/share/classfile/classLoaderData.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/classfile/classLoaderData.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -37,6 +37,9 @@
 #include "jfr/support/jfrTraceIdExtension.hpp"
 #endif
 
+// external name (synthetic) for the primordial "bootstrap" class loader instance
+#define BOOTSTRAP_LOADER_NAME "bootstrap"
+#define BOOTSTRAP_LOADER_NAME_LEN 9
 
 //
 // A class loader represents a linkset. Conceptually, a linkset identifies
@@ -258,9 +261,9 @@
   // Support for walking class loader data objects
   ClassLoaderData* _next; /// Next loader_datas created
 
-  // JFR support
   Klass*  _class_loader_klass;
-  Symbol* _class_loader_name;
+  Symbol* _name;
+  Symbol* _name_and_id;
   JFR_ONLY(DEFINE_TRACE_ID_FIELD;)
 
   void set_next(ClassLoaderData* next) { _next = next; }
@@ -362,8 +365,6 @@
 
   void initialize_holder(Handle holder);
 
-  inline unsigned int identity_hash() const { return (unsigned int)(((intptr_t)this) >> 3); }
-
   void oops_do(OopClosure* f, bool must_claim, bool clear_modified_oops = false);
 
   void classes_do(KlassClosure* klass_closure);
@@ -377,7 +378,6 @@
   void print_value()                               { print_value_on(tty); }
   void print_value_on(outputStream* out) const;
   void verify();
-  const char* loader_name() const;
 
   OopHandle add_handle(Handle h);
   void remove_handle(OopHandle h);
@@ -400,15 +400,20 @@
   static ClassLoaderData* class_loader_data_or_null(oop loader);
   static ClassLoaderData* anonymous_class_loader_data(Handle loader);
 
-  // Returns Klass* of associated class loader, or NULL if associated loader is <bootstrap>.
+  // Returns Klass* of associated class loader, or NULL if associated loader is 'bootstrap'.
   // Also works if unloading.
   Klass* class_loader_klass() const { return _class_loader_klass; }
 
-  // Returns Name of associated class loader.
-  // Returns NULL if associated class loader is <bootstrap> or if no name has been set for
-  //   this loader.
-  // Also works if unloading.
-  Symbol* class_loader_name() const { return _class_loader_name; }
+  // Returns the class loader's explict name as specified during
+  // construction or the class loader's qualified class name.
+  // Works during unloading.
+  const char* loader_name() const;
+  // Returns the explicitly specified class loader name or NULL.
+  Symbol* name() const { return _name; }
+
+  // Obtain the class loader's _name_and_id, works during unloading.
+  const char* loader_name_and_id() const;
+  Symbol* name_and_id() const { return _name_and_id; }
 
   JFR_ONLY(DEFINE_TRACE_ID_METHODS;)
 };
--- a/src/hotspot/share/classfile/classLoaderHierarchyDCmd.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/classfile/classLoaderHierarchyDCmd.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -157,7 +157,7 @@
 
     // Retrieve information.
     const Klass* const loader_klass = _cld->class_loader_klass();
-    const Symbol* const loader_name = _cld->class_loader_name();
+    const Symbol* const loader_name = _cld->name();
 
     branchtracker.print(st);
 
--- a/src/hotspot/share/classfile/dictionary.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/classfile/dictionary.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -641,6 +641,6 @@
 
   ResourceMark rm;
   stringStream tempst;
-  tempst.print("System Dictionary for %s", cld->loader_name());
+  tempst.print("System Dictionary for %s class loader", cld->loader_name_and_id());
   verify_table<DictionaryEntry>(tempst.as_string());
 }
--- a/src/hotspot/share/classfile/javaClasses.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/classfile/javaClasses.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -3993,6 +3993,7 @@
 int  java_lang_ClassLoader::_loader_data_offset = -1;
 int  java_lang_ClassLoader::parallelCapable_offset = -1;
 int  java_lang_ClassLoader::name_offset = -1;
+int  java_lang_ClassLoader::nameAndId_offset = -1;
 int  java_lang_ClassLoader::unnamedModule_offset = -1;
 
 ClassLoaderData* java_lang_ClassLoader::loader_data(oop loader) {
@@ -4008,6 +4009,7 @@
 #define CLASSLOADER_FIELDS_DO(macro) \
   macro(parallelCapable_offset, k1, "parallelLockMap",      concurrenthashmap_signature, false); \
   macro(name_offset,            k1, vmSymbols::name_name(), string_signature, false); \
+  macro(nameAndId_offset,       k1, "nameAndId",            string_signature, false); \
   macro(unnamedModule_offset,   k1, "unnamedModule",        module_signature, false); \
   macro(parent_offset,          k1, "parent",               classloader_signature, false)
 
@@ -4033,11 +4035,24 @@
   return loader->obj_field(parent_offset);
 }
 
+// Returns the name field of this class loader.  If the name field has not
+// been set, null will be returned.
 oop java_lang_ClassLoader::name(oop loader) {
   assert(is_instance(loader), "loader must be oop");
   return loader->obj_field(name_offset);
 }
 
+// Returns the nameAndId field of this class loader. The format is
+// as follows:
+//   If the defining loader has a name explicitly set then '<loader-name>' @<id>
+//   If the defining loader has no name then <qualified-class-name> @<id>
+//   If built-in loader, then omit '@<id>' as there is only one instance.
+// Use ClassLoader::loader_name_id() to obtain this String as a char*.
+oop java_lang_ClassLoader::nameAndId(oop loader) {
+  assert(is_instance(loader), "loader must be oop");
+  return loader->obj_field(nameAndId_offset);
+}
+
 bool java_lang_ClassLoader::isAncestor(oop loader, oop cl) {
   assert(is_instance(loader), "loader must be oop");
   assert(cl == NULL || is_instance(cl), "cl argument must be oop");
@@ -4111,39 +4126,28 @@
 
 // Caller needs ResourceMark.
 const char* java_lang_ClassLoader::describe_external(const oop loader) {
+  ClassLoaderData *cld = ClassLoaderData::class_loader_data(loader);
+  const char* name = cld->loader_name_and_id();
+
+  // bootstrap loader
   if (loader == NULL) {
-    return "<bootstrap>";
+    return name;
   }
 
   bool well_known_loader = SystemDictionary::is_system_class_loader(loader) ||
                            SystemDictionary::is_platform_class_loader(loader);
 
-  const char* name = NULL;
-  oop nameOop = java_lang_ClassLoader::name(loader);
-  if (nameOop != NULL) {
-    name = java_lang_String::as_utf8_string(nameOop);
-  }
-  if (name == NULL) {
-    // Use placeholder for missing name to have fixed message format.
-    name = "<unnamed>";
-  }
-
   stringStream ss;
-  ss.print("\"%s\" (instance of %s", name, loader->klass()->external_name());
+  ss.print("%s (instance of %s", name, loader->klass()->external_name());
   if (!well_known_loader) {
-    const char* parentName = NULL;
     oop pl = java_lang_ClassLoader::parent(loader);
+    ClassLoaderData *pl_cld = ClassLoaderData::class_loader_data(pl);
+    const char* parentName = pl_cld->loader_name_and_id();
     if (pl != NULL) {
-      oop parentNameOop = java_lang_ClassLoader::name(pl);
-      if (parentNameOop != NULL) {
-        parentName = java_lang_String::as_utf8_string(parentNameOop);
-      }
-      if (parentName == NULL) {
-        parentName = "<unnamed>";
-      }
-      ss.print(", child of \"%s\" %s", parentName, pl->klass()->external_name());
+      ss.print(", child of %s %s", parentName, pl->klass()->external_name());
     } else {
-      ss.print(", child of <bootstrap>");
+      // bootstrap loader
+      ss.print(", child of %s", parentName);
     }
   }
   ss.print(")");
--- a/src/hotspot/share/classfile/javaClasses.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/classfile/javaClasses.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -1272,6 +1272,7 @@
   static int parent_offset;
   static int parallelCapable_offset;
   static int name_offset;
+  static int nameAndId_offset;
   static int unnamedModule_offset;
 
  public:
@@ -1283,6 +1284,7 @@
 
   static oop parent(oop loader);
   static oop name(oop loader);
+  static oop nameAndId(oop loader);
   static bool isAncestor(oop loader, oop cl);
 
   // Support for parallelCapable field
--- a/src/hotspot/share/classfile/loaderConstraints.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/classfile/loaderConstraints.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -108,7 +108,7 @@
                      probe->name()->as_C_string());
           for (int i = 0; i < probe->num_loaders(); i++) {
             lt.print("    [%d]: %s", i,
-                          probe->loader_data(i)->loader_name());
+                          probe->loader_data(i)->loader_name_and_id());
           }
         }
       }
@@ -119,7 +119,7 @@
           if (lt.is_enabled()) {
             ResourceMark rm;
             lt.print("purging loader %s from constraint for name %s",
-                     probe->loader_data(n)->loader_name(),
+                     probe->loader_data(n)->loader_name_and_id(),
                      probe->name()->as_C_string()
                      );
           }
@@ -135,7 +135,7 @@
             lt.print("new loader list:");
             for (int i = 0; i < probe->num_loaders(); i++) {
               lt.print("    [%d]: %s", i,
-                            probe->loader_data(i)->loader_name());
+                            probe->loader_data(i)->loader_name_and_id());
             }
           }
 
@@ -177,8 +177,8 @@
     lt.print("Failed to add constraint for name: %s, loader[0]: %s,"
                 " loader[1]: %s, Reason: %s",
                   class_name->as_C_string(),
-                  SystemDictionary::loader_name(class_loader1()),
-                  SystemDictionary::loader_name(class_loader2()),
+                  ClassLoaderData::class_loader_data(class_loader1())->loader_name_and_id(),
+                  ClassLoaderData::class_loader_data(class_loader2())->loader_name_and_id(),
                   reason);
   }
 }
@@ -247,8 +247,8 @@
       lt.print("adding new constraint for name: %s, loader[0]: %s,"
                     " loader[1]: %s",
                     class_name->as_C_string(),
-                    SystemDictionary::loader_name(class_loader1()),
-                    SystemDictionary::loader_name(class_loader2())
+                    ClassLoaderData::class_loader_data(class_loader1())->loader_name_and_id(),
+                    ClassLoaderData::class_loader_data(class_loader2())->loader_name_and_id()
                     );
     }
   } else if (*pp1 == *pp2) {
@@ -260,7 +260,7 @@
         lt.print("setting class object in existing constraint for"
                       " name: %s and loader %s",
                       class_name->as_C_string(),
-                      SystemDictionary::loader_name(class_loader1())
+                      ClassLoaderData::class_loader_data(class_loader1())->loader_name_and_id()
                       );
       }
     } else {
@@ -291,7 +291,7 @@
       lt.print("constraint check failed for name %s, loader %s: "
                  "the presented class object differs from that stored",
                  name->as_C_string(),
-                 SystemDictionary::loader_name(loader()));
+                 ClassLoaderData::class_loader_data(loader())->loader_name_and_id());
     }
     return false;
   } else {
@@ -302,7 +302,7 @@
         lt.print("updating constraint for name %s, loader %s, "
                    "by setting class object",
                    name->as_C_string(),
-                   SystemDictionary::loader_name(loader()));
+                   ClassLoaderData::class_loader_data(loader())->loader_name_and_id());
       }
     }
     return true;
@@ -353,7 +353,7 @@
     lt.print("extending constraint for name %s by adding loader[%d]: %s %s",
                p->name()->as_C_string(),
                num,
-               SystemDictionary::loader_name(loader()),
+               ClassLoaderData::class_loader_data(loader())->loader_name_and_id(),
                (p->klass() == NULL ? " and setting class object" : "")
                );
   }
@@ -396,7 +396,7 @@
 
     for (int i = 0; i < p1->num_loaders(); i++) {
       lt.print("    [%d]: %s", i,
-                    p1->loader_data(i)->loader_name());
+                    p1->loader_data(i)->loader_name_and_id());
     }
     if (p1->klass() == NULL) {
       lt.print("... and setting class object");
--- a/src/hotspot/share/classfile/moduleEntry.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/classfile/moduleEntry.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -260,7 +260,7 @@
   ResourceMark rm;
   guarantee(java_lang_Module::is_instance(module),
             "The unnamed module for ClassLoader %s, is null or not an instance of java.lang.Module. The class loader has not been initialized correctly.",
-            cld->loader_name());
+            cld->loader_name_and_id());
 
   ModuleEntry* unnamed_module = new_unnamed_module_entry(Handle(Thread::current(), module), cld);
 
@@ -522,7 +522,7 @@
                p2i(this),
                name() == NULL ? UNNAMED_MODULE : name()->as_C_string(),
                p2i(module()),
-               loader_data()->loader_name(),
+               loader_data()->loader_name_and_id(),
                version() != NULL ? version()->as_C_string() : "NULL",
                location() != NULL ? location()->as_C_string() : "NULL",
                BOOL_TO_STR(!can_read_all_unnamed()), p2i(next()));
--- a/src/hotspot/share/classfile/modules.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/classfile/modules.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -312,6 +312,10 @@
               "Class loader is an invalid delegating class loader");
   }
   Handle h_loader = Handle(THREAD, loader);
+  // define_module can be called during start-up, before the class loader's ClassLoaderData
+  // has been created.  SystemDictionary::register_loader ensures creation, if needed.
+  ClassLoaderData* loader_data = SystemDictionary::register_loader(h_loader);
+  assert(loader_data != NULL, "class loader data shouldn't be null");
 
   // Check that the list of packages has no duplicates and that the
   // packages are syntactically ok.
@@ -329,7 +333,7 @@
         !SystemDictionary::is_platform_class_loader(h_loader()) &&
         (strncmp(package_name, JAVAPKG, JAVAPKG_LEN) == 0 &&
           (package_name[JAVAPKG_LEN] == '/' || package_name[JAVAPKG_LEN] == '\0'))) {
-      const char* class_loader_name = SystemDictionary::loader_name(h_loader());
+      const char* class_loader_name = loader_data->loader_name_and_id();
       size_t pkg_len = strlen(package_name);
       char* pkg_name = NEW_RESOURCE_ARRAY_IN_THREAD(THREAD, char, pkg_len);
       strncpy(pkg_name, package_name, pkg_len);
@@ -373,9 +377,6 @@
     }
   }
 
-  ClassLoaderData* loader_data = ClassLoaderData::class_loader_data_or_null(h_loader());
-  assert(loader_data != NULL, "class loader data shouldn't be null");
-
   PackageEntryTable* package_table = NULL;
   PackageEntry* existing_pkg = NULL;
   {
--- a/src/hotspot/share/classfile/stringTable.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/classfile/stringTable.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -456,7 +456,7 @@
   log_trace(stringtable)("Started to grow");
   {
     TraceTime timer("Grow", TRACETIME_LOG(Debug, stringtable, perf));
-    while (gt.doTask(jt)) {
+    while (gt.do_task(jt)) {
       gt.pause(jt);
       {
         ThreadBlockInVM tbivm(jt);
@@ -502,7 +502,7 @@
   bool interrupted = false;
   {
     TraceTime timer("Clean", TRACETIME_LOG(Debug, stringtable, perf));
-    while(bdt.doTask(jt, stdc, stdd)) {
+    while(bdt.do_task(jt, stdc, stdd)) {
       bdt.pause(jt);
       {
         ThreadBlockInVM tbivm(jt);
--- a/src/hotspot/share/classfile/systemDictionary.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/classfile/systemDictionary.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -3012,18 +3012,6 @@
   NOT_PRODUCT(SystemDictionary::verify());
 }
 
-// caller needs ResourceMark
-const char* SystemDictionary::loader_name(const oop loader) {
-  return ((loader) == NULL ? "<bootloader>" :
-          InstanceKlass::cast((loader)->klass())->name()->as_C_string());
-}
-
-// caller needs ResourceMark
-const char* SystemDictionary::loader_name(const ClassLoaderData* loader_data) {
-  return (loader_data->class_loader() == NULL ? "<bootloader>" :
-          SystemDictionary::loader_name(loader_data->class_loader()));
-}
-
 void SystemDictionary::initialize_oop_storage() {
   _vm_weak_oop_storage =
     new OopStorage("VM Weak Oop Handles",
--- a/src/hotspot/share/classfile/systemDictionary.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/classfile/systemDictionary.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -570,10 +570,6 @@
                                                      Handle *method_type_result,
                                                      TRAPS);
 
-  // Utility for printing loader "name" as part of tracing constraints
-  static const char* loader_name(const oop loader);
-  static const char* loader_name(const ClassLoaderData* loader_data);
-
   // Record the error when the first attempt to resolve a reference from a constant
   // pool entry to a class fails.
   static void add_resolution_error(const constantPoolHandle& pool, int which, Symbol* error,
--- a/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -56,6 +56,7 @@
 #include "gc/shared/isGCActiveMark.hpp"
 #include "gc/shared/oopStorageParState.hpp"
 #include "gc/shared/referencePolicy.hpp"
+#include "gc/shared/referenceProcessorPhaseTimes.hpp"
 #include "gc/shared/space.inline.hpp"
 #include "gc/shared/strongRootsScope.hpp"
 #include "gc/shared/taskqueue.inline.hpp"
@@ -299,7 +300,8 @@
                              _cmsGen->refs_discovery_is_mt(),        // mt discovery
                              MAX2(ConcGCThreads, ParallelGCThreads), // mt discovery degree
                              _cmsGen->refs_discovery_is_atomic(),    // discovery is not atomic
-                             &_is_alive_closure);                    // closure for liveness info
+                             &_is_alive_closure,                     // closure for liveness info
+                             false);                                 // disable adjusting number of processing threads
     // Initialize the _ref_processor field of CMSGen
     _cmsGen->set_ref_processor(_ref_processor);
 
@@ -5125,16 +5127,18 @@
   log_develop_trace(gc, task)("\t(%d: stole %d oops)", i, num_steals);
 }
 
-void CMSRefProcTaskExecutor::execute(ProcessTask& task)
-{
+void CMSRefProcTaskExecutor::execute(ProcessTask& task, uint ergo_workers) {
   CMSHeap* heap = CMSHeap::heap();
   WorkGang* workers = heap->workers();
   assert(workers != NULL, "Need parallel worker threads.");
+  assert(workers->active_workers() == ergo_workers,
+         "Ergonomically chosen workers (%u) must be equal to active workers (%u)",
+         ergo_workers, workers->active_workers());
   CMSRefProcTaskProxy rp_task(task, &_collector,
                               _collector.ref_processor_span(),
                               _collector.markBitMap(),
                               workers, _collector.task_queues());
-  workers->run_task(&rp_task);
+  workers->run_task(&rp_task, workers->active_workers());
 }
 
 void CMSCollector::refProcessingWork() {
@@ -8084,6 +8088,7 @@
     case CMSCollector::InitialMarking:
       initialize(manager /* GC manager */ ,
                  cause   /* cause of the GC */,
+                 true    /* allMemoryPoolsAffected */,
                  true    /* recordGCBeginTime */,
                  true    /* recordPreGCUsage */,
                  false   /* recordPeakUsage */,
@@ -8096,6 +8101,7 @@
     case CMSCollector::FinalMarking:
       initialize(manager /* GC manager */ ,
                  cause   /* cause of the GC */,
+                 true    /* allMemoryPoolsAffected */,
                  false   /* recordGCBeginTime */,
                  false   /* recordPreGCUsage */,
                  false   /* recordPeakUsage */,
@@ -8108,6 +8114,7 @@
     case CMSCollector::Sweeping:
       initialize(manager /* GC manager */ ,
                  cause   /* cause of the GC */,
+                 true    /* allMemoryPoolsAffected */,
                  false   /* recordGCBeginTime */,
                  false   /* recordPreGCUsage */,
                  true    /* recordPeakUsage */,
--- a/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -486,7 +486,7 @@
   { }
 
   // Executes a task using worker threads.
-  virtual void execute(ProcessTask& task);
+  virtual void execute(ProcessTask& task, uint ergo_workers);
 private:
   CMSCollector& _collector;
 };
--- a/src/hotspot/share/gc/cms/parNewGeneration.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/cms/parNewGeneration.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -42,6 +42,7 @@
 #include "gc/shared/plab.inline.hpp"
 #include "gc/shared/preservedMarks.inline.hpp"
 #include "gc/shared/referencePolicy.hpp"
+#include "gc/shared/referenceProcessorPhaseTimes.hpp"
 #include "gc/shared/space.hpp"
 #include "gc/shared/spaceDecorator.hpp"
 #include "gc/shared/strongRootsScope.hpp"
@@ -792,14 +793,17 @@
              par_scan_state.evacuate_followers_closure());
 }
 
-void ParNewRefProcTaskExecutor::execute(ProcessTask& task) {
+void ParNewRefProcTaskExecutor::execute(ProcessTask& task, uint ergo_workers) {
   CMSHeap* gch = CMSHeap::heap();
   WorkGang* workers = gch->workers();
   assert(workers != NULL, "Need parallel worker threads.");
+  assert(workers->active_workers() == ergo_workers,
+         "Ergonomically chosen workers (%u) must be equal to active workers (%u)",
+         ergo_workers, workers->active_workers());
   _state_set.reset(workers->active_workers(), _young_gen.promotion_failed());
   ParNewRefProcTaskProxy rp_task(task, _young_gen, _old_gen,
                                  _young_gen.reserved().end(), _state_set);
-  workers->run_task(&rp_task);
+  workers->run_task(&rp_task, workers->active_workers());
   _state_set.reset(0 /* bad value in debug if not reset */,
                    _young_gen.promotion_failed());
 }
@@ -812,7 +816,7 @@
 
 ScanClosureWithParBarrier::
 ScanClosureWithParBarrier(ParNewGeneration* g, bool gc_barrier) :
-  ScanClosure(g, gc_barrier)
+  OopsInClassLoaderDataOrGenClosure(g), _g(g), _boundary(g->reserved().end()), _gc_barrier(gc_barrier)
 { }
 
 template <typename OopClosureType1, typename OopClosureType2>
@@ -1449,7 +1453,8 @@
                              refs_discovery_is_mt(),     // mt discovery
                              ParallelGCThreads,          // mt discovery degree
                              refs_discovery_is_atomic(), // atomic_discovery
-                             NULL);                      // is_alive_non_header
+                             NULL,                       // is_alive_non_header
+                             false);                     // disable adjusting number of processing threads
   }
 }
 
--- a/src/hotspot/share/gc/cms/parNewGeneration.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/cms/parNewGeneration.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -275,9 +275,14 @@
 
 // Closure for scanning ParNewGeneration.
 // Same as ScanClosure, except does parallel GC barrier.
-class ScanClosureWithParBarrier: public ScanClosure {
- protected:
+class ScanClosureWithParBarrier: public OopsInClassLoaderDataOrGenClosure {
+ private:
+  ParNewGeneration* _g;
+  HeapWord*         _boundary;
+  bool              _gc_barrier;
+
   template <class T> void do_oop_work(T* p);
+
  public:
   ScanClosureWithParBarrier(ParNewGeneration* g, bool gc_barrier);
   virtual void do_oop(oop* p);
@@ -298,7 +303,7 @@
   { }
 
   // Executes a task using worker threads.
-  virtual void execute(ProcessTask& task);
+  virtual void execute(ProcessTask& task, uint ergo_workers);
   // Switch to single threaded mode.
   virtual void set_single_threaded_mode();
 };
--- a/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -601,10 +601,10 @@
 
   bool mismatched = (decorators & C2_MISMATCHED) != 0;
   bool unknown = (decorators & ON_UNKNOWN_OOP_REF) != 0;
-  bool on_heap = (decorators & IN_HEAP) != 0;
+  bool in_heap = (decorators & IN_HEAP) != 0;
   bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
   bool is_unordered = (decorators & MO_UNORDERED) != 0;
-  bool need_cpu_mem_bar = !is_unordered || mismatched || !on_heap;
+  bool need_cpu_mem_bar = !is_unordered || mismatched || !in_heap;
 
   Node* offset = adr->is_AddP() ? adr->in(AddPNode::Offset) : kit->top();
   Node* load = CardTableBarrierSetC2::load_at_resolved(access, val_type);
@@ -615,7 +615,7 @@
   // SATB log buffer using the pre-barrier mechanism.
   // Also we need to add memory barrier to prevent commoning reads
   // from this field across safepoint since GC can change its value.
-  bool need_read_barrier = on_heap && (on_weak ||
+  bool need_read_barrier = in_heap && (on_weak ||
                                        (unknown && offset != kit->top() && obj != kit->top()));
 
   if (!access.is_oop() || !need_read_barrier) {
--- a/src/hotspot/share/gc/g1/g1Arguments.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/g1/g1Arguments.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -122,6 +122,10 @@
     FLAG_SET_DEFAULT(GCPauseIntervalMillis, MaxGCPauseMillis + 1);
   }
 
+  if (FLAG_IS_DEFAULT(ParallelRefProcEnabled) && ParallelGCThreads > 1) {
+    FLAG_SET_DEFAULT(ParallelRefProcEnabled, true);
+  }
+
   log_trace(gc)("MarkStackSize: %uk  MarkStackSizeMax: %uk", (unsigned int) (MarkStackSize / K), (uint) (MarkStackSizeMax / K));
 
   // By default do not let the target stack size to be more than 1/4 of the entries
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/g1/g1CollectedHeap.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -1737,7 +1737,7 @@
 
   _memory_manager.add_pool(_eden_pool);
   _memory_manager.add_pool(_survivor_pool);
-
+  _memory_manager.add_pool(_old_pool, false /* always_affected_by_gc */);
 }
 
 void G1CollectedHeap::stop() {
@@ -1815,7 +1815,8 @@
                            (ParallelGCThreads > 1) || (ConcGCThreads > 1), // mt discovery
                            MAX2(ParallelGCThreads, ConcGCThreads),         // degree of mt discovery
                            false,                                          // Reference discovery is not atomic
-                           &_is_alive_closure_cm);                         // is alive closure
+                           &_is_alive_closure_cm,                          // is alive closure
+                           true);                                          // allow changes to number of processing threads
 
   // STW ref processor
   _ref_processor_stw =
@@ -1825,7 +1826,8 @@
                            (ParallelGCThreads > 1),              // mt discovery
                            ParallelGCThreads,                    // degree of mt discovery
                            true,                                 // Reference discovery is atomic
-                           &_is_alive_closure_stw);              // is alive closure
+                           &_is_alive_closure_stw,               // is alive closure
+                           true);                                // allow changes to number of processing threads
 }
 
 CollectorPolicy* G1CollectedHeap::collector_policy() const {
@@ -2831,7 +2833,8 @@
     log_info(gc,task)("Using %u workers of %u for evacuation", active_workers, workers()->total_workers());
 
     TraceCollectorStats tcs(g1mm()->incremental_collection_counters());
-    TraceMemoryManagerStats tms(&_memory_manager, gc_cause());
+    TraceMemoryManagerStats tms(&_memory_manager, gc_cause(),
+                                collector_state()->yc_type() == Mixed /* allMemoryPoolsAffected */);
 
     G1HeapTransition heap_transition(this);
     size_t heap_used_bytes_before_gc = used();
@@ -3791,25 +3794,22 @@
   G1ParScanThreadStateSet*  _pss;
   RefToScanQueueSet*        _queues;
   WorkGang*                 _workers;
-  uint                      _active_workers;
 
 public:
   G1STWRefProcTaskExecutor(G1CollectedHeap* g1h,
                            G1ParScanThreadStateSet* per_thread_states,
                            WorkGang* workers,
-                           RefToScanQueueSet *task_queues,
-                           uint n_workers) :
+                           RefToScanQueueSet *task_queues) :
     _g1h(g1h),
     _pss(per_thread_states),
     _queues(task_queues),
-    _workers(workers),
-    _active_workers(n_workers)
+    _workers(workers)
   {
-    g1h->ref_processor_stw()->set_active_mt_degree(n_workers);
+    g1h->ref_processor_stw()->set_active_mt_degree(workers->active_workers());
   }
 
   // Executes the given task using concurrent marking worker threads.
-  virtual void execute(ProcessTask& task);
+  virtual void execute(ProcessTask& task, uint ergo_workers);
 };
 
 // Gang task for possibly parallel reference processing
@@ -3843,7 +3843,7 @@
 
     G1STWIsAliveClosure is_alive(_g1h);
 
-    G1ParScanThreadState*          pss = _pss->state_for_worker(worker_id);
+    G1ParScanThreadState* pss = _pss->state_for_worker(worker_id);
     pss->set_ref_discoverer(NULL);
 
     // Keep alive closure.
@@ -3865,13 +3865,16 @@
 // Driver routine for parallel reference processing.
 // Creates an instance of the ref processing gang
 // task and has the worker threads execute it.
-void G1STWRefProcTaskExecutor::execute(ProcessTask& proc_task) {
+void G1STWRefProcTaskExecutor::execute(ProcessTask& proc_task, uint ergo_workers) {
   assert(_workers != NULL, "Need parallel worker threads.");
 
-  ParallelTaskTerminator terminator(_active_workers, _queues);
+  assert(_workers->active_workers() >= ergo_workers,
+         "Ergonomically chosen workers (%u) should be less than or equal to active workers (%u)",
+         ergo_workers, _workers->active_workers());
+  ParallelTaskTerminator terminator(ergo_workers, _queues);
   G1STWRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _pss, _queues, &terminator);
 
-  _workers->run_task(&proc_task_proxy);
+  _workers->run_task(&proc_task_proxy, ergo_workers);
 }
 
 // End of weak reference support closures
@@ -3922,7 +3925,7 @@
            "Mismatch between the number of GC workers %u and the maximum number of Reference process queues %u",
            no_of_gc_workers,  rp->max_num_queues());
 
-    G1STWRefProcTaskExecutor par_task_executor(this, per_thread_states, workers(), _task_queues, no_of_gc_workers);
+    G1STWRefProcTaskExecutor par_task_executor(this, per_thread_states, workers(), _task_queues);
     stats = rp->process_discovered_references(&is_alive,
                                               &keep_alive,
                                               &drain_queue,
--- a/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -1518,8 +1518,7 @@
     _g1h(g1h), _cm(cm),
     _workers(workers), _active_workers(n_workers) { }
 
-  // Executes the given task using concurrent marking worker threads.
-  virtual void execute(ProcessTask& task);
+  virtual void execute(ProcessTask& task, uint ergo_workers);
 };
 
 class G1CMRefProcTaskProxy : public AbstractGangTask {
@@ -1550,9 +1549,12 @@
   }
 };
 
-void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
+void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task, uint ergo_workers) {
   assert(_workers != NULL, "Need parallel worker threads.");
   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
+  assert(_workers->active_workers() >= ergo_workers,
+         "Ergonomically chosen workers(%u) should be less than or equal to active workers(%u)",
+         ergo_workers, _workers->active_workers());
 
   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
 
@@ -1560,8 +1562,8 @@
   // proxy task execution, so that the termination protocol
   // and overflow handling in G1CMTask::do_marking_step() knows
   // how many workers to wait for.
-  _cm->set_concurrency(_active_workers);
-  _workers->run_task(&proc_task_proxy);
+  _cm->set_concurrency(ergo_workers);
+  _workers->run_task(&proc_task_proxy, ergo_workers);
 }
 
 void G1ConcurrentMark::weak_refs_work(bool clear_all_soft_refs) {
--- a/src/hotspot/share/gc/g1/g1FullGCReferenceProcessorExecutor.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/g1/g1FullGCReferenceProcessorExecutor.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -67,9 +67,13 @@
   G1CollectedHeap::heap()->workers()->run_task(task, _collector->workers());
 }
 
-void G1FullGCReferenceProcessingExecutor::execute(ProcessTask& proc_task) {
+void G1FullGCReferenceProcessingExecutor::run_task(AbstractGangTask* task, uint workers) {
+  G1CollectedHeap::heap()->workers()->run_task(task, workers);
+}
+
+void G1FullGCReferenceProcessingExecutor::execute(ProcessTask& proc_task, uint ergo_workers) {
   G1RefProcTaskProxy proc_task_proxy(proc_task, _collector);
-  run_task(&proc_task_proxy);
+  run_task(&proc_task_proxy, ergo_workers);
 }
 
 void G1FullGCReferenceProcessingExecutor::execute(STWGCTimer* timer, G1FullGCTracer* tracer) {
--- a/src/hotspot/share/gc/g1/g1FullGCReferenceProcessorExecutor.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/g1/g1FullGCReferenceProcessorExecutor.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -50,10 +50,11 @@
   void execute(STWGCTimer* timer, G1FullGCTracer* tracer);
 
   // Executes the given task using concurrent marking worker threads.
-  virtual void execute(ProcessTask& task);
+  virtual void execute(ProcessTask& task, uint ergo_workers);
 
 private:
   void run_task(AbstractGangTask* task);
+  void run_task(AbstractGangTask* task, uint workers);
 
   class G1RefProcTaskProxy : public AbstractGangTask {
     typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
--- a/src/hotspot/share/gc/parallel/pcTasks.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/parallel/pcTasks.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -146,10 +146,13 @@
 // RefProcTaskExecutor
 //
 
-void RefProcTaskExecutor::execute(ProcessTask& task)
+void RefProcTaskExecutor::execute(ProcessTask& task, uint ergo_workers)
 {
   ParallelScavengeHeap* heap = ParallelScavengeHeap::heap();
   uint active_gc_threads = heap->gc_task_manager()->active_workers();
+  assert(active_gc_threads == ergo_workers,
+         "Ergonomically chosen workers (%u) must be equal to active workers (%u)",
+         ergo_workers, active_gc_threads);
   OopTaskQueueSet* qset = ParCompactionManager::stack_array();
   ParallelTaskTerminator terminator(active_gc_threads, qset);
   GCTaskQueue* q = GCTaskQueue::create();
--- a/src/hotspot/share/gc/parallel/pcTasks.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/parallel/pcTasks.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -140,7 +140,7 @@
 //
 
 class RefProcTaskExecutor: public AbstractRefProcTaskExecutor {
-  virtual void execute(ProcessTask& task);
+  virtual void execute(ProcessTask& task, uint ergo_workers);
 };
 
 
--- a/src/hotspot/share/gc/parallel/psMarkSweep.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/parallel/psMarkSweep.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -46,6 +46,7 @@
 #include "gc/shared/isGCActiveMark.hpp"
 #include "gc/shared/referencePolicy.hpp"
 #include "gc/shared/referenceProcessor.hpp"
+#include "gc/shared/referenceProcessorPhaseTimes.hpp"
 #include "gc/shared/spaceDecorator.hpp"
 #include "gc/shared/weakProcessor.hpp"
 #include "logging/log.hpp"
--- a/src/hotspot/share/gc/parallel/psParallelCompact.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/parallel/psParallelCompact.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -49,6 +49,7 @@
 #include "gc/shared/isGCActiveMark.hpp"
 #include "gc/shared/referencePolicy.hpp"
 #include "gc/shared/referenceProcessor.hpp"
+#include "gc/shared/referenceProcessorPhaseTimes.hpp"
 #include "gc/shared/spaceDecorator.hpp"
 #include "gc/shared/weakProcessor.hpp"
 #include "logging/log.hpp"
@@ -851,7 +852,8 @@
                            true,                // mt discovery
                            ParallelGCThreads,   // mt discovery degree
                            true,                // atomic_discovery
-                           &_is_alive_closure); // non-header is alive closure
+                           &_is_alive_closure,  // non-header is alive closure
+                           false);              // disable adjusting number of processing threads
   _counters = new CollectorCounters("PSParallelCompact", 1);
 
   // Initialize static fields in ParCompactionManager.
--- a/src/hotspot/share/gc/parallel/psScavenge.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/parallel/psScavenge.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -43,6 +43,7 @@
 #include "gc/shared/isGCActiveMark.hpp"
 #include "gc/shared/referencePolicy.hpp"
 #include "gc/shared/referenceProcessor.hpp"
+#include "gc/shared/referenceProcessorPhaseTimes.hpp"
 #include "gc/shared/spaceDecorator.hpp"
 #include "gc/shared/weakProcessor.hpp"
 #include "memory/resourceArea.hpp"
@@ -149,20 +150,26 @@
 }
 
 class PSRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
-  virtual void execute(ProcessTask& task);
+  virtual void execute(ProcessTask& task, uint ergo_workers);
 };
 
-void PSRefProcTaskExecutor::execute(ProcessTask& task)
+void PSRefProcTaskExecutor::execute(ProcessTask& task, uint ergo_workers)
 {
   GCTaskQueue* q = GCTaskQueue::create();
   GCTaskManager* manager = ParallelScavengeHeap::gc_task_manager();
-  for(uint i=0; i < manager->active_workers(); i++) {
+  uint active_workers = manager->active_workers();
+
+  assert(active_workers == ergo_workers,
+         "Ergonomically chosen workers (%u) must be equal to active workers (%u)",
+         ergo_workers, active_workers);
+
+  for(uint i=0; i < active_workers; i++) {
     q->enqueue(new PSRefProcTaskProxy(task, i));
   }
-  ParallelTaskTerminator terminator(manager->active_workers(),
-                 (TaskQueueSetSuper*) PSPromotionManager::stack_array_depth());
-  if (task.marks_oops_alive() && manager->active_workers() > 1) {
-    for (uint j = 0; j < manager->active_workers(); j++) {
+  ParallelTaskTerminator terminator(active_workers,
+                                    (TaskQueueSetSuper*) PSPromotionManager::stack_array_depth());
+  if (task.marks_oops_alive() && active_workers > 1) {
+    for (uint j = 0; j < active_workers; j++) {
       q->enqueue(new StealTask(&terminator));
     }
   }
@@ -747,7 +754,8 @@
                            true,                       // mt discovery
                            ParallelGCThreads,          // mt discovery degree
                            true,                       // atomic_discovery
-                           NULL);                      // header provides liveness info
+                           NULL,                       // header provides liveness info
+                           false);
 
   // Cache the cardtable
   _card_table = heap->card_table();
--- a/src/hotspot/share/gc/serial/defNewGeneration.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/serial/defNewGeneration.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -40,6 +40,7 @@
 #include "gc/shared/generationSpec.hpp"
 #include "gc/shared/preservedMarks.inline.hpp"
 #include "gc/shared/referencePolicy.hpp"
+#include "gc/shared/referenceProcessorPhaseTimes.hpp"
 #include "gc/shared/space.inline.hpp"
 #include "gc/shared/spaceDecorator.hpp"
 #include "gc/shared/strongRootsScope.hpp"
@@ -127,7 +128,7 @@
   NOT_PRODUCT(ResourceMark rm);
   log_develop_trace(gc, scavenge)("CLDScanClosure::do_cld " PTR_FORMAT ", %s, dirty: %s",
                                   p2i(cld),
-                                  cld->loader_name(),
+                                  cld->loader_name_and_id(),
                                   cld->has_modified_oops() ? "true" : "false");
 
   // If the cld has not been dirtied we know that there's
--- a/src/hotspot/share/gc/serial/genMarkSweep.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/serial/genMarkSweep.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -41,6 +41,7 @@
 #include "gc/shared/genOopClosures.inline.hpp"
 #include "gc/shared/modRefBarrierSet.hpp"
 #include "gc/shared/referencePolicy.hpp"
+#include "gc/shared/referenceProcessorPhaseTimes.hpp"
 #include "gc/shared/space.hpp"
 #include "gc/shared/strongRootsScope.hpp"
 #include "gc/shared/weakProcessor.hpp"
--- a/src/hotspot/share/gc/shared/barrierSet.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/shared/barrierSet.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -270,7 +270,7 @@
     }
 
     // Off-heap oop accesses. These accessors get resolved when
-    // IN_HEAP is not set (e.g. when using the RootAccess API), it is
+    // IN_HEAP is not set (e.g. when using the NativeAccess API), it is
     // an oop* overload, and the barrier strength is AS_NORMAL.
     template <typename T>
     static oop oop_load_not_in_heap(T* addr) {
--- a/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -41,7 +41,7 @@
   bool mismatched = (_decorators & C2_MISMATCHED) != 0;
   bool is_unordered = (_decorators & MO_UNORDERED) != 0;
   bool anonymous = (_decorators & C2_UNSAFE_ACCESS) != 0;
-  bool on_heap = (_decorators & IN_HEAP) != 0;
+  bool in_heap = (_decorators & IN_HEAP) != 0;
 
   bool is_write = (_decorators & C2_WRITE_ACCESS) != 0;
   bool is_read = (_decorators & C2_READ_ACCESS) != 0;
@@ -58,7 +58,7 @@
     // the barriers get omitted and the unsafe reference begins to "pollute"
     // the alias analysis of the rest of the graph, either Compile::can_alias
     // or Compile::must_alias will throw a diagnostic assert.)
-    if (!on_heap || !is_unordered || (mismatched && !_addr.type()->isa_aryptr())) {
+    if (!in_heap || !is_unordered || (mismatched && !_addr.type()->isa_aryptr())) {
       return true;
     }
   }
@@ -74,8 +74,8 @@
   bool unaligned = (decorators & C2_UNALIGNED) != 0;
   bool requires_atomic_access = (decorators & MO_UNORDERED) == 0;
 
-  bool in_root = (decorators & IN_ROOT) != 0;
-  assert(!in_root, "not supported yet");
+  bool in_native = (decorators & IN_NATIVE) != 0;
+  assert(!in_native, "not supported yet");
 
   if (access.type() == T_DOUBLE) {
     Node* new_val = kit->dstore_rounding(val.node());
@@ -103,8 +103,8 @@
   bool control_dependent = (decorators & C2_CONTROL_DEPENDENT_LOAD) != 0;
   bool pinned = (decorators & C2_PINNED_LOAD) != 0;
 
-  bool in_root = (decorators & IN_ROOT) != 0;
-  assert(!in_root, "not supported yet");
+  bool in_native = (decorators & IN_NATIVE) != 0;
+  assert(!in_native, "not supported yet");
 
   MemNode::MemOrd mo = access.mem_node_mo();
   LoadNode::ControlDependency dep = pinned ? LoadNode::Pinned : LoadNode::DependsOnlyOnTest;
--- a/src/hotspot/share/gc/shared/c2/modRefBarrierSetC2.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/shared/c2/modRefBarrierSetC2.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -39,10 +39,10 @@
 
   bool on_array = (decorators & IN_HEAP_ARRAY) != 0;
   bool anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
-  bool on_heap = (decorators & IN_HEAP) != 0;
+  bool in_heap = (decorators & IN_HEAP) != 0;
   bool use_precise = on_array || anonymous;
 
-  if (!access.is_oop() || (!on_heap && !anonymous)) {
+  if (!access.is_oop() || (!in_heap && !anonymous)) {
     return BarrierSetC2::store_at_resolved(access, val);
   }
 
--- a/src/hotspot/share/gc/shared/gcConfig.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/shared/gcConfig.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -76,12 +76,30 @@
          ZGC_ONLY_ARG(SupportedGC(UseZGC,             CollectedHeap::Z,        zArguments,        "z gc"))
 };
 
-#define FOR_EACH_SUPPORTED_GC(var) \
+#define FOR_EACH_SUPPORTED_GC(var)                                          \
   for (const SupportedGC* var = &SupportedGCs[0]; var < &SupportedGCs[ARRAY_SIZE(SupportedGCs)]; var++)
 
+#define FAIL_IF_SELECTED(option, enabled)                                   \
+  if (option == enabled && FLAG_IS_CMDLINE(option)) {                       \
+    vm_exit_during_initialization(enabled ?                                 \
+                                  "Option -XX:+" #option " not supported" : \
+                                  "Option -XX:-" #option " not supported"); \
+  }
+
 GCArguments* GCConfig::_arguments = NULL;
 bool GCConfig::_gc_selected_ergonomically = false;
 
+void GCConfig::fail_if_unsupported_gc_is_selected() {
+  NOT_CMSGC(     FAIL_IF_SELECTED(UseConcMarkSweepGC, true));
+  NOT_EPSILONGC( FAIL_IF_SELECTED(UseEpsilonGC,       true));
+  NOT_G1GC(      FAIL_IF_SELECTED(UseG1GC,            true));
+  NOT_PARALLELGC(FAIL_IF_SELECTED(UseParallelGC,      true));
+  NOT_PARALLELGC(FAIL_IF_SELECTED(UseParallelOldGC,   true));
+  NOT_SERIALGC(  FAIL_IF_SELECTED(UseSerialGC,        true));
+  NOT_SERIALGC(  FAIL_IF_SELECTED(UseParallelOldGC,   false));
+  NOT_ZGC(       FAIL_IF_SELECTED(UseZGC,             true));
+}
+
 void GCConfig::select_gc_ergonomically() {
   if (os::is_server_class_machine()) {
 #if INCLUDE_G1GC
@@ -96,14 +114,6 @@
     FLAG_SET_ERGO_IF_DEFAULT(bool, UseSerialGC, true);
 #endif
   }
-
-  NOT_CMSGC(     UNSUPPORTED_OPTION(UseConcMarkSweepGC));
-  NOT_EPSILONGC( UNSUPPORTED_OPTION(UseEpsilonGC);)
-  NOT_G1GC(      UNSUPPORTED_OPTION(UseG1GC);)
-  NOT_PARALLELGC(UNSUPPORTED_OPTION(UseParallelGC);)
-  NOT_PARALLELGC(UNSUPPORTED_OPTION(UseParallelOldGC));
-  NOT_SERIALGC(  UNSUPPORTED_OPTION(UseSerialGC);)
-  NOT_ZGC(       UNSUPPORTED_OPTION(UseZGC);)
 }
 
 bool GCConfig::is_no_gc_selected() {
@@ -135,6 +145,9 @@
 }
 
 GCArguments* GCConfig::select_gc() {
+  // Fail immediately if an unsupported GC is selected
+  fail_if_unsupported_gc_is_selected();
+
   if (is_no_gc_selected()) {
     // Try select GC ergonomically
     select_gc_ergonomically();
@@ -154,12 +167,6 @@
     vm_exit_during_initialization("Multiple garbage collectors selected", NULL);
   }
 
-#if INCLUDE_PARALLELGC && !INCLUDE_SERIALGC
-  if (FLAG_IS_CMDLINE(UseParallelOldGC) && !UseParallelOldGC) {
-    vm_exit_during_initialization("This JVM build only supports UseParallelOldGC as the full GC");
-  }
-#endif
-
   // Exactly one GC selected
   FOR_EACH_SUPPORTED_GC(gc) {
     if (gc->_flag) {
--- a/src/hotspot/share/gc/shared/gcConfig.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/shared/gcConfig.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -35,6 +35,7 @@
   static GCArguments* _arguments;
   static bool         _gc_selected_ergonomically;
 
+  static void fail_if_unsupported_gc_is_selected();
   static bool is_no_gc_selected();
   static bool is_exactly_one_gc_selected();
 
--- a/src/hotspot/share/gc/shared/gc_globals.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/shared/gc_globals.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -307,6 +307,12 @@
   product(bool, ParallelRefProcBalancingEnabled, true,                      \
           "Enable balancing of reference processing queues")                \
                                                                             \
+  experimental(size_t, ReferencesPerThread, 1000,                           \
+               "Ergonomically start one thread for this amount of "         \
+               "references for reference processing if "                    \
+               "ParallelRefProcEnabled is true. Specify 0 to disable and "  \
+               "use all threads.")                                          \
+                                                                            \
   product(uintx, InitiatingHeapOccupancyPercent, 45,                        \
           "The percent occupancy (IHOP) of the current old generation "     \
           "capacity above which a concurrent mark cycle will be initiated " \
--- a/src/hotspot/share/gc/shared/genOopClosures.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/shared/genOopClosures.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -101,7 +101,7 @@
 // This closure will perform barrier store calls for ALL
 // pointers in scanned oops.
 class ScanClosure: public OopsInClassLoaderDataOrGenClosure {
- protected:
+ private:
   DefNewGeneration* _g;
   HeapWord*         _boundary;
   bool              _gc_barrier;
--- a/src/hotspot/share/gc/shared/referenceProcessor.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/shared/referenceProcessor.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -31,6 +31,7 @@
 #include "gc/shared/gcTraceTime.inline.hpp"
 #include "gc/shared/referencePolicy.hpp"
 #include "gc/shared/referenceProcessor.inline.hpp"
+#include "gc/shared/referenceProcessorPhaseTimes.hpp"
 #include "logging/log.hpp"
 #include "memory/allocation.inline.hpp"
 #include "memory/resourceArea.hpp"
@@ -98,13 +99,15 @@
                                        bool      mt_discovery,
                                        uint      mt_discovery_degree,
                                        bool      atomic_discovery,
-                                       BoolObjectClosure* is_alive_non_header)  :
+                                       BoolObjectClosure* is_alive_non_header,
+                                       bool      adjust_no_of_processing_threads)  :
   _is_subject_to_discovery(is_subject_to_discovery),
   _discovering_refs(false),
   _enqueuing_is_done(false),
   _is_alive_non_header(is_alive_non_header),
   _processing_is_mt(mt_processing),
-  _next_id(0)
+  _next_id(0),
+  _adjust_no_of_processing_threads(adjust_no_of_processing_threads)
 {
   assert(is_subject_to_discovery != NULL, "must be set");
 
@@ -125,8 +128,7 @@
 
   // Initialize all entries to NULL
   for (uint i = 0; i < _max_num_queues * number_of_subclasses_of_ref(); i++) {
-    _discovered_refs[i].set_head(NULL);
-    _discovered_refs[i].set_length(0);
+    _discovered_refs[i].clear();
   }
 
   setup_policy(false /* default soft ref policy */);
@@ -189,6 +191,13 @@
   return total;
 }
 
+#ifdef ASSERT
+void ReferenceProcessor::verify_total_count_zero(DiscoveredList lists[], const char* type) {
+  size_t count = total_count(lists);
+  assert(count == 0, "%ss must be empty but has " SIZE_FORMAT " elements", type, count);
+}
+#endif
+
 ReferenceProcessorStats ReferenceProcessor::process_discovered_references(
   BoolObjectClosure*            is_alive,
   OopClosure*                   keep_alive,
@@ -217,34 +226,27 @@
                                 total_count(_discoveredFinalRefs),
                                 total_count(_discoveredPhantomRefs));
 
-  // Soft references
   {
-    RefProcPhaseTimesTracker tt(REF_SOFT, phase_times, this);
-    process_discovered_reflist(_discoveredSoftRefs, _current_soft_ref_policy, true,
-                               is_alive, keep_alive, complete_gc, task_executor, phase_times);
+    RefProcTotalPhaseTimesTracker tt(RefPhase1, phase_times, this);
+    process_soft_ref_reconsider(is_alive, keep_alive, complete_gc,
+                                task_executor, phase_times);
   }
 
   update_soft_ref_master_clock();
 
-  // Weak references
   {
-    RefProcPhaseTimesTracker tt(REF_WEAK, phase_times, this);
-    process_discovered_reflist(_discoveredWeakRefs, NULL, true,
-                               is_alive, keep_alive, complete_gc, task_executor, phase_times);
+    RefProcTotalPhaseTimesTracker tt(RefPhase2, phase_times, this);
+    process_soft_weak_final_refs(is_alive, keep_alive, complete_gc, task_executor, phase_times);
   }
 
-  // Final references
   {
-    RefProcPhaseTimesTracker tt(REF_FINAL, phase_times, this);
-    process_discovered_reflist(_discoveredFinalRefs, NULL, false,
-                               is_alive, keep_alive, complete_gc, task_executor, phase_times);
+    RefProcTotalPhaseTimesTracker tt(RefPhase3, phase_times, this);
+    process_final_keep_alive(keep_alive, complete_gc, task_executor, phase_times);
   }
 
-  // Phantom references
   {
-    RefProcPhaseTimesTracker tt(REF_PHANTOM, phase_times, this);
-    process_discovered_reflist(_discoveredPhantomRefs, NULL, true,
-                               is_alive, keep_alive, complete_gc, task_executor, phase_times);
+    RefProcTotalPhaseTimesTracker tt(RefPhase4, phase_times, this);
+    process_phantom_refs(is_alive, keep_alive, complete_gc, task_executor, phase_times);
   }
 
   if (task_executor != NULL) {
@@ -294,7 +296,7 @@
   // pre-barrier here because we know the Reference has already been found/marked,
   // that's how it ended up in the discovered list in the first place.
   RawAccess<>::oop_store(_prev_discovered_addr, new_next);
-  NOT_PRODUCT(_removed++);
+  _removed++;
   _refs_list.dec_length(1);
 }
 
@@ -318,24 +320,29 @@
   }
 }
 
-// NOTE: process_phase*() are largely similar, and at a high level
-// merely iterate over the extant list applying a predicate to
-// each of its elements and possibly removing that element from the
-// list and applying some further closures to that element.
-// We should consider the possibility of replacing these
-// process_phase*() methods by abstracting them into
-// a single general iterator invocation that receives appropriate
-// closures that accomplish this work.
+inline void log_dropped_ref(const DiscoveredListIterator& iter, const char* reason) {
+  if (log_develop_is_enabled(Trace, gc, ref)) {
+    ResourceMark rm;
+    log_develop_trace(gc, ref)("Dropping %s reference " PTR_FORMAT ": %s",
+                               reason, p2i(iter.obj()),
+                               iter.obj()->klass()->internal_name());
+  }
+}
 
-// (SoftReferences only) Traverse the list and remove any SoftReferences whose
-// referents are not alive, but that should be kept alive for policy reasons.
-// Keep alive the transitive closure of all such referents.
-void
-ReferenceProcessor::process_phase1(DiscoveredList&    refs_list,
-                                   ReferencePolicy*   policy,
-                                   BoolObjectClosure* is_alive,
-                                   OopClosure*        keep_alive,
-                                   VoidClosure*       complete_gc) {
+inline void log_enqueued_ref(const DiscoveredListIterator& iter, const char* reason) {
+  if (log_develop_is_enabled(Trace, gc, ref)) {
+    ResourceMark rm;
+    log_develop_trace(gc, ref)("Enqueue %s reference (" INTPTR_FORMAT ": %s)",
+                               reason, p2i(iter.obj()), iter.obj()->klass()->internal_name());
+  }
+  assert(oopDesc::is_oop(iter.obj(), UseConcMarkSweepGC), "Adding a bad reference");
+}
+
+size_t ReferenceProcessor::process_soft_ref_reconsider_work(DiscoveredList&    refs_list,
+                                                            ReferencePolicy*   policy,
+                                                            BoolObjectClosure* is_alive,
+                                                            OopClosure*        keep_alive,
+                                                            VoidClosure*       complete_gc) {
   assert(policy != NULL, "Must have a non-NULL policy");
   DiscoveredListIterator iter(refs_list, keep_alive, is_alive);
   // Decide which softly reachable refs should be kept alive.
@@ -344,8 +351,7 @@
     bool referent_is_dead = (iter.referent() != NULL) && !iter.is_referent_alive();
     if (referent_is_dead &&
         !policy->should_clear_reference(iter.obj(), _soft_ref_timestamp_clock)) {
-      log_develop_trace(gc, ref)("Dropping reference (" INTPTR_FORMAT ": %s"  ") by policy",
-                                 p2i(iter.obj()), iter.obj()->klass()->internal_name());
+      log_dropped_ref(iter, "by policy");
       // Remove Reference object from list
       iter.remove();
       // keep the referent around
@@ -357,23 +363,16 @@
   }
   // Close the reachable set
   complete_gc->do_void();
+
   log_develop_trace(gc, ref)(" Dropped " SIZE_FORMAT " dead Refs out of " SIZE_FORMAT " discovered Refs by policy, from list " INTPTR_FORMAT,
                              iter.removed(), iter.processed(), p2i(&refs_list));
+  return iter.removed();
 }
 
-inline void log_dropped_ref(const DiscoveredListIterator& iter, const char* reason) {
-  log_develop_trace(gc, ref)("Dropping %s reference " PTR_FORMAT ": %s",
-                             reason, p2i(iter.obj()),
-                             iter.obj()->klass()->internal_name());
-}
-
-// Traverse the list and remove any Refs whose referents are alive,
-// or NULL if discovery is not atomic.
-void ReferenceProcessor::process_phase2(DiscoveredList&    refs_list,
-                                        BoolObjectClosure* is_alive,
-                                        OopClosure*        keep_alive,
-                                        VoidClosure*       complete_gc) {
-  // complete_gc is unused.
+size_t ReferenceProcessor::process_soft_weak_final_refs_work(DiscoveredList&    refs_list,
+                                                             BoolObjectClosure* is_alive,
+                                                             OopClosure*        keep_alive,
+                                                             bool               do_enqueue_and_clear) {
   DiscoveredListIterator iter(refs_list, keep_alive, is_alive);
   while (iter.has_next()) {
     iter.load_ptrs(DEBUG_ONLY(!discovery_is_atomic() /* allow_null_referent */));
@@ -395,50 +394,80 @@
       iter.make_referent_alive();
       iter.move_to_next();
     } else {
+      if (do_enqueue_and_clear) {
+        iter.clear_referent();
+        iter.enqueue();
+        log_enqueued_ref(iter, "cleared");
+      }
+      // Keep in discovered list
       iter.next();
     }
   }
-  NOT_PRODUCT(
-    if (iter.processed() > 0) {
-      log_develop_trace(gc, ref)(" Dropped " SIZE_FORMAT " active Refs out of " SIZE_FORMAT
-        " Refs in discovered list " INTPTR_FORMAT,
-        iter.removed(), iter.processed(), p2i(&refs_list));
-    }
-  )
+  if (do_enqueue_and_clear) {
+    iter.complete_enqueue();
+    refs_list.clear();
+  }
+
+  log_develop_trace(gc, ref)(" Dropped " SIZE_FORMAT " active Refs out of " SIZE_FORMAT
+                             " Refs in discovered list " INTPTR_FORMAT,
+                             iter.removed(), iter.processed(), p2i(&refs_list));
+  return iter.removed();
 }
 
-void ReferenceProcessor::process_phase3(DiscoveredList&    refs_list,
-                                        bool               clear_referent,
-                                        BoolObjectClosure* is_alive,
-                                        OopClosure*        keep_alive,
-                                        VoidClosure*       complete_gc) {
-  ResourceMark rm;
-  DiscoveredListIterator iter(refs_list, keep_alive, is_alive);
+size_t ReferenceProcessor::process_final_keep_alive_work(DiscoveredList& refs_list,
+                                                         OopClosure*     keep_alive,
+                                                         VoidClosure*    complete_gc) {
+  DiscoveredListIterator iter(refs_list, keep_alive, NULL);
   while (iter.has_next()) {
     iter.load_ptrs(DEBUG_ONLY(false /* allow_null_referent */));
-    if (clear_referent) {
-      // NULL out referent pointer
-      iter.clear_referent();
-    } else {
-      // Current reference is a FinalReference; that's the only kind we
-      // don't clear the referent, instead keeping it for calling finalize.
-      iter.make_referent_alive();
-      // Self-loop next, to mark it not active.
-      assert(java_lang_ref_Reference::next(iter.obj()) == NULL, "enqueued FinalReference");
-      java_lang_ref_Reference::set_next_raw(iter.obj(), iter.obj());
-    }
+    // keep the referent and followers around
+    iter.make_referent_alive();
+
+    // Self-loop next, to mark the FinalReference not active.
+    assert(java_lang_ref_Reference::next(iter.obj()) == NULL, "enqueued FinalReference");
+    java_lang_ref_Reference::set_next_raw(iter.obj(), iter.obj());
+
     iter.enqueue();
-    log_develop_trace(gc, ref)("Adding %sreference (" INTPTR_FORMAT ": %s) as pending",
-                               clear_referent ? "cleared " : "", p2i(iter.obj()), iter.obj()->klass()->internal_name());
-    assert(oopDesc::is_oop(iter.obj(), UseConcMarkSweepGC), "Adding a bad reference");
+    log_enqueued_ref(iter, "Final");
     iter.next();
   }
   iter.complete_enqueue();
   // Close the reachable set
   complete_gc->do_void();
-  // Clear the list.
-  refs_list.set_head(NULL);
-  refs_list.set_length(0);
+  refs_list.clear();
+
+  assert(iter.removed() == 0, "This phase does not remove anything.");
+  return iter.removed();
+}
+
+size_t ReferenceProcessor::process_phantom_refs_work(DiscoveredList&    refs_list,
+                                          BoolObjectClosure* is_alive,
+                                          OopClosure*        keep_alive,
+                                          VoidClosure*       complete_gc) {
+  DiscoveredListIterator iter(refs_list, keep_alive, is_alive);
+  while (iter.has_next()) {
+    iter.load_ptrs(DEBUG_ONLY(!discovery_is_atomic() /* allow_null_referent */));
+
+    oop const referent = iter.referent();
+
+    if (referent == NULL || iter.is_referent_alive()) {
+      iter.make_referent_alive();
+      iter.remove();
+      iter.move_to_next();
+    } else {
+      iter.clear_referent();
+      iter.enqueue();
+      log_enqueued_ref(iter, "cleared Phantom");
+      iter.next();
+    }
+  }
+  iter.complete_enqueue();
+  // Close the reachable set; needed for collectors which keep_alive_closure do
+  // not immediately complete their work.
+  complete_gc->do_void();
+  refs_list.clear();
+
+  return iter.removed();
 }
 
 void
@@ -450,8 +479,7 @@
     next = java_lang_ref_Reference::discovered(obj);
     java_lang_ref_Reference::set_discovered_raw(obj, NULL);
   }
-  refs_list.set_head(NULL);
-  refs_list.set_length(0);
+  refs_list.clear();
 }
 
 void ReferenceProcessor::abandon_partial_discovery() {
@@ -488,69 +516,107 @@
   return total_count(list);
 }
 
-class RefProcPhase1Task: public AbstractRefProcTaskExecutor::ProcessTask {
+class RefProcPhase1Task : public AbstractRefProcTaskExecutor::ProcessTask {
 public:
   RefProcPhase1Task(ReferenceProcessor&           ref_processor,
-                    DiscoveredList                refs_lists[],
-                    ReferencePolicy*              policy,
-                    bool                          marks_oops_alive,
-                    ReferenceProcessorPhaseTimes* phase_times)
-    : ProcessTask(ref_processor, refs_lists, marks_oops_alive, phase_times),
-      _policy(policy)
-  { }
-  virtual void work(unsigned int i, BoolObjectClosure& is_alive,
+                    ReferenceProcessorPhaseTimes* phase_times,
+                    ReferencePolicy*              policy)
+    : ProcessTask(ref_processor, true /* marks_oops_alive */, phase_times),
+      _policy(policy) { }
+
+  virtual void work(uint worker_id,
+                    BoolObjectClosure& is_alive,
                     OopClosure& keep_alive,
                     VoidClosure& complete_gc)
   {
-    RefProcWorkerTimeTracker tt(ReferenceProcessorPhaseTimes::RefPhase1, _phase_times, i);
-
-    _ref_processor.process_phase1(_refs_lists[i], _policy,
-                                  &is_alive, &keep_alive, &complete_gc);
+    RefProcSubPhasesWorkerTimeTracker tt(ReferenceProcessor::SoftRefSubPhase1, _phase_times, worker_id);
+    size_t const removed = _ref_processor.process_soft_ref_reconsider_work(_ref_processor._discoveredSoftRefs[worker_id],
+                                                                           _policy,
+                                                                           &is_alive,
+                                                                           &keep_alive,
+                                                                           &complete_gc);
+    _phase_times->add_ref_cleared(REF_SOFT, removed);
   }
 private:
   ReferencePolicy* _policy;
 };
 
 class RefProcPhase2Task: public AbstractRefProcTaskExecutor::ProcessTask {
+  void run_phase2(uint worker_id,
+                  DiscoveredList list[],
+                  BoolObjectClosure& is_alive,
+                  OopClosure& keep_alive,
+                  bool do_enqueue_and_clear,
+                  ReferenceType ref_type) {
+    size_t const removed = _ref_processor.process_soft_weak_final_refs_work(list[worker_id],
+                                                                            &is_alive,
+                                                                            &keep_alive,
+                                                                            do_enqueue_and_clear);
+    _phase_times->add_ref_cleared(ref_type, removed);
+  }
+
 public:
-  RefProcPhase2Task(ReferenceProcessor&           ref_processor,
-                    DiscoveredList                refs_lists[],
-                    bool                          marks_oops_alive,
+  RefProcPhase2Task(ReferenceProcessor& ref_processor,
                     ReferenceProcessorPhaseTimes* phase_times)
-    : ProcessTask(ref_processor, refs_lists, marks_oops_alive, phase_times)
-  { }
-  virtual void work(unsigned int i, BoolObjectClosure& is_alive,
+    : ProcessTask(ref_processor, false /* marks_oops_alive */, phase_times) { }
+
+  virtual void work(uint worker_id,
+                    BoolObjectClosure& is_alive,
                     OopClosure& keep_alive,
-                    VoidClosure& complete_gc)
-  {
-    RefProcWorkerTimeTracker tt(ReferenceProcessorPhaseTimes::RefPhase2, _phase_times, i);
-
-    _ref_processor.process_phase2(_refs_lists[i],
-                                  &is_alive, &keep_alive, &complete_gc);
+                    VoidClosure& complete_gc) {
+    RefProcWorkerTimeTracker t(_phase_times->phase2_worker_time_sec(), worker_id);
+    {
+      RefProcSubPhasesWorkerTimeTracker tt(ReferenceProcessor::SoftRefSubPhase2, _phase_times, worker_id);
+      run_phase2(worker_id, _ref_processor._discoveredSoftRefs, is_alive, keep_alive, true /* do_enqueue_and_clear */, REF_SOFT);
+    }
+    {
+      RefProcSubPhasesWorkerTimeTracker tt(ReferenceProcessor::WeakRefSubPhase2, _phase_times, worker_id);
+      run_phase2(worker_id, _ref_processor._discoveredWeakRefs, is_alive, keep_alive, true /* do_enqueue_and_clear */, REF_WEAK);
+    }
+    {
+      RefProcSubPhasesWorkerTimeTracker tt(ReferenceProcessor::FinalRefSubPhase2, _phase_times, worker_id);
+      run_phase2(worker_id, _ref_processor._discoveredFinalRefs, is_alive, keep_alive, false /* do_enqueue_and_clear */, REF_FINAL);
+    }
+    // Close the reachable set; needed for collectors which keep_alive_closure do
+    // not immediately complete their work.
+    complete_gc.do_void();
   }
 };
 
 class RefProcPhase3Task: public AbstractRefProcTaskExecutor::ProcessTask {
 public:
   RefProcPhase3Task(ReferenceProcessor&           ref_processor,
-                    DiscoveredList                refs_lists[],
-                    bool                         clear_referent,
-                    bool                          marks_oops_alive,
                     ReferenceProcessorPhaseTimes* phase_times)
-    : ProcessTask(ref_processor, refs_lists, marks_oops_alive, phase_times),
-      _clear_referent(clear_referent)
-  { }
-  virtual void work(unsigned int i, BoolObjectClosure& is_alive,
+    : ProcessTask(ref_processor, true /* marks_oops_alive */, phase_times) { }
+
+  virtual void work(uint worker_id,
+                    BoolObjectClosure& is_alive,
                     OopClosure& keep_alive,
                     VoidClosure& complete_gc)
   {
-    RefProcWorkerTimeTracker tt(ReferenceProcessorPhaseTimes::RefPhase3, _phase_times, i);
+    RefProcSubPhasesWorkerTimeTracker tt(ReferenceProcessor::FinalRefSubPhase3, _phase_times, worker_id);
+    _ref_processor.process_final_keep_alive_work(_ref_processor._discoveredFinalRefs[worker_id], &keep_alive, &complete_gc);
+  }
+};
 
-    _ref_processor.process_phase3(_refs_lists[i], _clear_referent,
-                                  &is_alive, &keep_alive, &complete_gc);
+class RefProcPhase4Task: public AbstractRefProcTaskExecutor::ProcessTask {
+public:
+  RefProcPhase4Task(ReferenceProcessor&           ref_processor,
+                    ReferenceProcessorPhaseTimes* phase_times)
+    : ProcessTask(ref_processor, false /* marks_oops_alive */, phase_times) { }
+
+  virtual void work(uint worker_id,
+                    BoolObjectClosure& is_alive,
+                    OopClosure& keep_alive,
+                    VoidClosure& complete_gc)
+  {
+    RefProcSubPhasesWorkerTimeTracker tt(ReferenceProcessor::PhantomRefSubPhase4, _phase_times, worker_id);
+    size_t const removed = _ref_processor.process_phantom_refs_work(_ref_processor._discoveredPhantomRefs[worker_id],
+                                                                    &is_alive,
+                                                                    &keep_alive,
+                                                                    &complete_gc);
+    _phase_times->add_ref_cleared(REF_PHANTOM, removed);
   }
-private:
-  bool _clear_referent;
 };
 
 void ReferenceProcessor::log_reflist(const char* prefix, DiscoveredList list[], uint num_active_queues) {
@@ -614,6 +680,13 @@
   }
 }
 
+void ReferenceProcessor::maybe_balance_queues(DiscoveredList refs_lists[]) {
+  assert(_processing_is_mt, "Should not call this otherwise");
+  if (need_balance_queues(refs_lists)) {
+    balance_queues(refs_lists);
+  }
+}
+
 // Balances reference queues.
 // Move entries from all queues[0, 1, ..., _max_num_q-1] to
 // queues[0, 1, ..., _num_q-1] because only the first _num_q
@@ -698,77 +771,211 @@
 #endif
 }
 
-void ReferenceProcessor::process_discovered_reflist(
-  DiscoveredList                refs_lists[],
-  ReferencePolicy*              policy,
-  bool                          clear_referent,
-  BoolObjectClosure*            is_alive,
-  OopClosure*                   keep_alive,
-  VoidClosure*                  complete_gc,
-  AbstractRefProcTaskExecutor*  task_executor,
-  ReferenceProcessorPhaseTimes* phase_times)
-{
-  bool mt_processing = task_executor != NULL && _processing_is_mt;
+bool ReferenceProcessor::is_mt_processing_set_up(AbstractRefProcTaskExecutor* task_executor) const {
+  return task_executor != NULL && _processing_is_mt;
+}
 
-  phase_times->set_processing_is_mt(mt_processing);
+void ReferenceProcessor::process_soft_ref_reconsider(BoolObjectClosure* is_alive,
+                                                     OopClosure* keep_alive,
+                                                     VoidClosure* complete_gc,
+                                                     AbstractRefProcTaskExecutor* task_executor,
+                                                     ReferenceProcessorPhaseTimes* phase_times) {
+  assert(!_processing_is_mt || task_executor != NULL, "Task executor must not be NULL when mt processing is set.");
 
-  if (mt_processing && need_balance_queues(refs_lists)) {
-    RefProcBalanceQueuesTimeTracker tt(phase_times);
-    balance_queues(refs_lists);
+  size_t const num_soft_refs = total_count(_discoveredSoftRefs);
+  phase_times->set_ref_discovered(REF_SOFT, num_soft_refs);
+
+  phase_times->set_processing_is_mt(_processing_is_mt);
+
+  if (num_soft_refs == 0 || _current_soft_ref_policy == NULL) {
+    log_debug(gc, ref)("Skipped phase1 of Reference Processing due to unavailable references");
+    return;
   }
 
-  // Phase 1 (soft refs only):
-  // . Traverse the list and remove any SoftReferences whose
-  //   referents are not alive, but that should be kept alive for
-  //   policy reasons. Keep alive the transitive closure of all
-  //   such referents.
-  if (policy != NULL) {
-    RefProcParPhaseTimeTracker tt(ReferenceProcessorPhaseTimes::RefPhase1, phase_times);
+  RefProcMTDegreeAdjuster a(this, RefPhase1, num_soft_refs);
 
-    if (mt_processing) {
-      RefProcPhase1Task phase1(*this, refs_lists, policy, true /*marks_oops_alive*/, phase_times);
-      task_executor->execute(phase1);
-    } else {
-      for (uint i = 0; i < _max_num_queues; i++) {
-        process_phase1(refs_lists[i], policy,
-                       is_alive, keep_alive, complete_gc);
-      }
-    }
-  } else { // policy == NULL
-    assert(refs_lists != _discoveredSoftRefs,
-           "Policy must be specified for soft references.");
+  if (_processing_is_mt) {
+    RefProcBalanceQueuesTimeTracker tt(RefPhase1, phase_times);
+    maybe_balance_queues(_discoveredSoftRefs);
   }
 
-  // Phase 2:
-  // . Traverse the list and remove any refs whose referents are alive.
-  {
-    RefProcParPhaseTimeTracker tt(ReferenceProcessorPhaseTimes::RefPhase2, phase_times);
+  RefProcPhaseTimeTracker tt(RefPhase1, phase_times);
 
-    if (mt_processing) {
-      RefProcPhase2Task phase2(*this, refs_lists, !discovery_is_atomic() /*marks_oops_alive*/, phase_times);
-      task_executor->execute(phase2);
-    } else {
+  log_reflist("Phase1 Soft before", _discoveredSoftRefs, _max_num_queues);
+  if (_processing_is_mt) {
+    RefProcPhase1Task phase1(*this, phase_times, _current_soft_ref_policy);
+    task_executor->execute(phase1, num_queues());
+  } else {
+    size_t removed = 0;
+
+    RefProcSubPhasesWorkerTimeTracker tt2(SoftRefSubPhase1, phase_times, 0);
+    for (uint i = 0; i < _max_num_queues; i++) {
+      removed += process_soft_ref_reconsider_work(_discoveredSoftRefs[i], _current_soft_ref_policy,
+                                                  is_alive, keep_alive, complete_gc);
+    }
+
+    phase_times->add_ref_cleared(REF_SOFT, removed);
+  }
+  log_reflist("Phase1 Soft after", _discoveredSoftRefs, _max_num_queues);
+}
+
+void ReferenceProcessor::process_soft_weak_final_refs(BoolObjectClosure* is_alive,
+                                                      OopClosure* keep_alive,
+                                                      VoidClosure* complete_gc,
+                                                      AbstractRefProcTaskExecutor*  task_executor,
+                                                      ReferenceProcessorPhaseTimes* phase_times) {
+  assert(!_processing_is_mt || task_executor != NULL, "Task executor must not be NULL when mt processing is set.");
+
+  size_t const num_soft_refs = total_count(_discoveredSoftRefs);
+  size_t const num_weak_refs = total_count(_discoveredWeakRefs);
+  size_t const num_final_refs = total_count(_discoveredFinalRefs);
+  size_t const num_total_refs = num_soft_refs + num_weak_refs + num_final_refs;
+  phase_times->set_ref_discovered(REF_WEAK, num_weak_refs);
+  phase_times->set_ref_discovered(REF_FINAL, num_final_refs);
+
+  phase_times->set_processing_is_mt(_processing_is_mt);
+
+  if (num_total_refs == 0) {
+    log_debug(gc, ref)("Skipped phase2 of Reference Processing due to unavailable references");
+    return;
+  }
+
+  RefProcMTDegreeAdjuster a(this, RefPhase2, num_total_refs);
+
+  if (_processing_is_mt) {
+    RefProcBalanceQueuesTimeTracker tt(RefPhase2, phase_times);
+    maybe_balance_queues(_discoveredSoftRefs);
+    maybe_balance_queues(_discoveredWeakRefs);
+    maybe_balance_queues(_discoveredFinalRefs);
+  }
+
+  RefProcPhaseTimeTracker tt(RefPhase2, phase_times);
+
+  log_reflist("Phase2 Soft before", _discoveredSoftRefs, _max_num_queues);
+  log_reflist("Phase2 Weak before", _discoveredWeakRefs, _max_num_queues);
+  log_reflist("Phase2 Final before", _discoveredFinalRefs, _max_num_queues);
+  if (_processing_is_mt) {
+    RefProcPhase2Task phase2(*this, phase_times);
+    task_executor->execute(phase2, num_queues());
+  } else {
+    RefProcWorkerTimeTracker t(phase_times->phase2_worker_time_sec(), 0);
+    {
+      size_t removed = 0;
+
+      RefProcSubPhasesWorkerTimeTracker tt2(SoftRefSubPhase2, phase_times, 0);
       for (uint i = 0; i < _max_num_queues; i++) {
-        process_phase2(refs_lists[i], is_alive, keep_alive, complete_gc);
+        removed += process_soft_weak_final_refs_work(_discoveredSoftRefs[i], is_alive, keep_alive, true /* do_enqueue */);
       }
+
+      phase_times->add_ref_cleared(REF_SOFT, removed);
     }
+    {
+      size_t removed = 0;
+
+      RefProcSubPhasesWorkerTimeTracker tt2(WeakRefSubPhase2, phase_times, 0);
+      for (uint i = 0; i < _max_num_queues; i++) {
+        removed += process_soft_weak_final_refs_work(_discoveredWeakRefs[i], is_alive, keep_alive, true /* do_enqueue */);
+      }
+
+      phase_times->add_ref_cleared(REF_WEAK, removed);
+    }
+    {
+      size_t removed = 0;
+
+      RefProcSubPhasesWorkerTimeTracker tt2(FinalRefSubPhase2, phase_times, 0);
+      for (uint i = 0; i < _max_num_queues; i++) {
+        removed += process_soft_weak_final_refs_work(_discoveredFinalRefs[i], is_alive, keep_alive, false /* do_enqueue */);
+      }
+
+      phase_times->add_ref_cleared(REF_FINAL, removed);
+    }
+    complete_gc->do_void();
+  }
+  verify_total_count_zero(_discoveredSoftRefs, "SoftReference");
+  verify_total_count_zero(_discoveredWeakRefs, "WeakReference");
+  log_reflist("Phase2 Final after", _discoveredFinalRefs, _max_num_queues);
+}
+
+void ReferenceProcessor::process_final_keep_alive(OopClosure* keep_alive,
+                                                  VoidClosure* complete_gc,
+                                                  AbstractRefProcTaskExecutor*  task_executor,
+                                                  ReferenceProcessorPhaseTimes* phase_times) {
+  assert(!_processing_is_mt || task_executor != NULL, "Task executor must not be NULL when mt processing is set.");
+
+  size_t const num_final_refs = total_count(_discoveredFinalRefs);
+
+  phase_times->set_processing_is_mt(_processing_is_mt);
+
+  if (num_final_refs == 0) {
+    log_debug(gc, ref)("Skipped phase3 of Reference Processing due to unavailable references");
+    return;
+  }
+
+  RefProcMTDegreeAdjuster a(this, RefPhase3, num_final_refs);
+
+  if (_processing_is_mt) {
+    RefProcBalanceQueuesTimeTracker tt(RefPhase3, phase_times);
+    maybe_balance_queues(_discoveredFinalRefs);
   }
 
   // Phase 3:
-  // . Traverse the list and process referents as appropriate.
-  {
-    RefProcParPhaseTimeTracker tt(ReferenceProcessorPhaseTimes::RefPhase3, phase_times);
+  // . Traverse referents of final references and keep them and followers alive.
+  RefProcPhaseTimeTracker tt(RefPhase3, phase_times);
 
-    if (mt_processing) {
-      RefProcPhase3Task phase3(*this, refs_lists, clear_referent, true /*marks_oops_alive*/, phase_times);
-      task_executor->execute(phase3);
-    } else {
-      for (uint i = 0; i < _max_num_queues; i++) {
-        process_phase3(refs_lists[i], clear_referent,
-                       is_alive, keep_alive, complete_gc);
-      }
+  if (_processing_is_mt) {
+    RefProcPhase3Task phase3(*this, phase_times);
+    task_executor->execute(phase3, num_queues());
+  } else {
+    RefProcSubPhasesWorkerTimeTracker tt2(FinalRefSubPhase3, phase_times, 0);
+    for (uint i = 0; i < _max_num_queues; i++) {
+      process_final_keep_alive_work(_discoveredFinalRefs[i], keep_alive, complete_gc);
     }
   }
+  verify_total_count_zero(_discoveredFinalRefs, "FinalReference");
+}
+
+void ReferenceProcessor::process_phantom_refs(BoolObjectClosure* is_alive,
+                                              OopClosure* keep_alive,
+                                              VoidClosure* complete_gc,
+                                              AbstractRefProcTaskExecutor* task_executor,
+                                              ReferenceProcessorPhaseTimes* phase_times) {
+  assert(!_processing_is_mt || task_executor != NULL, "Task executor must not be NULL when mt processing is set.");
+
+  size_t const num_phantom_refs = total_count(_discoveredPhantomRefs);
+  phase_times->set_ref_discovered(REF_PHANTOM, num_phantom_refs);
+
+  phase_times->set_processing_is_mt(_processing_is_mt);
+
+  if (num_phantom_refs == 0) {
+    log_debug(gc, ref)("Skipped phase4 of Reference Processing due to unavailable references");
+    return;
+  }
+
+  RefProcMTDegreeAdjuster a(this, RefPhase4, num_phantom_refs);
+
+  if (_processing_is_mt) {
+    RefProcBalanceQueuesTimeTracker tt(RefPhase4, phase_times);
+    maybe_balance_queues(_discoveredPhantomRefs);
+  }
+
+  // Phase 4: Walk phantom references appropriately.
+  RefProcPhaseTimeTracker tt(RefPhase4, phase_times);
+
+  log_reflist("Phase4 Phantom before", _discoveredPhantomRefs, _max_num_queues);
+  if (_processing_is_mt) {
+    RefProcPhase4Task phase4(*this, phase_times);
+    task_executor->execute(phase4, num_queues());
+  } else {
+    size_t removed = 0;
+
+    RefProcSubPhasesWorkerTimeTracker tt(PhantomRefSubPhase4, phase_times, 0);
+    for (uint i = 0; i < _max_num_queues; i++) {
+      removed += process_phantom_refs_work(_discoveredPhantomRefs[i], is_alive, keep_alive, complete_gc);
+    }
+
+    phase_times->add_ref_cleared(REF_PHANTOM, removed);
+  }
+  verify_total_count_zero(_discoveredPhantomRefs, "PhantomReference");
 }
 
 inline DiscoveredList* ReferenceProcessor::get_discovered_list(ReferenceType rt) {
@@ -1119,12 +1326,10 @@
   // Close the reachable set
   complete_gc->do_void();
 
-  NOT_PRODUCT(
-    if (iter.processed() > 0) {
-      log_develop_trace(gc, ref)(" Dropped " SIZE_FORMAT " Refs out of " SIZE_FORMAT " Refs in discovered list " INTPTR_FORMAT,
-        iter.removed(), iter.processed(), p2i(&refs_list));
-    }
-  )
+  if (iter.processed() > 0) {
+    log_develop_trace(gc, ref)(" Dropped " SIZE_FORMAT " Refs out of " SIZE_FORMAT " Refs in discovered list " INTPTR_FORMAT,
+                               iter.removed(), iter.processed(), p2i(&refs_list));
+  }
   return false;
 }
 
@@ -1142,3 +1347,45 @@
    ShouldNotReachHere();
    return NULL;
 }
+
+uint RefProcMTDegreeAdjuster::ergo_proc_thread_count(size_t ref_count,
+                                                     uint max_threads,
+                                                     RefProcPhases phase) const {
+  assert(0 < max_threads, "must allow at least one thread");
+
+  if (use_max_threads(phase) || (ReferencesPerThread == 0)) {
+    return max_threads;
+  }
+
+  size_t thread_count = 1 + (ref_count / ReferencesPerThread);
+  return (uint)MIN3(thread_count,
+                    static_cast<size_t>(max_threads),
+                    (size_t)os::active_processor_count());
+}
+
+bool RefProcMTDegreeAdjuster::use_max_threads(RefProcPhases phase) const {
+  // Even a small number of references in either of those cases could produce large amounts of work.
+  return (phase == ReferenceProcessor::RefPhase1 || phase == ReferenceProcessor::RefPhase3);
+}
+
+RefProcMTDegreeAdjuster::RefProcMTDegreeAdjuster(ReferenceProcessor* rp,
+                                                 RefProcPhases phase,
+                                                 size_t ref_count):
+    _rp(rp),
+    _saved_mt_processing(_rp->processing_is_mt()),
+    _saved_num_queues(_rp->num_queues()) {
+  if (!_rp->processing_is_mt() || !_rp->adjust_no_of_processing_threads() || (ReferencesPerThread == 0)) {
+    return;
+  }
+
+  uint workers = ergo_proc_thread_count(ref_count, _rp->num_queues(), phase);
+
+  _rp->set_mt_processing(workers > 1);
+  _rp->set_active_mt_degree(workers);
+}
+
+RefProcMTDegreeAdjuster::~RefProcMTDegreeAdjuster() {
+  // Revert to previous status.
+  _rp->set_mt_processing(_saved_mt_processing);
+  _rp->set_active_mt_degree(_saved_num_queues);
+}
--- a/src/hotspot/share/gc/shared/referenceProcessor.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/shared/referenceProcessor.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -27,28 +27,14 @@
 
 #include "gc/shared/referenceDiscoverer.hpp"
 #include "gc/shared/referencePolicy.hpp"
-#include "gc/shared/referenceProcessorPhaseTimes.hpp"
 #include "gc/shared/referenceProcessorStats.hpp"
 #include "memory/referenceType.hpp"
 #include "oops/instanceRefKlass.hpp"
 
+class AbstractRefProcTaskExecutor;
 class GCTimer;
-
-// ReferenceProcessor class encapsulates the per-"collector" processing
-// of java.lang.Reference objects for GC. The interface is useful for supporting
-// a generational abstraction, in particular when there are multiple
-// generations that are being independently collected -- possibly
-// concurrently and/or incrementally.
-// ReferenceProcessor class abstracts away from a generational setting
-// by using a closure that determines whether a given reference or referent are
-// subject to this ReferenceProcessor's discovery, thus allowing its use in a
-// straightforward manner in a general, non-generational, non-contiguous generation
-// (or heap) setting.
-//
-
-// forward references
 class ReferencePolicy;
-class AbstractRefProcTaskExecutor;
+class ReferenceProcessorPhaseTimes;
 
 // List of discovered references.
 class DiscoveredList {
@@ -65,6 +51,8 @@
   void   set_length(size_t len) { _len = len;  }
   void   inc_length(size_t inc) { _len += inc; assert(_len > 0, "Error"); }
   void   dec_length(size_t dec) { _len -= dec; }
+
+  inline void clear();
 private:
   // Set value depending on UseCompressedOops. This could be a template class
   // but then we have to fix all the instantiations and declarations that use this class.
@@ -93,10 +81,8 @@
   oop                _first_seen; // cyclic linked list check
   )
 
-  NOT_PRODUCT(
   size_t             _processed;
   size_t             _removed;
-  )
 
 public:
   inline DiscoveredListIterator(DiscoveredList&    refs_list,
@@ -153,10 +139,8 @@
   void clear_referent();
 
   // Statistics
-  NOT_PRODUCT(
   inline size_t processed() const { return _processed; }
-  inline size_t removed() const   { return _removed; }
-  )
+  inline size_t removed() const { return _removed; }
 
   inline void move_to_next() {
     if (_current_discovered == _next_discovered) {
@@ -166,12 +150,50 @@
       _current_discovered = _next_discovered;
     }
     assert(_current_discovered != _first_seen, "cyclic ref_list found");
-    NOT_PRODUCT(_processed++);
+    _processed++;
   }
 };
 
+// The ReferenceProcessor class encapsulates the per-"collector" processing
+// of java.lang.Reference objects for GC. The interface is useful for supporting
+// a generational abstraction, in particular when there are multiple
+// generations that are being independently collected -- possibly
+// concurrently and/or incrementally.
+// ReferenceProcessor class abstracts away from a generational setting
+// by using a closure that determines whether a given reference or referent are
+// subject to this ReferenceProcessor's discovery, thus allowing its use in a
+// straightforward manner in a general, non-generational, non-contiguous generation
+// (or heap) setting.
 class ReferenceProcessor : public ReferenceDiscoverer {
+  friend class RefProcPhase1Task;
+  friend class RefProcPhase2Task;
+  friend class RefProcPhase3Task;
+  friend class RefProcPhase4Task;
+public:
+  // Names of sub-phases of reference processing. Indicates the type of the reference
+  // processed and the associated phase number at the end.
+  enum RefProcSubPhases {
+    SoftRefSubPhase1,
+    SoftRefSubPhase2,
+    WeakRefSubPhase2,
+    FinalRefSubPhase2,
+    FinalRefSubPhase3,
+    PhantomRefSubPhase4,
+    RefSubPhaseMax
+  };
+
+  // Main phases of reference processing.
+  enum RefProcPhases {
+    RefPhase1,
+    RefPhase2,
+    RefPhase3,
+    RefPhase4,
+    RefPhaseMax
+  };
+
+private:
   size_t total_count(DiscoveredList lists[]) const;
+  void verify_total_count_zero(DiscoveredList lists[], const char* type) NOT_DEBUG_RETURN;
 
   // The SoftReference master timestamp clock
   static jlong _soft_ref_timestamp_clock;
@@ -191,6 +213,7 @@
   uint        _next_id;                 // round-robin mod _num_queues counter in
                                         // support of work distribution
 
+  bool        _adjust_no_of_processing_threads; // allow dynamic adjustment of processing threads
   // For collectors that do not keep GC liveness information
   // in the object header, this field holds a closure that
   // helps the reference processor determine the reachability
@@ -222,15 +245,72 @@
   DiscoveredList* _discoveredFinalRefs;
   DiscoveredList* _discoveredPhantomRefs;
 
- public:
+  // Phase 1: Re-evaluate soft ref policy.
+  void process_soft_ref_reconsider(BoolObjectClosure* is_alive,
+                                   OopClosure* keep_alive,
+                                   VoidClosure* complete_gc,
+                                   AbstractRefProcTaskExecutor*  task_executor,
+                                   ReferenceProcessorPhaseTimes* phase_times);
+
+  // Phase 2: Drop Soft/Weak/Final references with a NULL or live referent, and clear
+  // and enqueue non-Final references.
+  void process_soft_weak_final_refs(BoolObjectClosure* is_alive,
+                                    OopClosure* keep_alive,
+                                    VoidClosure* complete_gc,
+                                    AbstractRefProcTaskExecutor*  task_executor,
+                                    ReferenceProcessorPhaseTimes* phase_times);
+
+  // Phase 3: Keep alive followers of Final references, and enqueue.
+  void process_final_keep_alive(OopClosure* keep_alive,
+                                VoidClosure* complete_gc,
+                                AbstractRefProcTaskExecutor*  task_executor,
+                                ReferenceProcessorPhaseTimes* phase_times);
+
+  // Phase 4: Drop and keep alive live Phantom references, or clear and enqueue if dead.
+  void process_phantom_refs(BoolObjectClosure* is_alive,
+                            OopClosure* keep_alive,
+                            VoidClosure* complete_gc,
+                            AbstractRefProcTaskExecutor*  task_executor,
+                            ReferenceProcessorPhaseTimes* phase_times);
+
+  // Work methods used by the process_* methods. All methods return the number of
+  // removed elements.
+
+  // (SoftReferences only) Traverse the list and remove any SoftReferences whose
+  // referents are not alive, but that should be kept alive for policy reasons.
+  // Keep alive the transitive closure of all such referents.
+  size_t process_soft_ref_reconsider_work(DiscoveredList&     refs_list,
+                                          ReferencePolicy*    policy,
+                                          BoolObjectClosure*  is_alive,
+                                          OopClosure*         keep_alive,
+                                          VoidClosure*        complete_gc);
+
+  // Traverse the list and remove any Refs whose referents are alive,
+  // or NULL if discovery is not atomic. Enqueue and clear the reference for
+  // others if do_enqueue_and_clear is set.
+  size_t process_soft_weak_final_refs_work(DiscoveredList&    refs_list,
+                                           BoolObjectClosure* is_alive,
+                                           OopClosure*        keep_alive,
+                                           bool               do_enqueue_and_clear);
+
+  // Keep alive followers of referents for FinalReferences. Must only be called for
+  // those.
+  size_t process_final_keep_alive_work(DiscoveredList&    refs_list,
+                                       OopClosure*        keep_alive,
+                                       VoidClosure*       complete_gc);
+
+  size_t process_phantom_refs_work(DiscoveredList&    refs_list,
+                                   BoolObjectClosure* is_alive,
+                                   OopClosure*        keep_alive,
+                                   VoidClosure*       complete_gc);
+
+public:
   static int number_of_subclasses_of_ref() { return (REF_PHANTOM - REF_OTHER); }
 
   uint num_queues() const                  { return _num_queues; }
   uint max_num_queues() const              { return _max_num_queues; }
   void set_active_mt_degree(uint v);
 
-  DiscoveredList* discovered_refs()        { return _discovered_refs; }
-
   ReferencePolicy* setup_policy(bool always_clear) {
     _current_soft_ref_policy = always_clear ?
       _always_clear_soft_ref_policy : _default_soft_ref_policy;
@@ -238,38 +318,6 @@
     return _current_soft_ref_policy;
   }
 
-  // Process references with a certain reachability level.
-  void process_discovered_reflist(DiscoveredList                refs_lists[],
-                                  ReferencePolicy*              policy,
-                                  bool                          clear_referent,
-                                  BoolObjectClosure*            is_alive,
-                                  OopClosure*                   keep_alive,
-                                  VoidClosure*                  complete_gc,
-                                  AbstractRefProcTaskExecutor*  task_executor,
-                                  ReferenceProcessorPhaseTimes* phase_times);
-
-  // Work methods used by the method process_discovered_reflist
-  // Phase1: keep alive all those referents that are otherwise
-  // dead but which must be kept alive by policy (and their closure).
-  void process_phase1(DiscoveredList&     refs_list,
-                      ReferencePolicy*    policy,
-                      BoolObjectClosure*  is_alive,
-                      OopClosure*         keep_alive,
-                      VoidClosure*        complete_gc);
-  // Phase2: remove all those references whose referents are
-  // reachable.
-  void process_phase2(DiscoveredList&    refs_list,
-                      BoolObjectClosure* is_alive,
-                      OopClosure*        keep_alive,
-                      VoidClosure*       complete_gc);
-  // Phase3: process the referents by either clearing them
-  // or keeping them alive (and their closure), and enqueuing them.
-  void process_phase3(DiscoveredList&    refs_list,
-                      bool               clear_referent,
-                      BoolObjectClosure* is_alive,
-                      OopClosure*        keep_alive,
-                      VoidClosure*       complete_gc);
-
   // "Preclean" all the discovered reference lists by removing references that
   // are active (e.g. due to the mutator calling enqueue()) or with NULL or
   // strongly reachable referents.
@@ -285,11 +333,11 @@
                                       YieldClosure*      yield,
                                       GCTimer*           gc_timer);
 
+private:
   // Returns the name of the discovered reference list
   // occupying the i / _num_queues slot.
   const char* list_name(uint i);
 
-private:
   // "Preclean" the given discovered reference list by removing references with
   // the attributes mentioned in preclean_discovered_references().
   // Supports both normal and fine grain yielding.
@@ -323,18 +371,24 @@
   void balance_queues(DiscoveredList refs_lists[]);
   bool need_balance_queues(DiscoveredList refs_lists[]);
 
+  // If there is need to balance the given queue, do it.
+  void maybe_balance_queues(DiscoveredList refs_lists[]);
+
   // Update (advance) the soft ref master clock field.
   void update_soft_ref_master_clock();
 
   bool is_subject_to_discovery(oop const obj) const;
 
+  bool is_mt_processing_set_up(AbstractRefProcTaskExecutor* task_executor) const;
+
 public:
   // Default parameters give you a vanilla reference processor.
   ReferenceProcessor(BoolObjectClosure* is_subject_to_discovery,
                      bool mt_processing = false, uint mt_processing_degree = 1,
                      bool mt_discovery  = false, uint mt_discovery_degree  = 1,
                      bool atomic_discovery = true,
-                     BoolObjectClosure* is_alive_non_header = NULL);
+                     BoolObjectClosure* is_alive_non_header = NULL,
+                     bool adjust_no_of_processing_threads = false);
 
   // RefDiscoveryPolicy values
   enum DiscoveryPolicy {
@@ -346,7 +400,6 @@
 
   static void init_statics();
 
- public:
   // get and set "is_alive_non_header" field
   BoolObjectClosure* is_alive_non_header() {
     return _is_alive_non_header;
@@ -408,6 +461,8 @@
   // debugging
   void verify_no_references_recorded() PRODUCT_RETURN;
   void verify_referent(oop obj)        PRODUCT_RETURN;
+
+  bool adjust_no_of_processing_threads() const { return _adjust_no_of_processing_threads; }
 };
 
 // A subject-to-discovery closure that uses a single memory span to determine the area that
@@ -576,7 +631,6 @@
   }
 };
 
-
 // This class is an interface used to implement task execution for the
 // reference processing.
 class AbstractRefProcTaskExecutor {
@@ -586,7 +640,7 @@
   class ProcessTask;
 
   // Executes a task using worker threads.
-  virtual void execute(ProcessTask& task) = 0;
+  virtual void execute(ProcessTask& task, uint ergo_workers) = 0;
 
   // Switch to single threaded mode.
   virtual void set_single_threaded_mode() { };
@@ -595,30 +649,50 @@
 // Abstract reference processing task to execute.
 class AbstractRefProcTaskExecutor::ProcessTask {
 protected:
-  ProcessTask(ReferenceProcessor&           ref_processor,
-              DiscoveredList                refs_lists[],
-              bool                          marks_oops_alive,
+  ReferenceProcessor&           _ref_processor;
+  // Indicates whether the phase could generate work that should be balanced across
+  // threads after execution.
+  bool                          _marks_oops_alive;
+  ReferenceProcessorPhaseTimes* _phase_times;
+
+  ProcessTask(ReferenceProcessor& ref_processor,
+              bool marks_oops_alive,
               ReferenceProcessorPhaseTimes* phase_times)
     : _ref_processor(ref_processor),
-      _refs_lists(refs_lists),
-      _phase_times(phase_times),
-      _marks_oops_alive(marks_oops_alive)
+      _marks_oops_alive(marks_oops_alive),
+      _phase_times(phase_times)
   { }
 
 public:
-  virtual void work(unsigned int work_id, BoolObjectClosure& is_alive,
+  virtual void work(uint worker_id,
+                    BoolObjectClosure& is_alive,
                     OopClosure& keep_alive,
                     VoidClosure& complete_gc) = 0;
 
-  // Returns true if a task marks some oops as alive.
-  bool marks_oops_alive() const
-  { return _marks_oops_alive; }
+  bool marks_oops_alive() const { return _marks_oops_alive; }
+};
 
-protected:
-  ReferenceProcessor&           _ref_processor;
-  DiscoveredList*               _refs_lists;
-  ReferenceProcessorPhaseTimes* _phase_times;
-  const bool                    _marks_oops_alive;
+// Temporarily change the number of workers based on given reference count.
+// This ergonomically decided worker count will be used to activate worker threads.
+class RefProcMTDegreeAdjuster : public StackObj {
+  typedef ReferenceProcessor::RefProcPhases RefProcPhases;
+
+  ReferenceProcessor* _rp;
+  bool                _saved_mt_processing;
+  uint                _saved_num_queues;
+
+  // Calculate based on total of references.
+  uint ergo_proc_thread_count(size_t ref_count,
+                              uint max_threads,
+                              RefProcPhases phase) const;
+
+  bool use_max_threads(RefProcPhases phase) const;
+
+public:
+  RefProcMTDegreeAdjuster(ReferenceProcessor* rp,
+                          RefProcPhases phase,
+                          size_t ref_count);
+  ~RefProcMTDegreeAdjuster();
 };
 
 #endif // SHARE_VM_GC_SHARED_REFERENCEPROCESSOR_HPP
--- a/src/hotspot/share/gc/shared/referenceProcessor.inline.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/shared/referenceProcessor.inline.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -47,6 +47,11 @@
  return head() == NULL;
 }
 
+void DiscoveredList::clear() {
+  set_head(NULL);
+  set_length(0);
+}
+
 DiscoveredListIterator::DiscoveredListIterator(DiscoveredList&    refs_list,
                                                OopClosure*        keep_alive,
                                                BoolObjectClosure* is_alive):
@@ -57,10 +62,8 @@
 #ifdef ASSERT
   _first_seen(refs_list.head()),
 #endif
-#ifndef PRODUCT
   _processed(0),
   _removed(0),
-#endif
   _next_discovered(NULL),
   _keep_alive(keep_alive),
   _is_alive(is_alive) {
--- a/src/hotspot/share/gc/shared/referenceProcessorPhaseTimes.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/shared/referenceProcessorPhaseTimes.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -31,61 +31,96 @@
 #include "logging/logStream.hpp"
 #include "memory/allocation.inline.hpp"
 
-RefProcWorkerTimeTracker::RefProcWorkerTimeTracker(ReferenceProcessorPhaseTimes::RefProcPhaseNumbers number,
-                                                   ReferenceProcessorPhaseTimes* phase_times,
-                                                   uint worker_id) :
-  _worker_time(NULL), _start_time(os::elapsedTime()), _worker_id(worker_id) {
-  assert (phase_times != NULL, "Invariant");
+#define ASSERT_REF_TYPE(ref_type) assert((ref_type) >= REF_SOFT && (ref_type) <= REF_PHANTOM, \
+                                         "Invariant (%d)", (int)ref_type)
 
-  _worker_time = phase_times->worker_time_sec(phase_times->par_phase(number));
+#define ASSERT_PHASE(phase) assert((phase) >= ReferenceProcessor::RefPhase1 && \
+                                   (phase) < ReferenceProcessor::RefPhaseMax,  \
+                                   "Invariant (%d)", (int)phase);
+
+#define ASSERT_SUB_PHASE(phase) assert((phase) >= ReferenceProcessor::SoftRefSubPhase1 && \
+                                       (phase) < ReferenceProcessor::RefSubPhaseMax, \
+                                       "Invariant (%d)", (int)phase);
+
+static const char* SubPhasesParWorkTitle[ReferenceProcessor::RefSubPhaseMax] = {
+       "SoftRef (ms):",
+       "SoftRef (ms):",
+       "WeakRef (ms):",
+       "FinalRef (ms):",
+       "FinalRef (ms):",
+       "PhantomRef (ms):"
+       };
+
+static const char* Phase2ParWorkTitle = "Total (ms):";
+
+static const char* SubPhasesSerWorkTitle[ReferenceProcessor::RefSubPhaseMax] = {
+       "SoftRef:",
+       "SoftRef:",
+       "WeakRef:",
+       "FinalRef:",
+       "FinalRef:",
+       "PhantomRef:"
+       };
+
+static const char* Phase2SerWorkTitle = "Total:";
+
+static const char* Indents[6] = {"", "  ", "    ", "      ", "        ", "          "};
+
+static const char* PhaseNames[ReferenceProcessor::RefPhaseMax] = {
+       "Reconsider SoftReferences",
+       "Notify Soft/WeakReferences",
+       "Notify and keep alive finalizable",
+       "Notify PhantomReferences"
+       };
+
+static const char* ReferenceTypeNames[REF_PHANTOM + 1] = {
+       "None", "Other", "SoftReference", "WeakReference", "FinalReference", "PhantomReference"
+       };
+
+STATIC_ASSERT((REF_PHANTOM + 1) == ARRAY_SIZE(ReferenceTypeNames));
+
+static const char* phase_enum_2_phase_string(ReferenceProcessor::RefProcPhases phase) {
+  assert(phase >= ReferenceProcessor::RefPhase1 && phase <= ReferenceProcessor::RefPhaseMax,
+         "Invalid reference processing phase (%d)", phase);
+  return PhaseNames[phase];
 }
 
-RefProcWorkerTimeTracker::RefProcWorkerTimeTracker(ReferenceProcessorPhaseTimes::RefProcParPhases phase,
-                                                   ReferenceProcessorPhaseTimes* phase_times,
-                                                   uint worker_id) :
-  _worker_time(NULL), _start_time(os::elapsedTime()), _worker_id(worker_id) {
-  assert (phase_times != NULL, "Invariant");
+static const char* ref_type_2_string(ReferenceType ref_type) {
+  ASSERT_REF_TYPE(ref_type);
+  return ReferenceTypeNames[ref_type];
+}
 
-  _worker_time = phase_times->worker_time_sec(phase);
+RefProcWorkerTimeTracker::RefProcWorkerTimeTracker(WorkerDataArray<double>* worker_time, uint worker_id) :
+  _worker_time(worker_time), _start_time(os::elapsedTime()), _worker_id(worker_id) {
+  assert(worker_time != NULL, "Invariant");
 }
 
 RefProcWorkerTimeTracker::~RefProcWorkerTimeTracker() {
-  _worker_time->set(_worker_id, os::elapsedTime() - _start_time);
+  double result = os::elapsedTime() - _start_time;
+  _worker_time->set(_worker_id, result);
+}
+
+RefProcSubPhasesWorkerTimeTracker::RefProcSubPhasesWorkerTimeTracker(ReferenceProcessor::RefProcSubPhases phase,
+                                                                     ReferenceProcessorPhaseTimes* phase_times,
+                                                                     uint worker_id) :
+  RefProcWorkerTimeTracker(phase_times->sub_phase_worker_time_sec(phase), worker_id) {
+}
+
+RefProcSubPhasesWorkerTimeTracker::~RefProcSubPhasesWorkerTimeTracker() {
 }
 
 RefProcPhaseTimeBaseTracker::RefProcPhaseTimeBaseTracker(const char* title,
+                                                         ReferenceProcessor::RefProcPhases phase_number,
                                                          ReferenceProcessorPhaseTimes* phase_times) :
-  _title(title), _phase_times(phase_times), _start_ticks(), _end_ticks() {
+  _phase_times(phase_times), _start_ticks(), _end_ticks(), _phase_number(phase_number) {
   assert(_phase_times != NULL, "Invariant");
 
   _start_ticks.stamp();
   if (_phase_times->gc_timer() != NULL) {
-    _phase_times->gc_timer()->register_gc_phase_start(_title, _start_ticks);
+    _phase_times->gc_timer()->register_gc_phase_start(title, _start_ticks);
   }
 }
 
-static const char* phase_enum_2_phase_string(ReferenceProcessorPhaseTimes::RefProcParPhases phase) {
-  switch(phase) {
-    case ReferenceProcessorPhaseTimes::SoftRefPhase1:
-      return "Phase1";
-    case ReferenceProcessorPhaseTimes::SoftRefPhase2:
-    case ReferenceProcessorPhaseTimes::WeakRefPhase2:
-    case ReferenceProcessorPhaseTimes::FinalRefPhase2:
-    case ReferenceProcessorPhaseTimes::PhantomRefPhase2:
-      return "Phase2";
-    case ReferenceProcessorPhaseTimes::SoftRefPhase3:
-    case ReferenceProcessorPhaseTimes::WeakRefPhase3:
-    case ReferenceProcessorPhaseTimes::FinalRefPhase3:
-    case ReferenceProcessorPhaseTimes::PhantomRefPhase3:
-      return "Phase3";
-    default:
-      ShouldNotReachHere();
-      return NULL;
-  }
-}
-
-static const char* Indents[6] = {"", "  ", "    ", "      ", "        ", "          "};
-
 Ticks RefProcPhaseTimeBaseTracker::end_ticks() {
   // If ASSERT is defined, the default value of Ticks will be -2.
   if (_end_ticks.value() <= 0) {
@@ -108,140 +143,83 @@
   }
 }
 
-RefProcBalanceQueuesTimeTracker::RefProcBalanceQueuesTimeTracker(ReferenceProcessorPhaseTimes* phase_times) :
-  RefProcPhaseTimeBaseTracker("Balance queues", phase_times) {}
+RefProcBalanceQueuesTimeTracker::RefProcBalanceQueuesTimeTracker(ReferenceProcessor::RefProcPhases phase_number,
+                                                                 ReferenceProcessorPhaseTimes* phase_times) :
+  RefProcPhaseTimeBaseTracker("Balance queues", phase_number, phase_times) {}
 
 RefProcBalanceQueuesTimeTracker::~RefProcBalanceQueuesTimeTracker() {
   double elapsed = elapsed_time();
-  phase_times()->set_balance_queues_time_ms(phase_times()->processing_ref_type(), elapsed);
+  phase_times()->set_balance_queues_time_ms(_phase_number, elapsed);
 }
 
-#define ASSERT_REF_TYPE(ref_type) assert(ref_type >= REF_SOFT && ref_type <= REF_PHANTOM, \
-                                         "Invariant (%d)", (int)ref_type)
-
-#define ASSERT_PHASE_NUMBER(phase_number) assert(phase_number >= ReferenceProcessorPhaseTimes::RefPhase1 && \
-                                                 phase_number <= ReferenceProcessorPhaseTimes::RefPhaseMax, \
-                                                 "Invariant (%d)", phase_number);
-
-static const char* phase_number_2_string(ReferenceProcessorPhaseTimes::RefProcPhaseNumbers phase_number) {
-  ASSERT_PHASE_NUMBER(phase_number);
-
-  switch(phase_number) {
-    case ReferenceProcessorPhaseTimes::RefPhase1:
-      return "Phase1";
-    case ReferenceProcessorPhaseTimes::RefPhase2:
-      return "Phase2";
-    case ReferenceProcessorPhaseTimes::RefPhase3:
-      return "Phase3";
-    default:
-      ShouldNotReachHere();
-      return NULL;
-  }
+RefProcPhaseTimeTracker::RefProcPhaseTimeTracker(ReferenceProcessor::RefProcPhases phase_number,
+                                                       ReferenceProcessorPhaseTimes* phase_times) :
+  RefProcPhaseTimeBaseTracker(phase_enum_2_phase_string(phase_number), phase_number, phase_times) {
 }
 
-RefProcParPhaseTimeTracker::RefProcParPhaseTimeTracker(ReferenceProcessorPhaseTimes::RefProcPhaseNumbers phase_number,
-                                                       ReferenceProcessorPhaseTimes* phase_times) :
-  _phase_number(phase_number),
-  RefProcPhaseTimeBaseTracker(phase_number_2_string(phase_number), phase_times) {}
-
-RefProcParPhaseTimeTracker::~RefProcParPhaseTimeTracker() {
+RefProcPhaseTimeTracker::~RefProcPhaseTimeTracker() {
   double elapsed = elapsed_time();
-  ReferenceProcessorPhaseTimes::RefProcParPhases phase = phase_times()->par_phase(_phase_number);
-  phase_times()->set_par_phase_time_ms(phase, elapsed);
+  phase_times()->set_phase_time_ms(_phase_number, elapsed);
 }
 
-static const char* ref_type_2_string(ReferenceType ref_type) {
-  ASSERT_REF_TYPE(ref_type);
-
-  switch(ref_type) {
-    case REF_SOFT:
-      return "SoftReference";
-    case REF_WEAK:
-      return "WeakReference";
-    case REF_FINAL:
-      return "FinalReference";
-    case REF_PHANTOM:
-      return "PhantomReference";
-    default:
-      ShouldNotReachHere();
-      return NULL;
-  }
+RefProcTotalPhaseTimesTracker::RefProcTotalPhaseTimesTracker(ReferenceProcessor::RefProcPhases phase_number,
+                                                             ReferenceProcessorPhaseTimes* phase_times,
+                                                             ReferenceProcessor* rp) :
+  _rp(rp), RefProcPhaseTimeBaseTracker(phase_enum_2_phase_string(phase_number), phase_number, phase_times) {
 }
 
-RefProcPhaseTimesTracker::RefProcPhaseTimesTracker(ReferenceType ref_type,
-                                                   ReferenceProcessorPhaseTimes* phase_times,
-                                                   ReferenceProcessor* rp) :
-  _rp(rp), RefProcPhaseTimeBaseTracker(ref_type_2_string(ref_type), phase_times) {
-  phase_times->set_processing_ref_type(ref_type);
-
-  size_t discovered = rp->total_reference_count(ref_type);
-  phase_times->set_ref_discovered(ref_type, discovered);
-}
-
-RefProcPhaseTimesTracker::~RefProcPhaseTimesTracker() {
+RefProcTotalPhaseTimesTracker::~RefProcTotalPhaseTimesTracker() {
   double elapsed = elapsed_time();
-  ReferenceProcessorPhaseTimes* times = phase_times();
-  ReferenceType ref_type = times->processing_ref_type();
-  times->set_ref_proc_time_ms(ref_type, elapsed);
-
-  size_t after_count = _rp->total_reference_count(ref_type);
-  size_t discovered = times->ref_discovered(ref_type);
-  times->set_ref_cleared(ref_type, discovered - after_count);
+  phase_times()->set_phase_time_ms(_phase_number, elapsed);
 }
 
 ReferenceProcessorPhaseTimes::ReferenceProcessorPhaseTimes(GCTimer* gc_timer, uint max_gc_threads) :
   _gc_timer(gc_timer), _processing_is_mt(false) {
 
-  for (int i = 0; i < RefParPhaseMax; i++) {
-    _worker_time_sec[i] = new WorkerDataArray<double>(max_gc_threads, "Process lists (ms)");
-    _par_phase_time_ms[i] = uninitialized();
+  for (uint i = 0; i < ReferenceProcessor::RefSubPhaseMax; i++) {
+    _sub_phases_worker_time_sec[i] = new WorkerDataArray<double>(max_gc_threads, SubPhasesParWorkTitle[i]);
   }
+  _phase2_worker_time_sec = new WorkerDataArray<double>(max_gc_threads, Phase2ParWorkTitle);
 
-  for (int i = 0; i < number_of_subclasses_of_ref; i++) {
-    _ref_proc_time_ms[i] = uninitialized();
-    _balance_queues_time_ms[i] = uninitialized();
-    _ref_cleared[i] = 0;
-    _ref_discovered[i] = 0;
-    _ref_enqueued[i] = 0;
-  }
+  reset();
 }
 
 inline int ref_type_2_index(ReferenceType ref_type) {
   return ref_type - REF_SOFT;
 }
 
-#define ASSERT_PAR_PHASE(phase) assert(phase >= ReferenceProcessorPhaseTimes::SoftRefPhase1 && \
-                                       phase < ReferenceProcessorPhaseTimes::RefParPhaseMax, \
-                                       "Invariant (%d)", (int)phase);
-
-WorkerDataArray<double>* ReferenceProcessorPhaseTimes::worker_time_sec(RefProcParPhases par_phase) const {
-  ASSERT_PAR_PHASE(par_phase);
-  return _worker_time_sec[par_phase];
+WorkerDataArray<double>* ReferenceProcessorPhaseTimes::sub_phase_worker_time_sec(ReferenceProcessor::RefProcSubPhases sub_phase) const {
+  ASSERT_SUB_PHASE(sub_phase);
+  return _sub_phases_worker_time_sec[sub_phase];
 }
 
-double ReferenceProcessorPhaseTimes::par_phase_time_ms(RefProcParPhases par_phase) const {
-  ASSERT_PAR_PHASE(par_phase);
-  return _par_phase_time_ms[par_phase];
+double ReferenceProcessorPhaseTimes::phase_time_ms(ReferenceProcessor::RefProcPhases phase) const {
+  ASSERT_PHASE(phase);
+  return _phases_time_ms[phase];
 }
 
-void ReferenceProcessorPhaseTimes::set_par_phase_time_ms(RefProcParPhases par_phase,
-                                                         double par_phase_time_ms) {
-  ASSERT_PAR_PHASE(par_phase);
-  _par_phase_time_ms[par_phase] = par_phase_time_ms;
+void ReferenceProcessorPhaseTimes::set_phase_time_ms(ReferenceProcessor::RefProcPhases phase,
+                                                     double phase_time_ms) {
+  ASSERT_PHASE(phase);
+  _phases_time_ms[phase] = phase_time_ms;
 }
 
 void ReferenceProcessorPhaseTimes::reset() {
-  for (int i = 0; i < RefParPhaseMax; i++) {
-    _worker_time_sec[i]->reset();
-    _par_phase_time_ms[i] = uninitialized();
+  for (int i = 0; i < ReferenceProcessor::RefSubPhaseMax; i++) {
+    _sub_phases_worker_time_sec[i]->reset();
+    _sub_phases_total_time_ms[i] = uninitialized();
   }
 
+  for (int i = 0; i < ReferenceProcessor::RefPhaseMax; i++) {
+    _phases_time_ms[i] = uninitialized();
+    _balance_queues_time_ms[i] = uninitialized();
+  }
+
+  _phase2_worker_time_sec->reset();
+
   for (int i = 0; i < number_of_subclasses_of_ref; i++) {
-    _ref_proc_time_ms[i] = uninitialized();
-    _balance_queues_time_ms[i] = uninitialized();
     _ref_cleared[i] = 0;
     _ref_discovered[i] = 0;
-    _ref_enqueued[i] = 0;
   }
 
   _total_time_ms = uninitialized();
@@ -250,35 +228,26 @@
 }
 
 ReferenceProcessorPhaseTimes::~ReferenceProcessorPhaseTimes() {
-  for (int i = 0; i < RefParPhaseMax; i++) {
-    delete _worker_time_sec[i];
+  for (int i = 0; i < ReferenceProcessor::RefSubPhaseMax; i++) {
+    delete _sub_phases_worker_time_sec[i];
   }
+  delete _phase2_worker_time_sec;
 }
 
-double ReferenceProcessorPhaseTimes::ref_proc_time_ms(ReferenceType ref_type) const {
-  ASSERT_REF_TYPE(ref_type);
-  return _ref_proc_time_ms[ref_type_2_index(ref_type)];
+double ReferenceProcessorPhaseTimes::sub_phase_total_time_ms(ReferenceProcessor::RefProcSubPhases sub_phase) const {
+  ASSERT_SUB_PHASE(sub_phase);
+  return _sub_phases_total_time_ms[sub_phase];
 }
 
-void ReferenceProcessorPhaseTimes::set_ref_proc_time_ms(ReferenceType ref_type,
-                                                        double ref_proc_time_ms) {
-  ASSERT_REF_TYPE(ref_type);
-  _ref_proc_time_ms[ref_type_2_index(ref_type)] = ref_proc_time_ms;
+void ReferenceProcessorPhaseTimes::set_sub_phase_total_phase_time_ms(ReferenceProcessor::RefProcSubPhases sub_phase,
+                                                                     double time_ms) {
+  ASSERT_SUB_PHASE(sub_phase);
+  _sub_phases_total_time_ms[sub_phase] = time_ms;
 }
 
-size_t ReferenceProcessorPhaseTimes::ref_cleared(ReferenceType ref_type) const {
+void ReferenceProcessorPhaseTimes::add_ref_cleared(ReferenceType ref_type, size_t count) {
   ASSERT_REF_TYPE(ref_type);
-  return _ref_cleared[ref_type_2_index(ref_type)];
-}
-
-void ReferenceProcessorPhaseTimes::set_ref_cleared(ReferenceType ref_type, size_t count) {
-  ASSERT_REF_TYPE(ref_type);
-  _ref_cleared[ref_type_2_index(ref_type)] = count;
-}
-
-size_t ReferenceProcessorPhaseTimes::ref_discovered(ReferenceType ref_type) const {
-  ASSERT_REF_TYPE(ref_type);
-  return _ref_discovered[ref_type_2_index(ref_type)];
+  Atomic::add(count, &_ref_cleared[ref_type_2_index(ref_type)]);
 }
 
 void ReferenceProcessorPhaseTimes::set_ref_discovered(ReferenceType ref_type, size_t count) {
@@ -286,70 +255,14 @@
   _ref_discovered[ref_type_2_index(ref_type)] = count;
 }
 
-size_t ReferenceProcessorPhaseTimes::ref_enqueued(ReferenceType ref_type) const {
-  ASSERT_REF_TYPE(ref_type);
-  return _ref_enqueued[ref_type_2_index(ref_type)];
+double ReferenceProcessorPhaseTimes::balance_queues_time_ms(ReferenceProcessor::RefProcPhases phase) const {
+  ASSERT_PHASE(phase);
+  return _balance_queues_time_ms[phase];
 }
 
-void ReferenceProcessorPhaseTimes::set_ref_enqueued(ReferenceType ref_type, size_t count) {
-  ASSERT_REF_TYPE(ref_type);
-  _ref_enqueued[ref_type_2_index(ref_type)] = count;
-}
-
-double ReferenceProcessorPhaseTimes::balance_queues_time_ms(ReferenceType ref_type) const {
-  ASSERT_REF_TYPE(ref_type);
-  return _balance_queues_time_ms[ref_type_2_index(ref_type)];
-}
-
-void ReferenceProcessorPhaseTimes::set_balance_queues_time_ms(ReferenceType ref_type, double time_ms) {
-  ASSERT_REF_TYPE(ref_type);
-  _balance_queues_time_ms[ref_type_2_index(ref_type)] = time_ms;
-}
-
-ReferenceProcessorPhaseTimes::RefProcParPhases
-ReferenceProcessorPhaseTimes::par_phase(RefProcPhaseNumbers phase_number) const {
-  ASSERT_PHASE_NUMBER(phase_number);
-  ASSERT_REF_TYPE(_processing_ref_type);
-
-  int result = SoftRefPhase1;
-
-  switch(_processing_ref_type) {
-    case REF_SOFT:
-      result = (int)SoftRefPhase1;
-      result += phase_number;
-
-      assert((RefProcParPhases)result >= SoftRefPhase1 &&
-             (RefProcParPhases)result <= SoftRefPhase3,
-             "Invariant (%d)", result);
-      break;
-    case REF_WEAK:
-      result = (int)WeakRefPhase2;
-      result += (phase_number - 1);
-      assert((RefProcParPhases)result >= WeakRefPhase2 &&
-             (RefProcParPhases)result <= WeakRefPhase3,
-             "Invariant (%d)", result);
-      break;
-    case REF_FINAL:
-      result = (int)FinalRefPhase2;
-      result += (phase_number - 1);
-      assert((RefProcParPhases)result >= FinalRefPhase2 &&
-             (RefProcParPhases)result <= FinalRefPhase3,
-             "Invariant (%d)", result);
-      break;
-    case REF_PHANTOM:
-      result = (int)PhantomRefPhase2;
-      result += (phase_number - 1);
-      assert((RefProcParPhases)result >= PhantomRefPhase2 &&
-             (RefProcParPhases)result <= PhantomRefPhase3,
-             "Invariant (%d)", result);
-      break;
-    default:
-      ShouldNotReachHere();
-  }
-
-  ASSERT_PAR_PHASE(result);
-
-  return (RefProcParPhases)result;
+void ReferenceProcessorPhaseTimes::set_balance_queues_time_ms(ReferenceProcessor::RefProcPhases phase, double time_ms) {
+  ASSERT_PHASE(phase);
+  _balance_queues_time_ms[phase] = time_ms;
 }
 
 #define TIME_FORMAT "%.1lfms"
@@ -366,10 +279,16 @@
   }
 
   uint next_indent = base_indent + 1;
+  print_phase(ReferenceProcessor::RefPhase1, next_indent);
+  print_phase(ReferenceProcessor::RefPhase2, next_indent);
+  print_phase(ReferenceProcessor::RefPhase3, next_indent);
+  print_phase(ReferenceProcessor::RefPhase4, next_indent);
+
   print_reference(REF_SOFT, next_indent);
   print_reference(REF_WEAK, next_indent);
   print_reference(REF_FINAL, next_indent);
   print_reference(REF_PHANTOM, next_indent);
+
 }
 
 void ReferenceProcessorPhaseTimes::print_reference(ReferenceType ref_type, uint base_indent) const {
@@ -377,73 +296,99 @@
 
   if (lt.is_enabled()) {
     LogStream ls(lt);
-    uint next_indent = base_indent + 1;
     ResourceMark rm;
 
-    ls.print_cr("%s%s: " TIME_FORMAT,
-                Indents[base_indent], ref_type_2_string(ref_type), ref_proc_time_ms(ref_type));
+    ls.print_cr("%s%s:", Indents[base_indent], ref_type_2_string(ref_type));
 
-    double balance_time = balance_queues_time_ms(ref_type);
-    if (balance_time != uninitialized()) {
-      ls.print_cr("%s%s " TIME_FORMAT, Indents[next_indent], "Balance queues:", balance_time);
+    uint const next_indent = base_indent + 1;
+    int const ref_type_index = ref_type_2_index(ref_type);
+
+    ls.print_cr("%sDiscovered: " SIZE_FORMAT, Indents[next_indent], _ref_discovered[ref_type_index]);
+    ls.print_cr("%sCleared: " SIZE_FORMAT, Indents[next_indent], _ref_cleared[ref_type_index]);
+  }
+}
+
+void ReferenceProcessorPhaseTimes::print_phase(ReferenceProcessor::RefProcPhases phase, uint indent) const {
+  double phase_time = phase_time_ms(phase);
+
+  if (phase_time == uninitialized()) {
+    return;
+  }
+
+  LogTarget(Debug, gc, phases, ref) lt;
+  LogStream ls(lt);
+
+  ls.print_cr("%s%s%s " TIME_FORMAT,
+              Indents[indent],
+              phase_enum_2_phase_string(phase),
+              indent == 0 ? "" : ":", /* 0 indent logs don't need colon. */
+              phase_time);
+
+  LogTarget(Debug, gc, phases, ref) lt2;
+  if (lt2.is_enabled()) {
+    LogStream ls(lt2);
+
+    if (_processing_is_mt) {
+      print_balance_time(&ls, phase, indent + 1);
     }
 
-    switch(ref_type) {
-      case REF_SOFT:
-        print_phase(SoftRefPhase1, next_indent);
-        print_phase(SoftRefPhase2, next_indent);
-        print_phase(SoftRefPhase3, next_indent);
+    switch (phase) {
+      case ReferenceProcessor::RefPhase1:
+        print_sub_phase(&ls, ReferenceProcessor::SoftRefSubPhase1, indent + 1);
         break;
-
-      case REF_WEAK:
-        print_phase(WeakRefPhase2, next_indent);
-        print_phase(WeakRefPhase3, next_indent);
+      case ReferenceProcessor::RefPhase2:
+        print_sub_phase(&ls, ReferenceProcessor::SoftRefSubPhase2, indent + 1);
+        print_sub_phase(&ls, ReferenceProcessor::WeakRefSubPhase2, indent + 1);
+        print_sub_phase(&ls, ReferenceProcessor::FinalRefSubPhase2, indent + 1);
         break;
-
-      case REF_FINAL:
-        print_phase(FinalRefPhase2, next_indent);
-        print_phase(FinalRefPhase3, next_indent);
+      case ReferenceProcessor::RefPhase3:
+        print_sub_phase(&ls, ReferenceProcessor::FinalRefSubPhase3, indent + 1);
         break;
-
-      case REF_PHANTOM:
-        print_phase(PhantomRefPhase2, next_indent);
-        print_phase(PhantomRefPhase3, next_indent);
+      case ReferenceProcessor::RefPhase4:
+        print_sub_phase(&ls, ReferenceProcessor::PhantomRefSubPhase4, indent + 1);
         break;
-
       default:
         ShouldNotReachHere();
     }
-
-    ls.print_cr("%s%s " SIZE_FORMAT, Indents[next_indent], "Discovered:", ref_discovered(ref_type));
-    ls.print_cr("%s%s " SIZE_FORMAT, Indents[next_indent], "Cleared:", ref_cleared(ref_type));
+    if (phase == ReferenceProcessor::RefPhase2) {
+      print_worker_time(&ls, _phase2_worker_time_sec, Phase2SerWorkTitle, indent + 1);
+    }
   }
 }
 
-void ReferenceProcessorPhaseTimes::print_phase(RefProcParPhases phase, uint indent) const {
-  double phase_time = par_phase_time_ms(phase);
-  if (phase_time != uninitialized()) {
-    LogTarget(Debug, gc, phases, ref) lt;
+void ReferenceProcessorPhaseTimes::print_balance_time(LogStream* ls, ReferenceProcessor::RefProcPhases phase, uint indent) const {
+  double balance_time = balance_queues_time_ms(phase);
+  if (balance_time != uninitialized()) {
+    ls->print_cr("%s%s " TIME_FORMAT, Indents[indent], "Balance queues:", balance_time);
+  }
+}
 
-    LogStream ls(lt);
+void ReferenceProcessorPhaseTimes::print_sub_phase(LogStream* ls, ReferenceProcessor::RefProcSubPhases sub_phase, uint indent) const {
+  print_worker_time(ls, _sub_phases_worker_time_sec[sub_phase], SubPhasesSerWorkTitle[sub_phase], indent);
+}
 
-    ls.print_cr("%s%s%s " TIME_FORMAT,
-                Indents[indent],
-                phase_enum_2_phase_string(phase),
-                indent == 0 ? "" : ":", /* 0 indent logs don't need colon. */
-                phase_time);
-
-    LogTarget(Trace, gc, phases, ref) lt2;
-    if (_processing_is_mt && lt2.is_enabled()) {
-      LogStream ls(lt2);
-
-      ls.print("%s", Indents[indent + 1]);
-      // worker_time_sec is recorded in seconds but it will be printed in milliseconds.
-      worker_time_sec(phase)->print_summary_on(&ls, true);
+void ReferenceProcessorPhaseTimes::print_worker_time(LogStream* ls, WorkerDataArray<double>* worker_time, const char* ser_title, uint indent) const {
+  ls->print("%s", Indents[indent]);
+  if (_processing_is_mt) {
+    worker_time->print_summary_on(ls, true);
+    LogTarget(Trace, gc, phases, task) lt;
+    if (lt.is_enabled()) {
+      LogStream ls2(lt);
+      ls2.print("%s", Indents[indent]);
+      worker_time->print_details_on(&ls2);
+    }
+  } else {
+    if (worker_time->get(0) != uninitialized()) {
+      ls->print_cr("%s " TIME_FORMAT,
+                   ser_title,
+                   worker_time->get(0) * MILLIUNITS);
+    } else {
+      ls->print_cr("%s skipped", ser_title);
     }
   }
 }
 
 #undef ASSERT_REF_TYPE
-#undef ASSERT_PHASE_NUMBER
-#undef ASSERT_PAR_PHASE
+#undef ASSERT_SUB_PHASE
+#undef ASSERT_PHASE
 #undef TIME_FORMAT
--- a/src/hotspot/share/gc/shared/referenceProcessorPhaseTimes.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/shared/referenceProcessorPhaseTimes.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -25,108 +25,76 @@
 #ifndef SHARE_VM_GC_SHARED_REFERENCEPROCESSORPHASETIMES_HPP
 #define SHARE_VM_GC_SHARED_REFERENCEPROCESSORPHASETIMES_HPP
 
+#include "gc/shared/referenceProcessor.hpp"
 #include "gc/shared/referenceProcessorStats.hpp"
 #include "gc/shared/workerDataArray.hpp"
+#include "memory/allocation.hpp"
 #include "memory/referenceType.hpp"
 #include "utilities/ticks.hpp"
 
 class DiscoveredList;
 class GCTimer;
+class LogStream;
 
 class ReferenceProcessorPhaseTimes : public CHeapObj<mtGC> {
-public:
-  // Detailed phases that has parallel work.
-  enum RefProcParPhases {
-    SoftRefPhase1,
-    SoftRefPhase2,
-    SoftRefPhase3,
-    WeakRefPhase2,
-    WeakRefPhase3,
-    FinalRefPhase2,
-    FinalRefPhase3,
-    PhantomRefPhase2,
-    PhantomRefPhase3,
-    RefParPhaseMax
-  };
-
-  // Sub-phases that are used when processing each j.l.Reference types.
-  // Only SoftReference has RefPhase1.
-  enum RefProcPhaseNumbers {
-    RefPhase1,
-    RefPhase2,
-    RefPhase3,
-    RefPhaseMax
-  };
-
-private:
   static const int number_of_subclasses_of_ref = REF_PHANTOM - REF_OTHER; // 5 - 1 = 4
 
-  // Records per thread information of each phase.
-  WorkerDataArray<double>* _worker_time_sec[RefParPhaseMax];
-  // Records elapsed time of each phase.
-  double                   _par_phase_time_ms[RefParPhaseMax];
+  // Records per thread time information of each sub phase.
+  WorkerDataArray<double>* _sub_phases_worker_time_sec[ReferenceProcessor::RefSubPhaseMax];
+  // Total time of each sub phase.
+  double                   _sub_phases_total_time_ms[ReferenceProcessor::RefSubPhaseMax];
 
-  // Total spent time for references.
-  // e.g. _ref_proc_time_ms[0] = _par_phase_time_ms[SoftRefPhase1] +
-  //                             _par_phase_time_ms[SoftRefPhase2] +
-  //                             _par_phase_time_ms[SoftRefPhase3] + extra time.
-  double                   _ref_proc_time_ms[number_of_subclasses_of_ref];
+  // Records total elapsed time for each phase.
+  double                   _phases_time_ms[ReferenceProcessor::RefPhaseMax];
+  // Records total queue balancing for each phase.
+  double                   _balance_queues_time_ms[ReferenceProcessor::RefPhaseMax];
 
+  WorkerDataArray<double>* _phase2_worker_time_sec;
+
+  // Total spent time for reference processing.
   double                   _total_time_ms;
 
   size_t                   _ref_cleared[number_of_subclasses_of_ref];
   size_t                   _ref_discovered[number_of_subclasses_of_ref];
-  size_t                   _ref_enqueued[number_of_subclasses_of_ref];
-  double                   _balance_queues_time_ms[number_of_subclasses_of_ref];
 
   bool                     _processing_is_mt;
 
-  // Currently processing reference type.
-  ReferenceType            _processing_ref_type;
-
   GCTimer*                 _gc_timer;
 
-  double par_phase_time_ms(RefProcParPhases phase) const;
-  double ref_proc_time_ms(ReferenceType ref_type) const;
+  double phase_time_ms(ReferenceProcessor::RefProcPhases phase) const;
+  double sub_phase_total_time_ms(ReferenceProcessor::RefProcSubPhases sub_phase) const;
 
   double total_time_ms() const { return _total_time_ms; }
 
-  size_t ref_cleared(ReferenceType ref_type) const;
-  size_t ref_enqueued(ReferenceType ref_type) const;
-
-  double balance_queues_time_ms(ReferenceType ref_type) const;
+  double balance_queues_time_ms(ReferenceProcessor::RefProcPhases phase) const;
 
   void print_reference(ReferenceType ref_type, uint base_indent) const;
-  void print_phase(RefProcParPhases phase, uint indent) const;
 
+  void print_phase(ReferenceProcessor::RefProcPhases phase, uint indent) const;
+  void print_balance_time(LogStream* ls, ReferenceProcessor::RefProcPhases phase, uint indent) const;
+  void print_sub_phase(LogStream* ls, ReferenceProcessor::RefProcSubPhases sub_phase, uint indent) const;
+  void print_worker_time(LogStream* ls, WorkerDataArray<double>* worker_time, const char* ser_title, uint indent) const;
+
+  static double uninitialized() { return -1.0; }
 public:
   ReferenceProcessorPhaseTimes(GCTimer* gc_timer, uint max_gc_threads);
   ~ReferenceProcessorPhaseTimes();
 
-  static double uninitialized() { return -1.0; }
+  WorkerDataArray<double>* phase2_worker_time_sec() const { return _phase2_worker_time_sec; }
+  WorkerDataArray<double>* sub_phase_worker_time_sec(ReferenceProcessor::RefProcSubPhases phase) const;
+  void set_phase_time_ms(ReferenceProcessor::RefProcPhases phase, double par_phase_time_ms);
 
-  WorkerDataArray<double>* worker_time_sec(RefProcParPhases phase) const;
-  void set_par_phase_time_ms(RefProcParPhases phase, double par_phase_time_ms);
-
-  void set_ref_proc_time_ms(ReferenceType ref_type, double ref_proc_time_ms);
+  void set_sub_phase_total_phase_time_ms(ReferenceProcessor::RefProcSubPhases sub_phase, double ref_proc_time_ms);
 
   void set_total_time_ms(double total_time_ms) { _total_time_ms = total_time_ms; }
 
-  void set_ref_cleared(ReferenceType ref_type, size_t count);
-  size_t ref_discovered(ReferenceType ref_type) const;
+  void add_ref_cleared(ReferenceType ref_type, size_t count);
   void set_ref_discovered(ReferenceType ref_type, size_t count);
-  void set_ref_enqueued(ReferenceType ref_type, size_t count);
 
-  void set_balance_queues_time_ms(ReferenceType ref_type, double time_ms);
+  void set_balance_queues_time_ms(ReferenceProcessor::RefProcPhases phase, double time_ms);
 
   void set_processing_is_mt(bool processing_is_mt) { _processing_is_mt = processing_is_mt; }
 
-  ReferenceType processing_ref_type() const { return _processing_ref_type; }
-  void set_processing_ref_type(ReferenceType processing_ref_type) { _processing_ref_type = processing_ref_type; }
-
-  // Returns RefProcParPhases calculated from phase_number and _processing_ref_type.
-  RefProcParPhases par_phase(RefProcPhaseNumbers phase_number) const;
-
   GCTimer* gc_timer() const { return _gc_timer; }
 
   // Reset all fields. If not reset at next cycle, an assertion will fail.
@@ -135,38 +103,40 @@
   void print_all_references(uint base_indent = 0, bool print_total = true) const;
 };
 
-// Updates working time of each worker thread.
-class RefProcWorkerTimeTracker : public StackObj {
+class RefProcWorkerTimeTracker : public CHeapObj<mtGC> {
 protected:
   WorkerDataArray<double>* _worker_time;
   double                   _start_time;
   uint                     _worker_id;
+public:
+  RefProcWorkerTimeTracker(WorkerDataArray<double>* worker_time, uint worker_id);
+  virtual ~RefProcWorkerTimeTracker();
+};
 
+// Updates working time of each worker thread for a given sub phase.
+class RefProcSubPhasesWorkerTimeTracker : public RefProcWorkerTimeTracker {
 public:
-  RefProcWorkerTimeTracker(ReferenceProcessorPhaseTimes::RefProcPhaseNumbers number,
-                           ReferenceProcessorPhaseTimes* phase_times,
-                           uint worker_id);
-  RefProcWorkerTimeTracker(ReferenceProcessorPhaseTimes::RefProcParPhases phase,
-                           ReferenceProcessorPhaseTimes* phase_times,
-                           uint worker_id);
-  ~RefProcWorkerTimeTracker();
+  RefProcSubPhasesWorkerTimeTracker(ReferenceProcessor::RefProcSubPhases phase,
+                                    ReferenceProcessorPhaseTimes* phase_times,
+                                    uint worker_id);
+  ~RefProcSubPhasesWorkerTimeTracker();
 };
 
 class RefProcPhaseTimeBaseTracker : public StackObj {
 protected:
-  const char*                   _title;
   ReferenceProcessorPhaseTimes* _phase_times;
   Ticks                         _start_ticks;
   Ticks                         _end_ticks;
 
+  ReferenceProcessor::RefProcPhases _phase_number;
+
   Ticks end_ticks();
   double elapsed_time();
   ReferenceProcessorPhaseTimes* phase_times() const { return _phase_times; }
-  // Print phase elapsed time with each worker information if MT processed.
-  void print_phase(ReferenceProcessorPhaseTimes::RefProcParPhases phase, uint indent);
 
 public:
   RefProcPhaseTimeBaseTracker(const char* title,
+                              ReferenceProcessor::RefProcPhases _phase_number,
                               ReferenceProcessorPhaseTimes* phase_times);
   ~RefProcPhaseTimeBaseTracker();
 };
@@ -175,30 +145,27 @@
 // save it into GCTimer.
 class RefProcBalanceQueuesTimeTracker : public RefProcPhaseTimeBaseTracker {
 public:
-  RefProcBalanceQueuesTimeTracker(ReferenceProcessorPhaseTimes* phase_times);
+  RefProcBalanceQueuesTimeTracker(ReferenceProcessor::RefProcPhases phase_number,
+                                  ReferenceProcessorPhaseTimes* phase_times);
   ~RefProcBalanceQueuesTimeTracker();
 };
 
 // Updates phase time at ReferenceProcessorPhaseTimes and save it into GCTimer.
-class RefProcParPhaseTimeTracker : public RefProcPhaseTimeBaseTracker {
-  ReferenceProcessorPhaseTimes::RefProcPhaseNumbers _phase_number;
-
+class RefProcPhaseTimeTracker : public RefProcPhaseTimeBaseTracker {
 public:
-  RefProcParPhaseTimeTracker(ReferenceProcessorPhaseTimes::RefProcPhaseNumbers phase_number,
-                             ReferenceProcessorPhaseTimes* phase_times);
-  ~RefProcParPhaseTimeTracker();
+  RefProcPhaseTimeTracker(ReferenceProcessor::RefProcPhases phase_number,
+                          ReferenceProcessorPhaseTimes* phase_times);
+  ~RefProcPhaseTimeTracker();
 };
 
-// Updates phase time related information.
-// - Each phase processing time, cleared/discovered reference counts and stats for each working threads if MT processed.
-class RefProcPhaseTimesTracker : public RefProcPhaseTimeBaseTracker {
+// Highest level time tracker.
+class RefProcTotalPhaseTimesTracker : public RefProcPhaseTimeBaseTracker {
   ReferenceProcessor* _rp;
-
 public:
-  RefProcPhaseTimesTracker(ReferenceType ref_type,
-                           ReferenceProcessorPhaseTimes* phase_times,
-                           ReferenceProcessor* rp);
-  ~RefProcPhaseTimesTracker();
+  RefProcTotalPhaseTimesTracker(ReferenceProcessor::RefProcPhases phase_number,
+                                ReferenceProcessorPhaseTimes* phase_times,
+                                ReferenceProcessor* rp);
+  ~RefProcTotalPhaseTimesTracker();
 };
 
 #endif // SHARE_VM_GC_SHARED_REFERENCEPROCESSORPHASETIMES_HPP
--- a/src/hotspot/share/gc/shared/stringdedup/stringDedup.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/shared/stringdedup/stringDedup.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -102,9 +102,9 @@
 
 protected:
   // Initialize string deduplication.
-  // QUEUE: String Dedup Queue implementation
-  // STAT:  String Dedup Stat implementation
-  template <typename QUEUE, typename STAT>
+  // Q: String Dedup Queue implementation
+  // S: String Dedup Stat implementation
+  template <typename Q, typename S>
   static void initialize_impl();
 };
 
--- a/src/hotspot/share/gc/shared/stringdedup/stringDedupTable.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/shared/stringdedup/stringDedupTable.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -286,7 +286,7 @@
         // Apply proper barrier to make sure it is kept alive. Concurrent mark might
         // otherwise declare it dead if there are no other strong references to this object.
         oop* obj_addr = (oop*)entry->obj_addr();
-        oop obj = RootAccess<IN_CONCURRENT_ROOT | ON_WEAK_OOP_REF>::oop_load(obj_addr);
+        oop obj = NativeAccess<IN_CONCURRENT_ROOT | ON_WEAK_OOP_REF>::oop_load(obj_addr);
         return typeArrayOop(obj);
       }
     }
--- a/src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -1181,8 +1181,14 @@
           if (is_strip_mined && (i == LoopNode::EntryControl)) {
             assert(region->in(i)->is_OuterStripMinedLoop(), "");
             igvn.replace_input_of(region->in(i), i, out_ctrl);
+            phase->set_idom(region->in(i), out_ctrl, phase->dom_depth(out_ctrl));
           } else if (ctrl == region->in(i)) {
             igvn.replace_input_of(region, i, out_ctrl);
+            // Only update the idom if is the loop entry we are updating
+            // - A loop backedge doesn't change the idom
+            if (region->is_Loop() && i == LoopNode::EntryControl) {
+              phase->set_idom(region, out_ctrl, phase->dom_depth(out_ctrl));
+            }
           } else {
             Node* iff = region->in(i)->in(0);
             igvn.replace_input_of(iff, 0, out_ctrl);
--- a/src/hotspot/share/gc/z/zBarrier.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/z/zBarrier.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -83,6 +83,7 @@
   static  oop load_barrier_on_phantom_oop_field_preloaded(volatile oop* p, oop o);
 
   // Weak load barrier
+  static oop weak_load_barrier_on_oop_field(volatile oop* p);
   static oop weak_load_barrier_on_oop_field_preloaded(volatile oop* p, oop o);
   static oop weak_load_barrier_on_weak_oop(oop o);
   static oop weak_load_barrier_on_weak_oop_field(volatile oop* p);
--- a/src/hotspot/share/gc/z/zBarrier.inline.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/z/zBarrier.inline.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -191,6 +191,12 @@
 //
 // Weak load barrier
 //
+inline oop ZBarrier::weak_load_barrier_on_oop_field(volatile oop* p) {
+  assert(!ZResurrection::is_blocked(), "Should not be called during resurrection blocked phase");
+  const oop o = *p;
+  return weak_load_barrier_on_oop_field_preloaded(p, o);
+}
+
 inline oop ZBarrier::weak_load_barrier_on_oop_field_preloaded(volatile oop* p, oop o) {
   return weak_barrier<is_weak_good_or_null_fast_path, weak_load_barrier_on_oop_slow_path>(p, o);
 }
--- a/src/hotspot/share/gc/z/zOopClosures.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/z/zOopClosures.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -57,7 +57,7 @@
 void ZVerifyRootOopClosure::do_oop(oop* p) {
   guarantee(!ZHeap::heap()->is_in((uintptr_t)p), "oop* " PTR_FORMAT " in heap", p2i(p));
 
-  const oop obj = RootAccess<>::oop_load(p);
+  const oop obj = NativeAccess<>::oop_load(p);
   z_verify_loaded_object(p, obj);
 }
 
--- a/src/hotspot/share/gc/z/zReferenceProcessor.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/z/zReferenceProcessor.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -105,19 +105,21 @@
   return *reference_referent_addr(obj);
 }
 
-bool ZReferenceProcessor::is_referent_alive_or_null(oop obj, ReferenceType type) const {
+bool ZReferenceProcessor::is_referent_strongly_alive_or_null(oop obj, ReferenceType type) const {
+  // Check if the referent is strongly alive or null, in which case we don't want to
+  // discover the reference. It can only be null if the application called
+  // Reference.enqueue() or Reference.clear().
+  //
+  // PhantomReferences with finalizable marked referents should technically not have
+  // to be discovered. However, InstanceRefKlass::oop_oop_iterate_ref_processing()
+  // does not know about the finalizable mark concept, and will therefore mark
+  // referents in non-discovered PhantomReferences as strongly live. To prevent
+  // this, we always discover PhantomReferences with finalizable marked referents.
+  // They will automatically be dropped during the reference processing phase.
+
   volatile oop* const p = reference_referent_addr(obj);
-
-  // Check if the referent is alive or null, in which case we don't want to discover
-  // the reference. It can only be null if the application called Reference.enqueue()
-  // or Reference.clear().
-  if (type == REF_PHANTOM) {
-    const oop o = ZBarrier::weak_load_barrier_on_phantom_oop_field(p);
-    return o == NULL || ZHeap::heap()->is_object_live(ZOop::to_address(o));
-  } else {
-    const oop o = ZBarrier::weak_load_barrier_on_weak_oop_field(p);
-    return o == NULL || ZHeap::heap()->is_object_strongly_live(ZOop::to_address(o));
-  }
+  const oop o = ZBarrier::weak_load_barrier_on_oop_field(p);
+  return o == NULL || ZHeap::heap()->is_object_strongly_live(ZOop::to_address(o));
 }
 
 bool ZReferenceProcessor::is_referent_softly_alive(oop obj, ReferenceType type) const {
@@ -191,7 +193,7 @@
   _encountered_count.get()[type]++;
 
   if (is_reference_inactive(obj) ||
-      is_referent_alive_or_null(obj, type) ||
+      is_referent_strongly_alive_or_null(obj, type) ||
       is_referent_softly_alive(obj, type)) {
     // Not discovered
     return false;
--- a/src/hotspot/share/gc/z/zReferenceProcessor.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/gc/z/zReferenceProcessor.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -53,7 +53,7 @@
   volatile oop* reference_referent_addr(oop obj) const;
   oop reference_referent(oop obj) const;
   bool is_reference_inactive(oop obj) const;
-  bool is_referent_alive_or_null(oop obj, ReferenceType type) const;
+  bool is_referent_strongly_alive_or_null(oop obj, ReferenceType type) const;
   bool is_referent_softly_alive(oop obj, ReferenceType type) const;
   bool should_drop_reference(oop obj, ReferenceType type) const;
   bool should_mark_referent(ReferenceType type) const;
--- a/src/hotspot/share/jfr/recorder/checkpoint/types/jfrTypeSet.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/jfr/recorder/checkpoint/types/jfrTypeSet.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -238,9 +238,9 @@
     // (primordial) boot class loader
     writer->write(cld_id); // class loader instance id
     writer->write((traceid)0);  // class loader type id (absence of)
-    writer->write((traceid)CREATE_SYMBOL_ID(1)); // 1 maps to synthetic name -> "boot"
+    writer->write((traceid)CREATE_SYMBOL_ID(1)); // 1 maps to synthetic name -> "bootstrap"
   } else {
-    Symbol* symbol_name = cld->class_loader_name();
+    Symbol* symbol_name = cld->name();
     const traceid symbol_name_id = symbol_name != NULL ? artifacts->mark(symbol_name) : 0;
     writer->write(cld_id); // class loader instance id
     writer->write(TRACE_ID(class_loader_klass)); // class loader type id
@@ -441,13 +441,13 @@
     CStringEntryPtr entry = this->_artifacts->map_cstring(0);
     assert(entry != NULL, "invariant");
     assert(strncmp(entry->literal(),
-      boot_class_loader_name,
-      strlen(boot_class_loader_name)) == 0, "invariant");
+      BOOTSTRAP_LOADER_NAME,
+      BOOTSTRAP_LOADER_NAME_LEN) == 0, "invariant");
     if (_unique_predicate(entry->id())) {
       count += write__artifact__cstring__entry__(this->_writer, entry);
     }
   } else {
-    const Symbol* class_loader_name = cld->class_loader_name();
+    const Symbol* class_loader_name = cld->name();
     if (class_loader_name != NULL) {
       SymbolEntryPtr entry = this->_artifacts->map_symbol(class_loader_name);
       assert(entry != NULL, "invariant");
--- a/src/hotspot/share/jfr/recorder/checkpoint/types/jfrTypeSetUtils.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/jfr/recorder/checkpoint/types/jfrTypeSetUtils.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -208,7 +208,7 @@
   assert(_symbol_id != NULL, "invariant");
   _symbol_id->initialize();
   assert(!_symbol_id->has_entries(), "invariant");
-  _symbol_id->mark(boot_class_loader_name, 0); // pre-load "boot"
+  _symbol_id->mark(BOOTSTRAP_LOADER_NAME, 0); // pre-load "bootstrap"
   _class_unload = class_unload;
   // resource allocation
   _klass_list = new GrowableArray<const Klass*>(initial_class_list_size, false, mtTracing);
--- a/src/hotspot/share/jfr/recorder/checkpoint/types/jfrTypeSetUtils.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/jfr/recorder/checkpoint/types/jfrTypeSetUtils.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -295,9 +295,6 @@
   bool has_cstring_entries() const { return _cstring_table->has_entries(); }
 };
 
-// external name (synthetic) for the primordial "boot" class loader instance
-const char* const boot_class_loader_name = "boot";
-
 /**
  * When processing a set of artifacts, there will be a need
  * to track transitive dependencies originating with each artifact.
--- a/src/hotspot/share/memory/metaspace/printCLDMetaspaceInfoClosure.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/memory/metaspace/printCLDMetaspaceInfoClosure.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -88,7 +88,7 @@
     Klass* k = cld->class_loader_klass();
     if (k != NULL) {
       class_name = k->external_name();
-      Symbol* s = cld->class_loader_name();
+      Symbol* s = cld->name();
       if (s != NULL) {
         name = s->as_C_string();
       }
--- a/src/hotspot/share/oops/access.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/oops/access.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -121,7 +121,7 @@
   static void verify_heap_oop_decorators() {
     const DecoratorSet heap_oop_decorators = AS_DECORATOR_MASK | ON_DECORATOR_MASK |
                                              OOP_DECORATOR_MASK | (IN_DECORATOR_MASK ^
-                                                                   (IN_ROOT | IN_CONCURRENT_ROOT)); // no root accesses in the heap
+                                                                   (IN_NATIVE | IN_CONCURRENT_ROOT)); // no root accesses in the heap
     verify_decorators<expected_mo_decorators | heap_oop_decorators>();
   }
 
@@ -296,7 +296,7 @@
 // Helper for performing normal accesses in roots. These accesses
 // may resolve an accessor on a GC barrier set
 template <DecoratorSet decorators = INTERNAL_EMPTY>
-class RootAccess: public Access<IN_ROOT | decorators> {};
+class NativeAccess: public Access<IN_NATIVE | decorators> {};
 
 // Helper for array access.
 template <DecoratorSet decorators = INTERNAL_EMPTY>
@@ -376,10 +376,10 @@
   ));
   const DecoratorSet location_decorators = decorators & IN_DECORATOR_MASK;
   STATIC_ASSERT(location_decorators == 0 || ( // make sure location decorators are disjoint if set
-    (location_decorators ^ IN_ROOT) == 0 ||
+    (location_decorators ^ IN_NATIVE) == 0 ||
     (location_decorators ^ IN_HEAP) == 0 ||
     (location_decorators ^ (IN_HEAP | IN_HEAP_ARRAY)) == 0 ||
-    (location_decorators ^ (IN_ROOT | IN_CONCURRENT_ROOT)) == 0
+    (location_decorators ^ (IN_NATIVE | IN_CONCURRENT_ROOT)) == 0
   ));
 }
 
--- a/src/hotspot/share/oops/accessDecorators.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/oops/accessDecorators.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -184,16 +184,16 @@
 //   be omitted if this decorator is not set.
 // * IN_HEAP_ARRAY: The access is performed on a heap allocated array. This is sometimes a special case
 //   for some GCs, and implies that it is an IN_HEAP.
-// * IN_ROOT: The access is performed in an off-heap data structure pointing into the Java heap.
+// * IN_NATIVE: The access is performed in an off-heap data structure pointing into the Java heap.
 // * IN_CONCURRENT_ROOT: The access is performed in an off-heap data structure pointing into the Java heap,
 //   but is notably not scanned during safepoints. This is sometimes a special case for some GCs and
-//   implies that it is also an IN_ROOT.
+//   implies that it is also an IN_NATIVE.
 const DecoratorSet IN_HEAP            = UCONST64(1) << 20;
 const DecoratorSet IN_HEAP_ARRAY      = UCONST64(1) << 21;
-const DecoratorSet IN_ROOT            = UCONST64(1) << 22;
+const DecoratorSet IN_NATIVE          = UCONST64(1) << 22;
 const DecoratorSet IN_CONCURRENT_ROOT = UCONST64(1) << 23;
 const DecoratorSet IN_DECORATOR_MASK  = IN_HEAP | IN_HEAP_ARRAY |
-                                        IN_ROOT | IN_CONCURRENT_ROOT;
+                                        IN_NATIVE | IN_CONCURRENT_ROOT;
 
 // == Value Decorators ==
 // * OOP_NOT_NULL: This property can make certain barriers faster such as compressing oops.
@@ -242,7 +242,7 @@
     static const DecoratorSet heap_array_is_in_heap = barrier_strength_default |
       ((IN_HEAP_ARRAY & barrier_strength_default) != 0 ? IN_HEAP : INTERNAL_EMPTY);
     static const DecoratorSet conc_root_is_root = heap_array_is_in_heap |
-      ((IN_CONCURRENT_ROOT & heap_array_is_in_heap) != 0 ? IN_ROOT : INTERNAL_EMPTY);
+      ((IN_CONCURRENT_ROOT & heap_array_is_in_heap) != 0 ? IN_NATIVE : INTERNAL_EMPTY);
     static const DecoratorSet value = conc_root_is_root | BT_BUILDTIME_DECORATORS;
   };
 
@@ -263,7 +263,7 @@
     DecoratorSet heap_array_is_in_heap = barrier_strength_default |
       ((IN_HEAP_ARRAY & barrier_strength_default) != 0 ? IN_HEAP : INTERNAL_EMPTY);
     DecoratorSet conc_root_is_root = heap_array_is_in_heap |
-      ((IN_CONCURRENT_ROOT & heap_array_is_in_heap) != 0 ? IN_ROOT : INTERNAL_EMPTY);
+      ((IN_CONCURRENT_ROOT & heap_array_is_in_heap) != 0 ? IN_NATIVE : INTERNAL_EMPTY);
     DecoratorSet value = conc_root_is_root | BT_BUILDTIME_DECORATORS;
     return value;
   }
--- a/src/hotspot/share/oops/instanceKlass.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/oops/instanceKlass.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -2328,8 +2328,7 @@
 void InstanceKlass::set_package(ClassLoaderData* loader_data, TRAPS) {
 
   // ensure java/ packages only loaded by boot or platform builtin loaders
-  Handle class_loader(THREAD, loader_data->class_loader());
-  check_prohibited_package(name(), class_loader, CHECK);
+  check_prohibited_package(name(), loader_data, CHECK);
 
   TempNewSymbol pkg_name = package_from_name(name(), CHECK);
 
@@ -2359,7 +2358,7 @@
 
       // A package should have been successfully created
       assert(_package_entry != NULL, "Package entry for class %s not found, loader %s",
-             name()->as_C_string(), loader_data->loader_name());
+             name()->as_C_string(), loader_data->loader_name_and_id());
     }
 
     if (log_is_enabled(Debug, module)) {
@@ -2368,14 +2367,14 @@
       log_trace(module)("Setting package: class: %s, package: %s, loader: %s, module: %s",
                         external_name(),
                         pkg_name->as_C_string(),
-                        loader_data->loader_name(),
+                        loader_data->loader_name_and_id(),
                         (m->is_named() ? m->name()->as_C_string() : UNNAMED_MODULE));
     }
   } else {
     ResourceMark rm;
     log_trace(module)("Setting package: class: %s, package: unnamed, loader: %s, module: %s",
                       external_name(),
-                      (loader_data != NULL) ? loader_data->loader_name() : "NULL",
+                      (loader_data != NULL) ? loader_data->loader_name_and_id() : "NULL",
                       UNNAMED_MODULE);
   }
 }
@@ -2471,10 +2470,10 @@
 
 // Only boot and platform class loaders can define classes in "java/" packages.
 void InstanceKlass::check_prohibited_package(Symbol* class_name,
-                                             Handle class_loader,
+                                             ClassLoaderData* loader_data,
                                              TRAPS) {
-  if (!class_loader.is_null() &&
-      !SystemDictionary::is_platform_class_loader(class_loader()) &&
+  if (!loader_data->is_boot_class_loader_data() &&
+      !loader_data->is_platform_class_loader_data() &&
       class_name != NULL) {
     ResourceMark rm(THREAD);
     char* name = class_name->as_C_string();
@@ -2482,7 +2481,7 @@
       TempNewSymbol pkg_name = InstanceKlass::package_from_name(class_name, CHECK);
       assert(pkg_name != NULL, "Error in parsing package name starting with 'java/'");
       name = pkg_name->as_C_string();
-      const char* class_loader_name = SystemDictionary::loader_name(class_loader());
+      const char* class_loader_name = loader_data->loader_name_and_id();
       StringUtils::replace_no_expand(name, "/", ".");
       const char* msg_text1 = "Class loader (instance of): ";
       const char* msg_text2 = " tried to load prohibited package name: ";
@@ -2641,7 +2640,12 @@
     // If the interface isn't implemented by the receiver class,
     // the VM should throw IncompatibleClassChangeError.
     if (cnt >= nof_interfaces) {
-      THROW_NULL(vmSymbols::java_lang_IncompatibleClassChangeError());
+      ResourceMark rm(THREAD);
+      stringStream ss;
+      ss.print("Receiver class %s does not implement "
+               "the interface %s defining the method to be called",
+               class_loader_and_module_name(), holder->class_loader_and_module_name());
+      THROW_MSG_NULL(vmSymbols::java_lang_IncompatibleClassChangeError(), ss.as_string());
     }
 
     Klass* ik = ioe->interface_klass();
--- a/src/hotspot/share/oops/instanceKlass.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/oops/instanceKlass.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -471,7 +471,7 @@
  private:
   // Check prohibited package ("java/" only loadable by boot or platform loaders)
   static void check_prohibited_package(Symbol* class_name,
-                                       Handle class_loader,
+                                       ClassLoaderData* loader_data,
                                        TRAPS);
  public:
   // tell if two classes have the same enclosing class (at package level)
--- a/src/hotspot/share/oops/klass.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/oops/klass.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -142,7 +142,10 @@
 
 
 void Klass::copy_array(arrayOop s, int src_pos, arrayOop d, int dst_pos, int length, TRAPS) {
-  THROW(vmSymbols::java_lang_ArrayStoreException());
+  ResourceMark rm(THREAD);
+  assert(s != NULL, "Throw NPE!");
+  THROW_MSG(vmSymbols::java_lang_ArrayStoreException(),
+            err_msg("arraycopy: source type %s is not an array", s->klass()->external_name()));
 }
 
 
--- a/src/hotspot/share/oops/methodData.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/oops/methodData.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -139,7 +139,7 @@
     //
     // The trap_state is collected only if ProfileTraps is true.
     trap_bits = 1+31,  // 31: enough to distinguish [0..Reason_RECORDED_LIMIT].
-    trap_mask = right_n_bits(trap_bits),
+    trap_mask = -1,
     first_flag = 0
   };
 
@@ -1976,7 +1976,7 @@
 
   // Whole-method sticky bits and flags
   enum {
-    _trap_hist_limit    = 23 JVMCI_ONLY(+5),   // decoupled from Deoptimization::Reason_LIMIT
+    _trap_hist_limit    = 24 JVMCI_ONLY(+5),   // decoupled from Deoptimization::Reason_LIMIT
     _trap_hist_mask     = max_jubyte,
     _extra_data_count   = 4     // extra DataLayout headers, for trap history
   }; // Public flag values
--- a/src/hotspot/share/oops/objArrayKlass.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/oops/objArrayKlass.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -235,7 +235,19 @@
       // slow case: need individual subtype checks
       // note: don't use obj_at_put below because it includes a redundant store check
       if (!ArrayAccess<ARRAYCOPY_DISJOINT | ARRAYCOPY_CHECKCAST>::oop_arraycopy(s, src_offset, d, dst_offset, length)) {
-        THROW(vmSymbols::java_lang_ArrayStoreException());
+        ResourceMark rm(THREAD);
+        stringStream ss;
+        if (!bound->is_subtype_of(stype)) {
+          ss.print("arraycopy: type mismatch: can not copy %s[] into %s[]",
+                   stype->external_name(), bound->external_name());
+        } else {
+          // oop_arraycopy should return the index in the source array that
+          // contains the problematic oop.
+          ss.print("arraycopy: element type mismatch: can not cast one of the elements"
+                   " of %s[] to the type of the destination array, %s",
+                   stype->external_name(), bound->external_name());
+        }
+        THROW_MSG(vmSymbols::java_lang_ArrayStoreException(), ss.as_string());
       }
     }
   }
@@ -246,13 +258,21 @@
   assert(s->is_objArray(), "must be obj array");
 
   if (!d->is_objArray()) {
-    THROW(vmSymbols::java_lang_ArrayStoreException());
+    ResourceMark rm(THREAD);
+    stringStream ss;
+    if (d->is_typeArray()) {
+      ss.print("arraycopy: type mismatch: can not copy object array[] into %s[]",
+               type2name_tab[ArrayKlass::cast(d->klass())->element_type()]);
+    } else {
+      ss.print("arraycopy: destination type %s is not an array", d->klass()->external_name());
+    }
+    THROW_MSG(vmSymbols::java_lang_ArrayStoreException(), ss.as_string());
   }
 
   // Check is all offsets and lengths are non negative
   if (src_pos < 0 || dst_pos < 0 || length < 0) {
     // Pass specific exception reason.
-    ResourceMark rm;
+    ResourceMark rm(THREAD);
     stringStream ss;
     if (src_pos < 0) {
       ss.print("arraycopy: source index %d out of bounds for object array[%d]",
@@ -269,7 +289,7 @@
   if ((((unsigned int) length + (unsigned int) src_pos) > (unsigned int) s->length()) ||
       (((unsigned int) length + (unsigned int) dst_pos) > (unsigned int) d->length())) {
     // Pass specific exception reason.
-    ResourceMark rm;
+    ResourceMark rm(THREAD);
     stringStream ss;
     if (((unsigned int) length + (unsigned int) src_pos) > (unsigned int) s->length()) {
       ss.print("arraycopy: last source index %u out of bounds for object array[%d]",
--- a/src/hotspot/share/oops/oopHandle.inline.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/oops/oopHandle.inline.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -29,7 +29,7 @@
 #include "oops/oopHandle.hpp"
 
 inline oop OopHandle::resolve() const {
-  return (_obj == NULL) ? (oop)NULL : RootAccess<IN_CONCURRENT_ROOT>::oop_load(_obj);
+  return (_obj == NULL) ? (oop)NULL : NativeAccess<IN_CONCURRENT_ROOT>::oop_load(_obj);
 }
 
 #endif //  SHARE_VM_OOPS_OOPHANDLE_INLINE_HPP
--- a/src/hotspot/share/oops/typeArrayKlass.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/oops/typeArrayKlass.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -131,15 +131,31 @@
 void TypeArrayKlass::copy_array(arrayOop s, int src_pos, arrayOop d, int dst_pos, int length, TRAPS) {
   assert(s->is_typeArray(), "must be type array");
 
-  // Check destination
-  if (!d->is_typeArray() || element_type() != TypeArrayKlass::cast(d->klass())->element_type()) {
-    THROW(vmSymbols::java_lang_ArrayStoreException());
+  // Check destination type.
+  if (!d->is_typeArray()) {
+    ResourceMark rm(THREAD);
+    stringStream ss;
+    if (d->is_objArray()) {
+      ss.print("arraycopy: type mismatch: can not copy %s[] into object array[]",
+               type2name_tab[ArrayKlass::cast(s->klass())->element_type()]);
+    } else {
+      ss.print("arraycopy: destination type %s is not an array", d->klass()->external_name());
+    }
+    THROW_MSG(vmSymbols::java_lang_ArrayStoreException(), ss.as_string());
+  }
+  if (element_type() != TypeArrayKlass::cast(d->klass())->element_type()) {
+    ResourceMark rm(THREAD);
+    stringStream ss;
+    ss.print("arraycopy: type mismatch: can not copy %s[] into %s[]",
+             type2name_tab[ArrayKlass::cast(s->klass())->element_type()],
+             type2name_tab[ArrayKlass::cast(d->klass())->element_type()]);
+    THROW_MSG(vmSymbols::java_lang_ArrayStoreException(), ss.as_string());
   }
 
-  // Check is all offsets and lengths are non negative
+  // Check if all offsets and lengths are non negative.
   if (src_pos < 0 || dst_pos < 0 || length < 0) {
     // Pass specific exception reason.
-    ResourceMark rm;
+    ResourceMark rm(THREAD);
     stringStream ss;
     if (src_pos < 0) {
       ss.print("arraycopy: source index %d out of bounds for %s[%d]",
@@ -156,7 +172,7 @@
   if ((((unsigned int) length + (unsigned int) src_pos) > (unsigned int) s->length()) ||
       (((unsigned int) length + (unsigned int) dst_pos) > (unsigned int) d->length())) {
     // Pass specific exception reason.
-    ResourceMark rm;
+    ResourceMark rm(THREAD);
     stringStream ss;
     if (((unsigned int) length + (unsigned int) src_pos) > (unsigned int) s->length()) {
       ss.print("arraycopy: last source index %u out of bounds for %s[%d]",
--- a/src/hotspot/share/oops/weakHandle.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/oops/weakHandle.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -48,7 +48,7 @@
     vm_exit_out_of_memory(sizeof(oop*), OOM_MALLOC_ERROR, "Unable to create new weak oop handle in OopStorage");
   }
   // Create WeakHandle with address returned and store oop into it.
-  RootAccess<ON_PHANTOM_OOP_REF>::oop_store(oop_addr, obj());
+  NativeAccess<ON_PHANTOM_OOP_REF>::oop_store(oop_addr, obj());
   return WeakHandle(oop_addr);
 }
 
@@ -58,7 +58,7 @@
   if (_obj != NULL) {
     // Clear the WeakHandle.  For race in creating ClassLoaderData, we can release this
     // WeakHandle before it is cleared by GC.
-    RootAccess<ON_PHANTOM_OOP_REF>::oop_store(_obj, (oop)NULL);
+    NativeAccess<ON_PHANTOM_OOP_REF>::oop_store(_obj, (oop)NULL);
     get_storage()->release(_obj);
   }
 }
--- a/src/hotspot/share/oops/weakHandle.inline.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/oops/weakHandle.inline.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -31,18 +31,18 @@
 template <WeakHandleType T>
 oop WeakHandle<T>::resolve() const {
   assert(!is_null(), "Must be created");
-  return RootAccess<ON_PHANTOM_OOP_REF>::oop_load(_obj);
+  return NativeAccess<ON_PHANTOM_OOP_REF>::oop_load(_obj);
 }
 
 template <WeakHandleType T>
 oop WeakHandle<T>::peek() const {
   assert(!is_null(), "Must be created");
-  return RootAccess<ON_PHANTOM_OOP_REF | AS_NO_KEEPALIVE>::oop_load(_obj);
+  return NativeAccess<ON_PHANTOM_OOP_REF | AS_NO_KEEPALIVE>::oop_load(_obj);
 }
 
 template <WeakHandleType T>
 void WeakHandle<T>::replace(oop with_obj) {
-  RootAccess<ON_PHANTOM_OOP_REF>::oop_store(_obj, with_obj);
+  NativeAccess<ON_PHANTOM_OOP_REF>::oop_store(_obj, with_obj);
 }
 
 #endif // SHARE_VM_OOPS_WEAKHANDLE_INLINE_HPP
--- a/src/hotspot/share/opto/c2_globals.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/opto/c2_globals.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -754,6 +754,9 @@
   product(uintx, LoopStripMiningIterShortLoop, 0,                           \
           "Loop with fewer iterations are not strip mined")                 \
           range(0, max_juint)                                               \
+                                                                            \
+  product(bool, UseProfiledLoopPredicate, true,                             \
+          "move predicates out of loops based on profiling data")           \
 
 C2_FLAGS(DECLARE_DEVELOPER_FLAG, \
          DECLARE_PD_DEVELOPER_FLAG, \
--- a/src/hotspot/share/opto/graphKit.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/opto/graphKit.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -3804,6 +3804,9 @@
   if (UseLoopPredicate) {
     add_predicate_impl(Deoptimization::Reason_predicate, nargs);
   }
+  if (UseProfiledLoopPredicate) {
+    add_predicate_impl(Deoptimization::Reason_profile_predicate, nargs);
+  }
   // loop's limit check predicate should be near the loop.
   add_predicate_impl(Deoptimization::Reason_loop_limit_check, nargs);
 }
--- a/src/hotspot/share/opto/loopPredicate.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/opto/loopPredicate.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -34,6 +34,8 @@
 #include "opto/opaquenode.hpp"
 #include "opto/rootnode.hpp"
 #include "opto/subnode.hpp"
+#include <fenv.h>
+#include <math.h>
 
 /*
  * The general idea of Loop Predication is to insert a predicate on the entry
@@ -89,7 +91,7 @@
 //
 //
 // We will create a region to guard the uct call if there is no one there.
-// The true projecttion (if_cont) of the new_iff is returned.
+// The true projection (if_cont) of the new_iff is returned.
 // This code is also used to clone predicates to cloned loops.
 ProjNode* PhaseIdealLoop::create_new_if_for_predicate(ProjNode* cont_proj, Node* new_entry,
                                                       Deoptimization::DeoptReason reason,
@@ -318,18 +320,37 @@
   if (limit_check_proj != NULL) {
     entry = entry->in(0)->in(0);
   }
+  ProjNode* profile_predicate_proj = NULL;
+  ProjNode* predicate_proj = NULL;
+  if (UseProfiledLoopPredicate) {
+    profile_predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_profile_predicate);
+    if (profile_predicate_proj != NULL) {
+      entry = skip_loop_predicates(entry);
+    }
+  }
   if (UseLoopPredicate) {
-    ProjNode* predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
-    if (predicate_proj != NULL) { // right pattern that can be used by loop predication
-      // clone predicate
-      new_entry = clone_predicate(predicate_proj, new_entry,
-                                  Deoptimization::Reason_predicate,
-                                  loop_phase, igvn);
-      assert(new_entry != NULL && new_entry->is_Proj(), "IfTrue or IfFalse after clone predicate");
-      if (TraceLoopPredicate) {
-        tty->print("Loop Predicate cloned: ");
-        debug_only( new_entry->in(0)->dump(); )
-      }
+    predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
+  }
+  if (predicate_proj != NULL) { // right pattern that can be used by loop predication
+    // clone predicate
+    new_entry = clone_predicate(predicate_proj, new_entry,
+                                Deoptimization::Reason_predicate,
+                                loop_phase, igvn);
+    assert(new_entry != NULL && new_entry->is_Proj(), "IfTrue or IfFalse after clone predicate");
+    if (TraceLoopPredicate) {
+      tty->print("Loop Predicate cloned: ");
+      debug_only( new_entry->in(0)->dump(); );
+    }
+  }
+  if (profile_predicate_proj != NULL) { // right pattern that can be used by loop predication
+    // clone predicate
+    new_entry = clone_predicate(profile_predicate_proj, new_entry,
+                                Deoptimization::Reason_profile_predicate,
+                                loop_phase, igvn);
+    assert(new_entry != NULL && new_entry->is_Proj(), "IfTrue or IfFalse after clone predicate");
+    if (TraceLoopPredicate) {
+      tty->print("Loop Predicate cloned: ");
+      debug_only( new_entry->in(0)->dump(); );
     }
   }
   if (limit_check_proj != NULL && clone_limit_check) {
@@ -351,25 +372,36 @@
 //--------------------------skip_loop_predicates------------------------------
 // Skip related predicates.
 Node* PhaseIdealLoop::skip_loop_predicates(Node* entry) {
+  IfNode* iff = entry->in(0)->as_If();
+  ProjNode* uncommon_proj = iff->proj_out(1 - entry->as_Proj()->_con);
+  Node* rgn = uncommon_proj->unique_ctrl_out();
+  assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct");
+  entry = entry->in(0)->in(0);
+  while (entry != NULL && entry->is_Proj() && entry->in(0)->is_If()) {
+    uncommon_proj = entry->in(0)->as_If()->proj_out(1 - entry->as_Proj()->_con);
+    if (uncommon_proj->unique_ctrl_out() != rgn)
+      break;
+    entry = entry->in(0)->in(0);
+  }
+  return entry;
+}
+
+Node* PhaseIdealLoop::skip_all_loop_predicates(Node* entry) {
   Node* predicate = NULL;
   predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
   if (predicate != NULL) {
     entry = entry->in(0)->in(0);
   }
+  if (UseProfiledLoopPredicate) {
+    predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_profile_predicate);
+    if (predicate != NULL) { // right pattern that can be used by loop predication
+      entry = skip_loop_predicates(entry);
+    }
+  }
   if (UseLoopPredicate) {
     predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
     if (predicate != NULL) { // right pattern that can be used by loop predication
-      IfNode* iff = entry->in(0)->as_If();
-      ProjNode* uncommon_proj = iff->proj_out(1 - entry->as_Proj()->_con);
-      Node* rgn = uncommon_proj->unique_ctrl_out();
-      assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct");
-      entry = entry->in(0)->in(0);
-      while (entry != NULL && entry->is_Proj() && entry->in(0)->is_If()) {
-        uncommon_proj = entry->in(0)->as_If()->proj_out(1 - entry->as_Proj()->_con);
-        if (uncommon_proj->unique_ctrl_out() != rgn)
-          break;
-        entry = entry->in(0)->in(0);
-      }
+      entry = skip_loop_predicates(entry);
     }
   }
   return entry;
@@ -400,6 +432,12 @@
       return entry;
     }
   }
+  if (UseProfiledLoopPredicate) {
+    predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_profile_predicate);
+    if (predicate != NULL) { // right pattern that can be used by loop predication
+      return entry;
+    }
+  }
   return NULL;
 }
 
@@ -766,24 +804,432 @@
   return bol;
 }
 
+// Should loop predication look not only in the path from tail to head
+// but also in branches of the loop body?
+bool PhaseIdealLoop::loop_predication_should_follow_branches(IdealLoopTree *loop, ProjNode *predicate_proj, float& loop_trip_cnt) {
+  if (!UseProfiledLoopPredicate) {
+    return false;
+  }
+
+  if (predicate_proj == NULL) {
+    return false;
+  }
+
+  LoopNode* head = loop->_head->as_Loop();
+  bool follow_branches = true;
+  IdealLoopTree* l = loop->_child;
+  // For leaf loops and loops with a single inner loop
+  while (l != NULL && follow_branches) {
+    IdealLoopTree* child = l;
+    if (child->_child != NULL &&
+        child->_head->is_OuterStripMinedLoop()) {
+      assert(child->_child->_next == NULL, "only one inner loop for strip mined loop");
+      assert(child->_child->_head->is_CountedLoop() && child->_child->_head->as_CountedLoop()->is_strip_mined(), "inner loop should be strip mined");
+      child = child->_child;
+    }
+    if (child->_child != NULL || child->_irreducible) {
+      follow_branches = false;
+    }
+    l = l->_next;
+  }
+  if (follow_branches) {
+    loop->compute_profile_trip_cnt(this);
+    if (head->is_profile_trip_failed()) {
+      follow_branches = false;
+    } else {
+      loop_trip_cnt = head->profile_trip_cnt();
+      if (head->is_CountedLoop()) {
+        CountedLoopNode* cl = head->as_CountedLoop();
+        if (cl->phi() != NULL) {
+          const TypeInt* t = _igvn.type(cl->phi())->is_int();
+          float worst_case_trip_cnt = ((float)t->_hi - t->_lo) / ABS(cl->stride_con());
+          if (worst_case_trip_cnt < loop_trip_cnt) {
+            loop_trip_cnt = worst_case_trip_cnt;
+          }
+        }
+      }
+    }
+  }
+  return follow_branches;
+}
+
+// Compute probability of reaching some CFG node from a fixed
+// dominating CFG node
+class PathFrequency {
+private:
+  Node* _dom; // frequencies are computed relative to this node
+  Node_Stack _stack;
+  GrowableArray<float> _freqs_stack; // keep track of intermediate result at regions
+  GrowableArray<float> _freqs; // cache frequencies
+  PhaseIdealLoop* _phase;
+
+public:
+  PathFrequency(Node* dom, PhaseIdealLoop* phase)
+    : _dom(dom), _stack(0), _phase(phase) {
+  }
+
+  float to(Node* n) {
+    // post order walk on the CFG graph from n to _dom
+    fesetround(FE_TOWARDZERO); // make sure rounding doesn't push frequency above 1
+    IdealLoopTree* loop = _phase->get_loop(_dom);
+    Node* c = n;
+    for (;;) {
+      assert(_phase->get_loop(c) == loop, "have to be in the same loop");
+      if (c == _dom || _freqs.at_grow(c->_idx, -1) >= 0) {
+        float f = c == _dom ? 1 : _freqs.at(c->_idx);
+        Node* prev = c;
+        while (_stack.size() > 0 && prev == c) {
+          Node* n = _stack.node();
+          if (!n->is_Region()) {
+            if (_phase->get_loop(n) != _phase->get_loop(n->in(0))) {
+              // Found an inner loop: compute frequency of reaching this
+              // exit from the loop head by looking at the number of
+              // times each loop exit was taken
+              IdealLoopTree* inner_loop = _phase->get_loop(n->in(0));
+              LoopNode* inner_head = inner_loop->_head->as_Loop();
+              assert(_phase->get_loop(n) == loop, "only 1 inner loop");
+              if (inner_head->is_OuterStripMinedLoop()) {
+                inner_head->verify_strip_mined(1);
+                if (n->in(0) == inner_head->in(LoopNode::LoopBackControl)->in(0)) {
+                  n = n->in(0)->in(0)->in(0);
+                }
+                inner_loop = inner_loop->_child;
+                inner_head = inner_loop->_head->as_Loop();
+                inner_head->verify_strip_mined(1);
+              }
+              fesetround(FE_UPWARD);  // make sure rounding doesn't push frequency above 1
+              float loop_exit_cnt = 0.0f;
+              for (uint i = 0; i < inner_loop->_body.size(); i++) {
+                Node *n = inner_loop->_body[i];
+                float c = inner_loop->compute_profile_trip_cnt_helper(n);
+                loop_exit_cnt += c;
+              }
+              fesetround(FE_TOWARDZERO);
+              float cnt = -1;
+              if (n->in(0)->is_If()) {
+                IfNode* iff = n->in(0)->as_If();
+                float p = n->in(0)->as_If()->_prob;
+                if (n->Opcode() == Op_IfFalse) {
+                  p = 1 - p;
+                }
+                if (p > PROB_MIN) {
+                  cnt = p * iff->_fcnt;
+                } else {
+                  cnt = 0;
+                }
+              } else {
+                assert(n->in(0)->is_Jump(), "unsupported node kind");
+                JumpNode* jmp = n->in(0)->as_Jump();
+                float p = n->in(0)->as_Jump()->_probs[n->as_JumpProj()->_con];
+                cnt = p * jmp->_fcnt;
+              }
+              float this_exit_f = cnt > 0 ? cnt / loop_exit_cnt : 0;
+              assert(this_exit_f <= 1 && this_exit_f >= 0, "Incorrect frequency");
+              f = f * this_exit_f;
+              assert(f <= 1 && f >= 0, "Incorrect frequency");
+            } else {
+              float p = -1;
+              if (n->in(0)->is_If()) {
+                p = n->in(0)->as_If()->_prob;
+                if (n->Opcode() == Op_IfFalse) {
+                  p = 1 - p;
+                }
+              } else {
+                assert(n->in(0)->is_Jump(), "unsupported node kind");
+                p = n->in(0)->as_Jump()->_probs[n->as_JumpProj()->_con];
+              }
+              f = f * p;
+              assert(f <= 1 && f >= 0, "Incorrect frequency");
+            }
+            _freqs.at_put_grow(n->_idx, (float)f, -1);
+            _stack.pop();
+          } else {
+            float prev_f = _freqs_stack.pop();
+            float new_f = f;
+            f = new_f + prev_f;
+            assert(f <= 1 && f >= 0, "Incorrect frequency");
+            uint i = _stack.index();
+            if (i < n->req()) {
+              c = n->in(i);
+              _stack.set_index(i+1);
+              _freqs_stack.push(f);
+            } else {
+              _freqs.at_put_grow(n->_idx, f, -1);
+              _stack.pop();
+            }
+          }
+        }
+        if (_stack.size() == 0) {
+          fesetround(FE_TONEAREST);
+          assert(f >= 0 && f <= 1, "should have been computed");
+          return f;
+        }
+      } else if (c->is_Loop()) {
+        ShouldNotReachHere();
+        c = c->in(LoopNode::EntryControl);
+      } else if (c->is_Region()) {
+        _freqs_stack.push(0);
+        _stack.push(c, 2);
+        c = c->in(1);
+      } else {
+        if (c->is_IfProj()) {
+          IfNode* iff = c->in(0)->as_If();
+          if (iff->_prob == PROB_UNKNOWN) {
+            // assume never taken
+            _freqs.at_put_grow(c->_idx, 0, -1);
+          } else if (_phase->get_loop(c) != _phase->get_loop(iff)) {
+            if (iff->_fcnt == COUNT_UNKNOWN) {
+              // assume never taken
+              _freqs.at_put_grow(c->_idx, 0, -1);
+            } else {
+              // skip over loop
+              _stack.push(c, 1);
+              c = _phase->get_loop(c->in(0))->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl);
+            }
+          } else {
+            _stack.push(c, 1);
+            c = iff;
+          }
+        } else if (c->is_JumpProj()) {
+          JumpNode* jmp = c->in(0)->as_Jump();
+          if (_phase->get_loop(c) != _phase->get_loop(jmp)) {
+            if (jmp->_fcnt == COUNT_UNKNOWN) {
+              // assume never taken
+              _freqs.at_put_grow(c->_idx, 0, -1);
+            } else {
+              // skip over loop
+              _stack.push(c, 1);
+              c = _phase->get_loop(c->in(0))->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl);
+            }
+          } else {
+            _stack.push(c, 1);
+            c = jmp;
+          }
+        } else if (c->Opcode() == Op_CatchProj &&
+                   c->in(0)->Opcode() == Op_Catch &&
+                   c->in(0)->in(0)->is_Proj() &&
+                   c->in(0)->in(0)->in(0)->is_Call()) {
+          // assume exceptions are never thrown
+          uint con = c->as_Proj()->_con;
+          if (con == CatchProjNode::fall_through_index) {
+            Node* call = c->in(0)->in(0)->in(0)->in(0);
+            if (_phase->get_loop(call) != _phase->get_loop(c)) {
+              _freqs.at_put_grow(c->_idx, 0, -1);
+            } else {
+              c = call;
+            }
+          } else {
+            assert(con >= CatchProjNode::catch_all_index, "what else?");
+            _freqs.at_put_grow(c->_idx, 0, -1);
+          }
+        } else if (c->unique_ctrl_out() == NULL && !c->is_If() && !c->is_Jump()) {
+          ShouldNotReachHere();
+        } else {
+          c = c->in(0);
+        }
+      }
+    }
+    ShouldNotReachHere();
+    return -1;
+  }
+};
+
+void PhaseIdealLoop::loop_predication_follow_branches(Node *n, IdealLoopTree *loop, float loop_trip_cnt,
+                                                      PathFrequency& pf, Node_Stack& stack, VectorSet& seen,
+                                                      Node_List& if_proj_list) {
+  assert(n->is_Region(), "start from a region");
+  Node* tail = loop->tail();
+  stack.push(n, 1);
+  do {
+    Node* c = stack.node();
+    assert(c->is_Region() || c->is_IfProj(), "only region here");
+    uint i = stack.index();
+
+    if (i < c->req()) {
+      stack.set_index(i+1);
+      Node* in = c->in(i);
+      while (!is_dominator(in, tail) && !seen.test_set(in->_idx)) {
+        IdealLoopTree* in_loop = get_loop(in);
+        if (in_loop != loop) {
+          in = in_loop->_head->in(LoopNode::EntryControl);
+        } else if (in->is_Region()) {
+          stack.push(in, 1);
+          break;
+        } else if (in->is_IfProj() &&
+                   in->as_Proj()->is_uncommon_trap_if_pattern(Deoptimization::Reason_none)) {
+          if (pf.to(in) * loop_trip_cnt >= 1) {
+            stack.push(in, 1);
+          }
+          in = in->in(0);
+        } else {
+          in = in->in(0);
+        }
+      }
+    } else {
+      if (c->is_IfProj()) {
+        if_proj_list.push(c);
+      }
+      stack.pop();
+    }
+
+  } while (stack.size() > 0);
+}
+
+
+bool PhaseIdealLoop::loop_predication_impl_helper(IdealLoopTree *loop, ProjNode* proj, ProjNode *predicate_proj,
+                                                  CountedLoopNode *cl, ConNode* zero, Invariance& invar,
+                                                  Deoptimization::DeoptReason reason) {
+  // Following are changed to nonnull when a predicate can be hoisted
+  ProjNode* new_predicate_proj = NULL;
+  IfNode*   iff  = proj->in(0)->as_If();
+  Node*     test = iff->in(1);
+  if (!test->is_Bool()){ //Conv2B, ...
+    return false;
+  }
+  BoolNode* bol = test->as_Bool();
+  if (invar.is_invariant(bol)) {
+    // Invariant test
+    new_predicate_proj = create_new_if_for_predicate(predicate_proj, NULL,
+                                                     reason,
+                                                     iff->Opcode());
+    Node* ctrl = new_predicate_proj->in(0)->as_If()->in(0);
+    BoolNode* new_predicate_bol = invar.clone(bol, ctrl)->as_Bool();
+
+    // Negate test if necessary
+    bool negated = false;
+    if (proj->_con != predicate_proj->_con) {
+      new_predicate_bol = new BoolNode(new_predicate_bol->in(1), new_predicate_bol->_test.negate());
+      register_new_node(new_predicate_bol, ctrl);
+      negated = true;
+    }
+    IfNode* new_predicate_iff = new_predicate_proj->in(0)->as_If();
+    _igvn.hash_delete(new_predicate_iff);
+    new_predicate_iff->set_req(1, new_predicate_bol);
+#ifndef PRODUCT
+    if (TraceLoopPredicate) {
+      tty->print("Predicate invariant if%s: %d ", negated ? " negated" : "", new_predicate_iff->_idx);
+      loop->dump_head();
+    } else if (TraceLoopOpts) {
+      tty->print("Predicate IC ");
+      loop->dump_head();
+    }
+#endif
+  } else if (cl != NULL && loop->is_range_check_if(iff, this, invar)) {
+    // Range check for counted loops
+    const Node*    cmp    = bol->in(1)->as_Cmp();
+    Node*          idx    = cmp->in(1);
+    assert(!invar.is_invariant(idx), "index is variant");
+    Node* rng = cmp->in(2);
+    assert(rng->Opcode() == Op_LoadRange || iff->is_RangeCheck() || _igvn.type(rng)->is_int()->_lo >= 0, "must be");
+    assert(invar.is_invariant(rng), "range must be invariant");
+    int scale    = 1;
+    Node* offset = zero;
+    bool ok = is_scaled_iv_plus_offset(idx, cl->phi(), &scale, &offset);
+    assert(ok, "must be index expression");
+
+    Node* init    = cl->init_trip();
+    // Limit is not exact.
+    // Calculate exact limit here.
+    // Note, counted loop's test is '<' or '>'.
+    Node* limit   = exact_limit(loop);
+    int  stride   = cl->stride()->get_int();
+
+    // Build if's for the upper and lower bound tests.  The
+    // lower_bound test will dominate the upper bound test and all
+    // cloned or created nodes will use the lower bound test as
+    // their declared control.
+
+    // Perform cloning to keep Invariance state correct since the
+    // late schedule will place invariant things in the loop.
+    Node *ctrl = predicate_proj->in(0)->as_If()->in(0);
+    rng = invar.clone(rng, ctrl);
+    if (offset && offset != zero) {
+      assert(invar.is_invariant(offset), "offset must be loop invariant");
+      offset = invar.clone(offset, ctrl);
+    }
+    // If predicate expressions may overflow in the integer range, longs are used.
+    bool overflow = false;
+
+    // Test the lower bound
+    BoolNode* lower_bound_bol = rc_predicate(loop, ctrl, scale, offset, init, limit, stride, rng, false, overflow);
+    // Negate test if necessary
+    bool negated = false;
+    if (proj->_con != predicate_proj->_con) {
+      lower_bound_bol = new BoolNode(lower_bound_bol->in(1), lower_bound_bol->_test.negate());
+      register_new_node(lower_bound_bol, ctrl);
+      negated = true;
+    }
+    ProjNode* lower_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, reason, overflow ? Op_If : iff->Opcode());
+    IfNode* lower_bound_iff = lower_bound_proj->in(0)->as_If();
+    _igvn.hash_delete(lower_bound_iff);
+    lower_bound_iff->set_req(1, lower_bound_bol);
+    if (TraceLoopPredicate) tty->print_cr("lower bound check if: %s %d ", negated ? " negated" : "", lower_bound_iff->_idx);
+
+    // Test the upper bound
+    BoolNode* upper_bound_bol = rc_predicate(loop, lower_bound_proj, scale, offset, init, limit, stride, rng, true, overflow);
+    negated = false;
+    if (proj->_con != predicate_proj->_con) {
+      upper_bound_bol = new BoolNode(upper_bound_bol->in(1), upper_bound_bol->_test.negate());
+      register_new_node(upper_bound_bol, ctrl);
+      negated = true;
+    }
+    ProjNode* upper_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, reason, overflow ? Op_If : iff->Opcode());
+    assert(upper_bound_proj->in(0)->as_If()->in(0) == lower_bound_proj, "should dominate");
+    IfNode* upper_bound_iff = upper_bound_proj->in(0)->as_If();
+    _igvn.hash_delete(upper_bound_iff);
+    upper_bound_iff->set_req(1, upper_bound_bol);
+    if (TraceLoopPredicate) tty->print_cr("upper bound check if: %s %d ", negated ? " negated" : "", lower_bound_iff->_idx);
+
+    // Fall through into rest of the clean up code which will move
+    // any dependent nodes onto the upper bound test.
+    new_predicate_proj = upper_bound_proj;
+
+    if (iff->is_RangeCheck()) {
+      new_predicate_proj = insert_skeleton_predicate(iff, loop, proj, predicate_proj, upper_bound_proj, scale, offset, init, limit, stride, rng, overflow, reason);
+    }
+
+#ifndef PRODUCT
+    if (TraceLoopOpts && !TraceLoopPredicate) {
+      tty->print("Predicate RC ");
+      loop->dump_head();
+    }
+#endif
+  } else {
+    // Loop variant check (for example, range check in non-counted loop)
+    // with uncommon trap.
+    return false;
+  }
+  assert(new_predicate_proj != NULL, "sanity");
+  // Success - attach condition (new_predicate_bol) to predicate if
+  invar.map_ctrl(proj, new_predicate_proj); // so that invariance test can be appropriate
+
+  // Eliminate the old If in the loop body
+  dominated_by( new_predicate_proj, iff, proj->_con != new_predicate_proj->_con );
+
+  C->set_major_progress();
+  return true;
+}
+
+
 // After pre/main/post loops are created, we'll put a copy of some
-// range checks between the pre and main loop to validate the initial
-// value of the induction variable for the main loop. Make a copy of
-// the predicates here with an opaque node as a place holder for the
-// initial value.
+// range checks between the pre and main loop to validate the value
+// of the main loop induction variable. Make a copy of the predicates
+// here with an opaque node as a place holder for the value (will be
+// updated by PhaseIdealLoop::update_skeleton_predicate()).
 ProjNode* PhaseIdealLoop::insert_skeleton_predicate(IfNode* iff, IdealLoopTree *loop,
                                                     ProjNode* proj, ProjNode *predicate_proj,
                                                     ProjNode* upper_bound_proj,
                                                     int scale, Node* offset,
                                                     Node* init, Node* limit, jint stride,
-                                                    Node* rng, bool &overflow) {
+                                                    Node* rng, bool &overflow,
+                                                    Deoptimization::DeoptReason reason) {
   assert(proj->_con && predicate_proj->_con, "not a range check?");
   Node* opaque_init = new Opaque1Node(C, init);
   register_new_node(opaque_init, upper_bound_proj);
   BoolNode* bol = rc_predicate(loop, upper_bound_proj, scale, offset, opaque_init, limit, stride, rng, (stride > 0) != (scale > 0), overflow);
   Node* opaque_bol = new Opaque4Node(C, bol, _igvn.intcon(1)); // This will go away once loop opts are over
   register_new_node(opaque_bol, upper_bound_proj);
-  ProjNode* new_proj = create_new_if_for_predicate(predicate_proj, NULL, Deoptimization::Reason_predicate, overflow ? Op_If : iff->Opcode());
+  ProjNode* new_proj = create_new_if_for_predicate(predicate_proj, NULL, reason, overflow ? Op_If : iff->Opcode());
   _igvn.replace_input_of(new_proj->in(0), 1, opaque_bol);
   assert(opaque_init->outcnt() > 0, "should be used");
   return new_proj;
@@ -821,13 +1267,32 @@
   }
 
   Node* entry = head->skip_strip_mined()->in(LoopNode::EntryControl);
+  ProjNode *loop_limit_proj = NULL;
   ProjNode *predicate_proj = NULL;
+  ProjNode *profile_predicate_proj = NULL;
   // Loop limit check predicate should be near the loop.
-  predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
-  if (predicate_proj != NULL)
-    entry = predicate_proj->in(0)->in(0);
+  loop_limit_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
+  if (loop_limit_proj != NULL) {
+    entry = loop_limit_proj->in(0)->in(0);
+  }
+  bool has_profile_predicates = false;
+  profile_predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_profile_predicate);
+  if (profile_predicate_proj != NULL) {
+    Node* n = skip_loop_predicates(entry);
+    // Check if predicates were already added to the profile predicate
+    // block
+    if (n != entry->in(0)->in(0)) {
+      has_profile_predicates = true;
+    }
+    entry = n;
+  }
   predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
-  if (!predicate_proj) {
+
+  float loop_trip_cnt = -1;
+  bool follow_branches = loop_predication_should_follow_branches(loop, profile_predicate_proj, loop_trip_cnt);
+  assert(!follow_branches || loop_trip_cnt >= 0, "negative trip count?");
+
+  if (predicate_proj == NULL && !follow_branches) {
 #ifndef PRODUCT
     if (TraceLoopPredicate) {
       tty->print("missing predicate:");
@@ -846,7 +1311,11 @@
   // Create list of if-projs such that a newer proj dominates all older
   // projs in the list, and they all dominate loop->tail()
   Node_List if_proj_list(area);
+  Node_List regions(area);
   Node *current_proj = loop->tail(); //start from tail
+
+
+  Node_List controls(area);
   while (current_proj != head) {
     if (loop == get_loop(current_proj) && // still in the loop ?
         current_proj->is_Proj()        && // is a projection  ?
@@ -854,161 +1323,79 @@
          current_proj->in(0)->Opcode() == Op_RangeCheck)) { // is a if projection ?
       if_proj_list.push(current_proj);
     }
+    if (follow_branches &&
+        current_proj->Opcode() == Op_Region &&
+        loop == get_loop(current_proj)) {
+      regions.push(current_proj);
+    }
     current_proj = idom(current_proj);
   }
 
   bool hoisted = false; // true if at least one proj is promoted
-  while (if_proj_list.size() > 0) {
-    // Following are changed to nonnull when a predicate can be hoisted
-    ProjNode* new_predicate_proj = NULL;
 
-    ProjNode* proj = if_proj_list.pop()->as_Proj();
-    IfNode*   iff  = proj->in(0)->as_If();
+  if (!has_profile_predicates) {
+    while (if_proj_list.size() > 0) {
+      Node* n = if_proj_list.pop();
 
-    if (!proj->is_uncommon_trap_if_pattern(Deoptimization::Reason_none)) {
-      if (loop->is_loop_exit(iff)) {
-        // stop processing the remaining projs in the list because the execution of them
-        // depends on the condition of "iff" (iff->in(1)).
+      ProjNode* proj = n->as_Proj();
+      IfNode*   iff  = proj->in(0)->as_If();
+
+      CallStaticJavaNode* call = proj->is_uncommon_trap_if_pattern(Deoptimization::Reason_none);
+      if (call == NULL) {
+        if (loop->is_loop_exit(iff)) {
+          // stop processing the remaining projs in the list because the execution of them
+          // depends on the condition of "iff" (iff->in(1)).
+          break;
+        } else {
+          // Both arms are inside the loop. There are two cases:
+          // (1) there is one backward branch. In this case, any remaining proj
+          //     in the if_proj list post-dominates "iff". So, the condition of "iff"
+          //     does not determine the execution the remining projs directly, and we
+          //     can safely continue.
+          // (2) both arms are forwarded, i.e. a diamond shape. In this case, "proj"
+          //     does not dominate loop->tail(), so it can not be in the if_proj list.
+          continue;
+        }
+      }
+      Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(call->uncommon_trap_request());
+      if (reason == Deoptimization::Reason_predicate) {
         break;
-      } else {
-        // Both arms are inside the loop. There are two cases:
-        // (1) there is one backward branch. In this case, any remaining proj
-        //     in the if_proj list post-dominates "iff". So, the condition of "iff"
-        //     does not determine the execution the remining projs directly, and we
-        //     can safely continue.
-        // (2) both arms are forwarded, i.e. a diamond shape. In this case, "proj"
-        //     does not dominate loop->tail(), so it can not be in the if_proj list.
-        continue;
+      }
+
+      if (predicate_proj != NULL) {
+        hoisted = loop_predication_impl_helper(loop, proj, predicate_proj, cl, zero, invar, Deoptimization::Reason_predicate) | hoisted;
+      }
+    } // end while
+  }
+
+  Node_List if_proj_list_freq(area);
+  if (follow_branches) {
+    PathFrequency pf(loop->_head, this);
+
+    // Some projections were skipped by regular predicates because of
+    // an early loop exit. Try them with profile data.
+    while (if_proj_list.size() > 0) {
+      Node* proj = if_proj_list.pop();
+      float f = pf.to(proj);
+      if (proj->as_Proj()->is_uncommon_trap_if_pattern(Deoptimization::Reason_none) &&
+          f * loop_trip_cnt >= 1) {
+        hoisted = loop_predication_impl_helper(loop, proj->as_Proj(), profile_predicate_proj, cl, zero, invar, Deoptimization::Reason_profile_predicate) | hoisted;
       }
     }
 
-    Node*     test = iff->in(1);
-    if (!test->is_Bool()){ //Conv2B, ...
-      continue;
+    // And look into all branches
+    Node_Stack stack(0);
+    VectorSet seen(Thread::current()->resource_area());
+    while (regions.size() > 0) {
+      Node* c = regions.pop();
+      loop_predication_follow_branches(c, loop, loop_trip_cnt, pf, stack, seen, if_proj_list_freq);
     }
-    BoolNode* bol = test->as_Bool();
-    if (invar.is_invariant(bol)) {
-      // Invariant test
-      new_predicate_proj = create_new_if_for_predicate(predicate_proj, NULL,
-                                                       Deoptimization::Reason_predicate,
-                                                       iff->Opcode());
-      Node* ctrl = new_predicate_proj->in(0)->as_If()->in(0);
-      BoolNode* new_predicate_bol = invar.clone(bol, ctrl)->as_Bool();
 
-      // Negate test if necessary
-      bool negated = false;
-      if (proj->_con != predicate_proj->_con) {
-        new_predicate_bol = new BoolNode(new_predicate_bol->in(1), new_predicate_bol->_test.negate());
-        register_new_node(new_predicate_bol, ctrl);
-        negated = true;
-      }
-      IfNode* new_predicate_iff = new_predicate_proj->in(0)->as_If();
-      _igvn.hash_delete(new_predicate_iff);
-      new_predicate_iff->set_req(1, new_predicate_bol);
-#ifndef PRODUCT
-      if (TraceLoopPredicate) {
-        tty->print("Predicate invariant if%s: %d ", negated ? " negated" : "", new_predicate_iff->_idx);
-        loop->dump_head();
-      } else if (TraceLoopOpts) {
-        tty->print("Predicate IC ");
-        loop->dump_head();
-      }
-#endif
-    } else if (cl != NULL && loop->is_range_check_if(iff, this, invar)) {
-      // Range check for counted loops
-      const Node*    cmp    = bol->in(1)->as_Cmp();
-      Node*          idx    = cmp->in(1);
-      assert(!invar.is_invariant(idx), "index is variant");
-      Node* rng = cmp->in(2);
-      assert(rng->Opcode() == Op_LoadRange || iff->is_RangeCheck() || _igvn.type(rng)->is_int()->_lo >= 0, "must be");
-      assert(invar.is_invariant(rng), "range must be invariant");
-      int scale    = 1;
-      Node* offset = zero;
-      bool ok = is_scaled_iv_plus_offset(idx, cl->phi(), &scale, &offset);
-      assert(ok, "must be index expression");
-
-      Node* init    = cl->init_trip();
-      // Limit is not exact.
-      // Calculate exact limit here.
-      // Note, counted loop's test is '<' or '>'.
-      Node* limit   = exact_limit(loop);
-      int  stride   = cl->stride()->get_int();
-
-      // Build if's for the upper and lower bound tests.  The
-      // lower_bound test will dominate the upper bound test and all
-      // cloned or created nodes will use the lower bound test as
-      // their declared control.
-
-      // Perform cloning to keep Invariance state correct since the
-      // late schedule will place invariant things in the loop.
-      Node *ctrl = predicate_proj->in(0)->as_If()->in(0);
-      rng = invar.clone(rng, ctrl);
-      if (offset && offset != zero) {
-        assert(invar.is_invariant(offset), "offset must be loop invariant");
-        offset = invar.clone(offset, ctrl);
-      }
-      // If predicate expressions may overflow in the integer range, longs are used.
-      bool overflow = false;
-
-      // Test the lower bound
-      BoolNode* lower_bound_bol = rc_predicate(loop, ctrl, scale, offset, init, limit, stride, rng, false, overflow);
-      // Negate test if necessary
-      bool negated = false;
-      if (proj->_con != predicate_proj->_con) {
-        lower_bound_bol = new BoolNode(lower_bound_bol->in(1), lower_bound_bol->_test.negate());
-        register_new_node(lower_bound_bol, ctrl);
-        negated = true;
-      }
-      ProjNode* lower_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, Deoptimization::Reason_predicate, overflow ? Op_If : iff->Opcode());
-      IfNode* lower_bound_iff = lower_bound_proj->in(0)->as_If();
-      _igvn.hash_delete(lower_bound_iff);
-      lower_bound_iff->set_req(1, lower_bound_bol);
-      if (TraceLoopPredicate) tty->print_cr("lower bound check if: %s %d ", negated ? " negated" : "", lower_bound_iff->_idx);
-
-      // Test the upper bound
-      BoolNode* upper_bound_bol = rc_predicate(loop, lower_bound_proj, scale, offset, init, limit, stride, rng, true, overflow);
-      negated = false;
-      if (proj->_con != predicate_proj->_con) {
-        upper_bound_bol = new BoolNode(upper_bound_bol->in(1), upper_bound_bol->_test.negate());
-        register_new_node(upper_bound_bol, ctrl);
-        negated = true;
-      }
-      ProjNode* upper_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, Deoptimization::Reason_predicate, overflow ? Op_If : iff->Opcode());
-      assert(upper_bound_proj->in(0)->as_If()->in(0) == lower_bound_proj, "should dominate");
-      IfNode* upper_bound_iff = upper_bound_proj->in(0)->as_If();
-      _igvn.hash_delete(upper_bound_iff);
-      upper_bound_iff->set_req(1, upper_bound_bol);
-      if (TraceLoopPredicate) tty->print_cr("upper bound check if: %s %d ", negated ? " negated" : "", lower_bound_iff->_idx);
-
-      // Fall through into rest of the clean up code which will move
-      // any dependent nodes onto the upper bound test.
-      new_predicate_proj = upper_bound_proj;
-
-      if (iff->is_RangeCheck()) {
-        new_predicate_proj = insert_skeleton_predicate(iff, loop, proj, predicate_proj, upper_bound_proj, scale, offset, init, limit, stride, rng, overflow);
-      }
-
-#ifndef PRODUCT
-      if (TraceLoopOpts && !TraceLoopPredicate) {
-        tty->print("Predicate RC ");
-        loop->dump_head();
-      }
-#endif
-    } else {
-      // Loop variant check (for example, range check in non-counted loop)
-      // with uncommon trap.
-      continue;
+    for (uint i = 0; i < if_proj_list_freq.size(); i++) {
+      ProjNode* proj = if_proj_list_freq.at(i)->as_Proj();
+      hoisted = loop_predication_impl_helper(loop, proj, profile_predicate_proj, cl, zero, invar, Deoptimization::Reason_profile_predicate) | hoisted;
     }
-    assert(new_predicate_proj != NULL, "sanity");
-    // Success - attach condition (new_predicate_bol) to predicate if
-    invar.map_ctrl(proj, new_predicate_proj); // so that invariance test can be appropriate
-
-    // Eliminate the old If in the loop body
-    dominated_by( new_predicate_proj, iff, proj->_con != new_predicate_proj->_con );
-
-    hoisted = true;
-    C->set_major_progress();
-  } // end while
+  }
 
 #ifndef PRODUCT
   // report that the loop predication has been actually performed
--- a/src/hotspot/share/opto/loopTransform.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/opto/loopTransform.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -137,11 +137,45 @@
 //------------------------------compute_profile_trip_cnt----------------------------
 // Compute loop trip count from profile data as
 //    (backedge_count + loop_exit_count) / loop_exit_count
-void IdealLoopTree::compute_profile_trip_cnt( PhaseIdealLoop *phase ) {
-  if (!_head->is_CountedLoop()) {
+
+float IdealLoopTree::compute_profile_trip_cnt_helper(Node* n) {
+  if (n->is_If()) {
+    IfNode *iff = n->as_If();
+    if (iff->_fcnt != COUNT_UNKNOWN && iff->_prob != PROB_UNKNOWN) {
+      Node *exit = is_loop_exit(iff);
+      if (exit) {
+        float exit_prob = iff->_prob;
+        if (exit->Opcode() == Op_IfFalse) exit_prob = 1.0 - exit_prob;
+        if (exit_prob > PROB_MIN) {
+          float exit_cnt = iff->_fcnt * exit_prob;
+          return exit_cnt;
+        }
+      }
+    }
+  }
+  if (n->is_Jump()) {
+    JumpNode *jmp = n->as_Jump();
+    if (jmp->_fcnt != COUNT_UNKNOWN) {
+      float* probs = jmp->_probs;
+      float exit_prob = 0;
+      PhaseIdealLoop *phase = _phase;
+      for (DUIterator_Fast imax, i = jmp->fast_outs(imax); i < imax; i++) {
+        JumpProjNode* u = jmp->fast_out(i)->as_JumpProj();
+        if (!is_member(_phase->get_loop(u))) {
+          exit_prob += probs[u->_con];
+        }
+      }
+      return exit_prob * jmp->_fcnt;
+    }
+  }
+  return 0;
+}
+
+void IdealLoopTree::compute_profile_trip_cnt(PhaseIdealLoop *phase) {
+  if (!_head->is_Loop()) {
     return;
   }
-  CountedLoopNode* head = _head->as_CountedLoop();
+  LoopNode* head = _head->as_Loop();
   if (head->profile_trip_cnt() != COUNT_UNKNOWN) {
     return; // Already computed
   }
@@ -153,7 +187,8 @@
         back->in(0) &&
         back->in(0)->is_If() &&
         back->in(0)->as_If()->_fcnt != COUNT_UNKNOWN &&
-        back->in(0)->as_If()->_prob != PROB_UNKNOWN) {
+        back->in(0)->as_If()->_prob != PROB_UNKNOWN &&
+        (back->Opcode() == Op_IfTrue ? 1-back->in(0)->as_If()->_prob : back->in(0)->as_If()->_prob) > PROB_MIN) {
       break;
     }
     back = phase->idom(back);
@@ -162,26 +197,34 @@
     assert((back->Opcode() == Op_IfTrue || back->Opcode() == Op_IfFalse) &&
            back->in(0), "if-projection exists");
     IfNode* back_if = back->in(0)->as_If();
-    float loop_back_cnt = back_if->_fcnt * back_if->_prob;
+    float loop_back_cnt = back_if->_fcnt * (back->Opcode() == Op_IfTrue ? back_if->_prob : (1 - back_if->_prob));
 
     // Now compute a loop exit count
     float loop_exit_cnt = 0.0f;
-    for( uint i = 0; i < _body.size(); i++ ) {
-      Node *n = _body[i];
-      if( n->is_If() ) {
-        IfNode *iff = n->as_If();
-        if( iff->_fcnt != COUNT_UNKNOWN && iff->_prob != PROB_UNKNOWN ) {
-          Node *exit = is_loop_exit(iff);
-          if( exit ) {
-            float exit_prob = iff->_prob;
-            if (exit->Opcode() == Op_IfFalse) exit_prob = 1.0 - exit_prob;
-            if (exit_prob > PROB_MIN) {
-              float exit_cnt = iff->_fcnt * exit_prob;
-              loop_exit_cnt += exit_cnt;
+    if (_child == NULL) {
+      for( uint i = 0; i < _body.size(); i++ ) {
+        Node *n = _body[i];
+        loop_exit_cnt += compute_profile_trip_cnt_helper(n);
+      }
+    } else {
+      ResourceMark rm;
+      Unique_Node_List wq;
+      wq.push(back);
+      for (uint i = 0; i < wq.size(); i++) {
+        Node *n = wq.at(i);
+        assert(n->is_CFG(), "only control nodes");
+        if (n != head) {
+          if (n->is_Region()) {
+            for (uint j = 1; j < n->req(); j++) {
+              wq.push(n->in(j));
             }
+          } else {
+            loop_exit_cnt += compute_profile_trip_cnt_helper(n);
+            wq.push(n->in(0));
           }
         }
       }
+
     }
     if (loop_exit_cnt > 0.0f) {
       trip_cnt = (loop_back_cnt + loop_exit_cnt) / loop_exit_cnt;
@@ -189,6 +232,8 @@
       // No exit count so use
       trip_cnt = loop_back_cnt;
     }
+  } else {
+    head->mark_profile_trip_failed();
   }
 #ifndef PRODUCT
   if (TraceProfileTripCount) {
@@ -1014,11 +1059,139 @@
 // loop is never executed). When that happens, range check
 // CastII/ConvI2L nodes cause some data paths to die. For consistency,
 // the control paths must die too but the range checks were removed by
-// predication. The range checks that we add here guarantee that they
-// do.
-void PhaseIdealLoop::duplicate_predicates(CountedLoopNode* pre_head, Node* min_taken, Node* castii,
-                                          IdealLoopTree* outer_loop, LoopNode* outer_main_head,
-                                          uint dd_main_head) {
+// predication. The range checks that we add here guarantee that they do.
+void PhaseIdealLoop::duplicate_predicates_helper(Node* predicate, Node* castii, IdealLoopTree* outer_loop,
+                                                 LoopNode* outer_main_head, uint dd_main_head) {
+  if (predicate != NULL) {
+    IfNode* iff = predicate->in(0)->as_If();
+    ProjNode* uncommon_proj = iff->proj_out(1 - predicate->as_Proj()->_con);
+    Node* rgn = uncommon_proj->unique_ctrl_out();
+    assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct");
+    assert(iff->in(1)->in(1)->Opcode() == Op_Opaque1, "unexpected predicate shape");
+    predicate = iff->in(0);
+    Node* current_proj = outer_main_head->in(LoopNode::EntryControl);
+    Node* prev_proj = current_proj;
+    while (predicate != NULL && predicate->is_Proj() && predicate->in(0)->is_If()) {
+      iff = predicate->in(0)->as_If();
+      uncommon_proj = iff->proj_out(1 - predicate->as_Proj()->_con);
+      if (uncommon_proj->unique_ctrl_out() != rgn)
+        break;
+      if (iff->in(1)->Opcode() == Op_Opaque4) {
+        // Clone the predicate twice and initialize one with the initial
+        // value of the loop induction variable. Leave the other predicate
+        // to be initialized when increasing the stride during loop unrolling.
+        prev_proj = update_skeleton_predicate(iff, castii, predicate, uncommon_proj, current_proj, outer_loop, prev_proj);
+        Node* value = new Opaque1Node(C, castii);
+        register_new_node(value, current_proj);
+        prev_proj = update_skeleton_predicate(iff, value, predicate, uncommon_proj, current_proj, outer_loop, prev_proj);
+        // Remove the skeleton predicate from the pre-loop
+        _igvn.replace_input_of(iff, 1, _igvn.intcon(1));
+      }
+      predicate = predicate->in(0)->in(0);
+    }
+    _igvn.replace_input_of(outer_main_head, LoopNode::EntryControl, prev_proj);
+    set_idom(outer_main_head, prev_proj, dd_main_head);
+  }
+}
+
+Node* PhaseIdealLoop::update_skeleton_predicate(Node* iff, Node* value, Node* predicate, Node* uncommon_proj,
+                                                Node* current_proj, IdealLoopTree* outer_loop, Node* prev_proj) {
+  bool clone = (outer_loop != NULL); // Clone the predicate?
+  Node_Stack to_clone(2);
+  to_clone.push(iff->in(1), 1);
+  uint current = C->unique();
+  Node* result = NULL;
+  // Look for the opaque node to replace with the new value
+  // and clone everything in between. We keep the Opaque4 node
+  // so the duplicated predicates are eliminated once loop
+  // opts are over: they are here only to keep the IR graph
+  // consistent.
+  do {
+    Node* n = to_clone.node();
+    uint i = to_clone.index();
+    Node* m = n->in(i);
+    int op = m->Opcode();
+    if (m->is_Bool() ||
+        m->is_Cmp() ||
+        op == Op_AndL ||
+        op == Op_OrL ||
+        op == Op_RShiftL ||
+        op == Op_LShiftL ||
+        op == Op_AddL ||
+        op == Op_AddI ||
+        op == Op_MulL ||
+        op == Op_MulI ||
+        op == Op_SubL ||
+        op == Op_SubI ||
+        op == Op_ConvI2L) {
+        to_clone.push(m, 1);
+        continue;
+    }
+    if (op == Op_Opaque1) {
+      if (!clone) {
+        // Update the input of the Opaque1Node and exit
+        _igvn.replace_input_of(m, 1, value);
+        return prev_proj;
+      }
+      if (n->_idx < current) {
+        n = n->clone();
+      }
+      n->set_req(i, value);
+      register_new_node(n, current_proj);
+      to_clone.set_node(n);
+    }
+    for (;;) {
+      Node* cur = to_clone.node();
+      uint j = to_clone.index();
+      if (j+1 < cur->req()) {
+        to_clone.set_index(j+1);
+        break;
+      }
+      to_clone.pop();
+      if (to_clone.size() == 0) {
+        result = cur;
+        break;
+      }
+      Node* next = to_clone.node();
+      j = to_clone.index();
+      if (clone && cur->_idx >= current) {
+        if (next->_idx < current) {
+          next = next->clone();
+          register_new_node(next, current_proj);
+          to_clone.set_node(next);
+        }
+        assert(next->in(j) != cur, "input should have been cloned");
+        next->set_req(j, cur);
+      }
+    }
+  } while (result == NULL);
+  if (!clone) {
+    return NULL;
+  }
+  assert(result->_idx >= current, "new node expected");
+
+  Node* proj = predicate->clone();
+  Node* other_proj = uncommon_proj->clone();
+  Node* new_iff = iff->clone();
+  new_iff->set_req(1, result);
+  proj->set_req(0, new_iff);
+  other_proj->set_req(0, new_iff);
+  Node *frame = new ParmNode(C->start(), TypeFunc::FramePtr);
+  register_new_node(frame, C->start());
+  // It's impossible for the predicate to fail at runtime. Use an Halt node.
+  Node* halt = new HaltNode(other_proj, frame);
+  C->root()->add_req(halt);
+  new_iff->set_req(0, prev_proj);
+
+  register_control(new_iff, outer_loop->_parent, prev_proj);
+  register_control(proj, outer_loop->_parent, new_iff);
+  register_control(other_proj, _ltree_root, new_iff);
+  register_control(halt, _ltree_root, other_proj);
+  return proj;
+}
+
+void PhaseIdealLoop::duplicate_predicates(CountedLoopNode* pre_head, Node* castii, IdealLoopTree* outer_loop,
+                                          LoopNode* outer_main_head, uint dd_main_head) {
   if (UseLoopPredicate) {
     Node* entry = pre_head->in(LoopNode::EntryControl);
     Node* predicate = NULL;
@@ -1026,112 +1199,16 @@
     if (predicate != NULL) {
       entry = entry->in(0)->in(0);
     }
+    Node* profile_predicate = NULL;
+    if (UseProfiledLoopPredicate) {
+      profile_predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_profile_predicate);
+      if (profile_predicate != NULL) {
+        entry = skip_loop_predicates(entry);
+      }
+    }
     predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
-    if (predicate != NULL) {
-      IfNode* iff = entry->in(0)->as_If();
-      ProjNode* uncommon_proj = iff->proj_out(1 - entry->as_Proj()->_con);
-      Node* rgn = uncommon_proj->unique_ctrl_out();
-      assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct");
-      assert(iff->in(1)->in(1)->Opcode() == Op_Opaque1, "unexpected predicate shape");
-      entry = entry->in(0)->in(0);
-      Node* prev_proj = min_taken;
-      while (entry != NULL && entry->is_Proj() && entry->in(0)->is_If()) {
-        uncommon_proj = entry->in(0)->as_If()->proj_out(1 - entry->as_Proj()->_con);
-        if (uncommon_proj->unique_ctrl_out() != rgn)
-          break;
-        iff = entry->in(0)->as_If();
-        if (iff->in(1)->Opcode() == Op_Opaque4) {
-          Node_Stack to_clone(2);
-          to_clone.push(iff->in(1), 1);
-          uint current = C->unique();
-          Node* result = NULL;
-          // Look for the opaque node to replace with the init value
-          // and clone everything in between. We keep the Opaque4 node
-          // so the duplicated predicates are eliminated once loop
-          // opts are over: they are here only to keep the IR graph
-          // consistent.
-          do {
-            Node* n = to_clone.node();
-            uint i = to_clone.index();
-            Node* m = n->in(i);
-            int op = m->Opcode();
-            if (m->is_Bool() ||
-                m->is_Cmp() ||
-                op == Op_AndL ||
-                op == Op_OrL ||
-                op == Op_RShiftL ||
-                op == Op_LShiftL ||
-                op == Op_AddL ||
-                op == Op_AddI ||
-                op == Op_MulL ||
-                op == Op_MulI ||
-                op == Op_SubL ||
-                op == Op_SubI ||
-                op == Op_ConvI2L) {
-              to_clone.push(m, 1);
-              continue;
-            }
-            if (op == Op_Opaque1) {
-              if (n->_idx < current) {
-                n = n->clone();
-              }
-              n->set_req(i, castii);
-              register_new_node(n, min_taken);
-              to_clone.set_node(n);
-            }
-            for (;;) {
-              Node* cur = to_clone.node();
-              uint j = to_clone.index();
-              if (j+1 < cur->req()) {
-                to_clone.set_index(j+1);
-                break;
-              }
-              to_clone.pop();
-              if (to_clone.size() == 0) {
-                result = cur;
-                break;
-              }
-              Node* next = to_clone.node();
-              j = to_clone.index();
-              if (cur->_idx >= current) {
-                if (next->_idx < current) {
-                  next = next->clone();
-                  register_new_node(next, min_taken);
-                  to_clone.set_node(next);
-                }
-                assert(next->in(j) != cur, "input should have been cloned");
-                next->set_req(j, cur);
-              }
-            }
-          } while (result == NULL);
-          assert(result->_idx >= current, "new node expected");
-
-          Node* proj = entry->clone();
-          Node* other_proj = uncommon_proj->clone();
-          Node* new_iff = iff->clone();
-          new_iff->set_req(1, result);
-          proj->set_req(0, new_iff);
-          other_proj->set_req(0, new_iff);
-          Node *frame = new ParmNode(C->start(), TypeFunc::FramePtr);
-          register_new_node(frame, C->start());
-          // It's impossible for the predicate to fail at runtime. Use
-          // an Halt node.
-          Node* halt = new HaltNode(other_proj, frame);
-          C->root()->add_req(halt);
-          new_iff->set_req(0, prev_proj);
-
-          register_control(new_iff, outer_loop->_parent, prev_proj);
-          register_control(proj, outer_loop->_parent, new_iff);
-          register_control(other_proj, _ltree_root, new_iff);
-          register_control(halt, _ltree_root, other_proj);
-
-          prev_proj = proj;
-        }
-        entry = entry->in(0)->in(0);
-      }
-      _igvn.replace_input_of(outer_main_head, LoopNode::EntryControl, prev_proj);
-      set_idom(outer_main_head, prev_proj, dd_main_head);
-    }
+    duplicate_predicates_helper(predicate, castii, outer_loop, outer_main_head, dd_main_head);
+    duplicate_predicates_helper(profile_predicate, castii, outer_loop, outer_main_head, dd_main_head);
   }
 }
 
@@ -1278,7 +1355,7 @@
   // CastII for the main loop:
   Node* castii = cast_incr_before_loop( pre_incr, min_taken, main_head );
   assert(castii != NULL, "no castII inserted");
-  duplicate_predicates(pre_head, min_taken, castii, outer_loop, outer_main_head, dd_main_head);
+  duplicate_predicates(pre_head, castii, outer_loop, outer_main_head, dd_main_head);
 
   // Step B4: Shorten the pre-loop to run only 1 iteration (for now).
   // RCE and alignment may change this later.
@@ -1622,6 +1699,30 @@
   assert(old_trip_count > 1 &&
       (!adjust_min_trip || stride_p <= (1<<3)*loop_head->unrolled_count()), "sanity");
 
+  if (UseLoopPredicate) {
+    // Search for skeleton predicates and update them according to the new stride
+    Node* entry = ctrl;
+    while (entry != NULL && entry->is_Proj() && entry->in(0)->is_If()) {
+      IfNode* iff = entry->in(0)->as_If();
+      ProjNode* proj = iff->proj_out(1 - entry->as_Proj()->_con);
+      if (proj->unique_ctrl_out()->Opcode() != Op_Halt) {
+        break;
+      }
+      if (iff->in(1)->Opcode() == Op_Opaque4) {
+        // Compute the value of the loop induction variable at the end of the
+        // first iteration of the unrolled loop: init + new_stride_con - init_inc
+        int init_inc = stride_con/loop_head->unrolled_count();
+        assert(init_inc != 0, "invalid loop increment");
+        int new_stride_con = stride_con * 2;
+        Node* max_value = _igvn.intcon(new_stride_con - init_inc);
+        max_value = new AddINode(init, max_value);
+        register_new_node(max_value, get_ctrl(iff->in(1)));
+        update_skeleton_predicate(iff, max_value);
+      }
+      entry = entry->in(0)->in(0);
+    }
+  }
+
   // Adjust loop limit to keep valid iterations number after unroll.
   // Use (limit - stride) instead of (((limit - init)/stride) & (-2))*stride
   // which may overflow.
@@ -2815,7 +2916,7 @@
   }
   if (needs_guard) {
     // Check for an obvious zero trip guard.
-    Node* inctrl = PhaseIdealLoop::skip_loop_predicates(cl->skip_predicates());
+    Node* inctrl = PhaseIdealLoop::skip_all_loop_predicates(cl->skip_predicates());
     if (inctrl->Opcode() == Op_IfTrue || inctrl->Opcode() == Op_IfFalse) {
       bool maybe_swapped = (inctrl->Opcode() == Op_IfFalse);
       // The test should look like just the backedge of a CountedLoop
--- a/src/hotspot/share/opto/loopUnswitch.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/opto/loopUnswitch.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -138,9 +138,19 @@
   Node* uniqc = proj_true->unique_ctrl_out();
   Node* entry = head->skip_strip_mined()->in(LoopNode::EntryControl);
   Node* predicate = find_predicate(entry);
+  if (predicate != NULL) {
+    entry = skip_loop_predicates(entry);
+  }
   if (predicate != NULL && UseLoopPredicate) {
     // We may have two predicates, find first.
-    entry = find_predicate(entry->in(0)->in(0));
+    Node* n = find_predicate(entry);
+    if (n != NULL) {
+      predicate = n;
+      entry = skip_loop_predicates(entry);
+    }
+  }
+  if (predicate != NULL && UseProfiledLoopPredicate) {
+    entry = find_predicate(entry);
     if (entry != NULL) predicate = entry;
   }
   if (predicate != NULL) predicate = predicate->in(0);
--- a/src/hotspot/share/opto/loopnode.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/opto/loopnode.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -1281,9 +1281,7 @@
   return l->outer_safepoint();
 }
 
-Node* CountedLoopNode::skip_predicates() {
-  if (is_main_loop()) {
-    Node* ctrl = skip_strip_mined()->in(LoopNode::EntryControl);
+Node* CountedLoopNode::skip_predicates_from_entry(Node* ctrl) {
     while (ctrl != NULL && ctrl->is_Proj() && ctrl->in(0)->is_If() &&
            ctrl->in(0)->as_If()->proj_out(1-ctrl->as_Proj()->_con)->outcnt() == 1 &&
            ctrl->in(0)->as_If()->proj_out(1-ctrl->as_Proj()->_con)->unique_out()->Opcode() == Op_Halt) {
@@ -1292,6 +1290,13 @@
 
     return ctrl;
   }
+
+Node* CountedLoopNode::skip_predicates() {
+  if (is_main_loop()) {
+    Node* ctrl = skip_strip_mined()->in(LoopNode::EntryControl);
+
+    return skip_predicates_from_entry(ctrl);
+  }
   return in(LoopNode::EntryControl);
 }
 
@@ -2400,6 +2405,13 @@
     entry = PhaseIdealLoop::find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
     if (entry != NULL) {
       tty->print(" predicated");
+      entry = PhaseIdealLoop::skip_loop_predicates(entry);
+    }
+  }
+  if (UseProfiledLoopPredicate) {
+    entry = PhaseIdealLoop::find_predicate_insertion_point(entry, Deoptimization::Reason_profile_predicate);
+    if (entry != NULL) {
+      tty->print(" profile_predicated");
     }
   }
   if (_head->is_CountedLoop()) {
@@ -2507,11 +2519,18 @@
     if (predicate_proj != NULL ) { // right pattern that can be used by loop predication
       assert(entry->in(0)->in(1)->in(1)->Opcode() == Op_Opaque1, "must be");
       useful_predicates.push(entry->in(0)->in(1)->in(1)); // good one
-      entry = entry->in(0)->in(0);
+      entry = skip_loop_predicates(entry);
     }
     predicate_proj = find_predicate(entry); // Predicate
     if (predicate_proj != NULL ) {
       useful_predicates.push(entry->in(0)->in(1)->in(1)); // good one
+      entry = skip_loop_predicates(entry);
+    }
+    if (UseProfiledLoopPredicate) {
+      predicate_proj = find_predicate(entry); // Predicate
+      if (predicate_proj != NULL ) {
+        useful_predicates.push(entry->in(0)->in(1)->in(1)); // good one
+      }
     }
   }
 
@@ -4205,12 +4224,33 @@
   // which can inhibit range check elimination.
   if (least != early) {
     Node* ctrl_out = least->unique_ctrl_out();
-    if (ctrl_out && ctrl_out->is_Loop() &&
-        least == ctrl_out->in(LoopNode::EntryControl) &&
-        (ctrl_out->is_CountedLoop() || ctrl_out->is_OuterStripMinedLoop())) {
-      Node* least_dom = idom(least);
-      if (get_loop(least_dom)->is_member(get_loop(least))) {
-        least = least_dom;
+    if (ctrl_out && ctrl_out->is_CountedLoop() &&
+        least == ctrl_out->in(LoopNode::EntryControl)) {
+      Node* new_ctrl = least;
+      // Move the node above predicates so a following pass of loop
+      // predication doesn't hoist a predicate that depends on it
+      // above that node.
+      if (find_predicate_insertion_point(new_ctrl, Deoptimization::Reason_loop_limit_check) != NULL) {
+        new_ctrl = new_ctrl->in(0)->in(0);
+        assert(is_dominator(early, new_ctrl), "least != early so we can move up the dominator tree");
+      }
+      if (find_predicate_insertion_point(new_ctrl, Deoptimization::Reason_profile_predicate) != NULL) {
+        Node* c = new_ctrl->in(0)->in(0);
+        assert(is_dominator(early, c), "least != early so we can move up the dominator tree");
+        new_ctrl = c;
+      }
+      if (find_predicate_insertion_point(new_ctrl, Deoptimization::Reason_predicate) != NULL) {
+        Node* c = new_ctrl->in(0)->in(0);
+        assert(is_dominator(early, c), "least != early so we can move up the dominator tree");
+        new_ctrl = c;
+      }
+      if (new_ctrl != ctrl_out) {
+        least = new_ctrl;
+      } else if (ctrl_out->is_CountedLoop() || ctrl_out->is_OuterStripMinedLoop()) {
+        Node* least_dom = idom(least);
+        if (get_loop(least_dom)->is_member(get_loop(least))) {
+          least = least_dom;
+        }
       }
     }
   }
--- a/src/hotspot/share/opto/loopnode.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/opto/loopnode.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -38,6 +38,7 @@
 class LoopNode;
 class Node;
 class OuterStripMinedLoopEndNode;
+class PathFrequency;
 class PhaseIdealLoop;
 class CountedLoopReserveKit;
 class VectorSet;
@@ -57,7 +58,7 @@
   // the semantics so it does not appear in the hash & cmp functions.
   virtual uint size_of() const { return sizeof(*this); }
 protected:
-  short _loop_flags;
+  uint _loop_flags;
   // Names for flag bitfields
   enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3,
          MainHasNoPreLoop=4,
@@ -73,26 +74,31 @@
          HasAtomicPostLoop=4096,
          HasRangeChecks=8192,
          IsMultiversioned=16384,
-         StripMined=32768};
+         StripMined=32768,
+         ProfileTripFailed=65536};
   char _unswitch_count;
   enum { _unswitch_max=3 };
   char _postloop_flags;
   enum { LoopNotRCEChecked = 0, LoopRCEChecked = 1, RCEPostLoop = 2 };
 
+  // Expected trip count from profile data
+  float _profile_trip_cnt;
+
 public:
   // Names for edge indices
   enum { Self=0, EntryControl, LoopBackControl };
 
-  int is_inner_loop() const { return _loop_flags & InnerLoop; }
+  bool is_inner_loop() const { return _loop_flags & InnerLoop; }
   void set_inner_loop() { _loop_flags |= InnerLoop; }
 
-  int range_checks_present() const { return _loop_flags & HasRangeChecks; }
-  int is_multiversioned() const { return _loop_flags & IsMultiversioned; }
-  int is_vectorized_loop() const { return _loop_flags & VectorizedLoop; }
-  int is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; }
+  bool range_checks_present() const { return _loop_flags & HasRangeChecks; }
+  bool is_multiversioned() const { return _loop_flags & IsMultiversioned; }
+  bool is_vectorized_loop() const { return _loop_flags & VectorizedLoop; }
+  bool is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; }
   void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; }
-  int partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; }
-  int is_strip_mined() const { return _loop_flags & StripMined; }
+  bool partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; }
+  bool is_strip_mined() const { return _loop_flags & StripMined; }
+  bool is_profile_trip_failed() const { return _loop_flags & ProfileTripFailed; }
 
   void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; }
   void mark_has_reductions() { _loop_flags |= HasReductions; }
@@ -105,6 +111,7 @@
   void mark_is_multiversioned() { _loop_flags |= IsMultiversioned; }
   void mark_strip_mined() { _loop_flags |= StripMined; }
   void clear_strip_mined() { _loop_flags &= ~StripMined; }
+  void mark_profile_trip_failed() { _loop_flags |= ProfileTripFailed; }
 
   int unswitch_max() { return _unswitch_max; }
   int unswitch_count() { return _unswitch_count; }
@@ -119,7 +126,12 @@
     _unswitch_count = val;
   }
 
-  LoopNode(Node *entry, Node *backedge) : RegionNode(3), _loop_flags(0), _unswitch_count(0), _postloop_flags(0) {
+  void set_profile_trip_cnt(float ptc) { _profile_trip_cnt = ptc; }
+  float profile_trip_cnt()             { return _profile_trip_cnt; }
+
+  LoopNode(Node *entry, Node *backedge)
+    : RegionNode(3), _loop_flags(0), _unswitch_count(0),
+      _postloop_flags(0), _profile_trip_cnt(COUNT_UNKNOWN)  {
     init_class_id(Class_Loop);
     init_req(EntryControl, entry);
     init_req(LoopBackControl, backedge);
@@ -186,9 +198,6 @@
   // Known trip count calculated by compute_exact_trip_count()
   uint  _trip_count;
 
-  // Expected trip count from profile data
-  float _profile_trip_cnt;
-
   // Log2 of original loop bodies in unrolled loop
   int _unrolled_count_log2;
 
@@ -203,8 +212,8 @@
 public:
   CountedLoopNode( Node *entry, Node *backedge )
     : LoopNode(entry, backedge), _main_idx(0), _trip_count(max_juint),
-      _profile_trip_cnt(COUNT_UNKNOWN), _unrolled_count_log2(0),
-      _node_count_before_unroll(0), _slp_maximum_unroll_factor(0) {
+      _unrolled_count_log2(0), _node_count_before_unroll(0),
+      _slp_maximum_unroll_factor(0) {
     init_class_id(Class_CountedLoop);
     // Initialize _trip_count to the largest possible value.
     // Will be reset (lower) if the loop's trip count is known.
@@ -245,16 +254,16 @@
 
   // A 'main' loop that is ONLY unrolled or peeled, never RCE'd or
   // Aligned, may be missing it's pre-loop.
-  int is_normal_loop   () const { return (_loop_flags&PreMainPostFlagsMask) == Normal; }
-  int is_pre_loop      () const { return (_loop_flags&PreMainPostFlagsMask) == Pre;    }
-  int is_main_loop     () const { return (_loop_flags&PreMainPostFlagsMask) == Main;   }
-  int is_post_loop     () const { return (_loop_flags&PreMainPostFlagsMask) == Post;   }
-  int is_reduction_loop() const { return (_loop_flags&HasReductions) == HasReductions; }
-  int was_slp_analyzed () const { return (_loop_flags&WasSlpAnalyzed) == WasSlpAnalyzed; }
-  int has_passed_slp   () const { return (_loop_flags&PassedSlpAnalysis) == PassedSlpAnalysis; }
-  int do_unroll_only      () const { return (_loop_flags&DoUnrollOnly) == DoUnrollOnly; }
-  int is_main_no_pre_loop() const { return _loop_flags & MainHasNoPreLoop; }
-  int has_atomic_post_loop  () const { return (_loop_flags & HasAtomicPostLoop) == HasAtomicPostLoop; }
+  bool is_normal_loop   () const { return (_loop_flags&PreMainPostFlagsMask) == Normal; }
+  bool is_pre_loop      () const { return (_loop_flags&PreMainPostFlagsMask) == Pre;    }
+  bool is_main_loop     () const { return (_loop_flags&PreMainPostFlagsMask) == Main;   }
+  bool is_post_loop     () const { return (_loop_flags&PreMainPostFlagsMask) == Post;   }
+  bool is_reduction_loop() const { return (_loop_flags&HasReductions) == HasReductions; }
+  bool was_slp_analyzed () const { return (_loop_flags&WasSlpAnalyzed) == WasSlpAnalyzed; }
+  bool has_passed_slp   () const { return (_loop_flags&PassedSlpAnalysis) == PassedSlpAnalysis; }
+  bool do_unroll_only      () const { return (_loop_flags&DoUnrollOnly) == DoUnrollOnly; }
+  bool is_main_no_pre_loop() const { return _loop_flags & MainHasNoPreLoop; }
+  bool has_atomic_post_loop  () const { return (_loop_flags & HasAtomicPostLoop) == HasAtomicPostLoop; }
   void set_main_no_pre_loop() { _loop_flags |= MainHasNoPreLoop; }
 
   int main_idx() const { return _main_idx; }
@@ -280,9 +289,6 @@
     _loop_flags &= ~PassedSlpAnalysis;
   }
 
-  void set_profile_trip_cnt(float ptc) { _profile_trip_cnt = ptc; }
-  float profile_trip_cnt()             { return _profile_trip_cnt; }
-
   void double_unrolled_count() { _unrolled_count_log2++; }
   int  unrolled_count()        { return 1 << MIN2(_unrolled_count_log2, BitsPerInt-3); }
 
@@ -301,6 +307,7 @@
   // If this is a main loop in a pre/main/post loop nest, walk over
   // the predicates that were inserted by
   // duplicate_predicates()/add_range_check_predicate()
+  static Node* skip_predicates_from_entry(Node* ctrl);
   Node* skip_predicates();
 
 #ifndef PRODUCT
@@ -588,6 +595,7 @@
   void compute_trip_count(PhaseIdealLoop* phase);
 
   // Compute loop trip count from profile data
+  float compute_profile_trip_cnt_helper(Node* n);
   void compute_profile_trip_cnt( PhaseIdealLoop *phase );
 
   // Reassociate invariant expressions.
@@ -732,9 +740,12 @@
   }
 
   Node* cast_incr_before_loop(Node* incr, Node* ctrl, Node* loop);
-  void duplicate_predicates(CountedLoopNode* pre_head, Node *min_taken, Node* castii,
-                            IdealLoopTree* outer_loop, LoopNode* outer_main_head,
-                            uint dd_main_head);
+  void duplicate_predicates_helper(Node* predicate, Node* castii, IdealLoopTree* outer_loop,
+                                   LoopNode* outer_main_head, uint dd_main_head);
+  void duplicate_predicates(CountedLoopNode* pre_head, Node* castii, IdealLoopTree* outer_loop,
+                            LoopNode* outer_main_head, uint dd_main_head);
+  Node* update_skeleton_predicate(Node* iff, Node* value, Node* predicate = NULL, Node* uncommon_proj = NULL,
+                                  Node* current_proj = NULL, IdealLoopTree* outer_loop = NULL, Node* prev_proj = NULL);
 
 public:
 
@@ -1073,6 +1084,7 @@
                                          PhaseIterGVN* igvn);
   Node* clone_loop_predicates(Node* old_entry, Node* new_entry, bool clone_limit_check);
 
+  static Node* skip_all_loop_predicates(Node* entry);
   static Node* skip_loop_predicates(Node* entry);
 
   // Find a good location to insert a predicate
@@ -1087,12 +1099,20 @@
 
   // Implementation of the loop predication to promote checks outside the loop
   bool loop_predication_impl(IdealLoopTree *loop);
+  bool loop_predication_impl_helper(IdealLoopTree *loop, ProjNode* proj, ProjNode *predicate_proj,
+                                    CountedLoopNode *cl, ConNode* zero, Invariance& invar,
+                                    Deoptimization::DeoptReason reason);
+  bool loop_predication_should_follow_branches(IdealLoopTree *loop, ProjNode *predicate_proj, float& loop_trip_cnt);
+  void loop_predication_follow_branches(Node *c, IdealLoopTree *loop, float loop_trip_cnt,
+                                        PathFrequency& pf, Node_Stack& stack, VectorSet& seen,
+                                        Node_List& if_proj_list);
   ProjNode* insert_skeleton_predicate(IfNode* iff, IdealLoopTree *loop,
                                       ProjNode* proj, ProjNode *predicate_proj,
                                       ProjNode* upper_bound_proj,
                                       int scale, Node* offset,
                                       Node* init, Node* limit, jint stride,
-                                      Node* rng, bool& overflow);
+                                      Node* rng, bool& overflow,
+                                      Deoptimization::DeoptReason reason);
   Node* add_range_check_predicate(IdealLoopTree* loop, CountedLoopNode* cl,
                                   Node* predicate_proj, int scale_con, Node* offset,
                                   Node* limit, jint stride_con);
--- a/src/hotspot/share/opto/node.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/opto/node.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -73,6 +73,7 @@
 class FastLockNode;
 class FastUnlockNode;
 class IfNode;
+class IfProjNode;
 class IfFalseNode;
 class IfTrueNode;
 class InitializeNode;
@@ -676,8 +677,9 @@
     DEFINE_CLASS_ID(Proj,  Node, 3)
       DEFINE_CLASS_ID(CatchProj, Proj, 0)
       DEFINE_CLASS_ID(JumpProj,  Proj, 1)
-      DEFINE_CLASS_ID(IfTrue,    Proj, 2)
-      DEFINE_CLASS_ID(IfFalse,   Proj, 3)
+      DEFINE_CLASS_ID(IfProj,    Proj, 2)
+        DEFINE_CLASS_ID(IfTrue,    IfProj, 0)
+        DEFINE_CLASS_ID(IfFalse,   IfProj, 1)
       DEFINE_CLASS_ID(Parm,      Proj, 4)
       DEFINE_CLASS_ID(MachProj,  Proj, 5)
 
@@ -818,6 +820,7 @@
   DEFINE_CLASS_QUERY(FastUnlock)
   DEFINE_CLASS_QUERY(If)
   DEFINE_CLASS_QUERY(RangeCheck)
+  DEFINE_CLASS_QUERY(IfProj)
   DEFINE_CLASS_QUERY(IfFalse)
   DEFINE_CLASS_QUERY(IfTrue)
   DEFINE_CLASS_QUERY(Initialize)
--- a/src/hotspot/share/opto/parse.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/opto/parse.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -161,6 +161,7 @@
     bool               _has_merged_backedge; // does this block have merged backedge?
     SafePointNode*     _start_map;      // all values flowing into this block
     MethodLivenessResult _live_locals;  // lazily initialized liveness bitmap
+    bool               _has_predicates; // Were predicates added before parsing of the loop head?
 
     int                _num_successors; // Includes only normal control flow.
     int                _all_successors; // Include exception paths also.
@@ -203,6 +204,9 @@
     // True when all non-exception predecessors have been parsed.
     bool is_ready() const                  { return preds_parsed() == pred_count(); }
 
+    bool has_predicates() const            { return _has_predicates; }
+    void set_has_predicates()              { _has_predicates = true; }
+
     int num_successors() const             { return _num_successors; }
     int all_successors() const             { return _all_successors; }
     Block* successor_at(int i) const {
@@ -552,6 +556,7 @@
   void    sharpen_type_after_if(BoolTest::mask btest,
                                 Node* con, const Type* tcon,
                                 Node* val, const Type* tval);
+  void    maybe_add_predicate_after_if(Block* path);
   IfNode* jump_if_fork_int(Node* a, Node* b, BoolTest::mask mask, float prob, float cnt);
   Node*   jump_if_join(Node* iffalse, Node* iftrue);
   void    jump_if_true_fork(IfNode *ifNode, int dest_bci_if_true, int prof_table_index, bool unc);
--- a/src/hotspot/share/opto/parse1.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/opto/parse1.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -666,10 +666,13 @@
         if (block->is_SEL_head()) {
           // Add predicate to single entry (not irreducible) loop head.
           assert(!block->has_merged_backedge(), "only entry paths should be merged for now");
-          // Need correct bci for predicate.
-          // It is fine to set it here since do_one_block() will set it anyway.
-          set_parse_bci(block->start());
-          add_predicate();
+          // Predicates may have been added after a dominating if
+          if (!block->has_predicates()) {
+            // Need correct bci for predicate.
+            // It is fine to set it here since do_one_block() will set it anyway.
+            set_parse_bci(block->start());
+            add_predicate();
+          }
           // Add new region for back branches.
           int edges = block->pred_count() - block->preds_parsed() + 1; // +1 for original region
           RegionNode *r = new RegionNode(edges+1);
@@ -1262,6 +1265,7 @@
   _is_handler = false;
   _has_merged_backedge = false;
   _start_map = NULL;
+  _has_predicates = false;
   _num_successors = 0;
   _all_successors = 0;
   _successors = NULL;
--- a/src/hotspot/share/opto/parse2.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/opto/parse2.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -1649,6 +1649,18 @@
   return (seems_never_taken(prob) && seems_stable_comparison());
 }
 
+void Parse::maybe_add_predicate_after_if(Block* path) {
+  if (path->is_SEL_head() && path->preds_parsed() == 0) {
+    // Add predicates at bci of if dominating the loop so traps can be
+    // recorded on the if's profile data
+    int bc_depth = repush_if_args();
+    add_predicate();
+    dec_sp(bc_depth);
+    path->set_has_predicates();
+  }
+}
+
+
 //----------------------------adjust_map_after_if------------------------------
 // Adjust the JVM state to reflect the result of taking this path.
 // Basically, it means inspecting the CmpNode controlling this
@@ -1657,8 +1669,14 @@
 // as graph nodes in the current abstract interpretation map.
 void Parse::adjust_map_after_if(BoolTest::mask btest, Node* c, float prob,
                                 Block* path, Block* other_path) {
-  if (stopped() || !c->is_Cmp() || btest == BoolTest::illegal)
+  if (!c->is_Cmp()) {
+    maybe_add_predicate_after_if(path);
+    return;
+  }
+
+  if (stopped() || btest == BoolTest::illegal) {
     return;                             // nothing to do
+  }
 
   bool is_fallthrough = (path == successor_for_bci(iter().next_bci()));
 
@@ -1690,10 +1708,13 @@
       have_con = false;
     }
   }
-  if (!have_con)                        // remaining adjustments need a con
+  if (!have_con) {                        // remaining adjustments need a con
+    maybe_add_predicate_after_if(path);
     return;
+  }
 
   sharpen_type_after_if(btest, con, tcon, val, tval);
+  maybe_add_predicate_after_if(path);
 }
 
 
--- a/src/hotspot/share/prims/jni.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/prims/jni.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -2630,7 +2630,17 @@
     if (v == NULL || v->is_a(ObjArrayKlass::cast(a->klass())->element_klass())) {
       a->obj_at_put(index, v);
     } else {
-      THROW(vmSymbols::java_lang_ArrayStoreException());
+      ResourceMark rm(THREAD);
+      stringStream ss;
+      Klass *bottom_kl = ObjArrayKlass::cast(a->klass())->bottom_klass();
+      ss.print("type mismatch: can not store %s to %s[%d]",
+               v->klass()->external_name(),
+               bottom_kl->is_typeArray_klass() ? type2name_tab[ArrayKlass::cast(bottom_kl)->element_type()] : bottom_kl->external_name(),
+               index);
+      for (int dims = ArrayKlass::cast(a->klass())->dimension(); dims > 1; --dims) {
+        ss.print("[]");
+      }
+      THROW_MSG(vmSymbols::java_lang_ArrayStoreException(), ss.as_string());
     }
   } else {
     char buf[jintAsStringSize];
--- a/src/hotspot/share/prims/jvmtiEnv.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/prims/jvmtiEnv.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -657,7 +657,11 @@
 
     // add the jar file to the bootclasspath
     log_info(class, load)("opened: %s", zip_entry->name());
+#if INCLUDE_CDS
     ClassLoaderExt::append_boot_classpath(zip_entry);
+#else
+    ClassLoader::add_to_boot_append_entries(zip_entry);
+#endif
     return JVMTI_ERROR_NONE;
   } else {
     return JVMTI_ERROR_WRONG_PHASE;
--- a/src/hotspot/share/prims/jvmtiTagMap.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/prims/jvmtiTagMap.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -90,11 +90,11 @@
 
   // accessor methods
   inline oop* object_addr() { return &_object; }
-  inline oop object()       { return RootAccess<ON_PHANTOM_OOP_REF>::oop_load(object_addr()); }
+  inline oop object()       { return NativeAccess<ON_PHANTOM_OOP_REF>::oop_load(object_addr()); }
   // Peek at the object without keeping it alive. The returned object must be
   // kept alive using a normal access if it leaks out of a thread transition from VM.
   inline oop object_peek()  {
-    return RootAccess<ON_PHANTOM_OOP_REF | AS_NO_KEEPALIVE>::oop_load(object_addr());
+    return NativeAccess<ON_PHANTOM_OOP_REF | AS_NO_KEEPALIVE>::oop_load(object_addr());
   }
   inline jlong tag() const  { return _tag; }
 
--- a/src/hotspot/share/runtime/deoptimization.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/runtime/deoptimization.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -2170,6 +2170,7 @@
   "array_check",
   "intrinsic" JVMCI_ONLY("_or_type_checked_inlining"),
   "bimorphic" JVMCI_ONLY("_or_optimized_type_check"),
+  "profile_predicate",
   "unloaded",
   "uninitialized",
   "unreached",
--- a/src/hotspot/share/runtime/deoptimization.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/runtime/deoptimization.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -41,7 +41,7 @@
   enum DeoptReason {
     Reason_many = -1,             // indicates presence of several reasons
     Reason_none = 0,              // indicates absence of a relevant deopt.
-    // Next 7 reasons are recorded per bytecode in DataLayout::trap_bits.
+    // Next 8 reasons are recorded per bytecode in DataLayout::trap_bits.
     // This is more complicated for JVMCI as JVMCI may deoptimize to *some* bytecode before the
     // bytecode that actually caused the deopt (with inlining, JVMCI may even deoptimize to a
     // bytecode in another method):
@@ -62,6 +62,8 @@
     Reason_optimized_type_check   = Reason_bimorphic,
 #endif
 
+    Reason_profile_predicate,     // compiler generated predicate moved from frequent branch in a loop failed
+
     // recorded per method
     Reason_unloaded,              // unloaded class or constant pool entry
     Reason_uninitialized,         // bad class state (uninitialized)
@@ -92,8 +94,8 @@
     Reason_LIMIT,
 
     // Note:  Keep this enum in sync. with _trap_reason_name.
-    Reason_RECORDED_LIMIT = Reason_bimorphic  // some are not recorded per bc
-    // Note:  Reason_RECORDED_LIMIT should be < 8 to fit into 3 bits of
+    Reason_RECORDED_LIMIT = Reason_profile_predicate  // some are not recorded per bc
+    // Note:  Reason_RECORDED_LIMIT should fit into 31 bits of
     // DataLayout::trap_bits.  This dependency is enforced indirectly
     // via asserts, to avoid excessive direct header-to-header dependencies.
     // See Deoptimization::trap_state_reason and class DataLayout.
--- a/src/hotspot/share/runtime/handshake.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/runtime/handshake.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -28,6 +28,7 @@
 #include "memory/resourceArea.hpp"
 #include "runtime/handshake.hpp"
 #include "runtime/interfaceSupport.inline.hpp"
+#include "runtime/orderAccess.hpp"
 #include "runtime/osThread.hpp"
 #include "runtime/semaphore.inline.hpp"
 #include "runtime/task.hpp"
@@ -44,19 +45,26 @@
 };
 
 class HandshakeThreadsOperation: public HandshakeOperation {
-  Semaphore _done;
+  static Semaphore _done;
   ThreadClosure* _thread_cl;
 
 public:
-  HandshakeThreadsOperation(ThreadClosure* cl) : _done(0), _thread_cl(cl) {}
+  HandshakeThreadsOperation(ThreadClosure* cl) : _thread_cl(cl) {}
   void do_handshake(JavaThread* thread);
   void cancel_handshake(JavaThread* thread) { _done.signal(); };
 
   bool thread_has_completed() { return _done.trywait(); }
+
+#ifdef ASSERT
+  void check_state() {
+    assert(!_done.trywait(), "Must be zero");
+  }
+#endif
 };
 
+Semaphore HandshakeThreadsOperation::_done(0);
+
 class VM_Handshake: public VM_Operation {
-  HandshakeThreadsOperation* const _op;
   const jlong _handshake_timeout;
  public:
   bool evaluate_at_safepoint() const { return false; }
@@ -64,6 +72,7 @@
   bool evaluate_concurrently() const { return false; }
 
  protected:
+  HandshakeThreadsOperation* const _op;
 
   VM_Handshake(HandshakeThreadsOperation* op) :
       _op(op),
@@ -102,7 +111,6 @@
   fatal("Handshake operation timed out");
 }
 
-
 class VM_HandshakeOneThread: public VM_Handshake {
   JavaThread* _target;
   bool _thread_alive;
@@ -111,6 +119,7 @@
     VM_Handshake(op), _target(target), _thread_alive(false) {}
 
   void doit() {
+    DEBUG_ONLY(_op->check_state();)
     TraceTime timer("Performing single-target operation (vmoperation doit)", TRACETIME_LOG(Info, handshake));
 
     {
@@ -155,6 +164,7 @@
         // then we hang here, which is good for debugging.
       }
     } while (!poll_for_completed_thread());
+    DEBUG_ONLY(_op->check_state();)
   }
 
   VMOp_Type type() const { return VMOp_HandshakeOneThread; }
@@ -167,6 +177,7 @@
   VM_HandshakeAllThreads(HandshakeThreadsOperation* op) : VM_Handshake(op) {}
 
   void doit() {
+    DEBUG_ONLY(_op->check_state();)
     TraceTime timer("Performing operation (vmoperation doit)", TRACETIME_LOG(Info, handshake));
 
     int number_of_threads_issued = 0;
@@ -213,7 +224,9 @@
         number_of_threads_completed++;
       }
 
-    } while (number_of_threads_issued != number_of_threads_completed);
+    } while (number_of_threads_issued > number_of_threads_completed);
+    assert(number_of_threads_issued == number_of_threads_completed, "Must be the same");
+    DEBUG_ONLY(_op->check_state();)
   }
 
   VMOp_Type type() const { return VMOp_HandshakeAllThreads; }
@@ -245,8 +258,6 @@
   bool thread_alive() const { return _thread_alive; }
 };
 
-#undef ALL_JAVA_THREADS
-
 void HandshakeThreadsOperation::do_handshake(JavaThread* thread) {
   ResourceMark rm;
   FormatBufferResource message("Operation for thread " PTR_FORMAT ", is_vm_thread: %s",
@@ -282,7 +293,7 @@
   }
 }
 
-HandshakeState::HandshakeState() : _operation(NULL), _semaphore(1), _vmthread_holds_semaphore(false), _thread_in_process_handshake(false) {}
+HandshakeState::HandshakeState() : _operation(NULL), _semaphore(1), _thread_in_process_handshake(false) {}
 
 void HandshakeState::set_operation(JavaThread* target, HandshakeOperation* op) {
   _operation = op;
@@ -296,17 +307,23 @@
 
 void HandshakeState::process_self_inner(JavaThread* thread) {
   assert(Thread::current() == thread, "should call from thread");
+
+  if (thread->is_terminated()) {
+    // If thread is not on threads list but armed, cancel.
+    thread->cancel_handshake();
+    return;
+  }
+
   CautiouslyPreserveExceptionMark pem(thread);
   ThreadInVMForHandshake tivm(thread);
   if (!_semaphore.trywait()) {
     _semaphore.wait_with_safepoint_check(thread);
   }
-  if (has_operation()) {
-    HandshakeOperation* op = _operation;
+  HandshakeOperation* op = OrderAccess::load_acquire(&_operation);
+  if (op != NULL) {
+    // Disarm before execute the operation
     clear_handshake(thread);
-    if (op != NULL) {
-      op->do_handshake(thread);
-    }
+    op->do_handshake(thread);
   }
   _semaphore.signal();
 }
@@ -314,12 +331,6 @@
 void HandshakeState::cancel_inner(JavaThread* thread) {
   assert(Thread::current() == thread, "should call from thread");
   assert(thread->thread_state() == _thread_in_vm, "must be in vm state");
-#ifdef DEBUG
-  {
-    ThreadsListHandle tlh;
-    assert(!tlh.includes(_target), "java thread must not be on threads list");
-  }
-#endif
   HandshakeOperation* op = _operation;
   clear_handshake(thread);
   if (op != NULL) {
@@ -332,14 +343,14 @@
 }
 
 bool HandshakeState::claim_handshake_for_vmthread() {
-  if (_semaphore.trywait()) {
-    if (has_operation()) {
-      _vmthread_holds_semaphore = true;
-    } else {
-      _semaphore.signal();
-    }
+  if (!_semaphore.trywait()) {
+    return false;
   }
-  return _vmthread_holds_semaphore;
+  if (has_operation()) {
+    return true;
+  }
+  _semaphore.signal();
+  return false;
 }
 
 void HandshakeState::process_by_vmthread(JavaThread* target) {
@@ -355,16 +366,22 @@
     return;
   }
 
+  // Claim the semaphore if there still an operation to be executed.
+  if (!claim_handshake_for_vmthread()) {
+    return;
+  }
+
   // If we own the semaphore at this point and while owning the semaphore
   // can observe a safe state the thread cannot possibly continue without
   // getting caught by the semaphore.
-  if (claim_handshake_for_vmthread() && vmthread_can_process_handshake(target)) {
+  if (vmthread_can_process_handshake(target)) {
     guarantee(!_semaphore.trywait(), "we should already own the semaphore");
 
     _operation->do_handshake(target);
+    // Disarm after VM thread have executed the operation.
     clear_handshake(target);
-    _vmthread_holds_semaphore = false;
     // Release the thread
-    _semaphore.signal();
   }
+
+  _semaphore.signal();
 }
--- a/src/hotspot/share/runtime/handshake.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/runtime/handshake.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -54,7 +54,6 @@
   HandshakeOperation* volatile _operation;
 
   Semaphore _semaphore;
-  bool _vmthread_holds_semaphore;
   bool _thread_in_process_handshake;
 
   bool claim_handshake_for_vmthread();
--- a/src/hotspot/share/runtime/jniHandles.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/runtime/jniHandles.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -109,7 +109,7 @@
     // Return NULL on allocation failure.
     if (ptr != NULL) {
       assert(*ptr == NULL, "invariant");
-      RootAccess<IN_CONCURRENT_ROOT>::oop_store(ptr, obj());
+      NativeAccess<IN_CONCURRENT_ROOT>::oop_store(ptr, obj());
       res = reinterpret_cast<jobject>(ptr);
     } else {
       report_handle_allocation_failure(alloc_failmode, "global");
@@ -133,7 +133,7 @@
     // Return NULL on allocation failure.
     if (ptr != NULL) {
       assert(*ptr == NULL, "invariant");
-      RootAccess<ON_PHANTOM_OOP_REF>::oop_store(ptr, obj());
+      NativeAccess<ON_PHANTOM_OOP_REF>::oop_store(ptr, obj());
       char* tptr = reinterpret_cast<char*>(ptr) + weak_tag_value;
       res = reinterpret_cast<jobject>(tptr);
     } else {
@@ -160,14 +160,14 @@
 oop JNIHandles::resolve_jweak(jweak handle) {
   assert(handle != NULL, "precondition");
   assert(is_jweak(handle), "precondition");
-  return RootAccess<ON_PHANTOM_OOP_REF>::oop_load(jweak_ptr(handle));
+  return NativeAccess<ON_PHANTOM_OOP_REF>::oop_load(jweak_ptr(handle));
 }
 
 bool JNIHandles::is_global_weak_cleared(jweak handle) {
   assert(handle != NULL, "precondition");
   assert(is_jweak(handle), "not a weak handle");
   oop* oop_ptr = jweak_ptr(handle);
-  oop value = RootAccess<ON_PHANTOM_OOP_REF | AS_NO_KEEPALIVE>::oop_load(oop_ptr);
+  oop value = NativeAccess<ON_PHANTOM_OOP_REF | AS_NO_KEEPALIVE>::oop_load(oop_ptr);
   return value == NULL;
 }
 
@@ -175,7 +175,7 @@
   if (handle != NULL) {
     assert(!is_jweak(handle), "wrong method for detroying jweak");
     oop* oop_ptr = jobject_ptr(handle);
-    RootAccess<IN_CONCURRENT_ROOT>::oop_store(oop_ptr, (oop)NULL);
+    NativeAccess<IN_CONCURRENT_ROOT>::oop_store(oop_ptr, (oop)NULL);
     global_handles()->release(oop_ptr);
   }
 }
@@ -185,7 +185,7 @@
   if (handle != NULL) {
     assert(is_jweak(handle), "JNI handle not jweak");
     oop* oop_ptr = jweak_ptr(handle);
-    RootAccess<ON_PHANTOM_OOP_REF>::oop_store(oop_ptr, (oop)NULL);
+    NativeAccess<ON_PHANTOM_OOP_REF>::oop_store(oop_ptr, (oop)NULL);
     weak_global_handles()->release(oop_ptr);
   }
 }
@@ -517,7 +517,7 @@
   // Try last block
   if (_last->_top < block_size_in_oops) {
     oop* handle = &(_last->_handles)[_last->_top++];
-    RootAccess<AS_DEST_NOT_INITIALIZED>::oop_store(handle, obj);
+    NativeAccess<AS_DEST_NOT_INITIALIZED>::oop_store(handle, obj);
     return (jobject) handle;
   }
 
@@ -525,7 +525,7 @@
   if (_free_list != NULL) {
     oop* handle = _free_list;
     _free_list = (oop*) *_free_list;
-    RootAccess<AS_DEST_NOT_INITIALIZED>::oop_store(handle, obj);
+    NativeAccess<AS_DEST_NOT_INITIALIZED>::oop_store(handle, obj);
     return (jobject) handle;
   }
   // Check if unused block follow last
--- a/src/hotspot/share/runtime/jniHandles.inline.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/runtime/jniHandles.inline.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -57,7 +57,7 @@
   if (is_jweak(handle)) {       // Unlikely
     result = resolve_jweak(handle);
   } else {
-    result = RootAccess<IN_CONCURRENT_ROOT>::oop_load(jobject_ptr(handle));
+    result = NativeAccess<IN_CONCURRENT_ROOT>::oop_load(jobject_ptr(handle));
     // Construction of jobjects canonicalize a null value into a null
     // jobject, so for non-jweak the pointee should never be null.
     assert(external_guard || result != NULL, "Invalid JNI handle");
@@ -83,7 +83,7 @@
 inline void JNIHandles::destroy_local(jobject handle) {
   if (handle != NULL) {
     assert(!is_jweak(handle), "Invalid JNI local handle");
-    RootAccess<>::oop_store(jobject_ptr(handle), (oop)NULL);
+    NativeAccess<>::oop_store(jobject_ptr(handle), (oop)NULL);
   }
 }
 
--- a/src/hotspot/share/runtime/safepointMechanism.inline.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/runtime/safepointMechanism.inline.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -59,12 +59,11 @@
   bool armed = local_poll_armed(thread); // load acquire, polling page -> op / global state
   if(armed) {
     // We could be armed for either a handshake operation or a safepoint
+    if (global_poll()) {
+      SafepointSynchronize::block(thread);
+    }
     if (thread->has_handshake()) {
       thread->handshake_process_by_self();
-    } else {
-      if (global_poll()) {
-        SafepointSynchronize::block(thread);
-      }
     }
   }
 }
--- a/src/hotspot/share/runtime/thread.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/runtime/thread.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -4219,6 +4219,9 @@
   before_exit(thread);
 
   thread->exit(true);
+  // thread will never call smr_delete, instead of implicit cancel
+  // in wait_for_vm_thread_exit we do it explicit.
+  thread->cancel_handshake();
 
   // Stop VM thread.
   {
--- a/src/hotspot/share/runtime/threadHeapSampler.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/runtime/threadHeapSampler.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -47,7 +47,7 @@
   const uint64_t PrngMult = 0x5DEECE66DLL;
   const uint64_t PrngAdd = 0xB;
   const uint64_t PrngModPower = 48;
-  const uint64_t PrngModMask = right_n_bits(PrngModPower);
+  const uint64_t PrngModMask = ((uint64_t)1 << PrngModPower) - 1;
   //assert(IS_SAFE_SIZE_MUL(PrngMult, rnd), "Overflow on multiplication.");
   //assert(IS_SAFE_SIZE_ADD(PrngMult * rnd, PrngAdd), "Overflow on addition.");
   return (PrngMult * rnd + PrngAdd) & PrngModMask;
--- a/src/hotspot/share/runtime/vmStructs.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/runtime/vmStructs.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -1117,11 +1117,11 @@
   /* FileMapInfo fields (CDS archive related) */                                                                                     \
   /********************************************/                                                                                     \
                                                                                                                                      \
-  nonstatic_field(FileMapInfo,                 _header,                                       FileMapInfo::FileMapHeader*)           \
-     static_field(FileMapInfo,                 _current_info,                                 FileMapInfo*)                          \
-  nonstatic_field(FileMapInfo::FileMapHeader,  _space[0],                                     FileMapInfo::FileMapHeader::space_info)\
-  nonstatic_field(FileMapInfo::FileMapHeader::space_info, _addr._base,                        char*)                                 \
-  nonstatic_field(FileMapInfo::FileMapHeader::space_info, _used,                              size_t)                                \
+  CDS_ONLY(nonstatic_field(FileMapInfo,                            _header,                   FileMapInfo::FileMapHeader*))          \
+  CDS_ONLY(   static_field(FileMapInfo,                            _current_info,             FileMapInfo*))                         \
+  CDS_ONLY(nonstatic_field(FileMapInfo::FileMapHeader,             _space[0],                 FileMapInfo::FileMapHeader::space_info))\
+  CDS_ONLY(nonstatic_field(FileMapInfo::FileMapHeader::space_info, _addr._base,               char*))                                \
+  CDS_ONLY(nonstatic_field(FileMapInfo::FileMapHeader::space_info, _used,                     size_t))                               \
                                                                                                                                      \
   /******************/                                                                                                               \
   /* VMError fields */                                                                                                               \
@@ -2381,6 +2381,7 @@
   declare_constant(Deoptimization::Reason_array_check)                    \
   declare_constant(Deoptimization::Reason_intrinsic)                      \
   declare_constant(Deoptimization::Reason_bimorphic)                      \
+  declare_constant(Deoptimization::Reason_profile_predicate)              \
   declare_constant(Deoptimization::Reason_unloaded)                       \
   declare_constant(Deoptimization::Reason_uninitialized)                  \
   declare_constant(Deoptimization::Reason_unreached)                      \
--- a/src/hotspot/share/services/diagnosticArgument.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/services/diagnosticArgument.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -68,27 +68,27 @@
   set_is_set(true);
 }
 
-void GenDCmdArgument::to_string(jlong l, char* buf, size_t len) {
+void GenDCmdArgument::to_string(jlong l, char* buf, size_t len) const {
   jio_snprintf(buf, len, INT64_FORMAT, l);
 }
 
-void GenDCmdArgument::to_string(bool b, char* buf, size_t len) {
+void GenDCmdArgument::to_string(bool b, char* buf, size_t len) const {
   jio_snprintf(buf, len, b ? "true" : "false");
 }
 
-void GenDCmdArgument::to_string(NanoTimeArgument n, char* buf, size_t len) {
+void GenDCmdArgument::to_string(NanoTimeArgument n, char* buf, size_t len) const {
   jio_snprintf(buf, len, INT64_FORMAT, n._nanotime);
 }
 
-void GenDCmdArgument::to_string(MemorySizeArgument m, char* buf, size_t len) {
+void GenDCmdArgument::to_string(MemorySizeArgument m, char* buf, size_t len) const {
   jio_snprintf(buf, len, INT64_FORMAT, m._size);
 }
 
-void GenDCmdArgument::to_string(char* c, char* buf, size_t len) {
+void GenDCmdArgument::to_string(char* c, char* buf, size_t len) const {
   jio_snprintf(buf, len, "%s", (c != NULL) ? c : "");
 }
 
-void GenDCmdArgument::to_string(StringArrayArgument* f, char* buf, size_t len) {
+void GenDCmdArgument::to_string(StringArrayArgument* f, char* buf, size_t len) const {
   int length = f->array()->length();
   size_t written = 0;
   buf[0] = 0;
--- a/src/hotspot/share/services/diagnosticArgument.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/services/diagnosticArgument.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2018, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -62,40 +62,35 @@
 class GenDCmdArgument : public ResourceObj {
 protected:
   GenDCmdArgument* _next;
-  const char*      _name;
-  const char*      _description;
-  const char*      _type;
-  const char*      _default_string;
-  bool             _is_set;
-  bool             _is_mandatory;
+  const char* const _name;
+  const char* const _description;
+  const char* const _type;
+  const char* const _default_string;
+  bool              _is_set;
+  const bool        _is_mandatory;
   bool             _allow_multiple;
   GenDCmdArgument(const char* name, const char* description, const char* type,
-                  const char* default_string, bool mandatory) {
-    _name = name;
-    _description = description;
-    _type = type;
-    _default_string = default_string;
-    _is_mandatory = mandatory;
-    _is_set = false;
-    _allow_multiple = false;
-  };
+                  const char* default_string, bool mandatory)
+    : _next(NULL), _name(name), _description(description), _type(type),
+      _default_string(default_string), _is_set(false), _is_mandatory(mandatory),
+      _allow_multiple(false) {}
 public:
-  const char* name() { return _name; }
-  const char* description() { return _description; }
-  const char* type() { return _type; }
-  const char* default_string() { return _default_string; }
-  bool is_set() { return _is_set; }
-  void set_is_set(bool b) { _is_set = b; }
-  bool allow_multiple() { return _allow_multiple; }
-  bool is_mandatory() { return _is_mandatory; }
-  bool has_value() { return _is_set || _default_string != NULL; }
-  bool has_default() { return _default_string != NULL; }
+  const char* name() const        { return _name; }
+  const char* description() const { return _description; }
+  const char* type() const        { return _type; }
+  const char* default_string() const { return _default_string; }
+  bool is_set() const             { return _is_set; }
+  void set_is_set(bool b)         { _is_set = b; }
+  bool allow_multiple() const     { return _allow_multiple; }
+  bool is_mandatory() const       { return _is_mandatory; }
+  bool has_value() const          { return _is_set || _default_string != NULL; }
+  bool has_default() const        { return _default_string != NULL; }
   void read_value(const char* str, size_t len, TRAPS);
   virtual void parse_value(const char* str, size_t len, TRAPS) = 0;
   virtual void init_value(TRAPS) = 0;
   virtual void reset(TRAPS) = 0;
   virtual void cleanup() = 0;
-  virtual void value_as_str(char* buf, size_t len) = 0;
+  virtual void value_as_str(char* buf, size_t len) const = 0;
   void set_next(GenDCmdArgument* arg) {
     _next = arg;
   }
@@ -103,12 +98,12 @@
     return _next;
   }
 
-  void to_string(jlong l, char* buf, size_t len);
-  void to_string(bool b, char* buf, size_t len);
-  void to_string(char* c, char* buf, size_t len);
-  void to_string(NanoTimeArgument n, char* buf, size_t len);
-  void to_string(MemorySizeArgument f, char* buf, size_t len);
-  void to_string(StringArrayArgument* s, char* buf, size_t len);
+  void to_string(jlong l, char* buf, size_t len) const;
+  void to_string(bool b, char* buf, size_t len) const;
+  void to_string(char* c, char* buf, size_t len) const;
+  void to_string(NanoTimeArgument n, char* buf, size_t len) const;
+  void to_string(MemorySizeArgument f, char* buf, size_t len) const;
+  void to_string(StringArrayArgument* s, char* buf, size_t len) const;
 };
 
 template <class ArgType> class DCmdArgument: public GenDCmdArgument {
@@ -123,7 +118,7 @@
                GenDCmdArgument(name, description, type, defaultvalue, mandatory)
                { }
   ~DCmdArgument() { destroy_value(); }
-  ArgType value() { return _value;}
+  ArgType value() const { return _value;}
   void set_value(ArgType v) { _value = v; }
   void reset(TRAPS) {
     destroy_value();
@@ -136,7 +131,7 @@
   void parse_value(const char* str, size_t len, TRAPS);
   void init_value(TRAPS);
   void destroy_value();
-  void value_as_str(char *buf, size_t len) { return to_string(_value, buf, len);}
+  void value_as_str(char *buf, size_t len) const { to_string(_value, buf, len);}
 };
 
 #endif  /* SHARE_VM_SERVICES_DIAGNOSTICARGUMENT_HPP */
--- a/src/hotspot/share/services/diagnosticFramework.cpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/services/diagnosticFramework.cpp	Tue Jun 19 09:22:38 2018 -0700
@@ -34,7 +34,9 @@
 #include "services/diagnosticFramework.hpp"
 #include "services/management.hpp"
 
-CmdLine::CmdLine(const char* line, size_t len, bool no_command_name) {
+CmdLine::CmdLine(const char* line, size_t len, bool no_command_name)
+  : _cmd(line), _cmd_len(0), _args(NULL), _args_len(0)
+{
   assert(line != NULL, "Command line string should not be NULL");
   const char* line_end;
   const char* cmd_end;
@@ -251,7 +253,7 @@
   }
 }
 
-void DCmdParser::print_help(outputStream* out, const char* cmd_name) {
+void DCmdParser::print_help(outputStream* out, const char* cmd_name) const {
   out->print("Syntax : %s %s", cmd_name, _options == NULL ? "" : "[options]");
   GenDCmdArgument* arg = _arguments_list;
   while (arg != NULL) {
@@ -323,7 +325,7 @@
   }
 }
 
-int DCmdParser::num_arguments() {
+int DCmdParser::num_arguments() const {
   GenDCmdArgument* arg = _arguments_list;
   int count = 0;
   while (arg != NULL) {
@@ -338,7 +340,7 @@
   return count;
 }
 
-GrowableArray<const char *>* DCmdParser::argument_name_array() {
+GrowableArray<const char *>* DCmdParser::argument_name_array() const {
   int count = num_arguments();
   GrowableArray<const char *>* array = new GrowableArray<const char *>(count);
   GenDCmdArgument* arg = _arguments_list;
@@ -354,7 +356,7 @@
   return array;
 }
 
-GrowableArray<DCmdArgumentInfo*>* DCmdParser::argument_info_array() {
+GrowableArray<DCmdArgumentInfo*>* DCmdParser::argument_info_array() const {
   int count = num_arguments();
   GrowableArray<DCmdArgumentInfo*>* array = new GrowableArray<DCmdArgumentInfo *>(count);
   int idx = 0;
@@ -399,6 +401,7 @@
       break;
     }
     if (line.is_executable()) {
+      ResourceMark rm;
       DCmd* command = DCmdFactory::create_local_DCmd(source, line, out, CHECK);
       assert(command != NULL, "command error must be handled before this line");
       DCmdMark mark(command);
@@ -413,7 +416,7 @@
   _dcmdparser.parse(line, delim, CHECK);
 }
 
-void DCmdWithParser::print_help(const char* name) {
+void DCmdWithParser::print_help(const char* name) const {
   _dcmdparser.print_help(output(), name);
 }
 
@@ -425,11 +428,11 @@
   _dcmdparser.cleanup();
 }
 
-GrowableArray<const char*>* DCmdWithParser::argument_name_array() {
+GrowableArray<const char*>* DCmdWithParser::argument_name_array() const {
   return _dcmdparser.argument_name_array();
 }
 
-GrowableArray<DCmdArgumentInfo*>* DCmdWithParser::argument_info_array() {
+GrowableArray<DCmdArgumentInfo*>* DCmdWithParser::argument_info_array() const {
   return _dcmdparser.argument_info_array();
 }
 
@@ -519,20 +522,6 @@
   return 0; // Actually, there's no checks for duplicates
 }
 
-DCmd* DCmdFactory::create_global_DCmd(DCmdSource source, CmdLine &line,
-                                      outputStream* out, TRAPS) {
-  DCmdFactory* f = factory(source, line.cmd_addr(), line.cmd_len());
-  if (f != NULL) {
-    if (f->is_enabled()) {
-      THROW_MSG_NULL(vmSymbols::java_lang_IllegalArgumentException(),
-                     f->disabled_message());
-    }
-    return f->create_Cheap_instance(out);
-  }
-  THROW_MSG_NULL(vmSymbols::java_lang_IllegalArgumentException(),
-             "Unknown diagnostic command");
-}
-
 DCmd* DCmdFactory::create_local_DCmd(DCmdSource source, CmdLine &line,
                                      outputStream* out, TRAPS) {
   DCmdFactory* f = factory(source, line.cmd_addr(), line.cmd_len());
--- a/src/hotspot/share/services/diagnosticFramework.hpp	Fri Jun 15 13:07:46 2018 -0700
+++ b/src/hotspot/share/services/diagnosticFramework.hpp	Tue Jun 19 09:22:38 2018 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -61,13 +61,13 @@
   size_t      _args_len;
 public:
   CmdLine(const char* line, size_t len, bool no_command_name);
-  const char* args_addr() const { return _args; }
-  size_t args_len() const { return _args_len; }
-  const char* cmd_addr() const { return _cmd; }
-  size_t cmd_len() const { return _cmd_len; }
-  bool is_empty() { return _cmd_len == 0; }
-  bool is_executable() { return is_empty() || _cmd[0] != '#'; }
-  bool is_stop() { return !is_empty() && strncmp("stop", _cmd, _cmd_len) == 0; }
+  const char* args_addr() const   { return _args; }
+  size_t args_len() const         { return _args_len; }
+  const char* cmd_addr() const    { return _cmd; }
+  size_t cmd_len() const          { return _cmd_len; }
+  bool is_empty() const           { return _cmd_len == 0; }
+  bool is_executable() const      { return is_empty() || _cmd[0] != '#'; }
+  bool is_stop() const            { return !is_empty() && strncmp("stop", _cmd, _cmd_len) == 0; }
 };
 
 // Iterator class taking a character string in input and returning a CmdLine
@@ -75,19 +75,16 @@
 class DCmdIter : public StackObj {
   friend class DCmd;
 private:
-  const char* _str;
-  char        _delim;
-  size_t      _len;
+  const char* const _str;
+  const char        _delim;
+  const size_t      _len;
   size_t      _cursor;
 public:
 
-  DCmdIter(const char* str, char delim) {
-    _str = str;
-    _delim = delim;
-    _len = strlen(str);
-    _cursor = 0;
-  }
-  bool has_next() { return _cursor < _len; }
+  DCmdIter(const char* str, char delim)
+   : _str(str), _delim(delim), _len(::strlen(str)),
+     _cursor(0) {}
+  bool has_next() const { return _cursor < _len; }
   CmdLine next() {
     assert(_cursor <= _len, "Cannot iterate more");
     size_t n = _cursor;
@@ -102,58 +99,51 @@
 
 // Iterator class to iterate over diagnostic command arguments
 class DCmdArgIter : public ResourceObj {
-  const char* _buffer;
-  size_t      _len;
+  const char* const _buffer;
+  const size_t      _len;
   size_t      _cursor;
   const char* _key_addr;
   size_t      _key_len;
   const char* _value_addr;
   size_t      _value_len;
-  char        _delim;
+  const char  _delim;
 public:
-  DCmdArgIter(const char* buf, size_t len, char delim) {
-    _buffer = buf;
-    _len = len;
-    _delim = delim;
-    _cursor = 0;
-  }
+  DCmdArgIter(const char* buf, size_t len, char delim)
+    : _buffer(buf), _len(len), _cursor(0), _key_addr(NULL),
+      _key_len(0), _value_addr(NULL), _value_len(0), _delim(delim) {}
+
   bool next(TRAPS);
-  const char* key_addr() { return _key_addr; }
-  size_t key_length() { return _key_len; }
-  const char* value_addr() { return _value_addr; }
-  size_t value_length() { return _value_len; }
+  const char* key_addr() const    { return _key_addr; }
+  size_t key_length() const       { return _key_len; }
+  const char* value_addr() const  { return _value_addr; }
+  size_t value_length() const     { return _value_len; }
 };
 
 // A DCmdInfo instance provides a description of a diagnostic command. It is
 // used to export the description to the JMX interface of the framework.
 class DCmdInfo : public ResourceObj {
 protected:
-  const char* _name;           /* Name of the diagnostic command */
-  const char* _description;    /* Short description */
-  const char* _impact;         /* Impact on the JVM */
-  JavaPermission _permission;  /* Java Permission required to execute this command if any */
-  int         _num_arguments;  /* Number of supported options or arguments */
-  bool        _is_enabled;     /* True if the diagnostic command can be invoked, false otherwise */
+  const char* const _name;           /* Name of the diagnostic command */
+  const char* const _description;    /* Short description */
+  const char* const _impact;         /* Impact on the JVM */
+  const JavaPermission _permission;  /* Java Permission required to execute this command if any */
+  const int         _num_arguments;  /* Number of supported options or arguments */
+  const bool        _is_enabled;     /* True if the diagnostic command can be invoked, false otherwise */
 public:
   DCmdInfo(const char* name,
           const char* description,
           const char* impact,
           JavaPermission permission,
           int num_arguments,
-          bool enabled) {
-    this->_name = name;
-    this->_description = description;
-    this->_impact = impact;
-    this->_permission = permission;
-    this->_num_arguments = num_arguments;
-    this->_is_enabled = enabled;
-  }
-  const char* name() const { return _name; }
-  const char* description() const { return _description; }
-  const char* impact() const { return _impact; }
-  JavaPermission permission() const { return _permission; }
-  int num_arguments() const { return _num_arguments; }
-  bool is_enabled() const { return _is_enabled; }
+          bool enabled)
+  : _name(name), _description(description), _impact(impact), _permission(permission),
+    _num_arguments(num_arguments), _is_enabled(enabled) {}
+  const char* name() const          { return _name; }
+  const char* description() const   { return _description; }
+  const char* impact() const        { return _impact; }
+  const JavaPermission& permission() const { return _permission; }
+  int num_arguments() const         { return _num_arguments; }
+  bool is_enabled() const           { return _is_enabled; }
 
   static bool by_name(void* name, DCmdInfo* info);
 };
@@ -163,51 +153,32 @@
 // framework.
 class DCmdArgumentInfo : public ResourceObj {
 protected:
-  const char* _name;            /* Option/Argument name*/
-  const char* _description;     /* Short description */
-  const char* _type;            /* Type: STRING, BOOLEAN, etc. */
-  const char* _default_string;  /* Default value in a parsable string */
-  bool        _mandatory;       /* True if the option/argument is mandatory */
-  bool        _option;          /* True if it is an option, false if it is an argument */
+  const char* const _name;            /* Option/Argument name*/
+  const char* const _description;     /* Short description */
+  const char* const _type;            /* Type: STRING, BOOLEAN, etc. */
+  const char* const _default_string;  /* Default value in a parsable string */
+  const bool        _mandatory;       /* True if the option/argument is mandatory */
+  const bool        _option;          /* True if it is an option, false if it is an argument */
                                 /* (see diagnosticFramework.hpp for option/argument definitions) */
-  bool        _multiple;        /* True is the option can be specified several time */
-  int         _position;        /* Expected position for this argument (this field is */
+  const bool        _multiple;        /* True is the option can be specified several time */
+  const int         _position;        /* Expected position for this argument (this field is */
                                 /* meaningless for options) */
 public:
   DCmdArgumentInfo(const char* name, const char* description, const char* type,
                    const char* default_string, bool mandatory, bool option,
-                   bool multiple) {
-    this->_name = name;
-    this->_description = description;
-    this->_type = type;
-    this->_default_string = default_string;
-    this->_option = option;
-    this->_mandatory = mandatory;
-    this->_option = option;
-    this->_multiple = multiple;
-    this->_position = -1;
-  }
-  DCmdArgumentInfo(const char* name, const char* description, const char* type,
-                   const char* default_string, bool mandatory, bool option,
-                   bool multiple, int position) {
-    this->_name = name;
-    this->_description = description;
-    this->_type = type;
-    this->_default_string = default_string;
-    this->_option = option;
-    this->_mandatory = mandatory;
-    this->_option = option;
-    this->_multiple = multiple;
-    this->_position = position;
-  }
-  const char* name() const { return _name; }
+                   bool multiple, int position = -1)
+    : _name(name), _description(description), _type(type),
+      _default_string(default_string), _mandatory(mandatory), _option(option),
+      _multiple(multiple), _position(position) {}
+
+  const char* name() const        { return _name; }
   const char* description() const { return _description; }
-  const char* type() const { return _type; }
+  const char* type() const        { return _type; }
   const char* default_string() const { return _default_string; }
-  bool is_mandatory() const { return _mandatory; }
-  bool is_option() const { return _option; }
-  bool is_multiple() const { return _multiple; }
-  int position() const { return _position; }
+  bool is_mandatory() const       { return _mandatory; }
+  bool is_option() const          { return _option; }
+  bool is_multiple() const        { return _multiple; }
+  int position() const            { return _position; }
 };
 
 // The DCmdParser class can be used to create an argument parser for a
@@ -233,25 +204,21 @@
 private:
   GenDCmdArgument* _options;
   GenDCmdArgument* _arguments_list;
-  char             _delim;
 public:
-  DCmdParser() {
-    _options = NULL;
-    _arguments_list = NULL;
-    _delim = ' ';
-  }
+  DCmdParser()
+    : _options(NULL), _arguments_list(NULL) {}
   void add_dcmd_option(GenDCmdArgument* arg);
   void add_dcmd_argument(GenDCmdArgument* arg);
   GenDCmdArgument* lookup_dcmd_option(const char* name, size_t len);
-  GenDCmdArgument* arguments_list() { return _arguments_list; };
+  GenDCmdArgument* arguments_list() const { return _arguments_list; };
   void check(TRAPS);
   void parse(CmdLine* line, char delim, TRAPS);
-  void print_help(outputStream* out, const char* cmd_name);
+  void print_help(outputStream* out, const char* cmd_name) const;
   void reset(TRAPS);
   void cleanup();
-  int num_arguments();
-  GrowableArray<const char*>* argument_name_array();
-  GrowableArray<DCmdArgumentInfo*>* argument_info_array();
+  int num_arguments() const;
+  GrowableArray<const char*>* argument_name_array() const;
+  GrowableArray<DCmdArgumentInfo*>* argument_info_array() const;
 };
 
 // The DCmd class is the parent class of all diagnostic commands
@@ -270,17 +237,18 @@
 // thread that will access the instance.
 class DCmd : public ResourceObj {
 protected:
-  outputStream* _output;
-  bool          _is_heap_allocated;
+  outputStream* const _output;
+  const bool          _is_heap_allocated;
 public:
-  DCmd(outputStream* output, bool heap_allocated) {
-    _output = output;
-    _is_heap_allocated = heap_allocated;
-  }
+  DCmd(outputStream* output, bool heap_allocated)
+   : _output(output), _is_heap_allocated(heap_allocated) {}
 
-  static const char* name() { return "No Name";}
-  static const char* description() { return "No Help";}
+  // Child classes: please always provide these methods:
+  //  static const char* name()             { return "<command name>";}
+  //  static const char* description()      { return "<command help>";}
+
   static const char* disabled_message() { return "Diagnostic command currently disabled"; }
+
   // The impact() method returns a description of the intrusiveness of the diagnostic
   // command on the Java Virtual Machine behavior. The rational for this method is that some
   // diagnostic commands can seriously disrupt the behavior of the Java Virtual Machine
@@ -291,7 +259,8 @@
   // where the impact level is selected among this list: {Low, Medium, High}. The optional
   // longer description can provide more specific details like the fact that Thread Dump
   // impact depends on the heap size.
-  static const char* impact() { return "Low: No impact"; }
+  static const char* impact()       { return "Low: No impact"; }
+
   // The permission() method returns the description of Java Permission. This
   // permission is required when the diagnostic command is invoked via the
   // DiagnosticCommandMBean. The rationale for this permission check is that
@@ -305,10 +274,10 @@
     JavaPermission p = {NULL, NULL, NULL};
     return p;
   }
-  static int num_arguments() { return 0; }
-  outputStream* output() { return _output; }
-  bool is_heap_allocated()  { return _is_heap_allocated; }
-  virtual void print_help(const char* name) {
+  static int num_arguments()        { return 0; }
+  outputStream* output() const      { return _output; }
+  bool is_heap_allocated() const    { return _is_heap_allocated; }
+  virtual void print_help(const char* name) const {
     output()->print_cr("Syntax: %s", name);
   }
   virtual void parse(CmdLine* line, char delim, TRAPS) {
@@ -324,11 +293,11 @@
   virtual void cleanup() { }
 
   // support for the JMX interface
-  virtual GrowableArray<const char*>* argument_name_array() {
+  virtual GrowableArray<const char*>* argument_name_array() const {
     GrowableArray<const char*>* array = new GrowableArray<const char*>(0);
     return array;
   }
-  virtual GrowableArray<DCmdArgumentInfo*>* argument_info_array() {
+  virtual GrowableArray<DCmdArgumentInfo*>* argument_info_array() const {
     GrowableArray<DCmdArgumentInfo*>* array = new GrowableArray<DCmdArgumentInfo*>(0);
     return array;
   }
@@ -343,25 +312,21 @@
   DCmdParser _dcmdparser;
 public:
   DCmdWithParser (outputStream *output, bool heap=false) : DCmd(output, heap) { }
-  static const char* name() { return "No Name";}
-  static const char* description() { return "No Help";}
   static const char* disabled_message() { return "Diagnostic command currently disabled"; }
-  static const char* impact() { return "Low: No impact"; }
-  static const JavaPermission permission() {JavaPermission p = {NULL, NULL, NULL}; return p; }
-  static int num_arguments() { return 0; }
+  static const char* impact()         { return "Low: No impact"; }
   virtual void parse(CmdLine *line, char delim, TRAPS);
   virtual void execute(DCmdSource source, TRAPS) { }
   virtual void reset(TRAPS);
   virtual void cleanup();
-  virtual void print_help(const char* name);
-  virtual GrowableArray<const char*>* argument_name_array();
-  virtual GrowableArray<DCmdArgumentInfo*>* argument_info_array();
+  virtual void print_help(const char* name) const;
+  virtual GrowableArray<const char*>* argument_name_array() const;
+  virtual GrowableArray<DCmdArgumentInfo*>* argument_info_array() const;
 };
 
 class DCmdMark : public StackObj {
-  DCmd* _ref;
+  DCmd* const _ref;
 public:
-  DCmdMark(DCmd* cmd) { _ref = cmd; }
+  DCmdMark(DCmd* cmd) : _ref(cmd) {}
   ~DCmdMark() {
     if (_ref != NULL) {
       _ref->cleanup();
@@ -382,37 +347,31 @@
   static Mutex*       _dcmdFactory_lock;
   static bool         _send_jmx_notification;
   static bool         _has_pending_jmx_notification;
+  static DCmdFactory* _DCmdFactoryList;
+
   // Pointer to the next factory in the singly-linked list of registered
   // diagnostic commands
   DCmdFactory*        _next;
   // When disabled, a diagnostic command cannot be executed. Any attempt to
   // execute it will result in the printing of the disabled message without
   // instantiating the command.
-  bool                _enabled;
+  const bool          _enabled;
   // When hidden, a diagnostic command doesn't appear in the list of commands
   // provided by the 'help' command.
-  bool                _hidden;
-  uint32_t            _export_flags;
-  int                 _num_arguments;
-  static DCmdFactory* _DCmdFactoryList;
+  const bool          _hidden;
+  const uint32_t      _export_flags;
+  const int           _num_arguments;
+
 public:
-  DCmdFactory(int num_arguments, uint32_t flags, bool enabled, bool hidden) {
-    _next = NULL;
-    _enabled = enabled;
-    _hidden = hidden;
-    _export_flags = flags;
-    _num_arguments = num_arguments;
-  }
-  bool is_enabled() const { return _enabled; }
-  void set_enabled(bool b) { _enabled = b; }
-  bool is_hidden() const { return _hidden; }
-  void set_hidden(bool b) { _hidden = b; }
-  uint32_t export_flags() { return _export_flags; }
-  void set_export_flags(uint32_t f) { _export_flags = f; }
-  int num_arguments() { return _num_arguments; }
-  DCmdFactory* next() { return _next; }
-  virtual DCmd* create_Cheap_instance(outputStream* output) = 0;
-  virtual DCmd* create_resource_instance(outputStream* output) = 0;
+  DCmdFactory(int num_arguments, uint32_t flags, bool enabled, bool hidden)
+    : _next(NULL), _enabled(enabled), _hidden(hidden),
+      _export_flags(flags), _num_arguments(num_arguments) {}
+  bool is_enabled() const       { return _enabled; }
+  bool is_hidden() const        { return _hidden; }
+  uint32_t export_flags() const { return _export_flags; }
+  int num_arguments() const     { return _num_arguments; }
+  DCmdFactory* next() const     { return _next; }
+  virtual DCmd* create_resource_instance(outputStream* output) const = 0;
   virtual const char* name() const = 0;
   virtual const char* description() const = 0;
   virtual const char* impact() const = 0;
@@ -424,8 +383,6 @@