changeset 57415:114caf2fa7b9 lworld

Merge
author dsimms
date Fri, 11 Oct 2019 10:39:58 +0200
parents 6eaf377afed9 e84d8379815b
children 85f25a764824
files .hgtags src/hotspot/cpu/aarch64/aarch64.ad src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.hpp src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp src/hotspot/cpu/x86/globals_x86.hpp src/hotspot/cpu/x86/macroAssembler_x86.cpp src/hotspot/cpu/x86/stubGenerator_x86_64.cpp src/hotspot/cpu/x86/vm_version_x86.cpp src/hotspot/cpu/x86/x86_64.ad src/hotspot/share/adlc/formssel.cpp src/hotspot/share/ci/ciEnv.cpp src/hotspot/share/ci/ciEnv.hpp src/hotspot/share/classfile/classFileParser.cpp src/hotspot/share/classfile/classLoader.cpp src/hotspot/share/classfile/javaClasses.cpp src/hotspot/share/classfile/javaClasses.hpp src/hotspot/share/classfile/systemDictionary.cpp src/hotspot/share/classfile/verificationType.cpp src/hotspot/share/classfile/verifier.cpp src/hotspot/share/code/compiledIC.cpp src/hotspot/share/code/compiledIC.hpp src/hotspot/share/gc/shared/c2/barrierSetC2.hpp src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp src/hotspot/share/include/jvm.h src/hotspot/share/memory/universe.cpp src/hotspot/share/oops/cpCache.cpp src/hotspot/share/oops/instanceKlass.cpp src/hotspot/share/oops/klass.cpp src/hotspot/share/oops/klassVtable.cpp src/hotspot/share/oops/klassVtable.hpp src/hotspot/share/oops/method.cpp src/hotspot/share/opto/c2compiler.cpp src/hotspot/share/opto/classes.cpp src/hotspot/share/opto/classes.hpp src/hotspot/share/opto/compile.cpp src/hotspot/share/opto/compile.hpp src/hotspot/share/opto/loopopts.cpp src/hotspot/share/opto/machnode.hpp src/hotspot/share/opto/matcher.cpp src/hotspot/share/opto/memnode.cpp src/hotspot/share/opto/memnode.hpp src/hotspot/share/opto/node.cpp src/hotspot/share/opto/node.hpp src/hotspot/share/opto/output.cpp src/hotspot/share/opto/phaseX.cpp src/hotspot/share/opto/split_if.cpp src/hotspot/share/prims/jniCheck.cpp src/hotspot/share/prims/jvm.cpp src/hotspot/share/prims/jvmtiEnv.cpp src/hotspot/share/prims/jvmtiExport.cpp src/hotspot/share/prims/jvmtiRedefineClasses.cpp src/hotspot/share/runtime/arguments.cpp src/hotspot/share/runtime/globals.hpp src/hotspot/share/runtime/mutexLocker.cpp src/hotspot/share/runtime/thread.cpp src/hotspot/share/runtime/thread.hpp src/hotspot/share/runtime/vmStructs.cpp src/java.base/share/classes/java/lang/reflect/Proxy.java src/java.base/share/native/libjava/Class.c src/java.base/share/native/libjava/check_classname.c src/java.base/share/native/libjava/verify_stub.c src/java.base/share/native/libverify/check_code.c src/java.base/share/native/libverify/check_format.c src/java.base/unix/native/libjava/jdk_util_md.c src/jdk.compiler/share/classes/com/sun/tools/javac/code/Types.java src/jdk.javadoc/share/classes/jdk/javadoc/internal/doclets/toolkit/util/Utils.java test/hotspot/jtreg/compiler/graalunit/EA9Test.java test/hotspot/jtreg/compiler/graalunit/com.oracle.mxtool.junit/com/oracle/mxtool/junit/JLModule.java test/hotspot/jtreg/serviceability/jvmti/RedefineClasses/RedefineDeleteJmethod.java test/hotspot/jtreg/serviceability/jvmti/RedefineClasses/libRedefineDeleteJmethod.c test/jdk/ProblemList.txt test/jtreg-ext/requires/VMProps.java
diffstat 300 files changed, 7174 insertions(+), 5993 deletions(-) [+]
line wrap: on
line diff
--- a/.hgtags	Thu Oct 10 14:11:18 2019 +0200
+++ b/.hgtags	Fri Oct 11 10:39:58 2019 +0200
@@ -595,3 +595,4 @@
 9c250a7600e12bdb1e611835250af3204d4aa152 jdk-13-ga
 778fc2dcbdaa8981e07e929a2cacef979c72261e jdk-14+15
 d29f0181ba424a95d881aba5eabf2e393abcc70f jdk-14+16
+5c83830390baafb76a1fbe33443c57620bd45fb9 jdk-14+17
--- a/make/CreateJmods.gmk	Thu Oct 10 14:11:18 2019 +0200
+++ b/make/CreateJmods.gmk	Fri Oct 11 10:39:58 2019 +0200
@@ -86,16 +86,18 @@
 # from there. These files were explicitly filtered or modified in <module>-copy
 # targets. For the rest, just pick up everything from the source legal dirs.
 LEGAL_NOTICES := \
-    $(SUPPORT_OUTPUTDIR)/modules_legal/common \
+    $(wildcard $(SUPPORT_OUTPUTDIR)/modules_legal/common) \
     $(if $(wildcard $(SUPPORT_OUTPUTDIR)/modules_legal/$(MODULE)), \
       $(wildcard $(SUPPORT_OUTPUTDIR)/modules_legal/$(MODULE)), \
       $(call FindModuleLegalSrcDirs, $(MODULE)) \
     )
 
-LEGAL_NOTICES_PATH := $(call PathList, $(LEGAL_NOTICES))
-DEPS += $(call FindFiles, $(LEGAL_NOTICES))
+ifneq ($(strip $(LEGAL_NOTICES)), )
+  LEGAL_NOTICES_PATH := $(call PathList, $(LEGAL_NOTICES))
+  DEPS += $(call FindFiles, $(LEGAL_NOTICES))
 
-JMOD_FLAGS += --legal-notices $(LEGAL_NOTICES_PATH)
+  JMOD_FLAGS += --legal-notices $(LEGAL_NOTICES_PATH)
+endif
 
 ifeq ($(filter-out jdk.incubator.%, $(MODULE)), )
   JMOD_FLAGS += --do-not-resolve-by-default
--- a/make/gensrc/Gensrc-jdk.internal.vm.compiler.management.gmk	Thu Oct 10 14:11:18 2019 +0200
+++ b/make/gensrc/Gensrc-jdk.internal.vm.compiler.management.gmk	Fri Oct 11 10:39:58 2019 +0200
@@ -73,7 +73,7 @@
 	($(CD) $(GENSRC_DIR)/META-INF/providers && \
 	    p=""; \
 	    impl=""; \
-	    for i in $$($(GREP) '^' * | $(SORT) -t ':' -k 2 | $(SED) 's/:.*//'); do \
+	    for i in $$($(NAWK) '$$0=FILENAME" "$$0' * | $(SORT) -k 2 | $(SED) 's/ .*//'); do \
 	      c=$$($(CAT) $$i | $(TR) -d '\n\r'); \
 	      if test x$$p != x$$c; then \
                 if test x$$p != x; then \
--- a/make/lib/CoreLibraries.gmk	Thu Oct 10 14:11:18 2019 +0200
+++ b/make/lib/CoreLibraries.gmk	Fri Oct 11 10:39:58 2019 +0200
@@ -23,8 +23,6 @@
 # questions.
 #
 
-WIN_VERIFY_LIB := $(SUPPORT_OUTPUTDIR)/native/$(MODULE)/libverify/verify.lib
-
 # Hook to include the corresponding custom file, if present.
 $(eval $(call IncludeCustomExtension, lib/CoreLibraries.gmk))
 
@@ -110,14 +108,14 @@
     LDFLAGS_macosx := -L$(SUPPORT_OUTPUTDIR)/native/$(MODULE)/, \
     LDFLAGS_windows := -delayload:shell32.dll, \
     LIBS := $(BUILD_LIBFDLIBM_TARGET), \
-    LIBS_unix := -ljvm -lverify, \
+    LIBS_unix := -ljvm, \
     LIBS_linux := $(LIBDL), \
     LIBS_solaris := -lsocket -lnsl -lscf $(LIBDL), \
     LIBS_aix := $(LIBDL) $(LIBM),\
     LIBS_macosx := -framework CoreFoundation \
         -framework Foundation \
         -framework SystemConfiguration, \
-    LIBS_windows := jvm.lib $(WIN_VERIFY_LIB) \
+    LIBS_windows := jvm.lib \
         shell32.lib delayimp.lib \
         advapi32.lib version.lib, \
 ))
--- a/src/hotspot/cpu/aarch64/aarch64.ad	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/cpu/aarch64/aarch64.ad	Fri Oct 11 10:39:58 2019 +0200
@@ -2526,17 +2526,8 @@
     __ INSN(REG, as_Register(BASE));                                    \
   }
 
-typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
-typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
-typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
-                                  MacroAssembler::SIMD_RegVariant T, const Address &adr);
-
-  // Used for all non-volatile memory accesses.  The use of
-  // $mem->opcode() to discover whether this pattern uses sign-extended
-  // offsets is something of a kludge.
-  static void loadStore(MacroAssembler masm, mem_insn insn,
-                         Register reg, int opcode,
-                         Register base, int index, int size, int disp)
+
+static Address mem2address(int opcode, Register base, int index, int size, int disp)
   {
     Address::extend scale;
 
@@ -2555,16 +2546,34 @@
     }
 
     if (index == -1) {
-      (masm.*insn)(reg, Address(base, disp));
+      return Address(base, disp);
     } else {
       assert(disp == 0, "unsupported address mode: disp = %d", disp);
-      (masm.*insn)(reg, Address(base, as_Register(index), scale));
+      return Address(base, as_Register(index), scale);
     }
   }
 
+
+typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
+typedef void (MacroAssembler::* mem_insn2)(Register Rt, Register adr);
+typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
+typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
+                                  MacroAssembler::SIMD_RegVariant T, const Address &adr);
+
+  // Used for all non-volatile memory accesses.  The use of
+  // $mem->opcode() to discover whether this pattern uses sign-extended
+  // offsets is something of a kludge.
+  static void loadStore(MacroAssembler masm, mem_insn insn,
+                        Register reg, int opcode,
+                        Register base, int index, int size, int disp)
+  {
+    Address addr = mem2address(opcode, base, index, size, disp);
+    (masm.*insn)(reg, addr);
+  }
+
   static void loadStore(MacroAssembler masm, mem_float_insn insn,
-                         FloatRegister reg, int opcode,
-                         Register base, int index, int size, int disp)
+                        FloatRegister reg, int opcode,
+                        Register base, int index, int size, int disp)
   {
     Address::extend scale;
 
@@ -2586,8 +2595,8 @@
   }
 
   static void loadStore(MacroAssembler masm, mem_vector_insn insn,
-                         FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
-                         int opcode, Register base, int index, int size, int disp)
+                        FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
+                        int opcode, Register base, int index, int size, int disp)
   {
     if (index == -1) {
       (masm.*insn)(reg, T, Address(base, disp));
@@ -3804,7 +3813,7 @@
     static const int hi[Op_RegL + 1] = { // enum name
       0,                                 // Op_Node
       0,                                 // Op_Set
-      OptoReg::Bad,                       // Op_RegN
+      OptoReg::Bad,                      // Op_RegN
       OptoReg::Bad,                      // Op_RegI
       R0_H_num,                          // Op_RegP
       OptoReg::Bad,                      // Op_RegF
@@ -6936,7 +6945,7 @@
 instruct loadP(iRegPNoSp dst, memory mem)
 %{
   match(Set dst (LoadP mem));
-  predicate(!needs_acquiring_load(n));
+  predicate(!needs_acquiring_load(n) && (n->as_Load()->barrier_data() == 0));
 
   ins_cost(4 * INSN_COST);
   format %{ "ldr  $dst, $mem\t# ptr" %}
@@ -7629,6 +7638,7 @@
 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
 %{
   match(Set dst (LoadP mem));
+  predicate(n->as_Load()->barrier_data() == 0);
 
   ins_cost(VOLATILE_REF_COST);
   format %{ "ldar  $dst, $mem\t# ptr" %}
@@ -8611,6 +8621,7 @@
 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
 
   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+  predicate(n->as_LoadStore()->barrier_data() == 0);
   ins_cost(2 * VOLATILE_REF_COST);
 
   effect(KILL cr);
@@ -8724,7 +8735,7 @@
 
 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
 
-  predicate(needs_acquiring_load_exclusive(n));
+  predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));
   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
   ins_cost(VOLATILE_REF_COST);
 
@@ -8855,6 +8866,7 @@
 %}
 
 instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
+  predicate(n->as_LoadStore()->barrier_data() == 0);
   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
   ins_cost(2 * VOLATILE_REF_COST);
   effect(TEMP_DEF res, KILL cr);
@@ -8954,7 +8966,7 @@
 %}
 
 instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
-  predicate(needs_acquiring_load_exclusive(n));
+  predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));
   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
   ins_cost(VOLATILE_REF_COST);
   effect(TEMP_DEF res, KILL cr);
@@ -9055,6 +9067,7 @@
 %}
 
 instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
+  predicate(n->as_LoadStore()->barrier_data() == 0);
   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
   ins_cost(2 * VOLATILE_REF_COST);
   effect(KILL cr);
@@ -9162,8 +9175,8 @@
 %}
 
 instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
-  predicate(needs_acquiring_load_exclusive(n));
   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+  predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));
   ins_cost(VOLATILE_REF_COST);
   effect(KILL cr);
   format %{
@@ -9213,6 +9226,7 @@
 %}
 
 instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) %{
+  predicate(n->as_LoadStore()->barrier_data() == 0);
   match(Set prev (GetAndSetP mem newv));
   ins_cost(2 * VOLATILE_REF_COST);
   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
@@ -9256,7 +9270,7 @@
 %}
 
 instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev) %{
-  predicate(needs_acquiring_load_exclusive(n));
+  predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));
   match(Set prev (GetAndSetP mem newv));
   ins_cost(VOLATILE_REF_COST);
   format %{ "atomic_xchg_acq  $prev, $newv, [$mem]" %}
--- a/src/hotspot/cpu/aarch64/compiledIC_aarch64.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/cpu/aarch64/compiledIC_aarch64.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -162,16 +162,12 @@
   // Creation also verifies the object.
   NativeMovConstReg* method_holder
     = nativeMovConstReg_at(stub + NativeInstruction::instruction_size);
-#ifndef PRODUCT
+
+#ifdef ASSERT
   NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address());
+  verify_mt_safe(callee, entry, method_holder, jump);
+#endif
 
-  // read the value once
-  volatile intptr_t data = method_holder->data();
-  assert(data == 0 || data == (intptr_t)callee(),
-         "a) MT-unsafe modification of inline cache");
-  assert(data == 0 || jump->jump_destination() == entry,
-         "b) MT-unsafe modification of inline cache");
-#endif
   // Update stub.
   method_holder->set_data((intptr_t)callee());
   NativeGeneralJump::insert_unconditional(method_holder->next_instruction_address(), entry);
--- a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -24,22 +24,23 @@
 #include "precompiled.hpp"
 #include "asm/macroAssembler.inline.hpp"
 #include "code/codeBlob.hpp"
+#include "code/vmreg.inline.hpp"
 #include "gc/z/zBarrier.inline.hpp"
 #include "gc/z/zBarrierSet.hpp"
 #include "gc/z/zBarrierSetAssembler.hpp"
 #include "gc/z/zBarrierSetRuntime.hpp"
+#include "gc/z/zThreadLocalData.hpp"
 #include "memory/resourceArea.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "utilities/macros.hpp"
 #ifdef COMPILER1
 #include "c1/c1_LIRAssembler.hpp"
 #include "c1/c1_MacroAssembler.hpp"
 #include "gc/z/c1/zBarrierSetC1.hpp"
 #endif // COMPILER1
-
-#include "gc/z/zThreadLocalData.hpp"
-
-ZBarrierSetAssembler::ZBarrierSetAssembler() :
-    _load_barrier_slow_stub(),
-    _load_barrier_weak_slow_stub() {}
+#ifdef COMPILER2
+#include "gc/z/c2/zBarrierSetC2.hpp"
+#endif // COMPILER2
 
 #ifdef PRODUCT
 #define BLOCK_COMMENT(str) /* nothing */
@@ -66,7 +67,7 @@
   assert_different_registers(rscratch1, rscratch2, src.base());
   assert_different_registers(rscratch1, rscratch2, dst);
 
-  RegSet savedRegs = RegSet::range(r0,r28) - RegSet::of(dst, rscratch1, rscratch2);
+  RegSet savedRegs = RegSet::range(r0, r28) - RegSet::of(dst, rscratch1, rscratch2);
 
   Label done;
 
@@ -207,7 +208,8 @@
 
   // The Address offset is too large to direct load - -784. Our range is +127, -128.
   __ mov(tmp, (long int)(in_bytes(ZThreadLocalData::address_bad_mask_offset()) -
-      in_bytes(JavaThread::jni_environment_offset())));
+              in_bytes(JavaThread::jni_environment_offset())));
+
   // Load address bad mask
   __ add(tmp, jni_env, tmp);
   __ ldr(tmp, Address(tmp));
@@ -295,12 +297,12 @@
   __ prologue("zgc_load_barrier stub", false);
 
   // We don't use push/pop_clobbered_registers() - we need to pull out the result from r0.
-  for (int i = 0; i < 32; i +=2) {
-    __ stpd(as_FloatRegister(i), as_FloatRegister(i+1), Address(__ pre(sp,-16)));
+  for (int i = 0; i < 32; i += 2) {
+    __ stpd(as_FloatRegister(i), as_FloatRegister(i + 1), Address(__ pre(sp,-16)));
   }
 
-  RegSet saveRegs = RegSet::range(r0,r28) - RegSet::of(r0);
-  __ push(saveRegs, sp);
+  const RegSet save_regs = RegSet::range(r1, r28);
+  __ push(save_regs, sp);
 
   // Setup arguments
   __ load_parameter(0, c_rarg0);
@@ -308,98 +310,161 @@
 
   __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2);
 
-  __ pop(saveRegs, sp);
+  __ pop(save_regs, sp);
 
-  for (int i = 30; i >0; i -=2) {
-      __ ldpd(as_FloatRegister(i), as_FloatRegister(i+1), Address(__ post(sp, 16)));
-    }
+  for (int i = 30; i >= 0; i -= 2) {
+    __ ldpd(as_FloatRegister(i), as_FloatRegister(i + 1), Address(__ post(sp, 16)));
+  }
 
   __ epilogue();
 }
 #endif // COMPILER1
 
-#undef __
-#define __ cgen->assembler()->
+#ifdef COMPILER2
 
-// Generates a register specific stub for calling
-// ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded() or
-// ZBarrierSetRuntime::load_barrier_on_weak_oop_field_preloaded().
-//
-// The raddr register serves as both input and output for this stub. When the stub is
-// called the raddr register contains the object field address (oop*) where the bad oop
-// was loaded from, which caused the slow path to be taken. On return from the stub the
-// raddr register contains the good/healed oop returned from
-// ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded() or
-// ZBarrierSetRuntime::load_barrier_on_weak_oop_field_preloaded().
-static address generate_load_barrier_stub(StubCodeGenerator* cgen, Register raddr, DecoratorSet decorators) {
-  // Don't generate stub for invalid registers
-  if (raddr == zr || raddr == r29 || raddr == r30) {
-    return NULL;
+OptoReg::Name ZBarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) {
+  if (!OptoReg::is_reg(opto_reg)) {
+    return OptoReg::Bad;
   }
 
-  // Create stub name
-  char name[64];
-  const bool weak = (decorators & ON_WEAK_OOP_REF) != 0;
-  os::snprintf(name, sizeof(name), "zgc_load_barrier%s_stub_%s", weak ? "_weak" : "", raddr->name());
-
-  __ align(CodeEntryAlignment);
-  StubCodeMark mark(cgen, "StubRoutines", os::strdup(name, mtCode));
-  address start = __ pc();
-
-  // Save live registers
-  RegSet savedRegs = RegSet::range(r0,r18) - RegSet::of(raddr);
-
-  __ enter();
-  __ push(savedRegs, sp);
-
-  // Setup arguments
-  if (raddr != c_rarg1) {
-    __ mov(c_rarg1, raddr);
+  const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
+  if (vm_reg->is_FloatRegister()) {
+    return opto_reg & ~1;
   }
 
-  __ ldr(c_rarg0, Address(raddr));
+  return opto_reg;
+}
 
-  // Call barrier function
-  __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), c_rarg0, c_rarg1);
+#undef __
+#define __ _masm->
 
-  // Move result returned in r0 to raddr, if needed
-  if (raddr != r0) {
-    __ mov(raddr, r0);
+class ZSaveLiveRegisters {
+private:
+  MacroAssembler* const _masm;
+  RegSet                _gp_regs;
+  RegSet                _fp_regs;
+
+public:
+  void initialize(ZLoadBarrierStubC2* stub) {
+    // Create mask of live registers
+    RegMask live = stub->live();
+
+    // Record registers that needs to be saved/restored
+    while (live.is_NotEmpty()) {
+      const OptoReg::Name opto_reg = live.find_first_elem();
+      live.Remove(opto_reg);
+      if (OptoReg::is_reg(opto_reg)) {
+        const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
+        if (vm_reg->is_Register()) {
+          _gp_regs += RegSet::of(vm_reg->as_Register());
+        } else if (vm_reg->is_FloatRegister()) {
+          _fp_regs += RegSet::of((Register)vm_reg->as_FloatRegister());
+        } else {
+          fatal("Unknown register type");
+        }
+      }
+    }
+
+    // Remove C-ABI SOE registers, scratch regs and _ref register that will be updated
+    _gp_regs -= RegSet::range(r19, r30) + RegSet::of(r8, r9, stub->ref());
   }
 
-  __ pop(savedRegs, sp);
-  __ leave();
-  __ ret(lr);
+  ZSaveLiveRegisters(MacroAssembler* masm, ZLoadBarrierStubC2* stub) :
+      _masm(masm),
+      _gp_regs(),
+      _fp_regs() {
 
-  return start;
+    // Figure out what registers to save/restore
+    initialize(stub);
+
+    // Save registers
+    __ push(_gp_regs, sp);
+    __ push_fp(_fp_regs, sp);
+  }
+
+  ~ZSaveLiveRegisters() {
+    // Restore registers
+    __ pop_fp(_fp_regs, sp);
+    __ pop(_gp_regs, sp);
+  }
+};
+
+#undef __
+#define __ _masm->
+
+class ZSetupArguments {
+private:
+  MacroAssembler* const _masm;
+  const Register        _ref;
+  const Address         _ref_addr;
+
+public:
+  ZSetupArguments(MacroAssembler* masm, ZLoadBarrierStubC2* stub) :
+      _masm(masm),
+      _ref(stub->ref()),
+      _ref_addr(stub->ref_addr()) {
+
+    // Setup arguments
+    if (_ref_addr.base() == noreg) {
+      // No self healing
+      if (_ref != c_rarg0) {
+        __ mov(c_rarg0, _ref);
+      }
+      __ mov(c_rarg1, 0);
+    } else {
+      // Self healing
+      if (_ref == c_rarg0) {
+        // _ref is already at correct place
+        __ lea(c_rarg1, _ref_addr);
+      } else if (_ref != c_rarg1) {
+        // _ref is in wrong place, but not in c_rarg1, so fix it first
+        __ lea(c_rarg1, _ref_addr);
+        __ mov(c_rarg0, _ref);
+      } else if (_ref_addr.base() != c_rarg0 && _ref_addr.index() != c_rarg0) {
+        assert(_ref == c_rarg1, "Mov ref first, vacating c_rarg0");
+        __ mov(c_rarg0, _ref);
+        __ lea(c_rarg1, _ref_addr);
+      } else {
+        assert(_ref == c_rarg1, "Need to vacate c_rarg1 and _ref_addr is using c_rarg0");
+        if (_ref_addr.base() == c_rarg0 || _ref_addr.index() == c_rarg0) {
+          __ mov(rscratch2, c_rarg1);
+          __ lea(c_rarg1, _ref_addr);
+          __ mov(c_rarg0, rscratch2);
+        } else {
+          ShouldNotReachHere();
+        }
+      }
+    }
+  }
+
+  ~ZSetupArguments() {
+    // Transfer result
+    if (_ref != r0) {
+      __ mov(_ref, r0);
+    }
+  }
+};
+
+#undef __
+#define __ masm->
+
+void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, ZLoadBarrierStubC2* stub) const {
+  BLOCK_COMMENT("ZLoadBarrierStubC2");
+
+  // Stub entry
+  __ bind(*stub->entry());
+
+  {
+    ZSaveLiveRegisters save_live_registers(masm, stub);
+    ZSetupArguments setup_arguments(masm, stub);
+    __ mov(rscratch1, stub->slow_path());
+    __ blr(rscratch1);
+  }
+
+  // Stub exit
+  __ b(*stub->continuation());
 }
 
 #undef __
 
-static void barrier_stubs_init_inner(const char* label, const DecoratorSet decorators, address* stub) {
-  const int nregs = 28;              // Exclude FP, XZR, SP from calculation.
-  const int code_size = nregs * 254; // Rough estimate of code size
-
-  ResourceMark rm;
-
-  CodeBuffer buf(BufferBlob::create(label, code_size));
-  StubCodeGenerator cgen(&buf);
-
-  for (int i = 0; i < nregs; i++) {
-    const Register reg = as_Register(i);
-    stub[i] = generate_load_barrier_stub(&cgen, reg, decorators);
-  }
-}
-
-void ZBarrierSetAssembler::barrier_stubs_init() {
-  barrier_stubs_init_inner("zgc_load_barrier_stubs", ON_STRONG_OOP_REF, _load_barrier_slow_stub);
-  barrier_stubs_init_inner("zgc_load_barrier_weak_stubs", ON_WEAK_OOP_REF, _load_barrier_weak_slow_stub);
-}
-
-address ZBarrierSetAssembler::load_barrier_slow_stub(Register reg) {
-  return _load_barrier_slow_stub[reg->encoding()];
-}
-
-address ZBarrierSetAssembler::load_barrier_weak_slow_stub(Register reg) {
-  return _load_barrier_weak_slow_stub[reg->encoding()];
-}
+#endif // COMPILER2
--- a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -24,6 +24,12 @@
 #ifndef CPU_AARCH64_GC_Z_ZBARRIERSETASSEMBLER_AARCH64_HPP
 #define CPU_AARCH64_GC_Z_ZBARRIERSETASSEMBLER_AARCH64_HPP
 
+#include "code/vmreg.hpp"
+#include "oops/accessDecorators.hpp"
+#ifdef COMPILER2
+#include "opto/optoreg.hpp"
+#endif // COMPILER2
+
 #ifdef COMPILER1
 class LIR_Assembler;
 class LIR_OprDesc;
@@ -32,14 +38,13 @@
 class ZLoadBarrierStubC1;
 #endif // COMPILER1
 
+#ifdef COMPILER2
+class Node;
+class ZLoadBarrierStubC2;
+#endif // COMPILER2
+
 class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase {
-private:
-  address _load_barrier_slow_stub[RegisterImpl::number_of_registers];
-  address _load_barrier_weak_slow_stub[RegisterImpl::number_of_registers];
-
 public:
-  ZBarrierSetAssembler();
-
   virtual void load_at(MacroAssembler* masm,
                        DecoratorSet decorators,
                        BasicType type,
@@ -84,10 +89,13 @@
                                              DecoratorSet decorators) const;
 #endif // COMPILER1
 
-  virtual void barrier_stubs_init();
+#ifdef COMPILER2
+  OptoReg::Name refine_register(const Node* node,
+                                OptoReg::Name opto_reg);
 
-  address load_barrier_slow_stub(Register reg);
-  address load_barrier_weak_slow_stub(Register reg);
+  void generate_c2_load_barrier_stub(MacroAssembler* masm,
+                                     ZLoadBarrierStubC2* stub) const;
+#endif // COMPILER2
 };
 
 #endif // CPU_AARCH64_GC_Z_ZBARRIERSETASSEMBLER_AARCH64_HPP
--- a/src/hotspot/cpu/aarch64/gc/z/zGlobals_aarch64.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/cpu/aarch64/gc/z/zGlobals_aarch64.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -40,7 +40,7 @@
 //  +--------------------------------+ 0x0000014000000000 (20TB)
 //  |         Remapped View          |
 //  +--------------------------------+ 0x0000010000000000 (16TB)
-//  |     (Reserved, but unused)     |
+//  .                                .
 //  +--------------------------------+ 0x00000c0000000000 (12TB)
 //  |         Marked1 View           |
 //  +--------------------------------+ 0x0000080000000000 (8TB)
@@ -75,7 +75,7 @@
 //  +--------------------------------+ 0x0000280000000000 (40TB)
 //  |         Remapped View          |
 //  +--------------------------------+ 0x0000200000000000 (32TB)
-//  |     (Reserved, but unused)     |
+//  .                                .
 //  +--------------------------------+ 0x0000180000000000 (24TB)
 //  |         Marked1 View           |
 //  +--------------------------------+ 0x0000100000000000 (16TB)
@@ -110,7 +110,7 @@
 //  +--------------------------------+ 0x0000500000000000 (80TB)
 //  |         Remapped View          |
 //  +--------------------------------+ 0x0000400000000000 (64TB)
-//  |     (Reserved, but unused)     |
+//  .                                .
 //  +--------------------------------+ 0x0000300000000000 (48TB)
 //  |         Marked1 View           |
 //  +--------------------------------+ 0x0000200000000000 (32TB)
--- a/src/hotspot/cpu/aarch64/gc/z/zGlobals_aarch64.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/cpu/aarch64/gc/z/zGlobals_aarch64.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -36,7 +36,6 @@
 //  ------------------------------------------------------------------
 //
 const size_t ZPlatformGranuleSizeShift      = 21; // 2MB
-const size_t ZPlatformMaxHeapSizeShift      = 46; // 16TB
 const size_t ZPlatformNMethodDisarmedOffset = 4;
 const size_t ZPlatformCacheLineSize         = 64;
 
--- a/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad	Fri Oct 11 10:39:58 2019 +0200
@@ -24,155 +24,244 @@
 source_hpp %{
 
 #include "gc/z/c2/zBarrierSetC2.hpp"
+#include "gc/z/zThreadLocalData.hpp"
 
 %}
 
 source %{
 
-#include "gc/z/zBarrierSetAssembler.hpp"
+static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) {
+  ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, weak);
+  __ ldr(tmp, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));
+  __ andr(tmp, tmp, ref);
+  __ cbnz(tmp, *stub->entry());
+  __ bind(*stub->continuation());
+}
 
-static void z_load_barrier_slow_reg(MacroAssembler& _masm, Register dst, 
-                                    Register base, int index, int scale, 
-                                    int disp, bool weak) {
-  const address stub = weak ? ZBarrierSet::assembler()->load_barrier_weak_slow_stub(dst)
-                            : ZBarrierSet::assembler()->load_barrier_slow_stub(dst);
-
-  if (index == -1) {
-    if (disp != 0) {
-      __ lea(dst, Address(base, disp));
-    } else {
-       __ mov(dst, base);
-    }
-  } else {
-    Register index_reg = as_Register(index);
-    if (disp == 0) {
-      __ lea(dst, Address(base, index_reg, Address::lsl(scale)));
-    } else {
-      __ lea(dst, Address(base, disp));
-      __ lea(dst, Address(dst, index_reg, Address::lsl(scale)));
-    }
-  }
-
-  __ far_call(RuntimeAddress(stub));
+static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
+  ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, false /* weak */);
+  __ b(*stub->entry());
+  __ bind(*stub->continuation());
 }
 
 %}
 
-//
-// Execute ZGC load barrier (strong) slow path
-//
-instruct loadBarrierSlowReg(iRegP dst, memory src, rFlagsReg cr,
-    vRegD_V0 v0, vRegD_V1 v1, vRegD_V2 v2, vRegD_V3 v3, vRegD_V4 v4,
-    vRegD_V5 v5, vRegD_V6 v6, vRegD_V7 v7, vRegD_V8 v8, vRegD_V9 v9,
-    vRegD_V10 v10, vRegD_V11 v11, vRegD_V12 v12, vRegD_V13 v13, vRegD_V14 v14,
-    vRegD_V15 v15, vRegD_V16 v16, vRegD_V17 v17, vRegD_V18 v18, vRegD_V19 v19,
-    vRegD_V20 v20, vRegD_V21 v21, vRegD_V22 v22, vRegD_V23 v23, vRegD_V24 v24,
-    vRegD_V25 v25, vRegD_V26 v26, vRegD_V27 v27, vRegD_V28 v28, vRegD_V29 v29,
-    vRegD_V30 v30, vRegD_V31 v31) %{
-  match(Set dst (LoadBarrierSlowReg src dst));
-  predicate(!n->as_LoadBarrierSlowReg()->is_weak());
+// Load Pointer
+instruct zLoadP(iRegPNoSp dst, memory mem, rFlagsReg cr)
+%{
+  match(Set dst (LoadP mem));
+  predicate(UseZGC && !needs_acquiring_load(n) && (n->as_Load()->barrier_data() == ZLoadBarrierStrong));
+  effect(TEMP dst, KILL cr);
 
-  effect(KILL cr,
-     KILL v0, KILL v1, KILL v2, KILL v3, KILL v4, KILL v5, KILL v6, KILL v7,
-     KILL v8, KILL v9, KILL v10, KILL v11, KILL v12, KILL v13, KILL v14,
-     KILL v15, KILL v16, KILL v17, KILL v18, KILL v19, KILL v20, KILL v21,
-     KILL v22, KILL v23, KILL v24, KILL v25, KILL v26, KILL v27, KILL v28,
-     KILL v29, KILL v30, KILL v31);
+  ins_cost(4 * INSN_COST);
 
-  format %{ "lea $dst, $src\n\t"
-            "call #ZLoadBarrierSlowPath" %}
+  format %{ "ldr  $dst, $mem" %}
 
   ins_encode %{
-    z_load_barrier_slow_reg(_masm, $dst$$Register, $src$$base$$Register,
-                            $src$$index, $src$$scale, $src$$disp, false);
+    const Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+    __ ldr($dst$$Register, ref_addr);
+    if (barrier_data() != ZLoadBarrierElided) {
+      z_load_barrier(_masm, this, ref_addr, $dst$$Register, rscratch2 /* tmp */, false /* weak */);
+    }
   %}
-  ins_pipe(pipe_slow);
+
+  ins_pipe(iload_reg_mem);
 %}
 
-//
-// Execute ZGC load barrier (weak) slow path
-//
-instruct loadBarrierWeakSlowReg(iRegP dst, memory src, rFlagsReg cr,
-    vRegD_V0 v0, vRegD_V1 v1, vRegD_V2 v2, vRegD_V3 v3, vRegD_V4 v4,
-    vRegD_V5 v5, vRegD_V6 v6, vRegD_V7 v7, vRegD_V8 v8, vRegD_V9 v9,
-    vRegD_V10 v10, vRegD_V11 v11, vRegD_V12 v12, vRegD_V13 v13, vRegD_V14 v14,
-    vRegD_V15 v15, vRegD_V16 v16, vRegD_V17 v17, vRegD_V18 v18, vRegD_V19 v19,
-    vRegD_V20 v20, vRegD_V21 v21, vRegD_V22 v22, vRegD_V23 v23, vRegD_V24 v24,
-    vRegD_V25 v25, vRegD_V26 v26, vRegD_V27 v27, vRegD_V28 v28, vRegD_V29 v29,
-    vRegD_V30 v30, vRegD_V31 v31) %{
-  match(Set dst (LoadBarrierSlowReg src dst));
-  predicate(n->as_LoadBarrierSlowReg()->is_weak());
+// Load Weak Pointer
+instruct zLoadWeakP(iRegPNoSp dst, memory mem, rFlagsReg cr)
+%{
+  match(Set dst (LoadP mem));
+  predicate(UseZGC && !needs_acquiring_load(n) && (n->as_Load()->barrier_data() == ZLoadBarrierWeak));
+  effect(TEMP dst, KILL cr);
 
-  effect(KILL cr,
-     KILL v0, KILL v1, KILL v2, KILL v3, KILL v4, KILL v5, KILL v6, KILL v7,
-     KILL v8, KILL v9, KILL v10, KILL v11, KILL v12, KILL v13, KILL v14,
-     KILL v15, KILL v16, KILL v17, KILL v18, KILL v19, KILL v20, KILL v21,
-     KILL v22, KILL v23, KILL v24, KILL v25, KILL v26, KILL v27, KILL v28,
-     KILL v29, KILL v30, KILL v31);
+  ins_cost(4 * INSN_COST);
 
-  format %{ "lea $dst, $src\n\t"
-            "call #ZLoadBarrierSlowPath" %}
+  format %{ "ldr  $dst, $mem" %}
 
   ins_encode %{
-    z_load_barrier_slow_reg(_masm, $dst$$Register, $src$$base$$Register,
-                            $src$$index, $src$$scale, $src$$disp, true);
+    const Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+    __ ldr($dst$$Register, ref_addr);
+    z_load_barrier(_masm, this, ref_addr, $dst$$Register, rscratch2 /* tmp */, true /* weak */);
   %}
-  ins_pipe(pipe_slow);
+
+  ins_pipe(iload_reg_mem);
 %}
 
+// Load Pointer Volatile
+instruct zLoadPVolatile(iRegPNoSp dst, indirect mem /* sync_memory */, rFlagsReg cr)
+%{
+  match(Set dst (LoadP mem));
+  predicate(UseZGC && needs_acquiring_load(n) && n->as_Load()->barrier_data() == ZLoadBarrierStrong);
+  effect(TEMP dst, KILL cr);
 
-// Specialized versions of compareAndExchangeP that adds a keepalive that is consumed
-// but doesn't affect output.
+  ins_cost(VOLATILE_REF_COST);
 
-instruct z_compareAndExchangeP(iRegPNoSp res, indirect mem,
-                               iRegP oldval, iRegP newval, iRegP keepalive,
-                               rFlagsReg cr) %{
-  match(Set res (ZCompareAndExchangeP (Binary mem keepalive) (Binary oldval newval)));
-  ins_cost(2 * VOLATILE_REF_COST);
-  effect(TEMP_DEF res, KILL cr);
-  format %{
-    "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
+  format %{ "ldar  $dst, $mem\t" %}
+
+  ins_encode %{
+    __ ldar($dst$$Register, $mem$$Register);
+    if (barrier_data() != ZLoadBarrierElided) {
+      z_load_barrier(_masm, this, Address($mem$$Register), $dst$$Register, rscratch2 /* tmp */, false /* weak */);
+    }
   %}
-  ins_encode %{
-    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
-               Assembler::xword, /*acquire*/ false, /*release*/ true,
-               /*weak*/ false, $res$$Register);
-  %}
-  ins_pipe(pipe_slow);
+
+  ins_pipe(pipe_serial);
 %}
 
-instruct z_compareAndSwapP(iRegINoSp res,
-                           indirect mem,
-                           iRegP oldval, iRegP newval, iRegP keepalive,
-                            rFlagsReg cr) %{
-
-  match(Set res (ZCompareAndSwapP (Binary mem keepalive) (Binary oldval newval)));
-  match(Set res (ZWeakCompareAndSwapP (Binary mem keepalive) (Binary oldval newval)));
+instruct zCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
+  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+  predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
+  effect(KILL cr, TEMP_DEF res);
 
   ins_cost(2 * VOLATILE_REF_COST);
 
-  effect(KILL cr);
+  format %{ "cmpxchg $mem, $oldval, $newval\n\t"
+            "cset    $res, EQ" %}
 
- format %{
-    "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
-    "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
- %}
-
- ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
-            aarch64_enc_cset_eq(res));
+  ins_encode %{
+    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
+    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
+               false /* acquire */, true /* release */, false /* weak */, rscratch2);
+    __ cset($res$$Register, Assembler::EQ);
+    if (barrier_data() != ZLoadBarrierElided) {
+      Label good;
+      __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));
+      __ andr(rscratch1, rscratch1, rscratch2);
+      __ cbz(rscratch1, good);
+      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), rscratch2 /* ref */, rscratch1 /* tmp */);
+      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
+                 false /* acquire */, true /* release */, false /* weak */, rscratch2);
+      __ cset($res$$Register, Assembler::EQ);
+      __ bind(good);
+    }
+  %}
 
   ins_pipe(pipe_slow);
 %}
 
-
-instruct z_get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev,
-                        iRegP keepalive) %{
-  match(Set prev (ZGetAndSetP mem (Binary newv keepalive)));
+instruct zCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
+  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+  predicate(UseZGC && needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong));
+  effect(KILL cr, TEMP_DEF res);
 
   ins_cost(2 * VOLATILE_REF_COST);
+
+ format %{ "cmpxchg $mem, $oldval, $newval\n\t"
+           "cset    $res, EQ" %}
+
+  ins_encode %{
+    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
+    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
+               true /* acquire */, true /* release */, false /* weak */, rscratch2);
+    __ cset($res$$Register, Assembler::EQ);
+    if (barrier_data() != ZLoadBarrierElided) {
+      Label good;
+      __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));
+      __ andr(rscratch1, rscratch1, rscratch2);
+      __ cbz(rscratch1, good);
+      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), rscratch2 /* ref */, rscratch1 /* tmp */ );
+      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
+                 true /* acquire */, true /* release */, false /* weak */, rscratch2);
+      __ cset($res$$Register, Assembler::EQ);
+      __ bind(good);
+    }
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct zCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
+  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
+  predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
+  effect(TEMP_DEF res, KILL cr);
+
+  ins_cost(2 * VOLATILE_REF_COST);
+
+  format %{ "cmpxchg $res = $mem, $oldval, $newval" %}
+
+  ins_encode %{
+    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
+    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
+               false /* acquire */, true /* release */, false /* weak */, $res$$Register);
+    if (barrier_data() != ZLoadBarrierElided) {
+      Label good;
+      __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));
+      __ andr(rscratch1, rscratch1, $res$$Register);
+      __ cbz(rscratch1, good);
+      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, rscratch1 /* tmp */);
+      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
+                 false /* acquire */, true /* release */, false /* weak */, $res$$Register);
+      __ bind(good);
+    }
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct zCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
+  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
+  predicate(UseZGC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
+  effect(TEMP_DEF res, KILL cr);
+
+  ins_cost(2 * VOLATILE_REF_COST);
+
+  format %{ "cmpxchg $res = $mem, $oldval, $newval" %}
+
+  ins_encode %{
+    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
+    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
+               true /* acquire */, true /* release */, false /* weak */, $res$$Register);
+    if (barrier_data() != ZLoadBarrierElided) {
+      Label good;
+      __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));
+      __ andr(rscratch1, rscratch1, $res$$Register);
+      __ cbz(rscratch1, good);
+      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, rscratch1 /* tmp */);
+      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
+                 true /* acquire */, true /* release */, false /* weak */, $res$$Register);
+      __ bind(good);
+    }
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct zGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{
+  match(Set prev (GetAndSetP mem newv));
+  predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
+  effect(TEMP_DEF prev, KILL cr);
+
+  ins_cost(2 * VOLATILE_REF_COST);
+
   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
+
   ins_encode %{
-    __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
+    __ atomic_xchg($prev$$Register, $newv$$Register, $mem$$Register);
+    if (barrier_data() != ZLoadBarrierElided) {
+      z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, rscratch2 /* tmp */, false /* weak */);
+    }
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct zGetAndSetPAcq(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{
+  match(Set prev (GetAndSetP mem newv));
+  predicate(UseZGC && needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong));
+  effect(TEMP_DEF prev, KILL cr);
+
+  ins_cost(VOLATILE_REF_COST);
+
+  format %{ "atomic_xchg_acq  $prev, $newv, [$mem]" %}
+
+  ins_encode %{
+    __ atomic_xchgal($prev$$Register, $newv$$Register, $mem$$Register);
+    if (barrier_data() != ZLoadBarrierElided) {
+      z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, rscratch2 /* tmp */, false /* weak */);
+    }
   %}
   ins_pipe(pipe_serial);
 %}
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -2178,6 +2178,65 @@
 
   return count;
 }
+
+// Push lots of registers in the bit set supplied.  Don't push sp.
+// Return the number of words pushed
+int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
+  int words_pushed = 0;
+
+  // Scan bitset to accumulate register pairs
+  unsigned char regs[32];
+  int count = 0;
+  for (int reg = 0; reg <= 31; reg++) {
+    if (1 & bitset)
+      regs[count++] = reg;
+    bitset >>= 1;
+  }
+  regs[count++] = zr->encoding_nocheck();
+  count &= ~1;  // Only push an even number of regs
+
+  // Always pushing full 128 bit registers.
+  if (count) {
+    stpq(as_FloatRegister(regs[0]), as_FloatRegister(regs[1]), Address(pre(stack, -count * wordSize * 2)));
+    words_pushed += 2;
+  }
+  for (int i = 2; i < count; i += 2) {
+    stpq(as_FloatRegister(regs[i]), as_FloatRegister(regs[i+1]), Address(stack, i * wordSize * 2));
+    words_pushed += 2;
+  }
+
+  assert(words_pushed == count, "oops, pushed != count");
+  return count;
+}
+
+int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
+  int words_pushed = 0;
+
+  // Scan bitset to accumulate register pairs
+  unsigned char regs[32];
+  int count = 0;
+  for (int reg = 0; reg <= 31; reg++) {
+    if (1 & bitset)
+      regs[count++] = reg;
+    bitset >>= 1;
+  }
+  regs[count++] = zr->encoding_nocheck();
+  count &= ~1;
+
+  for (int i = 2; i < count; i += 2) {
+    ldpq(as_FloatRegister(regs[i]), as_FloatRegister(regs[i+1]), Address(stack, i * wordSize * 2));
+    words_pushed += 2;
+  }
+  if (count) {
+    ldpq(as_FloatRegister(regs[0]), as_FloatRegister(regs[1]), Address(post(stack, count * wordSize * 2)));
+    words_pushed += 2;
+  }
+
+  assert(words_pushed == count, "oops, pushed != count");
+
+  return count;
+}
+
 #ifdef ASSERT
 void MacroAssembler::verify_heapbase(const char* msg) {
 #if 0
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -447,12 +447,18 @@
   int push(unsigned int bitset, Register stack);
   int pop(unsigned int bitset, Register stack);
 
+  int push_fp(unsigned int bitset, Register stack);
+  int pop_fp(unsigned int bitset, Register stack);
+
   void mov(Register dst, Address a);
 
 public:
   void push(RegSet regs, Register stack) { if (regs.bits()) push(regs.bits(), stack); }
   void pop(RegSet regs, Register stack) { if (regs.bits()) pop(regs.bits(), stack); }
 
+  void push_fp(RegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); }
+  void pop_fp(RegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); }
+
   // Push and pop everything that might be clobbered by a native
   // runtime call except rscratch1 and rscratch2.  (They are always
   // scratch, so we don't have to protect them.)  Only save the lower
--- a/src/hotspot/cpu/aarch64/register_aarch64.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/cpu/aarch64/register_aarch64.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -230,6 +230,11 @@
     return *this;
   }
 
+  RegSet &operator-=(const RegSet aSet) {
+    *this = *this - aSet;
+    return *this;
+  }
+
   static RegSet of(Register r1) {
     return RegSet(r1);
   }
--- a/src/hotspot/cpu/arm/compiledIC_arm.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/cpu/arm/compiledIC_arm.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -115,16 +115,7 @@
   // Creation also verifies the object.
   NativeMovConstReg* method_holder = nativeMovConstReg_at(stub);
   NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
-
-#ifdef ASSERT
-  // read the value once
-  volatile intptr_t data = method_holder->data();
-  volatile address destination = jump->jump_destination();
-  assert(data == 0 || data == (intptr_t)callee(),
-         "a) MT-unsafe modification of inline cache");
-  assert(destination == (address)-1 || destination == entry,
-         "b) MT-unsafe modification of inline cache");
-#endif
+  verify_mt_safe(callee, entry, method_holder, jump);
 
   // Update stub.
   method_holder->set_data((intptr_t)callee());
--- a/src/hotspot/cpu/ppc/compiledIC_ppc.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/cpu/ppc/compiledIC_ppc.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012, 2015 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -178,15 +178,7 @@
   NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + IC_pos_in_java_to_interp_stub);
   NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
 
-#ifdef ASSERT
-  // read the value once
-  volatile intptr_t data = method_holder->data();
-  volatile address destination = jump->jump_destination();
-  assert(data == 0 || data == (intptr_t)callee(),
-         "a) MT-unsafe modification of inline cache");
-  assert(destination == (address)-1 || destination == entry,
-         "b) MT-unsafe modification of inline cache");
-#endif
+  verify_mt_safe(callee, entry, method_holder, jump);
 
   // Update stub.
   method_holder->set_data((intptr_t)callee());
--- a/src/hotspot/cpu/s390/compiledIC_s390.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/cpu/s390/compiledIC_s390.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2016 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -104,19 +104,7 @@
   // Creation also verifies the object.
   NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeCall::get_IC_pos_in_java_to_interp_stub());
   NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
-
-#ifdef ASSERT
-  // A generated lambda form might be deleted from the Lambdaform
-  // cache in MethodTypeForm.  If a jit compiled lambdaform method
-  // becomes not entrant and the cache access returns null, the new
-  // resolve will lead to a new generated LambdaForm.
-  volatile intptr_t data = method_holder->data();
-  volatile address destination = jump->jump_destination();
-  assert(data == 0 || data == (intptr_t)callee() || callee->is_compiled_lambda_form(),
-         "a) MT-unsafe modification of inline cache");
-  assert(destination == (address)-1 || destination == entry,
-         "b) MT-unsafe modification of inline cache");
-#endif
+  verify_mt_safe(callee, entry, method_holder, jump);
 
   // Update stub.
   method_holder->set_data((intptr_t)callee(), relocInfo::metadata_type);
--- a/src/hotspot/cpu/sparc/compiledIC_sparc.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/cpu/sparc/compiledIC_sparc.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -104,16 +104,7 @@
   // Creation also verifies the object.
   NativeMovConstReg* method_holder = nativeMovConstReg_at(stub);
   NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
-
-#ifdef ASSERT
-  // read the value once
-  volatile intptr_t data = method_holder->data();
-  volatile address destination = jump->jump_destination();
-  assert(data == 0 || data == (intptr_t)callee(),
-         "a) MT-unsafe modification of inline cache");
-  assert(destination == (address)-1 || destination == entry,
-         "b) MT-unsafe modification of inline cache");
-#endif
+  verify_mt_safe(callee, entry, method_holder, jump);
 
   // Update stub.
   method_holder->set_data((intptr_t)callee());
--- a/src/hotspot/cpu/x86/compiledIC_x86.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/cpu/x86/compiledIC_x86.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -157,16 +157,7 @@
   // Creation also verifies the object.
   NativeMovConstReg* method_holder = nativeMovConstReg_at(stub);
   NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
-
-#ifdef ASSERT
-  Method* old_method = reinterpret_cast<Method*>(method_holder->data());
-  address destination = jump->jump_destination();
-  assert(old_method == NULL || old_method == callee() ||
-         !old_method->method_holder()->is_loader_alive(),
-         "a) MT-unsafe modification of inline cache");
-  assert(destination == (address)-1 || destination == entry,
-         "b) MT-unsafe modification of inline cache");
-#endif
+  verify_mt_safe(callee, entry, method_holder, jump);
 
   // Update stub.
   method_holder->set_data((intptr_t)callee());
--- a/src/hotspot/cpu/x86/gc/z/zArguments_x86.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/cpu/x86/gc/z/zArguments_x86.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -23,20 +23,7 @@
 
 #include "precompiled.hpp"
 #include "gc/z/zArguments.hpp"
-#include "runtime/globals.hpp"
-#include "runtime/globals_extension.hpp"
-#include "utilities/debug.hpp"
 
 void ZArguments::initialize_platform() {
-#ifdef COMPILER2
-  // The C2 barrier slow path expects vector registers to be least
-  // 16 bytes wide, which is the minimum width available on all
-  // x86-64 systems. However, the user could have speficied a lower
-  // number on the command-line, in which case we print a warning
-  // and raise it to 16.
-  if (MaxVectorSize < 16) {
-    warning("ZGC requires MaxVectorSize to be at least 16");
-    FLAG_SET_DEFAULT(MaxVectorSize, 16);
-  }
-#endif
+  // Does nothing
 }
--- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -24,22 +24,22 @@
 #include "precompiled.hpp"
 #include "asm/macroAssembler.inline.hpp"
 #include "code/codeBlob.hpp"
+#include "code/vmreg.inline.hpp"
 #include "gc/z/zBarrier.inline.hpp"
 #include "gc/z/zBarrierSet.hpp"
 #include "gc/z/zBarrierSetAssembler.hpp"
 #include "gc/z/zBarrierSetRuntime.hpp"
 #include "memory/resourceArea.hpp"
-#include "runtime/stubCodeGenerator.hpp"
+#include "runtime/sharedRuntime.hpp"
 #include "utilities/macros.hpp"
 #ifdef COMPILER1
 #include "c1/c1_LIRAssembler.hpp"
 #include "c1/c1_MacroAssembler.hpp"
 #include "gc/z/c1/zBarrierSetC1.hpp"
 #endif // COMPILER1
-
-ZBarrierSetAssembler::ZBarrierSetAssembler() :
-    _load_barrier_slow_stub(),
-    _load_barrier_weak_slow_stub() {}
+#ifdef COMPILER2
+#include "gc/z/c2/zBarrierSetC2.hpp"
+#endif // COMPILER2
 
 #ifdef PRODUCT
 #define BLOCK_COMMENT(str) /* nothing */
@@ -346,137 +346,327 @@
 
 #endif // COMPILER1
 
-#undef __
-#define __ cgen->assembler()->
+#ifdef COMPILER2
 
-// Generates a register specific stub for calling
-// ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded() or
-// ZBarrierSetRuntime::load_barrier_on_weak_oop_field_preloaded().
-//
-// The raddr register serves as both input and output for this stub. When the stub is
-// called the raddr register contains the object field address (oop*) where the bad oop
-// was loaded from, which caused the slow path to be taken. On return from the stub the
-// raddr register contains the good/healed oop returned from
-// ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded() or
-// ZBarrierSetRuntime::load_barrier_on_weak_oop_field_preloaded().
-static address generate_load_barrier_stub(StubCodeGenerator* cgen, Register raddr, DecoratorSet decorators) {
-  // Don't generate stub for invalid registers
-  if (raddr == rsp || raddr == r15) {
-    return NULL;
+OptoReg::Name ZBarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) {
+  if (!OptoReg::is_reg(opto_reg)) {
+    return OptoReg::Bad;
   }
 
-  // Create stub name
-  char name[64];
-  const bool weak = (decorators & ON_WEAK_OOP_REF) != 0;
-  os::snprintf(name, sizeof(name), "zgc_load_barrier%s_stub_%s", weak ? "_weak" : "", raddr->name());
-
-  __ align(CodeEntryAlignment);
-  StubCodeMark mark(cgen, "StubRoutines", os::strdup(name, mtCode));
-  address start = __ pc();
-
-  // Save live registers
-  if (raddr != rax) {
-    __ push(rax);
-  }
-  if (raddr != rcx) {
-    __ push(rcx);
-  }
-  if (raddr != rdx) {
-    __ push(rdx);
-  }
-  if (raddr != rsi) {
-    __ push(rsi);
-  }
-  if (raddr != rdi) {
-    __ push(rdi);
-  }
-  if (raddr != r8) {
-    __ push(r8);
-  }
-  if (raddr != r9) {
-    __ push(r9);
-  }
-  if (raddr != r10) {
-    __ push(r10);
-  }
-  if (raddr != r11) {
-    __ push(r11);
+  const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
+  if (vm_reg->is_XMMRegister()) {
+    opto_reg &= ~15;
+    switch (node->ideal_reg()) {
+      case Op_VecX:
+        opto_reg |= 2;
+        break;
+      case Op_VecY:
+        opto_reg |= 4;
+        break;
+      case Op_VecZ:
+        opto_reg |= 8;
+        break;
+      default:
+        opto_reg |= 1;
+        break;
+    }
   }
 
-  // Setup arguments
-  if (raddr != c_rarg1) {
-    __ movq(c_rarg1, raddr);
-  }
-  __ movq(c_rarg0, Address(raddr, 0));
+  return opto_reg;
+}
 
-  // Call barrier function
-  __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), c_rarg0, c_rarg1);
+// We use the vec_spill_helper from the x86.ad file to avoid reinventing this wheel
+extern int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
+                            int stack_offset, int reg, uint ireg, outputStream* st);
 
-  // Move result returned in rax to raddr, if needed
-  if (raddr != rax) {
-    __ movq(raddr, rax);
+#undef __
+#define __ _masm->
+
+class ZSaveLiveRegisters {
+private:
+  struct XMMRegisterData {
+    XMMRegister _reg;
+    int         _size;
+
+    // Used by GrowableArray::find()
+    bool operator == (const XMMRegisterData& other) {
+      return _reg == other._reg;
+    }
+  };
+
+  MacroAssembler* const          _masm;
+  GrowableArray<Register>        _gp_registers;
+  GrowableArray<XMMRegisterData> _xmm_registers;
+  int                            _spill_size;
+  int                            _spill_offset;
+
+  static int xmm_compare_register_size(XMMRegisterData* left, XMMRegisterData* right) {
+    if (left->_size == right->_size) {
+      return 0;
+    }
+
+    return (left->_size < right->_size) ? -1 : 1;
   }
 
-  // Restore saved registers
-  if (raddr != r11) {
-    __ pop(r11);
-  }
-  if (raddr != r10) {
-    __ pop(r10);
-  }
-  if (raddr != r9) {
-    __ pop(r9);
-  }
-  if (raddr != r8) {
-    __ pop(r8);
-  }
-  if (raddr != rdi) {
-    __ pop(rdi);
-  }
-  if (raddr != rsi) {
-    __ pop(rsi);
-  }
-  if (raddr != rdx) {
-    __ pop(rdx);
-  }
-  if (raddr != rcx) {
-    __ pop(rcx);
-  }
-  if (raddr != rax) {
-    __ pop(rax);
+  static int xmm_slot_size(OptoReg::Name opto_reg) {
+    // The low order 4 bytes denote what size of the XMM register is live
+    return (opto_reg & 15) << 3;
   }
 
-  __ ret(0);
+  static uint xmm_ideal_reg_for_size(int reg_size) {
+    switch (reg_size) {
+    case 8:
+      return Op_VecD;
+    case 16:
+      return Op_VecX;
+    case 32:
+      return Op_VecY;
+    case 64:
+      return Op_VecZ;
+    default:
+      fatal("Invalid register size %d", reg_size);
+      return 0;
+    }
+  }
 
-  return start;
+  bool xmm_needs_vzeroupper() const {
+    return _xmm_registers.is_nonempty() && _xmm_registers.at(0)._size > 16;
+  }
+
+  void xmm_register_save(const XMMRegisterData& reg_data) {
+    const OptoReg::Name opto_reg = OptoReg::as_OptoReg(reg_data._reg->as_VMReg());
+    const uint ideal_reg = xmm_ideal_reg_for_size(reg_data._size);
+    _spill_offset -= reg_data._size;
+    vec_spill_helper(__ code(), false /* do_size */, false /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
+  }
+
+  void xmm_register_restore(const XMMRegisterData& reg_data) {
+    const OptoReg::Name opto_reg = OptoReg::as_OptoReg(reg_data._reg->as_VMReg());
+    const uint ideal_reg = xmm_ideal_reg_for_size(reg_data._size);
+    vec_spill_helper(__ code(), false /* do_size */, true /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
+    _spill_offset += reg_data._size;
+  }
+
+  void gp_register_save(Register reg) {
+    _spill_offset -= 8;
+    __ movq(Address(rsp, _spill_offset), reg);
+  }
+
+  void gp_register_restore(Register reg) {
+    __ movq(reg, Address(rsp, _spill_offset));
+    _spill_offset += 8;
+  }
+
+  void initialize(ZLoadBarrierStubC2* stub) {
+    // Create mask of caller saved registers that need to
+    // be saved/restored if live
+    RegMask caller_saved;
+    caller_saved.Insert(OptoReg::as_OptoReg(rax->as_VMReg()));
+    caller_saved.Insert(OptoReg::as_OptoReg(rcx->as_VMReg()));
+    caller_saved.Insert(OptoReg::as_OptoReg(rdx->as_VMReg()));
+    caller_saved.Insert(OptoReg::as_OptoReg(rsi->as_VMReg()));
+    caller_saved.Insert(OptoReg::as_OptoReg(rdi->as_VMReg()));
+    caller_saved.Insert(OptoReg::as_OptoReg(r8->as_VMReg()));
+    caller_saved.Insert(OptoReg::as_OptoReg(r9->as_VMReg()));
+    caller_saved.Insert(OptoReg::as_OptoReg(r10->as_VMReg()));
+    caller_saved.Insert(OptoReg::as_OptoReg(r11->as_VMReg()));
+    caller_saved.Remove(OptoReg::as_OptoReg(stub->ref()->as_VMReg()));
+
+    // Create mask of live registers
+    RegMask live = stub->live();
+    if (stub->tmp() != noreg) {
+      live.Insert(OptoReg::as_OptoReg(stub->tmp()->as_VMReg()));
+    }
+
+    int gp_spill_size = 0;
+    int xmm_spill_size = 0;
+
+    // Record registers that needs to be saved/restored
+    while (live.is_NotEmpty()) {
+      const OptoReg::Name opto_reg = live.find_first_elem();
+      const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
+
+      live.Remove(opto_reg);
+
+      if (vm_reg->is_Register()) {
+        if (caller_saved.Member(opto_reg)) {
+          _gp_registers.append(vm_reg->as_Register());
+          gp_spill_size += 8;
+        }
+      } else if (vm_reg->is_XMMRegister()) {
+        // We encode in the low order 4 bits of the opto_reg, how large part of the register is live
+        const VMReg vm_reg_base = OptoReg::as_VMReg(opto_reg & ~15);
+        const int reg_size = xmm_slot_size(opto_reg);
+        const XMMRegisterData reg_data = { vm_reg_base->as_XMMRegister(), reg_size };
+        const int reg_index = _xmm_registers.find(reg_data);
+        if (reg_index == -1) {
+          // Not previously appended
+          _xmm_registers.append(reg_data);
+          xmm_spill_size += reg_size;
+        } else {
+          // Previously appended, update size
+          const int reg_size_prev = _xmm_registers.at(reg_index)._size;
+          if (reg_size > reg_size_prev) {
+            _xmm_registers.at_put(reg_index, reg_data);
+            xmm_spill_size += reg_size - reg_size_prev;
+          }
+        }
+      } else {
+        fatal("Unexpected register type");
+      }
+    }
+
+    // Sort by size, largest first
+    _xmm_registers.sort(xmm_compare_register_size);
+
+    // Stack pointer must be 16 bytes aligned for the call
+    _spill_offset = _spill_size = align_up(xmm_spill_size + gp_spill_size, 16);
+  }
+
+public:
+  ZSaveLiveRegisters(MacroAssembler* masm, ZLoadBarrierStubC2* stub) :
+      _masm(masm),
+      _gp_registers(),
+      _xmm_registers(),
+      _spill_size(0),
+      _spill_offset(0) {
+
+    //
+    // Stack layout after registers have been spilled:
+    //
+    // | ...            | original rsp, 16 bytes aligned
+    // ------------------
+    // | zmm0 high      |
+    // | ...            |
+    // | zmm0 low       | 16 bytes aligned
+    // | ...            |
+    // | ymm1 high      |
+    // | ...            |
+    // | ymm1 low       | 16 bytes aligned
+    // | ...            |
+    // | xmmN high      |
+    // | ...            |
+    // | xmmN low       | 8 bytes aligned
+    // | reg0           | 8 bytes aligned
+    // | reg1           |
+    // | ...            |
+    // | regN           | new rsp, if 16 bytes aligned
+    // | <padding>      | else new rsp, 16 bytes aligned
+    // ------------------
+    //
+
+    // Figure out what registers to save/restore
+    initialize(stub);
+
+    // Allocate stack space
+    if (_spill_size > 0) {
+      __ subptr(rsp, _spill_size);
+    }
+
+    // Save XMM/YMM/ZMM registers
+    for (int i = 0; i < _xmm_registers.length(); i++) {
+      xmm_register_save(_xmm_registers.at(i));
+    }
+
+    if (xmm_needs_vzeroupper()) {
+      __ vzeroupper();
+    }
+
+    // Save general purpose registers
+    for (int i = 0; i < _gp_registers.length(); i++) {
+      gp_register_save(_gp_registers.at(i));
+    }
+  }
+
+  ~ZSaveLiveRegisters() {
+    // Restore general purpose registers
+    for (int i = _gp_registers.length() - 1; i >= 0; i--) {
+      gp_register_restore(_gp_registers.at(i));
+    }
+
+    __ vzeroupper();
+
+    // Restore XMM/YMM/ZMM registers
+    for (int i = _xmm_registers.length() - 1; i >= 0; i--) {
+      xmm_register_restore(_xmm_registers.at(i));
+    }
+
+    // Free stack space
+    if (_spill_size > 0) {
+      __ addptr(rsp, _spill_size);
+    }
+  }
+};
+
+class ZSetupArguments {
+private:
+  MacroAssembler* const _masm;
+  const Register        _ref;
+  const Address         _ref_addr;
+
+public:
+  ZSetupArguments(MacroAssembler* masm, ZLoadBarrierStubC2* stub) :
+      _masm(masm),
+      _ref(stub->ref()),
+      _ref_addr(stub->ref_addr()) {
+
+    // Setup arguments
+    if (_ref_addr.base() == noreg) {
+      // No self healing
+      if (_ref != c_rarg0) {
+        __ movq(c_rarg0, _ref);
+      }
+      __ xorq(c_rarg1, c_rarg1);
+    } else {
+      // Self healing
+      if (_ref == c_rarg0) {
+        __ lea(c_rarg1, _ref_addr);
+      } else if (_ref != c_rarg1) {
+        __ lea(c_rarg1, _ref_addr);
+        __ movq(c_rarg0, _ref);
+      } else if (_ref_addr.base() != c_rarg0 && _ref_addr.index() != c_rarg0) {
+        __ movq(c_rarg0, _ref);
+        __ lea(c_rarg1, _ref_addr);
+      } else {
+        __ xchgq(c_rarg0, c_rarg1);
+        if (_ref_addr.base() == c_rarg0) {
+          __ lea(c_rarg1, Address(c_rarg1, _ref_addr.index(), _ref_addr.scale(), _ref_addr.disp()));
+        } else if (_ref_addr.index() == c_rarg0) {
+          __ lea(c_rarg1, Address(_ref_addr.base(), c_rarg1, _ref_addr.scale(), _ref_addr.disp()));
+        } else {
+          ShouldNotReachHere();
+        }
+      }
+    }
+  }
+
+  ~ZSetupArguments() {
+    // Transfer result
+    if (_ref != rax) {
+      __ movq(_ref, rax);
+    }
+  }
+};
+
+#undef __
+#define __ masm->
+
+void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, ZLoadBarrierStubC2* stub) const {
+  BLOCK_COMMENT("ZLoadBarrierStubC2");
+
+  // Stub entry
+  __ bind(*stub->entry());
+
+  {
+    ZSaveLiveRegisters save_live_registers(masm, stub);
+    ZSetupArguments setup_arguments(masm, stub);
+    __ call(RuntimeAddress(stub->slow_path()));
+  }
+
+  // Stub exit
+  __ jmp(*stub->continuation());
 }
 
 #undef __
 
-static void barrier_stubs_init_inner(const char* label, const DecoratorSet decorators, address* stub) {
-  const int nregs = RegisterImpl::number_of_registers;
-  const int code_size = nregs * 128; // Rough estimate of code size
-
-  ResourceMark rm;
-
-  CodeBuffer buf(BufferBlob::create(label, code_size));
-  StubCodeGenerator cgen(&buf);
-
-  for (int i = 0; i < nregs; i++) {
-    const Register reg = as_Register(i);
-    stub[i] = generate_load_barrier_stub(&cgen, reg, decorators);
-  }
-}
-
-void ZBarrierSetAssembler::barrier_stubs_init() {
-  barrier_stubs_init_inner("zgc_load_barrier_stubs", ON_STRONG_OOP_REF, _load_barrier_slow_stub);
-  barrier_stubs_init_inner("zgc_load_barrier_weak_stubs", ON_WEAK_OOP_REF, _load_barrier_weak_slow_stub);
-}
-
-address ZBarrierSetAssembler::load_barrier_slow_stub(Register reg) {
-  return _load_barrier_slow_stub[reg->encoding()];
-}
-
-address ZBarrierSetAssembler::load_barrier_weak_slow_stub(Register reg) {
-  return _load_barrier_weak_slow_stub[reg->encoding()];
-}
+#endif // COMPILER2
--- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -24,6 +24,14 @@
 #ifndef CPU_X86_GC_Z_ZBARRIERSETASSEMBLER_X86_HPP
 #define CPU_X86_GC_Z_ZBARRIERSETASSEMBLER_X86_HPP
 
+#include "code/vmreg.hpp"
+#include "oops/accessDecorators.hpp"
+#ifdef COMPILER2
+#include "opto/optoreg.hpp"
+#endif // COMPILER2
+
+class MacroAssembler;
+
 #ifdef COMPILER1
 class LIR_Assembler;
 class LIR_OprDesc;
@@ -32,14 +40,13 @@
 class ZLoadBarrierStubC1;
 #endif // COMPILER1
 
+#ifdef COMPILER2
+class Node;
+class ZLoadBarrierStubC2;
+#endif // COMPILER2
+
 class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase {
-private:
-  address _load_barrier_slow_stub[RegisterImpl::number_of_registers];
-  address _load_barrier_weak_slow_stub[RegisterImpl::number_of_registers];
-
 public:
-  ZBarrierSetAssembler();
-
   virtual void load_at(MacroAssembler* masm,
                        DecoratorSet decorators,
                        BasicType type,
@@ -83,10 +90,13 @@
                                              DecoratorSet decorators) const;
 #endif // COMPILER1
 
-  virtual void barrier_stubs_init();
+#ifdef COMPILER2
+  OptoReg::Name refine_register(const Node* node,
+                                OptoReg::Name opto_reg);
 
-  address load_barrier_slow_stub(Register reg);
-  address load_barrier_weak_slow_stub(Register reg);
+  void generate_c2_load_barrier_stub(MacroAssembler* masm,
+                                     ZLoadBarrierStubC2* stub) const;
+#endif // COMPILER2
 };
 
 #endif // CPU_X86_GC_Z_ZBARRIERSETASSEMBLER_X86_HPP
--- a/src/hotspot/cpu/x86/gc/z/zGlobals_x86.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/cpu/x86/gc/z/zGlobals_x86.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -40,7 +40,7 @@
 //  +--------------------------------+ 0x0000014000000000 (20TB)
 //  |         Remapped View          |
 //  +--------------------------------+ 0x0000010000000000 (16TB)
-//  |     (Reserved, but unused)     |
+//  .                                .
 //  +--------------------------------+ 0x00000c0000000000 (12TB)
 //  |         Marked1 View           |
 //  +--------------------------------+ 0x0000080000000000 (8TB)
@@ -75,7 +75,7 @@
 //  +--------------------------------+ 0x0000280000000000 (40TB)
 //  |         Remapped View          |
 //  +--------------------------------+ 0x0000200000000000 (32TB)
-//  |     (Reserved, but unused)     |
+//  .                                .
 //  +--------------------------------+ 0x0000180000000000 (24TB)
 //  |         Marked1 View           |
 //  +--------------------------------+ 0x0000100000000000 (16TB)
@@ -110,7 +110,7 @@
 //  +--------------------------------+ 0x0000500000000000 (80TB)
 //  |         Remapped View          |
 //  +--------------------------------+ 0x0000400000000000 (64TB)
-//  |     (Reserved, but unused)     |
+//  .                                .
 //  +--------------------------------+ 0x0000300000000000 (48TB)
 //  |         Marked1 View           |
 //  +--------------------------------+ 0x0000200000000000 (32TB)
--- a/src/hotspot/cpu/x86/gc/z/zGlobals_x86.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/cpu/x86/gc/z/zGlobals_x86.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -36,7 +36,6 @@
 //  ------------------------------------------------------------------
 //
 const size_t ZPlatformGranuleSizeShift      = 21; // 2MB
-const size_t ZPlatformMaxHeapSizeShift      = 46; // 16TB
 const size_t ZPlatformNMethodDisarmedOffset = 4;
 const size_t ZPlatformCacheLineSize         = 64;
 
--- a/src/hotspot/cpu/x86/gc/z/z_x86_64.ad	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/cpu/x86/gc/z/z_x86_64.ad	Fri Oct 11 10:39:58 2019 +0200
@@ -24,190 +24,144 @@
 source_hpp %{
 
 #include "gc/z/c2/zBarrierSetC2.hpp"
+#include "gc/z/zThreadLocalData.hpp"
 
 %}
 
 source %{
 
-#include "gc/z/zBarrierSetAssembler.hpp"
+static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) {
+  ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, weak);
+  __ testptr(ref, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
+  __ jcc(Assembler::notZero, *stub->entry());
+  __ bind(*stub->continuation());
+}
 
-static void z_load_barrier_slow_reg(MacroAssembler& _masm, Register dst, Address src, bool weak) {
-  assert(dst != rsp, "Invalid register");
-  assert(dst != r15, "Invalid register");
-
-  const address stub = weak ? ZBarrierSet::assembler()->load_barrier_weak_slow_stub(dst)
-                            : ZBarrierSet::assembler()->load_barrier_slow_stub(dst);
-  __ lea(dst, src);
-  __ call(RuntimeAddress(stub));
+static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
+  ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, false /* weak */);
+  __ jmp(*stub->entry());
+  __ bind(*stub->continuation());
 }
 
 %}
 
-// For XMM and YMM enabled processors
-instruct zLoadBarrierSlowRegXmmAndYmm(rRegP dst, memory src, rFlagsReg cr,
-                                      rxmm0 x0, rxmm1 x1, rxmm2 x2, rxmm3 x3,
-                                      rxmm4 x4, rxmm5 x5, rxmm6 x6, rxmm7 x7,
-                                      rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11,
-                                      rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15) %{
-  match(Set dst (LoadBarrierSlowReg src dst));
-  predicate(UseAVX <= 2 && !n->as_LoadBarrierSlowReg()->is_weak());
+// Load Pointer
+instruct zLoadP(rRegP dst, memory mem, rFlagsReg cr)
+%{
+  predicate(UseZGC && n->as_Load()->barrier_data() == ZLoadBarrierStrong);
+  match(Set dst (LoadP mem));
+  effect(KILL cr, TEMP dst);
 
-  effect(KILL cr,
-         KILL x0, KILL x1, KILL x2, KILL x3,
-         KILL x4, KILL x5, KILL x6, KILL x7,
-         KILL x8, KILL x9, KILL x10, KILL x11,
-         KILL x12, KILL x13, KILL x14, KILL x15);
+  ins_cost(125);
 
-  format %{ "lea $dst, $src\n\t"
-            "call #ZLoadBarrierSlowPath" %}
+  format %{ "movq     $dst, $mem" %}
 
   ins_encode %{
-    z_load_barrier_slow_reg(_masm, $dst$$Register, $src$$Address, false /* weak */);
+    __ movptr($dst$$Register, $mem$$Address);
+    if (barrier_data() != ZLoadBarrierElided) {
+      z_load_barrier(_masm, this, $mem$$Address, $dst$$Register, noreg /* tmp */, false /* weak */);
+    }
   %}
-  ins_pipe(pipe_slow);
+
+  ins_pipe(ialu_reg_mem);
 %}
 
-// For ZMM enabled processors
-instruct zLoadBarrierSlowRegZmm(rRegP dst, memory src, rFlagsReg cr,
-                                rxmm0 x0, rxmm1 x1, rxmm2 x2, rxmm3 x3,
-                                rxmm4 x4, rxmm5 x5, rxmm6 x6, rxmm7 x7,
-                                rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11,
-                                rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15,
-                                rxmm16 x16, rxmm17 x17, rxmm18 x18, rxmm19 x19,
-                                rxmm20 x20, rxmm21 x21, rxmm22 x22, rxmm23 x23,
-                                rxmm24 x24, rxmm25 x25, rxmm26 x26, rxmm27 x27,
-                                rxmm28 x28, rxmm29 x29, rxmm30 x30, rxmm31 x31) %{
+// Load Weak Pointer
+instruct zLoadWeakP(rRegP dst, memory mem, rFlagsReg cr)
+%{
+  predicate(UseZGC && n->as_Load()->barrier_data() == ZLoadBarrierWeak);
+  match(Set dst (LoadP mem));
+  effect(KILL cr, TEMP dst);
 
-  match(Set dst (LoadBarrierSlowReg src dst));
-  predicate(UseAVX == 3 && !n->as_LoadBarrierSlowReg()->is_weak());
+  ins_cost(125);
 
-  effect(KILL cr,
-         KILL x0, KILL x1, KILL x2, KILL x3,
-         KILL x4, KILL x5, KILL x6, KILL x7,
-         KILL x8, KILL x9, KILL x10, KILL x11,
-         KILL x12, KILL x13, KILL x14, KILL x15,
-         KILL x16, KILL x17, KILL x18, KILL x19,
-         KILL x20, KILL x21, KILL x22, KILL x23,
-         KILL x24, KILL x25, KILL x26, KILL x27,
-         KILL x28, KILL x29, KILL x30, KILL x31);
-
-  format %{ "lea $dst, $src\n\t"
-            "call #ZLoadBarrierSlowPath" %}
+  format %{ "movq     $dst, $mem" %}
 
   ins_encode %{
-    z_load_barrier_slow_reg(_masm, $dst$$Register, $src$$Address, false /* weak */);
+    __ movptr($dst$$Register, $mem$$Address);
+    z_load_barrier(_masm, this, $mem$$Address, $dst$$Register, noreg /* tmp */, true /* weak */);
   %}
-  ins_pipe(pipe_slow);
+
+  ins_pipe(ialu_reg_mem);
 %}
 
-// For XMM and YMM enabled processors
-instruct zLoadBarrierWeakSlowRegXmmAndYmm(rRegP dst, memory src, rFlagsReg cr,
-                                          rxmm0 x0, rxmm1 x1, rxmm2 x2, rxmm3 x3,
-                                          rxmm4 x4, rxmm5 x5, rxmm6 x6, rxmm7 x7,
-                                          rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11,
-                                          rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15) %{
-  match(Set dst (LoadBarrierSlowReg src dst));
-  predicate(UseAVX <= 2 && n->as_LoadBarrierSlowReg()->is_weak());
+instruct zCompareAndExchangeP(memory mem, rax_RegP oldval, rRegP newval, rRegP tmp, rFlagsReg cr) %{
+  match(Set oldval (CompareAndExchangeP mem (Binary oldval newval)));
+  predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
+  effect(KILL cr, TEMP tmp);
 
-  effect(KILL cr,
-         KILL x0, KILL x1, KILL x2, KILL x3,
-         KILL x4, KILL x5, KILL x6, KILL x7,
-         KILL x8, KILL x9, KILL x10, KILL x11,
-         KILL x12, KILL x13, KILL x14, KILL x15);
-
-  format %{ "lea $dst, $src\n\t"
-            "call #ZLoadBarrierSlowPath" %}
+  format %{ "lock\n\t"
+            "cmpxchgq $newval, $mem" %}
 
   ins_encode %{
-    z_load_barrier_slow_reg(_masm, $dst$$Register, $src$$Address, true /* weak */);
+    if (barrier_data() != ZLoadBarrierElided) {
+      __ movptr($tmp$$Register, $oldval$$Register);
+    }
+    __ lock();
+    __ cmpxchgptr($newval$$Register, $mem$$Address);
+    if (barrier_data() != ZLoadBarrierElided) {
+      Label good;
+      __ testptr($oldval$$Register, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
+      __ jcc(Assembler::zero, good);
+      z_load_barrier_slow_path(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register);
+      __ movptr($oldval$$Register, $tmp$$Register);
+      __ lock();
+      __ cmpxchgptr($newval$$Register, $mem$$Address);
+      __ bind(good);
+    }
   %}
-  ins_pipe(pipe_slow);
+
+  ins_pipe(pipe_cmpxchg);
 %}
 
-// For ZMM enabled processors
-instruct zLoadBarrierWeakSlowRegZmm(rRegP dst, memory src, rFlagsReg cr,
-                                    rxmm0 x0, rxmm1 x1, rxmm2 x2, rxmm3 x3,
-                                    rxmm4 x4, rxmm5 x5, rxmm6 x6, rxmm7 x7,
-                                    rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11,
-                                    rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15,
-                                    rxmm16 x16, rxmm17 x17, rxmm18 x18, rxmm19 x19,
-                                    rxmm20 x20, rxmm21 x21, rxmm22 x22, rxmm23 x23,
-                                    rxmm24 x24, rxmm25 x25, rxmm26 x26, rxmm27 x27,
-                                    rxmm28 x28, rxmm29 x29, rxmm30 x30, rxmm31 x31) %{
+instruct zCompareAndSwapP(rRegI res, memory mem, rRegP newval, rRegP tmp, rFlagsReg cr, rax_RegP oldval) %{
+  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+  predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
+  effect(KILL cr, KILL oldval, TEMP tmp);
 
-  match(Set dst (LoadBarrierSlowReg src dst));
-  predicate(UseAVX == 3 && n->as_LoadBarrierSlowReg()->is_weak());
-
-  effect(KILL cr,
-         KILL x0, KILL x1, KILL x2, KILL x3,
-         KILL x4, KILL x5, KILL x6, KILL x7,
-         KILL x8, KILL x9, KILL x10, KILL x11,
-         KILL x12, KILL x13, KILL x14, KILL x15,
-         KILL x16, KILL x17, KILL x18, KILL x19,
-         KILL x20, KILL x21, KILL x22, KILL x23,
-         KILL x24, KILL x25, KILL x26, KILL x27,
-         KILL x28, KILL x29, KILL x30, KILL x31);
-
-  format %{ "lea $dst, $src\n\t"
-            "call #ZLoadBarrierSlowPath" %}
+  format %{ "lock\n\t"
+            "cmpxchgq $newval, $mem\n\t"
+            "sete     $res\n\t"
+            "movzbl   $res, $res" %}
 
   ins_encode %{
-    z_load_barrier_slow_reg(_masm, $dst$$Register, $src$$Address, true /* weak */);
+    if (barrier_data() != ZLoadBarrierElided) {
+      __ movptr($tmp$$Register, $oldval$$Register);
+    }
+    __ lock();
+    __ cmpxchgptr($newval$$Register, $mem$$Address);
+    if (barrier_data() != ZLoadBarrierElided) {
+      Label good;
+      __ testptr($oldval$$Register, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
+      __ jcc(Assembler::zero, good);
+      z_load_barrier_slow_path(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register);
+      __ movptr($oldval$$Register, $tmp$$Register);
+      __ lock();
+      __ cmpxchgptr($newval$$Register, $mem$$Address);
+      __ bind(good);
+      __ cmpptr($tmp$$Register, $oldval$$Register);
+    }
+    __ setb(Assembler::equal, $res$$Register);
+    __ movzbl($res$$Register, $res$$Register);
   %}
-  ins_pipe(pipe_slow);
+
+  ins_pipe(pipe_cmpxchg);
 %}
 
-// Specialized versions of compareAndExchangeP that adds a keepalive that is consumed
-// but doesn't affect output.
+instruct zXChgP(memory mem, rRegP newval, rFlagsReg cr) %{
+  match(Set newval (GetAndSetP mem newval));
+  predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
+  effect(KILL cr);
 
-instruct z_compareAndExchangeP(
-        memory mem_ptr,
-        rax_RegP oldval, rRegP newval, rRegP keepalive,
-        rFlagsReg cr) %{
-    predicate(VM_Version::supports_cx8());
-    match(Set oldval (ZCompareAndExchangeP (Binary mem_ptr keepalive) (Binary oldval newval)));
-    effect(KILL cr);
+  format %{ "xchgq    $newval, $mem" %}
 
-    format %{ "cmpxchgq $mem_ptr,$newval\t# "
-              "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
-    opcode(0x0F, 0xB1);
-    ins_encode(lock_prefix,
-            REX_reg_mem_wide(newval, mem_ptr),
-            OpcP, OpcS,
-            reg_mem(newval, mem_ptr)  // lock cmpxchg
-    );
-    ins_pipe( pipe_cmpxchg );
+  ins_encode %{
+    __ xchgptr($newval$$Register, $mem$$Address);
+    if (barrier_data() != ZLoadBarrierElided) {
+      z_load_barrier(_masm, this, Address(noreg, 0), $newval$$Register, noreg /* tmp */, false /* weak */);
+    }
+  %}
+
+  ins_pipe(pipe_cmpxchg);
 %}
-
-instruct z_compareAndSwapP(rRegI res,
-                         memory mem_ptr,
-                         rax_RegP oldval, rRegP newval, rRegP keepalive,
-                         rFlagsReg cr) %{
-  predicate(VM_Version::supports_cx8());
-  match(Set res (ZCompareAndSwapP (Binary mem_ptr keepalive) (Binary oldval newval)));
-  match(Set res (ZWeakCompareAndSwapP (Binary mem_ptr keepalive) (Binary oldval newval)));
-  effect(KILL cr, KILL oldval);
-
-  format %{ "cmpxchgq $mem_ptr,$newval\t# "
-            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
-            "sete    $res\n\t"
-            "movzbl  $res, $res" %}
-  opcode(0x0F, 0xB1);
-  ins_encode(lock_prefix,
-          REX_reg_mem_wide(newval, mem_ptr),
-          OpcP, OpcS,
-          reg_mem(newval, mem_ptr),
-          REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
-          REX_reg_breg(res, res), // movzbl
-          Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
-  ins_pipe( pipe_cmpxchg );
-%}
-
-instruct z_xchgP( memory mem, rRegP newval, rRegP keepalive) %{
-  match(Set newval (ZGetAndSetP mem (Binary newval keepalive)));
-  format %{ "XCHGQ  $newval,[$mem]" %}
-  ins_encode %{
-    __ xchgq($newval$$Register, $mem$$Address);
-  %}
-  ins_pipe( pipe_cmpxchg );
-%}
--- a/src/hotspot/cpu/x86/globals_x86.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/cpu/x86/globals_x86.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -214,5 +214,15 @@
           "Use BMI2 instructions")                                          \
                                                                             \
   diagnostic(bool, UseLibmIntrinsic, true,                                  \
-          "Use Libm Intrinsics")
+          "Use Libm Intrinsics")                                            \
+                                                                            \
+  /* Minimum array size in bytes to use AVX512 intrinsics */                \
+  /* for copy, inflate and fill which don't bail out early based on any */  \
+  /* condition. When this value is set to zero compare operations like */   \
+  /* compare, vectorizedMismatch, compress can also use AVX512 intrinsics.*/\
+  diagnostic(int, AVX3Threshold, 4096,                                      \
+             "Minimum array size in bytes to use AVX512 intrinsics"         \
+             "for copy, inflate and fill. When this value is set as zero"   \
+             "compare operations can also use AVX512 intrinsics.")          \
+          range(0, max_jint)
 #endif // CPU_X86_GLOBALS_X86_HPP
--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -7061,7 +7061,7 @@
     bind(COMPARE_WIDE_VECTORS_LOOP);
 
 #ifdef _LP64
-    if (VM_Version::supports_avx512vlbw()) { // trying 64 bytes fast loop
+    if ((AVX3Threshold == 0) && VM_Version::supports_avx512vlbw()) { // trying 64 bytes fast loop
       cmpl(cnt2, stride2x2);
       jccb(Assembler::below, COMPARE_WIDE_VECTORS_LOOP_AVX2);
       testl(cnt2, stride2x2-1);   // cnt2 holds the vector count
@@ -7321,7 +7321,7 @@
   testl(len, len);
   jcc(Assembler::zero, FALSE_LABEL);
 
-  if ((UseAVX > 2) && // AVX512
+  if ((AVX3Threshold == 0) && (UseAVX > 2) && // AVX512
     VM_Version::supports_avx512vlbw() &&
     VM_Version::supports_bmi2()) {
 
@@ -7394,7 +7394,7 @@
   } else {
     movl(result, len); // copy
 
-    if (UseAVX == 2 && UseSSE >= 2) {
+    if (UseAVX >= 2 && UseSSE >= 2) {
       // With AVX2, use 32-byte vector compare
       Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
 
@@ -7567,14 +7567,12 @@
     lea(ary2, Address(ary2, limit, Address::times_1));
     negptr(limit);
 
-    bind(COMPARE_WIDE_VECTORS);
-
 #ifdef _LP64
-    if (VM_Version::supports_avx512vlbw()) { // trying 64 bytes fast loop
+    if ((AVX3Threshold == 0) && VM_Version::supports_avx512vlbw()) { // trying 64 bytes fast loop
       Label COMPARE_WIDE_VECTORS_LOOP_AVX2, COMPARE_WIDE_VECTORS_LOOP_AVX3;
 
       cmpl(limit, -64);
-      jccb(Assembler::greater, COMPARE_WIDE_VECTORS_LOOP_AVX2);
+      jcc(Assembler::greater, COMPARE_WIDE_VECTORS_LOOP_AVX2);
 
       bind(COMPARE_WIDE_VECTORS_LOOP_AVX3); // the hottest loop
 
@@ -7607,7 +7605,7 @@
 
     }//if (VM_Version::supports_avx512vlbw())
 #endif //_LP64
-
+    bind(COMPARE_WIDE_VECTORS);
     vmovdqu(vec1, Address(ary1, limit, Address::times_1));
     vmovdqu(vec2, Address(ary2, limit, Address::times_1));
     vpxor(vec1, vec2);
@@ -7833,32 +7831,33 @@
       assert( UseSSE >= 2, "supported cpu only" );
       Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
       movdl(xtmp, value);
-      if (UseAVX > 2 && UseUnalignedLoadStores) {
+      if (UseAVX >= 2 && UseUnalignedLoadStores) {
+        Label L_check_fill_32_bytes;
+        if (UseAVX > 2) {
+          // Fill 64-byte chunks
+          Label L_fill_64_bytes_loop_avx3, L_check_fill_64_bytes_avx2;
+
+          // If number of bytes to fill < AVX3Threshold, perform fill using AVX2
+          cmpl(count, AVX3Threshold);
+          jccb(Assembler::below, L_check_fill_64_bytes_avx2);
+
+          vpbroadcastd(xtmp, xtmp, Assembler::AVX_512bit);
+
+          subl(count, 16 << shift);
+          jccb(Assembler::less, L_check_fill_32_bytes);
+          align(16);
+
+          BIND(L_fill_64_bytes_loop_avx3);
+          evmovdqul(Address(to, 0), xtmp, Assembler::AVX_512bit);
+          addptr(to, 64);
+          subl(count, 16 << shift);
+          jcc(Assembler::greaterEqual, L_fill_64_bytes_loop_avx3);
+          jmpb(L_check_fill_32_bytes);
+
+          BIND(L_check_fill_64_bytes_avx2);
+        }
         // Fill 64-byte chunks
-        Label L_fill_64_bytes_loop, L_check_fill_32_bytes;
-        vpbroadcastd(xtmp, xtmp, Assembler::AVX_512bit);
-
-        subl(count, 16 << shift);
-        jcc(Assembler::less, L_check_fill_32_bytes);
-        align(16);
-
-        BIND(L_fill_64_bytes_loop);
-        evmovdqul(Address(to, 0), xtmp, Assembler::AVX_512bit);
-        addptr(to, 64);
-        subl(count, 16 << shift);
-        jcc(Assembler::greaterEqual, L_fill_64_bytes_loop);
-
-        BIND(L_check_fill_32_bytes);
-        addl(count, 8 << shift);
-        jccb(Assembler::less, L_check_fill_8_bytes);
-        vmovdqu(Address(to, 0), xtmp);
-        addptr(to, 32);
-        subl(count, 8 << shift);
-
-        BIND(L_check_fill_8_bytes);
-      } else if (UseAVX == 2 && UseUnalignedLoadStores) {
-        // Fill 64-byte chunks
-        Label L_fill_64_bytes_loop, L_check_fill_32_bytes;
+        Label L_fill_64_bytes_loop;
         vpbroadcastd(xtmp, xtmp, Assembler::AVX_256bit);
 
         subl(count, 16 << shift);
@@ -8572,12 +8571,13 @@
   shlq(length);
   xorq(result, result);
 
-  if ((UseAVX > 2) &&
+  if ((AVX3Threshold == 0) && (UseAVX > 2) &&
       VM_Version::supports_avx512vlbw()) {
     Label VECTOR64_LOOP, VECTOR64_NOT_EQUAL, VECTOR32_TAIL;
 
     cmpq(length, 64);
     jcc(Assembler::less, VECTOR32_TAIL);
+
     movq(tmp1, length);
     andq(tmp1, 0x3F);      // tail count
     andq(length, ~(0x3F)); //vector count
@@ -10034,7 +10034,7 @@
   // save length for return
   push(len);
 
-  if ((UseAVX > 2) && // AVX512
+  if ((AVX3Threshold == 0) && (UseAVX > 2) && // AVX512
     VM_Version::supports_avx512vlbw() &&
     VM_Version::supports_bmi2()) {
 
@@ -10226,7 +10226,7 @@
 //   }
 void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
   XMMRegister tmp1, Register tmp2) {
-  Label copy_chars_loop, done, below_threshold;
+  Label copy_chars_loop, done, below_threshold, avx3_threshold;
   // rsi: src
   // rdi: dst
   // rdx: len
@@ -10236,7 +10236,7 @@
   // rdi holds start addr of destination char[]
   // rdx holds length
   assert_different_registers(src, dst, len, tmp2);
-
+  movl(tmp2, len);
   if ((UseAVX > 2) && // AVX512
     VM_Version::supports_avx512vlbw() &&
     VM_Version::supports_bmi2()) {
@@ -10248,9 +10248,11 @@
     testl(len, -16);
     jcc(Assembler::zero, below_threshold);
 
+    testl(len, -1 * AVX3Threshold);
+    jcc(Assembler::zero, avx3_threshold);
+
     // In order to use only one arithmetic operation for the main loop we use
     // this pre-calculation
-    movl(tmp2, len);
     andl(tmp2, (32 - 1)); // tail count (in chars), 32 element wide loop
     andl(len, -32);     // vector count
     jccb(Assembler::zero, copy_tail);
@@ -10281,12 +10283,11 @@
     evmovdquw(Address(dst, 0), k2, tmp1, Assembler::AVX_512bit);
 
     jmp(done);
+    bind(avx3_threshold);
   }
   if (UseSSE42Intrinsics) {
     Label copy_16_loop, copy_8_loop, copy_bytes, copy_new_tail, copy_tail;
 
-    movl(tmp2, len);
-
     if (UseAVX > 1) {
       andl(tmp2, (16 - 1));
       andl(len, -16);
@@ -10311,13 +10312,7 @@
 
       bind(below_threshold);
       bind(copy_new_tail);
-      if ((UseAVX > 2) &&
-        VM_Version::supports_avx512vlbw() &&
-        VM_Version::supports_bmi2()) {
-        movl(tmp2, len);
-      } else {
-        movl(len, tmp2);
-      }
+      movl(len, tmp2);
       andl(tmp2, 0x00000007);
       andl(len, 0xFFFFFFF8);
       jccb(Assembler::zero, copy_tail);
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -1304,30 +1304,58 @@
     if (UseUnalignedLoadStores) {
       Label L_end;
       // Copy 64-bytes per iteration
-      __ BIND(L_loop);
       if (UseAVX > 2) {
+        Label L_loop_avx512, L_loop_avx2, L_32_byte_head, L_above_threshold, L_below_threshold;
+
+        __ BIND(L_copy_bytes);
+        __ cmpptr(qword_count, (-1 * AVX3Threshold / 8));
+        __ jccb(Assembler::less, L_above_threshold);
+        __ jmpb(L_below_threshold);
+
+        __ bind(L_loop_avx512);
         __ evmovdqul(xmm0, Address(end_from, qword_count, Address::times_8, -56), Assembler::AVX_512bit);
         __ evmovdqul(Address(end_to, qword_count, Address::times_8, -56), xmm0, Assembler::AVX_512bit);
-      } else if (UseAVX == 2) {
+        __ bind(L_above_threshold);
+        __ addptr(qword_count, 8);
+        __ jcc(Assembler::lessEqual, L_loop_avx512);
+        __ jmpb(L_32_byte_head);
+
+        __ bind(L_loop_avx2);
         __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
         __ vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
         __ vmovdqu(xmm1, Address(end_from, qword_count, Address::times_8, -24));
         __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm1);
+        __ bind(L_below_threshold);
+        __ addptr(qword_count, 8);
+        __ jcc(Assembler::lessEqual, L_loop_avx2);
+
+        __ bind(L_32_byte_head);
+        __ subptr(qword_count, 4);  // sub(8) and add(4)
+        __ jccb(Assembler::greater, L_end);
       } else {
-        __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
-        __ movdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
-        __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, -40));
-        __ movdqu(Address(end_to, qword_count, Address::times_8, -40), xmm1);
-        __ movdqu(xmm2, Address(end_from, qword_count, Address::times_8, -24));
-        __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm2);
-        __ movdqu(xmm3, Address(end_from, qword_count, Address::times_8, - 8));
-        __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm3);
+        __ BIND(L_loop);
+        if (UseAVX == 2) {
+          __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
+          __ vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
+          __ vmovdqu(xmm1, Address(end_from, qword_count, Address::times_8, -24));
+          __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm1);
+        } else {
+          __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
+          __ movdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
+          __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, -40));
+          __ movdqu(Address(end_to, qword_count, Address::times_8, -40), xmm1);
+          __ movdqu(xmm2, Address(end_from, qword_count, Address::times_8, -24));
+          __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm2);
+          __ movdqu(xmm3, Address(end_from, qword_count, Address::times_8, - 8));
+          __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm3);
+        }
+
+        __ BIND(L_copy_bytes);
+        __ addptr(qword_count, 8);
+        __ jcc(Assembler::lessEqual, L_loop);
+        __ subptr(qword_count, 4);  // sub(8) and add(4)
+        __ jccb(Assembler::greater, L_end);
       }
-      __ BIND(L_copy_bytes);
-      __ addptr(qword_count, 8);
-      __ jcc(Assembler::lessEqual, L_loop);
-      __ subptr(qword_count, 4);  // sub(8) and add(4)
-      __ jccb(Assembler::greater, L_end);
       // Copy trailing 32 bytes
       if (UseAVX >= 2) {
         __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
@@ -1384,31 +1412,59 @@
     if (UseUnalignedLoadStores) {
       Label L_end;
       // Copy 64-bytes per iteration
-      __ BIND(L_loop);
       if (UseAVX > 2) {
+        Label L_loop_avx512, L_loop_avx2, L_32_byte_head, L_above_threshold, L_below_threshold;
+
+        __ BIND(L_copy_bytes);
+        __ cmpptr(qword_count, (AVX3Threshold / 8));
+        __ jccb(Assembler::greater, L_above_threshold);
+        __ jmpb(L_below_threshold);
+
+        __ BIND(L_loop_avx512);
         __ evmovdqul(xmm0, Address(from, qword_count, Address::times_8, 0), Assembler::AVX_512bit);
         __ evmovdqul(Address(dest, qword_count, Address::times_8, 0), xmm0, Assembler::AVX_512bit);
-      } else if (UseAVX == 2) {
+        __ bind(L_above_threshold);
+        __ subptr(qword_count, 8);
+        __ jcc(Assembler::greaterEqual, L_loop_avx512);
+        __ jmpb(L_32_byte_head);
+
+        __ bind(L_loop_avx2);
         __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32));
         __ vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0);
-        __ vmovdqu(xmm1, Address(from, qword_count, Address::times_8,  0));
-        __ vmovdqu(Address(dest, qword_count, Address::times_8,  0), xmm1);
+        __ vmovdqu(xmm1, Address(from, qword_count, Address::times_8, 0));
+        __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm1);
+        __ bind(L_below_threshold);
+        __ subptr(qword_count, 8);
+        __ jcc(Assembler::greaterEqual, L_loop_avx2);
+
+        __ bind(L_32_byte_head);
+        __ addptr(qword_count, 4);  // add(8) and sub(4)
+        __ jccb(Assembler::less, L_end);
       } else {
-        __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 48));
-        __ movdqu(Address(dest, qword_count, Address::times_8, 48), xmm0);
-        __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 32));
-        __ movdqu(Address(dest, qword_count, Address::times_8, 32), xmm1);
-        __ movdqu(xmm2, Address(from, qword_count, Address::times_8, 16));
-        __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm2);
-        __ movdqu(xmm3, Address(from, qword_count, Address::times_8,  0));
-        __ movdqu(Address(dest, qword_count, Address::times_8,  0), xmm3);
+        __ BIND(L_loop);
+        if (UseAVX == 2) {
+          __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32));
+          __ vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0);
+          __ vmovdqu(xmm1, Address(from, qword_count, Address::times_8,  0));
+          __ vmovdqu(Address(dest, qword_count, Address::times_8,  0), xmm1);
+        } else {
+          __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 48));
+          __ movdqu(Address(dest, qword_count, Address::times_8, 48), xmm0);
+          __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 32));
+          __ movdqu(Address(dest, qword_count, Address::times_8, 32), xmm1);
+          __ movdqu(xmm2, Address(from, qword_count, Address::times_8, 16));
+          __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm2);
+          __ movdqu(xmm3, Address(from, qword_count, Address::times_8,  0));
+          __ movdqu(Address(dest, qword_count, Address::times_8,  0), xmm3);
+        }
+
+        __ BIND(L_copy_bytes);
+        __ subptr(qword_count, 8);
+        __ jcc(Assembler::greaterEqual, L_loop);
+
+        __ addptr(qword_count, 4);  // add(8) and sub(4)
+        __ jccb(Assembler::less, L_end);
       }
-      __ BIND(L_copy_bytes);
-      __ subptr(qword_count, 8);
-      __ jcc(Assembler::greaterEqual, L_loop);
-
-      __ addptr(qword_count, 4);  // add(8) and sub(4)
-      __ jccb(Assembler::less, L_end);
       // Copy trailing 32 bytes
       if (UseAVX >= 2) {
         __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 0));
--- a/src/hotspot/cpu/x86/vm_version_x86.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/cpu/x86/vm_version_x86.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -381,6 +381,10 @@
     __ cmpl(rax, 0xE0);
     __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 
+    __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
+    __ movl(rax, Address(rsi, 0));
+    __ cmpl(rax, 0x50654);              // If it is Skylake
+    __ jcc(Assembler::equal, legacy_setup);
     // If UseAVX is unitialized or is set by the user to include EVEX
     if (use_evex) {
       // EVEX setup: run in lowest evex mode
@@ -465,6 +469,11 @@
     __ cmpl(rax, 0xE0);
     __ jcc(Assembler::notEqual, legacy_save_restore);
 
+    __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
+    __ movl(rax, Address(rsi, 0));
+    __ cmpl(rax, 0x50654);              // If it is Skylake
+    __ jcc(Assembler::equal, legacy_save_restore);
+
     // If UseAVX is unitialized or is set by the user to include EVEX
     if (use_evex) {
       // EVEX check: run in lowest evex mode
@@ -660,6 +669,9 @@
   }
   if (FLAG_IS_DEFAULT(UseAVX)) {
     FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
+    if (is_intel_family_core() && _model == CPU_MODEL_SKYLAKE && _stepping < 5) {
+      FLAG_SET_DEFAULT(UseAVX, 2);  //Set UseAVX=2 for Skylake
+    }
   } else if (UseAVX > use_avx_limit) {
     warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", (int) UseAVX, use_avx_limit);
     FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
@@ -1059,6 +1071,13 @@
   }
 #endif // COMPILER2 && ASSERT
 
+  if (!FLAG_IS_DEFAULT(AVX3Threshold)) {
+    if (!is_power_of_2(AVX3Threshold)) {
+      warning("AVX3Threshold must be a power of 2");
+      FLAG_SET_DEFAULT(AVX3Threshold, 4096);
+    }
+  }
+
 #ifdef _LP64
   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
     UseMultiplyToLenIntrinsic = true;
--- a/src/hotspot/cpu/x86/vm_version_x86.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/cpu/x86/vm_version_x86.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -366,7 +366,7 @@
     CPU_MODEL_HASWELL_E3     = 0x3c,
     CPU_MODEL_HASWELL_E7     = 0x3f,
     CPU_MODEL_BROADWELL      = 0x3d,
-    CPU_MODEL_SKYLAKE        = CPU_MODEL_HASWELL_E3
+    CPU_MODEL_SKYLAKE        = 0x55
   };
 
   // cpuid information block.  All info derived from executing cpuid with
--- a/src/hotspot/cpu/x86/x86.ad	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/cpu/x86/x86.ad	Fri Oct 11 10:39:58 2019 +0200
@@ -1097,138 +1097,6 @@
 reg_class_dynamic vectorz_reg(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 
-reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
-reg_class ymm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h);
-reg_class zmm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p);
-
-reg_class xmm1_reg(XMM1, XMM1b, XMM1c, XMM1d);
-reg_class ymm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h);
-reg_class zmm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p);
-
-reg_class xmm2_reg(XMM2, XMM2b, XMM2c, XMM2d);
-reg_class ymm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h);
-reg_class zmm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p);
-
-reg_class xmm3_reg(XMM3, XMM3b, XMM3c, XMM3d);
-reg_class ymm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h);
-reg_class zmm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p);
-
-reg_class xmm4_reg(XMM4, XMM4b, XMM4c, XMM4d);
-reg_class ymm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h);
-reg_class zmm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p);
-
-reg_class xmm5_reg(XMM5, XMM5b, XMM5c, XMM5d);
-reg_class ymm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h);
-reg_class zmm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p);
-
-reg_class xmm6_reg(XMM6, XMM6b, XMM6c, XMM6d);
-reg_class ymm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h);
-reg_class zmm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p);
-
-reg_class xmm7_reg(XMM7, XMM7b, XMM7c, XMM7d);
-reg_class ymm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h);
-reg_class zmm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p);
-
-#ifdef _LP64
-
-reg_class xmm8_reg(XMM8, XMM8b, XMM8c, XMM8d);
-reg_class ymm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h);
-reg_class zmm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p);
-
-reg_class xmm9_reg(XMM9, XMM9b, XMM9c, XMM9d);
-reg_class ymm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h);
-reg_class zmm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p);
-
-reg_class xmm10_reg(XMM10, XMM10b, XMM10c, XMM10d);
-reg_class ymm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h);
-reg_class zmm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p);
-
-reg_class xmm11_reg(XMM11, XMM11b, XMM11c, XMM11d);
-reg_class ymm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h);
-reg_class zmm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p);
-
-reg_class xmm12_reg(XMM12, XMM12b, XMM12c, XMM12d);
-reg_class ymm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h);
-reg_class zmm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p);
-
-reg_class xmm13_reg(XMM13, XMM13b, XMM13c, XMM13d);
-reg_class ymm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h);
-reg_class zmm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p);
-
-reg_class xmm14_reg(XMM14, XMM14b, XMM14c, XMM14d);
-reg_class ymm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h);
-reg_class zmm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p);
-
-reg_class xmm15_reg(XMM15, XMM15b, XMM15c, XMM15d);
-reg_class ymm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
-reg_class zmm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
-
-reg_class xmm16_reg(XMM16, XMM16b, XMM16c, XMM16d);
-reg_class ymm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h);
-reg_class zmm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p);
-
-reg_class xmm17_reg(XMM17, XMM17b, XMM17c, XMM17d);
-reg_class ymm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h);
-reg_class zmm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p);
-
-reg_class xmm18_reg(XMM18, XMM18b, XMM18c, XMM18d);
-reg_class ymm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h);
-reg_class zmm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p);
-
-reg_class xmm19_reg(XMM19, XMM19b, XMM19c, XMM19d);
-reg_class ymm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h);
-reg_class zmm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p);
-
-reg_class xmm20_reg(XMM20, XMM20b, XMM20c, XMM20d);
-reg_class ymm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h);
-reg_class zmm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p);
-
-reg_class xmm21_reg(XMM21, XMM21b, XMM21c, XMM21d);
-reg_class ymm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h);
-reg_class zmm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p);
-
-reg_class xmm22_reg(XMM22, XMM22b, XMM22c, XMM22d);
-reg_class ymm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h);
-reg_class zmm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p);
-
-reg_class xmm23_reg(XMM23, XMM23b, XMM23c, XMM23d);
-reg_class ymm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h);
-reg_class zmm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p);
-
-reg_class xmm24_reg(XMM24, XMM24b, XMM24c, XMM24d);
-reg_class ymm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h);
-reg_class zmm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p);
-
-reg_class xmm25_reg(XMM25, XMM25b, XMM25c, XMM25d);
-reg_class ymm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h);
-reg_class zmm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p);
-
-reg_class xmm26_reg(XMM26, XMM26b, XMM26c, XMM26d);
-reg_class ymm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h);
-reg_class zmm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p);
-
-reg_class xmm27_reg(XMM27, XMM27b, XMM27c, XMM27d);
-reg_class ymm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h);
-reg_class zmm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p);
-
-reg_class xmm28_reg(XMM28, XMM28b, XMM28c, XMM28d);
-reg_class ymm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h);
-reg_class zmm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p);
-
-reg_class xmm29_reg(XMM29, XMM29b, XMM29c, XMM29d);
-reg_class ymm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h);
-reg_class zmm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p);
-
-reg_class xmm30_reg(XMM30, XMM30b, XMM30c, XMM30d);
-reg_class ymm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h);
-reg_class zmm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p);
-
-reg_class xmm31_reg(XMM31, XMM31b, XMM31c, XMM31d);
-reg_class ymm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
-reg_class zmm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
-
-#endif
-
 %}
 
 
@@ -1800,8 +1668,8 @@
   return (UseAVX > 2) ? 6 : 4;
 }
 
-static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
-                            int stack_offset, int reg, uint ireg, outputStream* st) {
+int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
+                     int stack_offset, int reg, uint ireg, outputStream* st) {
   // In 64-bit VM size calculation is very complex. Emitting instructions
   // into scratch buffer is used to get size in 64-bit VM.
   LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
@@ -2863,6 +2731,7 @@
 %}
 
 
+#ifdef _LP64
 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
   predicate(UseSSE>=4);
   match(Set dst (RoundDoubleMode src rmode));
@@ -2963,6 +2832,7 @@
   %}
   ins_pipe( pipe_slow );
 %}
+#endif // _LP64
 
 instruct onspinwait() %{
   match(OnSpinWait);
@@ -3859,7 +3729,7 @@
 %}
 
 instruct Repl2F_zero(vecD dst, immF0 zero) %{
-  predicate(n->as_Vector()->length() == 2 && UseAVX < 3);
+  predicate(n->as_Vector()->length() == 2);
   match(Set dst (ReplicateF zero));
   format %{ "xorps   $dst,$dst\t! replicate2F zero" %}
   ins_encode %{
@@ -3869,7 +3739,7 @@
 %}
 
 instruct Repl4F_zero(vecX dst, immF0 zero) %{
-  predicate(n->as_Vector()->length() == 4 && UseAVX < 3);
+  predicate(n->as_Vector()->length() == 4);
   match(Set dst (ReplicateF zero));
   format %{ "xorps   $dst,$dst\t! replicate4F zero" %}
   ins_encode %{
@@ -3879,7 +3749,7 @@
 %}
 
 instruct Repl8F_zero(vecY dst, immF0 zero) %{
-  predicate(n->as_Vector()->length() == 8 && UseAVX < 3);
+  predicate(n->as_Vector()->length() == 8 && UseAVX > 0);
   match(Set dst (ReplicateF zero));
   format %{ "vxorps  $dst,$dst,$dst\t! replicate8F zero" %}
   ins_encode %{
@@ -3953,7 +3823,7 @@
 
 // Replicate double (8 byte) scalar zero to be vector
 instruct Repl2D_zero(vecX dst, immD0 zero) %{
-  predicate(n->as_Vector()->length() == 2 && UseAVX < 3);
+  predicate(n->as_Vector()->length() == 2);
   match(Set dst (ReplicateD zero));
   format %{ "xorpd   $dst,$dst\t! replicate2D zero" %}
   ins_encode %{
@@ -3963,7 +3833,7 @@
 %}
 
 instruct Repl4D_zero(vecY dst, immD0 zero) %{
-  predicate(n->as_Vector()->length() == 4 && UseAVX < 3);
+  predicate(n->as_Vector()->length() == 4 && UseAVX > 0);
   match(Set dst (ReplicateD zero));
   format %{ "vxorpd  $dst,$dst,$dst,vect256\t! replicate4D zero" %}
   ins_encode %{
@@ -4888,42 +4758,6 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{
-  predicate(n->as_Vector()->length() == 2 && UseAVX > 2);
-  match(Set dst (ReplicateF zero));
-  format %{ "vpxor  $dst k0,$dst,$dst\t! replicate2F zero" %}
-  ins_encode %{
-    // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation
-    int vector_len = 2;
-    __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-instruct Repl4F_zero_evex(vecX dst, immF0 zero) %{
-  predicate(n->as_Vector()->length() == 4 && UseAVX > 2);
-  match(Set dst (ReplicateF zero));
-  format %{ "vpxor  $dst k0,$dst,$dst\t! replicate4F zero" %}
-  ins_encode %{
-    // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation
-    int vector_len = 2;
-    __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-instruct Repl8F_zero_evex(vecY dst, immF0 zero) %{
-  predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
-  match(Set dst (ReplicateF zero));
-  format %{ "vpxor  $dst k0,$dst,$dst\t! replicate8F zero" %}
-  ins_encode %{
-    // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation
-    int vector_len = 2;
-    __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{
   predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
   match(Set dst (ReplicateF zero));
@@ -4980,30 +4814,6 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{
-  predicate(n->as_Vector()->length() == 2 && UseAVX > 2);
-  match(Set dst (ReplicateD zero));
-  format %{ "vpxor  $dst k0,$dst,$dst\t! replicate2D zero" %}
-  ins_encode %{
-    // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation
-    int vector_len = 2;
-    __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-instruct Repl4D_zero_evex(vecY dst, immD0 zero) %{
-  predicate(n->as_Vector()->length() == 4 && UseAVX > 2);
-  match(Set dst (ReplicateD zero));
-  format %{ "vpxor  $dst k0,$dst,$dst\t! replicate4D zero" %}
-  ins_encode %{
-    // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation
-    int vector_len = 2;
-    __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{
   predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
   match(Set dst (ReplicateD zero));
--- a/src/hotspot/cpu/x86/x86_64.ad	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/cpu/x86/x86_64.ad	Fri Oct 11 10:39:58 2019 +0200
@@ -1035,8 +1035,8 @@
 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 
-static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
-                            int stack_offset, int reg, uint ireg, outputStream* st);
+int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
+                     int stack_offset, int reg, uint ireg, outputStream* st);
 
 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
                                       int dst_offset, uint ireg, outputStream* st) {
@@ -4270,200 +4270,6 @@
   %}
 %}
 
-// Operands for bound floating pointer register arguments
-operand rxmm0() %{
-  constraint(ALLOC_IN_RC(xmm0_reg));
-  match(VecX);
-  format%{%}
-  interface(REG_INTER);
-%}
-operand rxmm1() %{
-  constraint(ALLOC_IN_RC(xmm1_reg));
-  match(VecX);
-  format%{%}
-  interface(REG_INTER);
-%}
-operand rxmm2() %{
-  constraint(ALLOC_IN_RC(xmm2_reg));
-  match(VecX);
-  format%{%}
-  interface(REG_INTER);
-%}
-operand rxmm3() %{
-  constraint(ALLOC_IN_RC(xmm3_reg));
-  match(VecX);
-  format%{%}
-  interface(REG_INTER);
-%}
-operand rxmm4() %{
-  constraint(ALLOC_IN_RC(xmm4_reg));
-  match(VecX);
-  format%{%}
-  interface(REG_INTER);
-%}
-operand rxmm5() %{
-  constraint(ALLOC_IN_RC(xmm5_reg));
-  match(VecX);
-  format%{%}
-  interface(REG_INTER);
-%}
-operand rxmm6() %{
-  constraint(ALLOC_IN_RC(xmm6_reg));
-  match(VecX);
-  format%{%}
-  interface(REG_INTER);
-%}
-operand rxmm7() %{
-  constraint(ALLOC_IN_RC(xmm7_reg));
-  match(VecX);
-  format%{%}
-  interface(REG_INTER);
-%}
-operand rxmm8() %{
-  constraint(ALLOC_IN_RC(xmm8_reg));
-  match(VecX);
-  format%{%}
-  interface(REG_INTER);
-%}
-operand rxmm9() %{
-  constraint(ALLOC_IN_RC(xmm9_reg));
-  match(VecX);
-  format%{%}
-  interface(REG_INTER);
-%}
-operand rxmm10() %{
-  constraint(ALLOC_IN_RC(xmm10_reg));
-  match(VecX);
-  format%{%}
-  interface(REG_INTER);
-%}
-operand rxmm11() %{
-  constraint(ALLOC_IN_RC(xmm11_reg));
-  match(VecX);
-  format%{%}
-  interface(REG_INTER);
-%}
-operand rxmm12() %{
-  constraint(ALLOC_IN_RC(xmm12_reg));
-  match(VecX);
-  format%{%}
-  interface(REG_INTER);
-%}
-operand rxmm13() %{
-  constraint(ALLOC_IN_RC(xmm13_reg));
-  match(VecX);
-  format%{%}
-  interface(REG_INTER);
-%}
-operand rxmm14() %{
-  constraint(ALLOC_IN_RC(xmm14_reg));
-  match(VecX);
-  format%{%}
-  interface(REG_INTER);
-%}
-operand rxmm15() %{
-  constraint(ALLOC_IN_RC(xmm15_reg));
-  match(VecX);
-  format%{%}
-  interface(REG_INTER);
-%}
-operand rxmm16() %{
-  constraint(ALLOC_IN_RC(xmm16_reg));
-  match(VecX);
-  format%{%}
-  interface(REG_INTER);
-%}
-operand rxmm17() %{
-  constraint(ALLOC_IN_RC(xmm17_reg));
-  match(VecX);
-  format%{%}
-  interface(REG_INTER);
-%}
-operand rxmm18() %{
-  constraint(ALLOC_IN_RC(xmm18_reg));
-  match(VecX);
-  format%{%}
-  interface(REG_INTER);
-%}
-operand rxmm19() %{
-  constraint(ALLOC_IN_RC(xmm19_reg));
-  match(VecX);
-  format%{%}
-  interface(REG_INTER);
-%}
-operand rxmm20() %{
-  constraint(ALLOC_IN_RC(xmm20_reg));
-  match(VecX);
-  format%{%}
-  interface(REG_INTER);
-%}
-operand rxmm21() %{
-  constraint(ALLOC_IN_RC(xmm21_reg));
-  match(VecX);
-  format%{%}
-  interface(REG_INTER);
-%}
-operand rxmm22() %{
-  constraint(ALLOC_IN_RC(xmm22_reg));
-  match(VecX);
-  format%{%}
-  interface(REG_INTER);
-%}
-operand rxmm23() %{
-  constraint(ALLOC_IN_RC(xmm23_reg));
-  match(VecX);
-  format%{%}
-  interface(REG_INTER);
-%}
-operand rxmm24() %{
-  constraint(ALLOC_IN_RC(xmm24_reg));
-  match(VecX);
-  format%{%}
-  interface(REG_INTER);
-%}
-operand rxmm25() %{
-  constraint(ALLOC_IN_RC(xmm25_reg));
-  match(VecX);
-  format%{%}
-  interface(REG_INTER);
-%}
-operand rxmm26() %{
-  constraint(ALLOC_IN_RC(xmm26_reg));
-  match(VecX);
-  format%{%}
-  interface(REG_INTER);
-%}
-operand rxmm27() %{
-  constraint(ALLOC_IN_RC(xmm27_reg));
-  match(VecX);
-  format%{%}
-  interface(REG_INTER);
-%}
-operand rxmm28() %{
-  constraint(ALLOC_IN_RC(xmm28_reg));
-  match(VecX);
-  format%{%}
-  interface(REG_INTER);
-%}
-operand rxmm29() %{
-  constraint(ALLOC_IN_RC(xmm29_reg));
-  match(VecX);
-  format%{%}
-  interface(REG_INTER);
-%}
-operand rxmm30() %{
-  constraint(ALLOC_IN_RC(xmm30_reg));
-  match(VecX);
-  format%{%}
-  interface(REG_INTER);
-%}
-operand rxmm31() %{
-  constraint(ALLOC_IN_RC(xmm31_reg));
-  match(VecX);
-  format%{%}
-  interface(REG_INTER);
-%}
-
 //----------OPERAND CLASSES----------------------------------------------------
 // Operand Classes are groups of operands that are used as to simplify
 // instruction definitions by not requiring the AD writer to specify separate
@@ -5356,6 +5162,7 @@
 instruct loadP(rRegP dst, memory mem)
 %{
   match(Set dst (LoadP mem));
+  predicate(n->as_Load()->barrier_data() == 0);
 
   ins_cost(125); // XXX
   format %{ "movq    $dst, $mem\t# ptr" %}
@@ -7844,6 +7651,7 @@
                            rax_RegP oldval, rRegP newval,
                            rFlagsReg cr)
 %{
+  predicate(n->as_LoadStore()->barrier_data() == 0);
   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
 
   format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
@@ -7895,7 +7703,7 @@
                          rax_RegP oldval, rRegP newval,
                          rFlagsReg cr)
 %{
-  predicate(VM_Version::supports_cx8());
+  predicate(VM_Version::supports_cx8() && n->as_LoadStore()->barrier_data() == 0);
   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
   effect(KILL cr, KILL oldval);
@@ -8137,7 +7945,7 @@
                          rax_RegP oldval, rRegP newval,
                          rFlagsReg cr)
 %{
-  predicate(VM_Version::supports_cx8());
+  predicate(VM_Version::supports_cx8() && n->as_LoadStore()->barrier_data() == 0);
   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
   effect(KILL cr);
 
@@ -8282,6 +8090,7 @@
 
 instruct xchgP( memory mem, rRegP newval) %{
   match(Set newval (GetAndSetP mem newval));
+  predicate(n->as_LoadStore()->barrier_data() == 0);
   format %{ "XCHGQ  $newval,[$mem]" %}
   ins_encode %{
     __ xchgq($newval$$Register, $mem$$Address);
@@ -12136,6 +11945,7 @@
 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
 %{
   match(Set cr (CmpP op1 (LoadP op2)));
+  predicate(n->in(2)->as_Load()->barrier_data() == 0);
 
   ins_cost(500); // XXX
   format %{ "cmpq    $op1, $op2\t# ptr" %}
@@ -12161,7 +11971,8 @@
 // and raw pointers have no anti-dependencies.
 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
 %{
-  predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none);
+  predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
+            n->in(2)->as_Load()->barrier_data() == 0);
   match(Set cr (CmpP op1 (LoadP op2)));
 
   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
@@ -12186,7 +11997,8 @@
 // any compare to a zero should be eq/neq.
 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
 %{
-  predicate(!UseCompressedOops || (CompressedOops::base() != NULL));
+  predicate((!UseCompressedOops || (CompressedOops::base() != NULL)) &&
+            n->in(1)->as_Load()->barrier_data() == 0);
   match(Set cr (CmpP (LoadP op) zero));
 
   ins_cost(500); // XXX
@@ -12199,7 +12011,9 @@
 
 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
 %{
-  predicate(UseCompressedOops && (CompressedOops::base() == NULL) && (CompressedKlassPointers::base() == NULL));
+  predicate(UseCompressedOops && (CompressedOops::base() == NULL) &&
+            (CompressedKlassPointers::base() == NULL) &&
+            n->in(1)->as_Load()->barrier_data() == 0);
   match(Set cr (CmpP (LoadP mem) zero));
 
   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
--- a/src/hotspot/os/aix/os_aix.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/os/aix/os_aix.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -132,18 +132,6 @@
 #define ERROR_MP_VMGETINFO_CLAIMS_NO_SUPPORT_FOR_64K 103
 
 // excerpts from systemcfg.h that might be missing on older os levels
-#ifndef PV_5_Compat
-  #define PV_5_Compat 0x0F8000   /* Power PC 5 */
-#endif
-#ifndef PV_6
-  #define PV_6 0x100000          /* Power PC 6 */
-#endif
-#ifndef PV_6_1
-  #define PV_6_1 0x100001        /* Power PC 6 DD1.x */
-#endif
-#ifndef PV_6_Compat
-  #define PV_6_Compat 0x108000   /* Power PC 6 */
-#endif
 #ifndef PV_7
   #define PV_7 0x200000          /* Power PC 7 */
 #endif
@@ -156,6 +144,13 @@
 #ifndef PV_8_Compat
   #define PV_8_Compat 0x308000   /* Power PC 8 */
 #endif
+#ifndef PV_9
+  #define PV_9 0x400000          /* Power PC 9 */
+#endif
+#ifndef PV_9_Compat
+  #define PV_9_Compat  0x408000  /* Power PC 9 */
+#endif
+
 
 static address resolve_function_descriptor_to_code_pointer(address p);
 
@@ -1386,15 +1381,7 @@
 void os::print_os_info(outputStream* st) {
   st->print("OS:");
 
-  st->print("uname:");
-  struct utsname name;
-  uname(&name);
-  st->print(name.sysname); st->print(" ");
-  st->print(name.nodename); st->print(" ");
-  st->print(name.release); st->print(" ");
-  st->print(name.version); st->print(" ");
-  st->print(name.machine);
-  st->cr();
+  os::Posix::print_uname_info(st);
 
   uint32_t ver = os::Aix::os_version();
   st->print_cr("AIX kernel version %u.%u.%u.%u",
@@ -1402,16 +1389,12 @@
 
   os::Posix::print_rlimit_info(st);
 
+  os::Posix::print_load_average(st);
+
   // _SC_THREAD_THREADS_MAX is the maximum number of threads within a process.
   long tmax = sysconf(_SC_THREAD_THREADS_MAX);
   st->print_cr("maximum #threads within a process:%ld", tmax);
 
-  // load average
-  st->print("load average:");
-  double loadavg[3] = {-1.L, -1.L, -1.L};
-  os::loadavg(loadavg, 3);
-  st->print_cr("%0.02f %0.02f %0.02f", loadavg[0], loadavg[1], loadavg[2]);
-
   // print wpar info
   libperfstat::wparinfo_t wi;
   if (libperfstat::get_wparinfo(&wi)) {
@@ -1504,6 +1487,9 @@
 void os::get_summary_cpu_info(char* buf, size_t buflen) {
   // read _system_configuration.version
   switch (_system_configuration.version) {
+  case PV_9:
+    strncpy(buf, "Power PC 9", buflen);
+    break;
   case PV_8:
     strncpy(buf, "Power PC 8", buflen);
     break;
@@ -1537,6 +1523,9 @@
   case PV_8_Compat:
     strncpy(buf, "PV_8_Compat", buflen);
     break;
+  case PV_9_Compat:
+    strncpy(buf, "PV_9_Compat", buflen);
+    break;
   default:
     strncpy(buf, "unknown", buflen);
   }
--- a/src/hotspot/os/posix/gc/z/zVirtualMemory_posix.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/os/posix/gc/z/zVirtualMemory_posix.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -43,7 +43,7 @@
 
   if ((uintptr_t)res != start) {
     // Failed to reserve memory at the requested address
-    unmap(start, size);
+    unmap((uintptr_t)res, size);
     return false;
   }
 
--- a/src/hotspot/os/posix/os_posix.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/os/posix/os_posix.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -373,8 +373,12 @@
 void os::Posix::print_load_average(outputStream* st) {
   st->print("load average:");
   double loadavg[3];
-  os::loadavg(loadavg, 3);
-  st->print("%0.02f %0.02f %0.02f", loadavg[0], loadavg[1], loadavg[2]);
+  int res = os::loadavg(loadavg, 3);
+  if (res != -1) {
+    st->print("%0.02f %0.02f %0.02f", loadavg[0], loadavg[1], loadavg[2]);
+  } else {
+    st->print(" Unavailable");
+  }
   st->cr();
 }
 
--- a/src/hotspot/os/windows/os_windows.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/os/windows/os_windows.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -4189,8 +4189,6 @@
     if (::isalpha(buf[0]) && !::IsDBCSLeadByte(buf[0]) && buf[1] == ':' && buf[2] == '\\') {
       prefix = L"\\\\?\\";
     } else if (buf[0] == '\\' && buf[1] == '\\') {
-      assert(buf[2] != '\\');
-
       if (buf[2] == '?' && buf[3] == '\\') {
         prefix = L"";
         needs_fullpath = false;
--- a/src/hotspot/share/adlc/formssel.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/adlc/formssel.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -774,11 +774,6 @@
        !strcmp(_matrule->_rChild->_opType,"CheckCastPP")  ||
        !strcmp(_matrule->_rChild->_opType,"GetAndSetP")   ||
        !strcmp(_matrule->_rChild->_opType,"GetAndSetN")   ||
-#if INCLUDE_ZGC
-       !strcmp(_matrule->_rChild->_opType,"ZGetAndSetP") ||
-       !strcmp(_matrule->_rChild->_opType,"ZCompareAndExchangeP") ||
-       !strcmp(_matrule->_rChild->_opType,"LoadBarrierSlowReg") ||
-#endif
 #if INCLUDE_SHENANDOAHGC
        !strcmp(_matrule->_rChild->_opType,"ShenandoahCompareAndExchangeP") ||
        !strcmp(_matrule->_rChild->_opType,"ShenandoahCompareAndExchangeN") ||
@@ -3512,9 +3507,6 @@
     "StoreCM",
     "GetAndSetB", "GetAndSetS", "GetAndAddI", "GetAndSetI", "GetAndSetP",
     "GetAndAddB", "GetAndAddS", "GetAndAddL", "GetAndSetL", "GetAndSetN",
-#if INCLUDE_ZGC
-    "ZGetAndSetP", "ZCompareAndSwapP", "ZCompareAndExchangeP", "ZWeakCompareAndSwapP",
-#endif
     "ClearArray"
   };
   int cnt = sizeof(needs_ideal_memory_list)/sizeof(char*);
--- a/src/hotspot/share/ci/ciEnv.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/ci/ciEnv.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -239,6 +239,7 @@
   _jvmti_can_access_local_variables     = JvmtiExport::can_access_local_variables();
   _jvmti_can_post_on_exceptions         = JvmtiExport::can_post_on_exceptions();
   _jvmti_can_pop_frame                  = JvmtiExport::can_pop_frame();
+  _jvmti_can_get_owned_monitor_info     = JvmtiExport::can_get_owned_monitor_info();
 }
 
 bool ciEnv::jvmti_state_changed() const {
@@ -263,6 +264,10 @@
       JvmtiExport::can_pop_frame()) {
     return true;
   }
+  if (!_jvmti_can_get_owned_monitor_info &&
+      JvmtiExport::can_get_owned_monitor_info()) {
+    return true;
+  }
 
   return false;
 }
--- a/src/hotspot/share/ci/ciEnv.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/ci/ciEnv.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -73,6 +73,7 @@
   bool  _jvmti_can_access_local_variables;
   bool  _jvmti_can_post_on_exceptions;
   bool  _jvmti_can_pop_frame;
+  bool  _jvmti_can_get_owned_monitor_info; // includes can_get_owned_monitor_stack_depth_info
 
   // Cache DTrace flags
   bool  _dtrace_extended_probes;
@@ -353,6 +354,7 @@
   }
   bool  jvmti_can_hotswap_or_post_breakpoint() const { return _jvmti_can_hotswap_or_post_breakpoint; }
   bool  jvmti_can_post_on_exceptions()         const { return _jvmti_can_post_on_exceptions; }
+  bool  jvmti_can_get_owned_monitor_info()     const { return _jvmti_can_get_owned_monitor_info; }
 
   // Cache DTrace flags
   void  cache_dtrace_flags();
--- a/src/hotspot/share/classfile/classFileParser.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/classfile/classFileParser.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -3077,7 +3077,7 @@
   // We temporarily use the vtable_index field in the Method* to store the
   // class file index, so we can read in after calling qsort.
   // Put the method ordering in the shared archive.
-  if (JvmtiExport::can_maintain_original_method_order() || DumpSharedSpaces) {
+  if (JvmtiExport::can_maintain_original_method_order() || Arguments::is_dumping_archive()) {
     for (int index = 0; index < length; index++) {
       Method* const m = methods->at(index);
       assert(!m->valid_vtable_index(), "vtable index should not be set");
@@ -3091,7 +3091,7 @@
   intArray* method_ordering = NULL;
   // If JVMTI original method ordering or sharing is enabled construct int
   // array remembering the original ordering
-  if (JvmtiExport::can_maintain_original_method_order() || DumpSharedSpaces) {
+  if (JvmtiExport::can_maintain_original_method_order() || Arguments::is_dumping_archive()) {
     method_ordering = new intArray(length, length, -1);
     for (int index = 0; index < length; index++) {
       Method* const m = methods->at(index);
--- a/src/hotspot/share/classfile/classLoader.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/classfile/classLoader.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -462,7 +462,7 @@
 
 #if INCLUDE_CDS
 void ClassLoader::exit_with_path_failure(const char* error, const char* message) {
-  assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "only called at dump time");
+  Arguments::assert_is_dumping_archive();
   tty->print_cr("Hint: enable -Xlog:class+path=info to diagnose the failure");
   vm_exit_during_initialization(error, message);
 }
@@ -532,7 +532,7 @@
 
 #if INCLUDE_CDS
 void ClassLoader::setup_app_search_path(const char *class_path) {
-  assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "Sanity");
+  Arguments::assert_is_dumping_archive();
 
   ResourceMark rm;
   ClasspathStream cp_stream(class_path);
@@ -546,7 +546,7 @@
 void ClassLoader::add_to_module_path_entries(const char* path,
                                              ClassPathEntry* entry) {
   assert(entry != NULL, "ClassPathEntry should not be NULL");
-  assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "dump time only");
+  Arguments::assert_is_dumping_archive();
 
   // The entry does not exist, add to the list
   if (_module_path_entries == NULL) {
@@ -560,7 +560,7 @@
 
 // Add a module path to the _module_path_entries list.
 void ClassLoader::update_module_path_entry_list(const char *path, TRAPS) {
-  assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "dump time only");
+  Arguments::assert_is_dumping_archive();
   struct stat st;
   if (os::stat(path, &st) != 0) {
     tty->print_cr("os::stat error %d (%s). CDS dump aborted (path was \"%s\").",
@@ -656,7 +656,7 @@
   bool set_base_piece = true;
 
 #if INCLUDE_CDS
-  if (DumpSharedSpaces || DynamicDumpSharedSpaces) {
+  if (Arguments::is_dumping_archive()) {
     if (!Arguments::has_jimage()) {
       vm_exit_during_initialization("CDS is not supported in exploded JDK build", NULL);
     }
@@ -1360,7 +1360,7 @@
 // Record the shared classpath index and loader type for classes loaded
 // by the builtin loaders at dump time.
 void ClassLoader::record_result(InstanceKlass* ik, const ClassFileStream* stream, TRAPS) {
-  assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "sanity");
+  Arguments::assert_is_dumping_archive();
   assert(stream != NULL, "sanity");
 
   if (ik->is_unsafe_anonymous()) {
@@ -1537,13 +1537,13 @@
 
 #if INCLUDE_CDS
 void ClassLoader::initialize_shared_path() {
-  if (DumpSharedSpaces || DynamicDumpSharedSpaces) {
+  if (Arguments::is_dumping_archive()) {
     ClassLoaderExt::setup_search_paths();
   }
 }
 
 void ClassLoader::initialize_module_path(TRAPS) {
-  if (DumpSharedSpaces || DynamicDumpSharedSpaces) {
+  if (Arguments::is_dumping_archive()) {
     ClassLoaderExt::setup_module_paths(THREAD);
     FileMapInfo::allocate_shared_path_table();
   }
--- a/src/hotspot/share/classfile/classLoader.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/classfile/classLoader.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -26,6 +26,7 @@
 #define SHARE_CLASSFILE_CLASSLOADER_HPP
 
 #include "jimage.hpp"
+#include "runtime/arguments.hpp"
 #include "runtime/handles.hpp"
 #include "runtime/perfData.hpp"
 #include "utilities/exceptions.hpp"
@@ -236,6 +237,8 @@
   CDS_ONLY(static ClassPathEntry* app_classpath_entries() {return _app_classpath_entries;})
   CDS_ONLY(static ClassPathEntry* module_path_entries() {return _module_path_entries;})
 
+  static bool has_bootclasspath_append() { return _first_append_entry != NULL; }
+
  protected:
   // Initialization:
   //   - setup the boot loader's system class path
@@ -395,8 +398,7 @@
   // Helper function used by CDS code to get the number of module path
   // entries during shared classpath setup time.
   static int num_module_path_entries() {
-    assert(DumpSharedSpaces || DynamicDumpSharedSpaces,
-           "Should only be called at CDS dump time");
+    Arguments::assert_is_dumping_archive();
     int num_entries = 0;
     ClassPathEntry* e= ClassLoader::_module_path_entries;
     while (e != NULL) {
--- a/src/hotspot/share/classfile/classLoader.inline.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/classfile/classLoader.inline.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -62,8 +62,7 @@
 // entries during shared classpath setup time.
 
 inline int ClassLoader::num_boot_classpath_entries() {
-  assert(DumpSharedSpaces || DynamicDumpSharedSpaces,
-         "Should only be called at CDS dump time");
+  Arguments::assert_is_dumping_archive();
   assert(has_jrt_entry(), "must have a java runtime image");
   int num_entries = 1; // count the runtime image
   ClassPathEntry* e = ClassLoader::_first_append_entry;
@@ -85,8 +84,7 @@
 // Helper function used by CDS code to get the number of app classpath
 // entries during shared classpath setup time.
 inline int ClassLoader::num_app_classpath_entries() {
-  assert(DumpSharedSpaces || DynamicDumpSharedSpaces,
-         "Should only be called at CDS dump time");
+  Arguments::assert_is_dumping_archive();
   int num_entries = 0;
   ClassPathEntry* e= ClassLoader::_app_classpath_entries;
   while (e != NULL) {
--- a/src/hotspot/share/classfile/classLoaderExt.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/classfile/classLoaderExt.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -62,8 +62,7 @@
 }
 
 void ClassLoaderExt::setup_app_search_path() {
-  assert(DumpSharedSpaces || DynamicDumpSharedSpaces,
-         "this function is only used at CDS dump time");
+  Arguments::assert_is_dumping_archive();
   _app_class_paths_start_index = ClassLoader::num_boot_classpath_entries();
   char* app_class_path = os::strdup(Arguments::get_appclasspath());
 
@@ -92,8 +91,7 @@
   }
 }
 void ClassLoaderExt::setup_module_paths(TRAPS) {
-  assert(DumpSharedSpaces || DynamicDumpSharedSpaces,
-         "this function is only used with CDS dump time");
+  Arguments::assert_is_dumping_archive();
   _app_module_paths_start_index = ClassLoader::num_boot_classpath_entries() +
                               ClassLoader::num_app_classpath_entries();
   Handle system_class_loader (THREAD, SystemDictionary::java_system_loader());
@@ -231,7 +229,7 @@
 void ClassLoaderExt::record_result(const s2 classpath_index,
                                    InstanceKlass* result,
                                    TRAPS) {
-  assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "Sanity");
+  Arguments::assert_is_dumping_archive();
 
   // We need to remember where the class comes from during dumping.
   oop loader = result->class_loader();
--- a/src/hotspot/share/classfile/compactHashtable.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/classfile/compactHashtable.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -42,7 +42,7 @@
 //
 CompactHashtableWriter::CompactHashtableWriter(int num_entries,
                                                CompactHashtableStats* stats) {
-  assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "dump-time only");
+  Arguments::assert_is_dumping_archive();
   assert(num_entries >= 0, "sanity");
   _num_buckets = calculate_num_buckets(num_entries);
   assert(_num_buckets > 0, "no buckets");
--- a/src/hotspot/share/classfile/dictionary.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/classfile/dictionary.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -246,7 +246,7 @@
 
 // Used to scan and relocate the classes during CDS archive dump.
 void Dictionary::classes_do(MetaspaceClosure* it) {
-  assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "dump-time only");
+  Arguments::assert_is_dumping_archive();
   for (int index = 0; index < table_size(); index++) {
     for (DictionaryEntry* probe = bucket(index);
                           probe != NULL;
--- a/src/hotspot/share/classfile/javaClasses.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/classfile/javaClasses.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -379,7 +379,7 @@
 
   if (_to_java_string_fn == NULL) {
     void *lib_handle = os::native_java_library();
-    _to_java_string_fn = CAST_TO_FN_PTR(to_java_string_fn_t, os::dll_lookup(lib_handle, "NewStringPlatform"));
+    _to_java_string_fn = CAST_TO_FN_PTR(to_java_string_fn_t, os::dll_lookup(lib_handle, "JNU_NewStringPlatform"));
     if (_to_java_string_fn == NULL) {
       fatal("NewStringPlatform missing");
     }
--- a/src/hotspot/share/classfile/javaClasses.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/classfile/javaClasses.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -202,7 +202,6 @@
 
   // Conversion between '.' and '/' formats
   static Handle externalize_classname(Handle java_string, TRAPS) { return char_converter(java_string, '/', '.', THREAD); }
-  static Handle internalize_classname(Handle java_string, TRAPS) { return char_converter(java_string, '.', '/', THREAD); }
 
   // Conversion
   static Symbol* as_symbol(oop java_string);
--- a/src/hotspot/share/classfile/klassFactory.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/classfile/klassFactory.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -218,7 +218,7 @@
   JFR_ONLY(ON_KLASS_CREATION(result, parser, THREAD);)
 
 #if INCLUDE_CDS
-  if (DumpSharedSpaces || DynamicDumpSharedSpaces) {
+  if (Arguments::is_dumping_archive()) {
     ClassLoader::record_result(result, stream, THREAD);
   }
 #endif // INCLUDE_CDS
--- a/src/hotspot/share/classfile/symbolTable.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/classfile/symbolTable.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -220,7 +220,7 @@
   assert (len <= Symbol::max_length(), "should be checked by caller");
 
   Symbol* sym;
-  if (DumpSharedSpaces || DynamicDumpSharedSpaces) {
+  if (Arguments::is_dumping_archive()) {
     c_heap = false;
   }
   if (c_heap) {
@@ -283,7 +283,7 @@
 };
 
 void SymbolTable::metaspace_pointers_do(MetaspaceClosure* it) {
-  assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "called only during dump time");
+  Arguments::assert_is_dumping_archive();
   MetaspacePointersDo mpd(it);
   _local_table->do_safepoint_scan(mpd);
 }
--- a/src/hotspot/share/classfile/systemDictionary.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/classfile/systemDictionary.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -1251,10 +1251,8 @@
   TempNewSymbol pkg_name = NULL;
   PackageEntry* pkg_entry = NULL;
   ModuleEntry* mod_entry = NULL;
-  const char* pkg_string = NULL;
   pkg_name = InstanceKlass::package_from_name(class_name, CHECK_false);
   if (pkg_name != NULL) {
-    pkg_string = pkg_name->as_C_string();
     if (loader_data != NULL) {
       pkg_entry = loader_data->packages()->lookup_only(pkg_name);
     }
@@ -1291,7 +1289,7 @@
     // 3. or, the class is from an unamed module
     if (!ent->is_modules_image() && ik->is_shared_boot_class()) {
       // the class is from the -Xbootclasspath/a
-      if (pkg_string == NULL ||
+      if (pkg_name == NULL ||
           pkg_entry == NULL ||
           pkg_entry->in_unnamed_module()) {
         assert(mod_entry == NULL ||
@@ -1303,8 +1301,7 @@
     return false;
   } else {
     bool res = SystemDictionaryShared::is_shared_class_visible_for_classloader(
-              ik, class_loader, pkg_string, pkg_name,
-              pkg_entry, mod_entry, CHECK_(false));
+              ik, class_loader, pkg_name, pkg_entry, mod_entry, CHECK_(false));
     return res;
   }
 }
@@ -1478,6 +1475,11 @@
         // a named package within the unnamed module.  In all cases,
         // limit visibility to search for the class only in the boot
         // loader's append path.
+        if (!ClassLoader::has_bootclasspath_append()) {
+           // If there is no bootclasspath append entry, no need to continue
+           // searching.
+           return NULL;
+        }
         search_only_bootloader_append = true;
       }
     }
--- a/src/hotspot/share/classfile/systemDictionaryShared.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/classfile/systemDictionaryShared.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -657,7 +657,6 @@
 bool SystemDictionaryShared::is_shared_class_visible_for_classloader(
                                                      InstanceKlass* ik,
                                                      Handle class_loader,
-                                                     const char* pkg_string,
                                                      Symbol* pkg_name,
                                                      PackageEntry* pkg_entry,
                                                      ModuleEntry* mod_entry,
@@ -684,7 +683,7 @@
     }
   } else if (SystemDictionary::is_system_class_loader(class_loader())) {
     assert(ent != NULL, "shared class for system loader should have valid SharedClassPathEntry");
-    if (pkg_string == NULL) {
+    if (pkg_name == NULL) {
       // The archived class is in the unnamed package. Currently, the boot image
       // does not contain any class in the unnamed package.
       assert(!ent->is_modules_image(), "Class in the unnamed package must be from the classpath");
@@ -1029,7 +1028,7 @@
 }
 
 void SystemDictionaryShared::set_shared_class_misc_info(InstanceKlass* k, ClassFileStream* cfs) {
-  assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "only when dumping");
+  Arguments::assert_is_dumping_archive();
   assert(!is_builtin(k), "must be unregistered class");
   DumpTimeSharedClassInfo* info = find_or_allocate_info_for(k);
   info->_clsfile_size  = cfs->length();
@@ -1185,7 +1184,7 @@
 
 bool SystemDictionaryShared::is_excluded_class(InstanceKlass* k) {
   assert(_no_class_loading_should_happen, "sanity");
-  assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "only when dumping");
+  Arguments::assert_is_dumping_archive();
   return find_or_allocate_info_for(k)->is_excluded();
 }
 
@@ -1209,7 +1208,7 @@
 
 bool SystemDictionaryShared::add_verification_constraint(InstanceKlass* k, Symbol* name,
          Symbol* from_name, bool from_field_is_protected, bool from_is_array, bool from_is_object) {
-  assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "called at dump time only");
+  Arguments::assert_is_dumping_archive();
   DumpTimeSharedClassInfo* info = find_or_allocate_info_for(k);
   info->add_verification_constraint(k, name, from_name, from_field_is_protected,
                                     from_is_array, from_is_object);
--- a/src/hotspot/share/classfile/systemDictionaryShared.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/classfile/systemDictionaryShared.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -240,7 +240,6 @@
   static bool is_sharing_possible(ClassLoaderData* loader_data);
   static bool is_shared_class_visible_for_classloader(InstanceKlass* ik,
                                                       Handle class_loader,
-                                                      const char* pkg_string,
                                                       Symbol* pkg_name,
                                                       PackageEntry* pkg_entry,
                                                       ModuleEntry* mod_entry,
--- a/src/hotspot/share/classfile/verificationType.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/classfile/verificationType.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -95,7 +95,7 @@
       return true;
     }
 
-    if (DumpSharedSpaces || DynamicDumpSharedSpaces) {
+    if (Arguments::is_dumping_archive()) {
       if (SystemDictionaryShared::add_verification_constraint(klass,
               name(), from.name(), from_field_is_protected, from.is_array(),
               from.is_object())) {
--- a/src/hotspot/share/classfile/verifier.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/classfile/verifier.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -64,29 +64,39 @@
 #define VALUETYPE_MAJOR_VERSION                        56
 #define MAX_ARRAY_DIMENSIONS 255
 
-// Access to external entry for VerifyClassCodes - old byte code verifier
+// Access to external entry for VerifyClassForMajorVersion - old byte code verifier
 
 extern "C" {
-  typedef jboolean (*verify_byte_codes_fn_t)(JNIEnv *, jclass, char *, jint);
-  typedef jboolean (*verify_byte_codes_fn_new_t)(JNIEnv *, jclass, char *, jint, jint);
+  typedef jboolean (*verify_byte_codes_fn_t)(JNIEnv *, jclass, char *, jint, jint);
 }
 
-static void* volatile _verify_byte_codes_fn = NULL;
+static verify_byte_codes_fn_t volatile _verify_byte_codes_fn = NULL;
 
-static volatile jint _is_new_verify_byte_codes_fn = (jint) true;
+static verify_byte_codes_fn_t verify_byte_codes_fn() {
 
-static void* verify_byte_codes_fn() {
-  if (OrderAccess::load_acquire(&_verify_byte_codes_fn) == NULL) {
-    void *lib_handle = os::native_java_library();
-    void *func = os::dll_lookup(lib_handle, "VerifyClassCodesForMajorVersion");
-    OrderAccess::release_store(&_verify_byte_codes_fn, func);
-    if (func == NULL) {
-      _is_new_verify_byte_codes_fn = false;
-      func = os::dll_lookup(lib_handle, "VerifyClassCodes");
-      OrderAccess::release_store(&_verify_byte_codes_fn, func);
-    }
-  }
-  return (void*)_verify_byte_codes_fn;
+  if (_verify_byte_codes_fn != NULL)
+    return _verify_byte_codes_fn;
+
+  MutexLocker locker(Verify_lock);
+
+  if (_verify_byte_codes_fn != NULL)
+    return _verify_byte_codes_fn;
+
+  // Load verify dll
+  char buffer[JVM_MAXPATHLEN];
+  char ebuf[1024];
+  if (!os::dll_locate_lib(buffer, sizeof(buffer), Arguments::get_dll_dir(), "verify"))
+    return NULL; // Caller will throw VerifyError
+
+  void *lib_handle = os::dll_load(buffer, ebuf, sizeof(ebuf));
+  if (lib_handle == NULL)
+    return NULL; // Caller will throw VerifyError
+
+  void *fn = os::dll_lookup(lib_handle, "VerifyClassForMajorVersion");
+  if (fn == NULL)
+    return NULL; // Caller will throw VerifyError
+
+  return _verify_byte_codes_fn = CAST_TO_FN_PTR(verify_byte_codes_fn_t, fn);
 }
 
 
@@ -283,7 +293,7 @@
   JavaThread* thread = (JavaThread*)THREAD;
   JNIEnv *env = thread->jni_environment();
 
-  void* verify_func = verify_byte_codes_fn();
+  verify_byte_codes_fn_t verify_func = verify_byte_codes_fn();
 
   if (verify_func == NULL) {
     jio_snprintf(message, message_len, "Could not link verifier");
@@ -302,16 +312,7 @@
     // ThreadToNativeFromVM takes care of changing thread_state, so safepoint
     // code knows that we have left the VM
 
-    if (_is_new_verify_byte_codes_fn) {
-      verify_byte_codes_fn_new_t func =
-        CAST_TO_FN_PTR(verify_byte_codes_fn_new_t, verify_func);
-      result = (*func)(env, cls, message, (int)message_len,
-          klass->major_version());
-    } else {
-      verify_byte_codes_fn_t func =
-        CAST_TO_FN_PTR(verify_byte_codes_fn_t, verify_func);
-      result = (*func)(env, cls, message, (int)message_len);
-    }
+    result = (*verify_func)(env, cls, message, (int)message_len, klass->major_version());
   }
 
   JNIHandles::destroy_local(cls);
--- a/src/hotspot/share/code/compiledIC.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/code/compiledIC.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -756,4 +756,22 @@
   tty->cr();
 }
 
+void CompiledDirectStaticCall::verify_mt_safe(const methodHandle& callee, address entry,
+                                              NativeMovConstReg* method_holder,
+                                              NativeJump*        jump) {
+  // A generated lambda form might be deleted from the Lambdaform
+  // cache in MethodTypeForm.  If a jit compiled lambdaform method
+  // becomes not entrant and the cache access returns null, the new
+  // resolve will lead to a new generated LambdaForm.
+  Method* old_method = reinterpret_cast<Method*>(method_holder->data());
+  assert(old_method == NULL || old_method == callee() ||
+         callee->is_compiled_lambda_form() ||
+         !old_method->method_holder()->is_loader_alive() ||
+         old_method->is_old(),  // may be race patching deoptimized nmethod due to redefinition.
+         "a) MT-unsafe modification of inline cache");
+
+  address destination = jump->jump_destination();
+  assert(destination == (address)-1 || destination == entry,
+         "b) MT-unsafe modification of inline cache");
+}
 #endif // !PRODUCT
--- a/src/hotspot/share/code/compiledIC.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/code/compiledIC.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -402,6 +402,9 @@
 
   // Also used by CompiledIC
   void set_to_interpreted(const methodHandle& callee, address entry);
+  void verify_mt_safe(const methodHandle& callee, address entry,
+                      NativeMovConstReg* method_holder,
+                      NativeJump*        jump) PRODUCT_RETURN;
 #if INCLUDE_AOT
   void set_to_far(const methodHandle& callee, address entry);
 #endif
--- a/src/hotspot/share/compiler/compilerDirectives.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/compiler/compilerDirectives.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -66,8 +66,7 @@
     cflags(VectorizeDebug,          uintx, 0, VectorizeDebug) \
     cflags(CloneMapDebug,           bool, false, CloneMapDebug) \
     cflags(IGVPrintLevel,           intx, PrintIdealGraphLevel, IGVPrintLevel) \
-    cflags(MaxNodeLimit,            intx, MaxNodeLimit, MaxNodeLimit) \
-ZGC_ONLY(cflags(ZTraceLoadBarriers, bool, false, ZTraceLoadBarriers))
+    cflags(MaxNodeLimit,            intx, MaxNodeLimit, MaxNodeLimit)
 #else
   #define compilerdirectives_c2_flags(cflags)
 #endif
--- a/src/hotspot/share/gc/g1/g1Analytics.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1Analytics.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -78,6 +78,8 @@
     _alloc_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
     _prev_collection_pause_end_ms(0.0),
     _rs_length_diff_seq(new TruncatedSeq(TruncatedSeqLength)),
+    _concurrent_refine_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+    _logged_cards_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
     _cost_per_logged_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
     _cost_scan_hcc_seq(new TruncatedSeq(TruncatedSeqLength)),
     _young_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)),
@@ -102,6 +104,10 @@
   int index = MIN2(ParallelGCThreads - 1, 7u);
 
   _rs_length_diff_seq->add(rs_length_diff_defaults[index]);
+  // Start with inverse of maximum STW cost.
+  _concurrent_refine_rate_ms_seq->add(1/cost_per_logged_card_ms_defaults[0]);
+  // Some applications have very low rates for logging cards.
+  _logged_cards_rate_ms_seq->add(0.0);
   _cost_per_logged_card_ms_seq->add(cost_per_logged_card_ms_defaults[index]);
   _cost_scan_hcc_seq->add(0.0);
   _young_cards_per_entry_ratio_seq->add(young_cards_per_entry_ratio_defaults[index]);
@@ -159,6 +165,14 @@
     (pause_time_ms * _recent_prev_end_times_for_all_gcs_sec->num()) / interval_ms;
 }
 
+void G1Analytics::report_concurrent_refine_rate_ms(double cards_per_ms) {
+  _concurrent_refine_rate_ms_seq->add(cards_per_ms);
+}
+
+void G1Analytics::report_logged_cards_rate_ms(double cards_per_ms) {
+  _logged_cards_rate_ms_seq->add(cards_per_ms);
+}
+
 void G1Analytics::report_cost_per_logged_card_ms(double cost_per_logged_card_ms) {
   _cost_per_logged_card_ms_seq->add(cost_per_logged_card_ms);
 }
@@ -223,6 +237,14 @@
   return get_new_prediction(_alloc_rate_ms_seq);
 }
 
+double G1Analytics::predict_concurrent_refine_rate_ms() const {
+  return get_new_prediction(_concurrent_refine_rate_ms_seq);
+}
+
+double G1Analytics::predict_logged_cards_rate_ms() const {
+  return get_new_prediction(_logged_cards_rate_ms_seq);
+}
+
 double G1Analytics::predict_cost_per_logged_card_ms() const {
   return get_new_prediction(_cost_per_logged_card_ms_seq);
 }
--- a/src/hotspot/share/gc/g1/g1Analytics.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1Analytics.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -46,6 +46,8 @@
   double        _prev_collection_pause_end_ms;
 
   TruncatedSeq* _rs_length_diff_seq;
+  TruncatedSeq* _concurrent_refine_rate_ms_seq;
+  TruncatedSeq* _logged_cards_rate_ms_seq;
   TruncatedSeq* _cost_per_logged_card_ms_seq;
   TruncatedSeq* _cost_scan_hcc_seq;
   TruncatedSeq* _young_cards_per_entry_ratio_seq;
@@ -99,6 +101,8 @@
   void report_concurrent_mark_remark_times_ms(double ms);
   void report_concurrent_mark_cleanup_times_ms(double ms);
   void report_alloc_rate_ms(double alloc_rate);
+  void report_concurrent_refine_rate_ms(double cards_per_ms);
+  void report_logged_cards_rate_ms(double cards_per_ms);
   void report_cost_per_logged_card_ms(double cost_per_logged_card_ms);
   void report_cost_scan_hcc(double cost_scan_hcc);
   void report_cost_per_remset_card_ms(double cost_per_remset_card_ms, bool for_young_gc);
@@ -116,6 +120,8 @@
   double predict_alloc_rate_ms() const;
   int num_alloc_rate_ms() const;
 
+  double predict_concurrent_refine_rate_ms() const;
+  double predict_logged_cards_rate_ms() const;
   double predict_cost_per_logged_card_ms() const;
 
   double predict_scan_hcc_ms() const;
--- a/src/hotspot/share/gc/g1/g1CollectionSet.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1CollectionSet.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -409,7 +409,7 @@
   guarantee(target_pause_time_ms > 0.0,
             "target_pause_time_ms = %1.6lf should be positive", target_pause_time_ms);
 
-  size_t pending_cards = _policy->pending_cards();
+  size_t pending_cards = _policy->pending_cards_at_gc_start();
   double base_time_ms = _policy->predict_base_elapsed_time_ms(pending_cards);
   double time_remaining_ms = MAX2(target_pause_time_ms - base_time_ms, 0.0);
 
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefine.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1ConcurrentRefine.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -412,6 +412,22 @@
   dcqs.notify_if_necessary();
 }
 
+G1ConcurrentRefine::RefinementStats G1ConcurrentRefine::total_refinement_stats() const {
+  struct CollectData : public ThreadClosure {
+    Tickspan _total_time;
+    size_t _total_cards;
+    CollectData() : _total_time(), _total_cards(0) {}
+    virtual void do_thread(Thread* t) {
+      G1ConcurrentRefineThread* crt = static_cast<G1ConcurrentRefineThread*>(t);
+      _total_time += crt->total_refinement_time();
+      _total_cards += crt->total_refined_cards();
+    }
+  } collector;
+  // Cast away const so we can call non-modifying closure on threads.
+  const_cast<G1ConcurrentRefine*>(this)->threads_do(&collector);
+  return RefinementStats(collector._total_time, collector._total_cards);
+}
+
 size_t G1ConcurrentRefine::activation_threshold(uint worker_id) const {
   Thresholds thresholds = calc_thresholds(_green_zone, _yellow_zone, worker_id);
   return activation_level(thresholds);
@@ -432,7 +448,8 @@
   }
 }
 
-bool G1ConcurrentRefine::do_refinement_step(uint worker_id) {
+bool G1ConcurrentRefine::do_refinement_step(uint worker_id,
+                                            size_t* total_refined_cards) {
   G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
 
   size_t curr_cards = dcqs.num_cards();
@@ -448,5 +465,6 @@
 
   // Process the next buffer, if there are enough left.
   return dcqs.refine_completed_buffer_concurrently(worker_id + worker_id_offset(),
-                                                   deactivation_threshold(worker_id));
+                                                   deactivation_threshold(worker_id),
+                                                   total_refined_cards);
 }
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefine.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1ConcurrentRefine.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -27,6 +27,7 @@
 
 #include "memory/allocation.hpp"
 #include "utilities/globalDefinitions.hpp"
+#include "utilities/ticks.hpp"
 
 // Forward decl
 class G1ConcurrentRefine;
@@ -118,11 +119,22 @@
   // Adjust refinement thresholds based on work done during the pause and the goal time.
   void adjust(double logged_cards_scan_time, size_t processed_logged_cards, double goal_ms);
 
+  struct RefinementStats {
+    Tickspan _time;
+    size_t _cards;
+    RefinementStats(Tickspan time, size_t cards) : _time(time), _cards(cards) {}
+  };
+
+  RefinementStats total_refinement_stats() const;
+
   // Cards in the dirty card queue set.
   size_t activation_threshold(uint worker_id) const;
   size_t deactivation_threshold(uint worker_id) const;
-  // Perform a single refinement step. Called by the refinement threads when woken up.
-  bool do_refinement_step(uint worker_id);
+
+  // Perform a single refinement step; called by the refinement
+  // threads.  Returns true if there was refinement work available.
+  // Increments *total_refined_cards.
+  bool do_refinement_step(uint worker_id, size_t* total_refined_cards);
 
   // Iterate over all concurrent refinement threads applying the given closure.
   void threads_do(ThreadClosure *tc);
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -37,6 +37,8 @@
   ConcurrentGCThread(),
   _vtime_start(0.0),
   _vtime_accum(0.0),
+  _total_refinement_time(),
+  _total_refined_cards(0),
   _worker_id(worker_id),
   _active(false),
   _monitor(NULL),
@@ -101,11 +103,12 @@
       break;
     }
 
-    size_t buffers_processed = 0;
     log_debug(gc, refine)("Activated worker %d, on threshold: " SIZE_FORMAT ", current: " SIZE_FORMAT,
                           _worker_id, _cr->activation_threshold(_worker_id),
                           G1BarrierSet::dirty_card_queue_set().num_cards());
 
+    size_t start_total_refined_cards = _total_refined_cards; // For logging.
+
     {
       SuspendibleThreadSetJoiner sts_join;
 
@@ -115,20 +118,22 @@
           continue;             // Re-check for termination after yield delay.
         }
 
-        if (!_cr->do_refinement_step(_worker_id)) {
-          break;
+        Ticks start_time = Ticks::now();
+        if (!_cr->do_refinement_step(_worker_id, &_total_refined_cards)) {
+          break;                // No cards to process.
         }
-        ++buffers_processed;
+        _total_refinement_time += (Ticks::now() - start_time);
       }
     }
 
     deactivate();
     log_debug(gc, refine)("Deactivated worker %d, off threshold: " SIZE_FORMAT
-                          ", current: " SIZE_FORMAT ", buffers processed: "
-                          SIZE_FORMAT,
+                          ", current: " SIZE_FORMAT ", refined cards: "
+                          SIZE_FORMAT ", total refined cards: " SIZE_FORMAT,
                           _worker_id, _cr->deactivation_threshold(_worker_id),
                           G1BarrierSet::dirty_card_queue_set().num_cards(),
-                          buffers_processed);
+                          _total_refined_cards - start_total_refined_cards,
+                          _total_refined_cards);
 
     if (os::supports_vtime()) {
       _vtime_accum = (os::elapsedVTime() - _vtime_start);
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -26,6 +26,7 @@
 #define SHARE_GC_G1_G1CONCURRENTREFINETHREAD_HPP
 
 #include "gc/shared/concurrentGCThread.hpp"
+#include "utilities/ticks.hpp"
 
 // Forward Decl.
 class G1ConcurrentRefine;
@@ -38,6 +39,10 @@
 
   double _vtime_start;  // Initial virtual time.
   double _vtime_accum;  // Accumulated virtual time.
+
+  Tickspan _total_refinement_time;
+  size_t _total_refined_cards;
+
   uint _worker_id;
 
   bool _active;
@@ -61,6 +66,9 @@
   // Activate this thread.
   void activate();
 
+  Tickspan total_refinement_time() const { return _total_refinement_time; }
+  size_t total_refined_cards() const { return _total_refined_cards; }
+
   // Total virtual time so far.
   double vtime_accum() { return _vtime_accum; }
 };
--- a/src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -37,6 +37,7 @@
 #include "runtime/atomic.hpp"
 #include "runtime/flags/flagSetting.hpp"
 #include "runtime/mutexLocker.hpp"
+#include "runtime/os.hpp"
 #include "runtime/safepoint.hpp"
 #include "runtime/thread.inline.hpp"
 #include "runtime/threadSMR.hpp"
@@ -62,6 +63,9 @@
   }
 }
 
+// Assumed to be zero by concurrent threads.
+static uint par_ids_start() { return 0; }
+
 G1DirtyCardQueueSet::G1DirtyCardQueueSet(Monitor* cbl_mon,
                                          BufferNode::Allocator* allocator) :
   PtrQueueSet(allocator),
@@ -73,15 +77,16 @@
   _process_completed_buffers(false),
   _max_cards(MaxCardsUnlimited),
   _max_cards_padding(0),
-  _free_ids(0, num_par_ids()),
-  _processed_buffers_mut(0),
-  _processed_buffers_rs_thread(0)
+  _free_ids(par_ids_start(), num_par_ids()),
+  _mutator_refined_cards_counters(NEW_C_HEAP_ARRAY(size_t, num_par_ids(), mtGC))
 {
+  ::memset(_mutator_refined_cards_counters, 0, num_par_ids() * sizeof(size_t));
   _all_active = true;
 }
 
 G1DirtyCardQueueSet::~G1DirtyCardQueueSet() {
   abandon_completed_buffers();
+  FREE_C_HEAP_ARRAY(size_t, _mutator_refined_cards_counters);
 }
 
 // Determines how many mutator threads can process the buffers in parallel.
@@ -89,6 +94,14 @@
   return (uint)os::initial_active_processor_count();
 }
 
+size_t G1DirtyCardQueueSet::total_mutator_refined_cards() const {
+  size_t sum = 0;
+  for (uint i = 0; i < num_par_ids(); ++i) {
+    sum += _mutator_refined_cards_counters[i];
+  }
+  return sum;
+}
+
 void G1DirtyCardQueueSet::handle_zero_index_for_thread(Thread* t) {
   G1ThreadLocalData::dirty_card_queue(t).handle_zero_index();
 }
@@ -213,7 +226,9 @@
   return result;
 }
 
-bool G1DirtyCardQueueSet::refine_buffer(BufferNode* node, uint worker_id) {
+bool G1DirtyCardQueueSet::refine_buffer(BufferNode* node,
+                                        uint worker_id,
+                                        size_t* total_refined_cards) {
   G1RemSet* rem_set = G1CollectedHeap::heap()->rem_set();
   size_t size = buffer_size();
   void** buffer = BufferNode::make_buffer_from_node(node);
@@ -223,6 +238,7 @@
     CardTable::CardValue* cp = static_cast<CardTable::CardValue*>(buffer[i]);
     rem_set->refine_card_concurrently(cp, worker_id);
   }
+  *total_refined_cards += (i - node->index());
   node->set_index(i);
   return i == size;
 }
@@ -260,25 +276,27 @@
 
 bool G1DirtyCardQueueSet::mut_process_buffer(BufferNode* node) {
   uint worker_id = _free_ids.claim_par_id(); // temporarily claim an id
-  bool result = refine_buffer(node, worker_id);
+  uint counter_index = worker_id - par_ids_start();
+  size_t* counter = &_mutator_refined_cards_counters[counter_index];
+  bool result = refine_buffer(node, worker_id, counter);
   _free_ids.release_par_id(worker_id); // release the id
 
   if (result) {
     assert_fully_consumed(node, buffer_size());
-    Atomic::inc(&_processed_buffers_mut);
   }
   return result;
 }
 
-bool G1DirtyCardQueueSet::refine_completed_buffer_concurrently(uint worker_id, size_t stop_at) {
+bool G1DirtyCardQueueSet::refine_completed_buffer_concurrently(uint worker_id,
+                                                               size_t stop_at,
+                                                               size_t* total_refined_cards) {
   BufferNode* node = get_completed_buffer(stop_at);
   if (node == NULL) {
     return false;
-  } else if (refine_buffer(node, worker_id)) {
+  } else if (refine_buffer(node, worker_id, total_refined_cards)) {
     assert_fully_consumed(node, buffer_size());
     // Done with fully processed buffer.
     deallocate_buffer(node);
-    Atomic::inc(&_processed_buffers_rs_thread);
     return true;
   } else {
     // Return partially processed buffer to the queue.
--- a/src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -78,14 +78,15 @@
 
   void abandon_completed_buffers();
 
-  // Refine the cards in "node" from it's index to buffer_size.
+  // Refine the cards in "node" from its index to buffer_size.
   // Stops processing if SuspendibleThreadSet::should_yield() is true.
   // Returns true if the entire buffer was processed, false if there
   // is a pending yield request.  The node's index is updated to exclude
   // the processed elements, e.g. up to the element before processing
   // stopped, or one past the last element if the entire buffer was
-  // processed.
-  bool refine_buffer(BufferNode* node, uint worker_id);
+  // processed. Increments *total_refined_cards by the number of cards
+  // processed and removed from the buffer.
+  bool refine_buffer(BufferNode* node, uint worker_id, size_t* total_refined_cards);
 
   bool mut_process_buffer(BufferNode* node);
 
@@ -97,10 +98,9 @@
 
   G1FreeIdSet _free_ids;
 
-  // The number of completed buffers processed by mutator and rs thread,
-  // respectively.
-  jint _processed_buffers_mut;
-  jint _processed_buffers_rs_thread;
+  // Array of cumulative dirty cards refined by mutator threads.
+  // Array has an entry per id in _free_ids.
+  size_t* _mutator_refined_cards_counters;
 
 public:
   G1DirtyCardQueueSet(Monitor* cbl_mon, BufferNode::Allocator* allocator);
@@ -158,7 +158,12 @@
   // Stops processing a buffer if SuspendibleThreadSet::should_yield(),
   // returning the incompletely processed buffer to the completed buffer
   // list, for later processing of the remainder.
-  bool refine_completed_buffer_concurrently(uint worker_id, size_t stop_at);
+  //
+  // Increments *total_refined_cards by the number of cards processed and
+  // removed from the buffer.
+  bool refine_completed_buffer_concurrently(uint worker_id,
+                                            size_t stop_at,
+                                            size_t* total_refined_cards);
 
   // If a full collection is happening, reset partial logs, and release
   // completed ones: the full collection will make them all irrelevant.
@@ -181,13 +186,8 @@
     return _max_cards_padding;
   }
 
-  jint processed_buffers_mut() {
-    return _processed_buffers_mut;
-  }
-  jint processed_buffers_rs_thread() {
-    return _processed_buffers_rs_thread;
-  }
-
+  // Total dirty cards refined by mutator threads.
+  size_t total_mutator_refined_cards() const;
 };
 
 inline G1DirtyCardQueueSet* G1DirtyCardQueue::dirty_card_qset() const {
--- a/src/hotspot/share/gc/g1/g1Policy.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1Policy.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -70,7 +70,11 @@
   _free_regions_at_end_of_collection(0),
   _max_rs_length(0),
   _rs_length_prediction(0),
-  _pending_cards(0),
+  _pending_cards_at_gc_start(0),
+  _pending_cards_at_prev_gc_end(0),
+  _total_mutator_refined_cards(0),
+  _total_concurrent_refined_cards(0),
+  _total_concurrent_refinement_time(),
   _bytes_allocated_in_old_since_last_gc(0),
   _initial_mark_to_mixed(),
   _collection_set(NULL),
@@ -442,6 +446,7 @@
   collector_state()->set_in_young_only_phase(false);
   collector_state()->set_in_full_gc(true);
   _collection_set->clear_candidates();
+  record_concurrent_refinement_data(true /* is_full_collection */);
 }
 
 void G1Policy::record_full_collection_end() {
@@ -472,12 +477,67 @@
   _survivor_surv_rate_group->reset();
   update_young_list_max_and_target_length();
   update_rs_length_prediction();
+  _pending_cards_at_prev_gc_end = _g1h->pending_card_num();
 
   _bytes_allocated_in_old_since_last_gc = 0;
 
   record_pause(FullGC, _full_collection_start_sec, end_sec);
 }
 
+void G1Policy::record_concurrent_refinement_data(bool is_full_collection) {
+  _pending_cards_at_gc_start = _g1h->pending_card_num();
+
+  // Record info about concurrent refinement thread processing.
+  G1ConcurrentRefine* cr = _g1h->concurrent_refine();
+  G1ConcurrentRefine::RefinementStats cr_stats = cr->total_refinement_stats();
+
+  Tickspan cr_time = cr_stats._time - _total_concurrent_refinement_time;
+  _total_concurrent_refinement_time = cr_stats._time;
+
+  size_t cr_cards = cr_stats._cards - _total_concurrent_refined_cards;
+  _total_concurrent_refined_cards = cr_stats._cards;
+
+  // Don't update rate if full collection.  We could be in an implicit full
+  // collection after a non-full collection failure, in which case there
+  // wasn't any mutator/cr-thread activity since last recording.  And if
+  // we're in an explicit full collection, the time since the last GC can
+  // be arbitrarily short, so not a very good sample.  Similarly, don't
+  // update the rate if the current sample is empty or time is zero.
+  if (!is_full_collection && (cr_cards > 0) && (cr_time > Tickspan())) {
+    double rate = cr_cards / (cr_time.seconds() * MILLIUNITS);
+    _analytics->report_concurrent_refine_rate_ms(rate);
+  }
+
+  // Record info about mutator thread processing.
+  G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
+  size_t mut_total_cards = dcqs.total_mutator_refined_cards();
+  size_t mut_cards = mut_total_cards - _total_mutator_refined_cards;
+  _total_mutator_refined_cards = mut_total_cards;
+
+  // Record mutator's card logging rate.
+  // Don't update if full collection; see above.
+  if (!is_full_collection) {
+    size_t total_cards = _pending_cards_at_gc_start + cr_cards + mut_cards;
+    assert(_pending_cards_at_prev_gc_end <= total_cards,
+           "untracked cards: last pending: " SIZE_FORMAT
+           ", pending: " SIZE_FORMAT ", conc refine: " SIZE_FORMAT
+           ", mut refine:" SIZE_FORMAT,
+           _pending_cards_at_prev_gc_end, _pending_cards_at_gc_start,
+           cr_cards, mut_cards);
+    size_t logged_cards = total_cards - _pending_cards_at_prev_gc_end;
+    double logging_start_time = _analytics->prev_collection_pause_end_ms();
+    double logging_end_time = Ticks::now().seconds() * MILLIUNITS;
+    double logging_time = logging_end_time - logging_start_time;
+    // Unlike above for conc-refine rate, here we should not require a
+    // non-empty sample, since an application could go some time with only
+    // young-gen or filtered out writes.  But we'll ignore unusually short
+    // sample periods, as they may just pollute the predictions.
+    if (logging_time > 1.0) {   // Require > 1ms sample time.
+      _analytics->report_logged_cards_rate_ms(logged_cards / logging_time);
+    }
+  }
+}
+
 void G1Policy::record_collection_pause_start(double start_time_sec) {
   // We only need to do this here as the policy will only be applied
   // to the GC we're about to start. so, no point is calculating this
@@ -490,7 +550,8 @@
   assert_used_and_recalculate_used_equal(_g1h);
 
   phase_times()->record_cur_collection_start_sec(start_time_sec);
-  _pending_cards = _g1h->pending_card_num();
+
+  record_concurrent_refinement_data(false /* is_full_collection */);
 
   _collection_set->reset_bytes_used_before();
   _bytes_copied_during_gc = 0;
@@ -744,7 +805,7 @@
     // after the mixed gc phase.
     // During mixed gc we do not use them for young gen sizing.
     if (this_pause_was_young_only) {
-      _analytics->report_pending_cards((double) _pending_cards);
+      _analytics->report_pending_cards((double) _pending_cards_at_gc_start);
       _analytics->report_rs_length((double) _max_rs_length);
     }
   }
@@ -798,6 +859,7 @@
     scan_logged_cards_time_goal_ms -= scan_hcc_time_ms;
   }
 
+  _pending_cards_at_prev_gc_end = _g1h->pending_card_num();
   double const logged_cards_time = logged_cards_processing_time();
 
   log_debug(gc, ergo, refine)("Concurrent refinement times: Logged Cards Scan time goal: %1.2fms Logged Cards Scan time: %1.2fms HCC time: %1.2fms",
--- a/src/hotspot/share/gc/g1/g1Policy.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1Policy.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -100,7 +100,11 @@
 
   size_t _rs_length_prediction;
 
-  size_t _pending_cards;
+  size_t _pending_cards_at_gc_start;
+  size_t _pending_cards_at_prev_gc_end;
+  size_t _total_mutator_refined_cards;
+  size_t _total_concurrent_refined_cards;
+  Tickspan _total_concurrent_refinement_time;
 
   // The amount of allocated bytes in old gen during the last mutator and the following
   // young GC phase.
@@ -244,7 +248,15 @@
                         uint base_free_regions, double target_pause_time_ms) const;
 
 public:
-  size_t pending_cards() const { return _pending_cards; }
+  size_t pending_cards_at_gc_start() const { return _pending_cards_at_gc_start; }
+
+  size_t total_concurrent_refined_cards() const {
+    return _total_concurrent_refined_cards;
+  }
+
+  size_t total_mutator_refined_cards() const {
+    return _total_mutator_refined_cards;
+  }
 
   // Calculate the minimum number of old regions we'll add to the CSet
   // during a mixed GC.
@@ -283,6 +295,9 @@
   void record_pause(PauseKind kind, double start, double end);
   // Indicate that we aborted marking before doing any mixed GCs.
   void abort_time_to_mixed_tracking();
+
+  void record_concurrent_refinement_data(bool is_full_collection);
+
 public:
 
   G1Policy(STWGCTimer* gc_timer);
--- a/src/hotspot/share/gc/g1/g1RemSet.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1RemSet.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -489,7 +489,6 @@
   _scan_state(new G1RemSetScanState()),
   _prev_period_summary(),
   _g1h(g1h),
-  _num_conc_refined_cards(0),
   _ct(ct),
   _g1p(_g1h->policy()),
   _hot_card_cache(hot_card_cache) {
@@ -1377,7 +1376,6 @@
 
   G1ConcurrentRefineOopClosure conc_refine_cl(_g1h, worker_id);
   if (r->oops_on_memregion_seq_iterate_careful<false>(dirty_region, &conc_refine_cl) != NULL) {
-    _num_conc_refined_cards++; // Unsynchronized update, only used for logging.
     return;
   }
 
--- a/src/hotspot/share/gc/g1/g1RemSet.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1RemSet.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -61,7 +61,6 @@
   G1RemSetSummary _prev_period_summary;
 
   G1CollectedHeap* _g1h;
-  size_t _num_conc_refined_cards; // Number of cards refined concurrently to the mutator.
 
   G1CardTable*           _ct;
   G1Policy*              _g1p;
@@ -125,8 +124,6 @@
   // Print accumulated summary info from the last time called.
   void print_periodic_summary_info(const char* header, uint period_count);
 
-  size_t num_conc_refined_cards() const { return _num_conc_refined_cards; }
-
   // Rebuilds the remembered set by scanning from bottom to TARS for all regions
   // using the given work gang.
   void rebuild_rem_set(G1ConcurrentMark* cm, WorkGang* workers, uint worker_id_offset);
--- a/src/hotspot/share/gc/g1/g1RemSetSummary.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1RemSetSummary.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -27,6 +27,7 @@
 #include "gc/g1/g1ConcurrentRefine.hpp"
 #include "gc/g1/g1ConcurrentRefineThread.hpp"
 #include "gc/g1/g1DirtyCardQueue.hpp"
+#include "gc/g1/g1Policy.hpp"
 #include "gc/g1/g1RemSet.hpp"
 #include "gc/g1/g1RemSetSummary.hpp"
 #include "gc/g1/g1YoungRemSetSamplingThread.hpp"
@@ -53,18 +54,17 @@
 };
 
 void G1RemSetSummary::update() {
-  _num_conc_refined_cards = _rem_set->num_conc_refined_cards();
-  G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
-  _num_processed_buf_mutator = dcqs.processed_buffers_mut();
-  _num_processed_buf_rs_threads = dcqs.processed_buffers_rs_thread();
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+  const G1Policy* policy = g1h->policy();
+  _total_mutator_refined_cards = policy->total_mutator_refined_cards();
+  _total_concurrent_refined_cards = policy->total_concurrent_refined_cards();
 
   _num_coarsenings = HeapRegionRemSet::n_coarsenings();
 
-  G1CollectedHeap* g1h = G1CollectedHeap::heap();
-  G1ConcurrentRefine* cg1r = g1h->concurrent_refine();
   if (_rs_threads_vtimes != NULL) {
     GetRSThreadVTimeClosure p(this);
-    cg1r->threads_do(&p);
+    g1h->concurrent_refine()->threads_do(&p);
   }
   set_sampling_thread_vtime(g1h->sampling_thread()->vtime_accum());
 }
@@ -83,9 +83,8 @@
 
 G1RemSetSummary::G1RemSetSummary() :
   _rem_set(NULL),
-  _num_conc_refined_cards(0),
-  _num_processed_buf_mutator(0),
-  _num_processed_buf_rs_threads(0),
+  _total_mutator_refined_cards(0),
+  _total_concurrent_refined_cards(0),
   _num_coarsenings(0),
   _num_vtimes(G1ConcurrentRefine::max_num_threads()),
   _rs_threads_vtimes(NEW_C_HEAP_ARRAY(double, _num_vtimes, mtGC)),
@@ -96,9 +95,8 @@
 
 G1RemSetSummary::G1RemSetSummary(G1RemSet* rem_set) :
   _rem_set(rem_set),
-  _num_conc_refined_cards(0),
-  _num_processed_buf_mutator(0),
-  _num_processed_buf_rs_threads(0),
+  _total_mutator_refined_cards(0),
+  _total_concurrent_refined_cards(0),
   _num_coarsenings(0),
   _num_vtimes(G1ConcurrentRefine::max_num_threads()),
   _rs_threads_vtimes(NEW_C_HEAP_ARRAY(double, _num_vtimes, mtGC)),
@@ -114,12 +112,10 @@
   assert(other != NULL, "just checking");
   assert(_num_vtimes == other->_num_vtimes, "just checking");
 
-  _num_conc_refined_cards = other->num_conc_refined_cards();
+  _total_mutator_refined_cards = other->total_mutator_refined_cards();
+  _total_concurrent_refined_cards = other->total_concurrent_refined_cards();
 
-  _num_processed_buf_mutator = other->num_processed_buf_mutator();
-  _num_processed_buf_rs_threads = other->num_processed_buf_rs_threads();
-
-  _num_coarsenings = other->_num_coarsenings;
+  _num_coarsenings = other->num_coarsenings();
 
   memcpy(_rs_threads_vtimes, other->_rs_threads_vtimes, sizeof(double) * _num_vtimes);
 
@@ -130,10 +126,8 @@
   assert(other != NULL, "just checking");
   assert(_num_vtimes == other->_num_vtimes, "just checking");
 
-  _num_conc_refined_cards = other->num_conc_refined_cards() - _num_conc_refined_cards;
-
-  _num_processed_buf_mutator = other->num_processed_buf_mutator() - _num_processed_buf_mutator;
-  _num_processed_buf_rs_threads = other->num_processed_buf_rs_threads() - _num_processed_buf_rs_threads;
+  _total_mutator_refined_cards = other->total_mutator_refined_cards() - _total_mutator_refined_cards;
+  _total_concurrent_refined_cards = other->total_concurrent_refined_cards() - _total_concurrent_refined_cards;
 
   _num_coarsenings = other->num_coarsenings() - _num_coarsenings;
 
@@ -356,16 +350,15 @@
 
 void G1RemSetSummary::print_on(outputStream* out) {
   out->print_cr(" Recent concurrent refinement statistics");
-  out->print_cr("  Processed " SIZE_FORMAT " cards concurrently", num_conc_refined_cards());
-  out->print_cr("  Of " SIZE_FORMAT " completed buffers:", num_processed_buf_total());
-  out->print_cr("     " SIZE_FORMAT_W(8) " (%5.1f%%) by concurrent RS threads.",
-                num_processed_buf_total(),
-                percent_of(num_processed_buf_rs_threads(), num_processed_buf_total()));
+  out->print_cr("  Of " SIZE_FORMAT " refined cards:", total_refined_cards());
+  out->print_cr("     " SIZE_FORMAT_W(8) " (%5.1f%%) by concurrent refinement threads.",
+                total_concurrent_refined_cards(),
+                percent_of(total_concurrent_refined_cards(), total_refined_cards()));
   out->print_cr("     " SIZE_FORMAT_W(8) " (%5.1f%%) by mutator threads.",
-                num_processed_buf_mutator(),
-                percent_of(num_processed_buf_mutator(), num_processed_buf_total()));
+                total_mutator_refined_cards(),
+                percent_of(total_mutator_refined_cards(), total_refined_cards()));
   out->print_cr("  Did " SIZE_FORMAT " coarsenings.", num_coarsenings());
-  out->print_cr("  Concurrent RS threads times (s)");
+  out->print_cr("  Concurrent refinement threads times (s)");
   out->print("     ");
   for (uint i = 0; i < _num_vtimes; i++) {
     out->print("    %5.2f", rs_thread_vtime(i));
--- a/src/hotspot/share/gc/g1/g1RemSetSummary.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1RemSetSummary.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -38,9 +38,8 @@
 
   G1RemSet* _rem_set;
 
-  size_t _num_conc_refined_cards;
-  size_t _num_processed_buf_mutator;
-  size_t _num_processed_buf_rs_threads;
+  size_t _total_mutator_refined_cards;
+  size_t _total_concurrent_refined_cards;
 
   size_t _num_coarsenings;
 
@@ -76,20 +75,16 @@
     return _sampling_thread_vtime;
   }
 
-  size_t num_conc_refined_cards() const {
-    return _num_conc_refined_cards;
+  size_t total_mutator_refined_cards() const {
+    return _total_mutator_refined_cards;
   }
 
-  size_t num_processed_buf_mutator() const {
-    return _num_processed_buf_mutator;
+  size_t total_concurrent_refined_cards() const {
+    return _total_concurrent_refined_cards;
   }
 
-  size_t num_processed_buf_rs_threads() const {
-    return _num_processed_buf_rs_threads;
-  }
-
-  size_t num_processed_buf_total() const {
-    return num_processed_buf_mutator() + num_processed_buf_rs_threads();
+  size_t total_refined_cards() const {
+    return total_mutator_refined_cards() + total_concurrent_refined_cards();
   }
 
   size_t num_coarsenings() const {
--- a/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -268,7 +268,7 @@
   virtual void clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const;
 
   // Support for GC barriers emitted during parsing
-  virtual bool has_load_barriers() const { return false; }
+  virtual bool has_load_barrier_nodes() const { return false; }
   virtual bool is_gc_barrier_node(Node* node) const { return false; }
   virtual Node* step_over_gc_barrier(Node* c) const { return c; }
   virtual Node* step_over_gc_barrier_ctrl(Node* c) const { return c; }
@@ -291,13 +291,9 @@
   virtual bool is_gc_specific_loop_opts_pass(LoopOptsMode mode) const { return false; }
 
   virtual bool has_special_unique_user(const Node* node) const { return false; }
-  virtual bool needs_anti_dependence_check(const Node* node) const { return true; }
-
-  virtual void barrier_insertion_phase(Compile* C, PhaseIterGVN &igvn) const { }
 
   enum CompilePhase {
     BeforeOptimize,
-    BeforeLateInsertion,
     BeforeMacroExpand,
     BeforeCodeGen
   };
@@ -324,6 +320,10 @@
   virtual Node* split_if_pre(PhaseIdealLoop* phase, Node* n) const { return NULL; }
   virtual bool build_loop_late_post(PhaseIdealLoop* phase, Node* n) const { return false; }
   virtual bool sink_node(PhaseIdealLoop* phase, Node* n, Node* x, Node* x_ctrl, Node* n_ctrl) const { return false; }
+
+  virtual void late_barrier_analysis() const { }
+  virtual int estimate_stub_size() const { return 0; }
+  virtual void emit_stubs(CodeBuffer& cb) const { }
 };
 
 #endif // SHARE_GC_SHARED_C2_BARRIERSETC2_HPP
--- a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -103,7 +103,7 @@
   static const TypeFunc* write_ref_field_pre_entry_Type();
   static const TypeFunc* shenandoah_clone_barrier_Type();
   static const TypeFunc* shenandoah_load_reference_barrier_Type();
-  virtual bool has_load_barriers() const { return true; }
+  virtual bool has_load_barrier_nodes() const { return true; }
 
   // This is the entry-point for the backend to perform accesses through the Access API.
   virtual void clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const;
--- a/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -1275,12 +1275,32 @@
         // The rethrow call may have too many projections to be
         // properly handled here. Given there's no reason for a
         // barrier to depend on the call, move it above the call
-        if (phase->get_ctrl(val) == ctrl) {
-          assert(val->Opcode() == Op_DecodeN, "unexpected node");
-          assert(phase->is_dominator(phase->get_ctrl(val->in(1)), call->in(0)), "Load is too low");
-          phase->set_ctrl(val, call->in(0));
-        }
-        phase->set_ctrl(lrb, call->in(0));
+        stack.push(lrb, 0);
+        do {
+          Node* n = stack.node();
+          uint idx = stack.index();
+          if (idx < n->req()) {
+            Node* in = n->in(idx);
+            stack.set_index(idx+1);
+            if (in != NULL) {
+              if (phase->has_ctrl(in)) {
+                if (phase->is_dominator(call, phase->get_ctrl(in))) {
+#ifdef ASSERT
+                  for (uint i = 0; i < stack.size(); i++) {
+                    assert(stack.node_at(i) != in, "node shouldn't have been seen yet");
+                  }
+#endif
+                  stack.push(in, 0);
+                }
+              } else {
+                assert(phase->is_dominator(in, call->in(0)), "no dependency on the call");
+              }
+            }
+          } else {
+            phase->set_ctrl(n, call->in(0));
+            stack.pop();
+          }
+        } while(stack.size() > 0);
         continue;
       }
       CallProjections projs = call->extract_projections(false, false);
--- a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -69,7 +69,8 @@
   // enough, but we also do not want to steal too much CPU from the concurrently running
   // application. Using 1/4 of available threads for concurrent GC seems a good
   // compromise here.
-  if (FLAG_IS_DEFAULT(ConcGCThreads)) {
+  bool ergo_conc = FLAG_IS_DEFAULT(ConcGCThreads);
+  if (ergo_conc) {
     FLAG_SET_DEFAULT(ConcGCThreads, MAX2(1, os::processor_count() / 4));
   }
 
@@ -82,7 +83,8 @@
   // that will overwhelm the OS scheduler. Using 1/2 of available threads seems to be a fair
   // compromise here. Due to implementation constraints, it should not be lower than
   // the number of concurrent threads.
-  if (FLAG_IS_DEFAULT(ParallelGCThreads)) {
+  bool ergo_parallel = FLAG_IS_DEFAULT(ParallelGCThreads);
+  if (ergo_parallel) {
     FLAG_SET_DEFAULT(ParallelGCThreads, MAX2(1, os::processor_count() / 2));
   }
 
@@ -90,9 +92,21 @@
     vm_exit_during_initialization("Shenandoah expects ParallelGCThreads > 0, check -XX:ParallelGCThreads=#");
   }
 
+  // Make sure ergonomic decisions do not break the thread count invariants.
+  // This may happen when user overrides one of the flags, but not the other.
+  // When that happens, we want to adjust the setting that was set ergonomically.
   if (ParallelGCThreads < ConcGCThreads) {
-    warning("Shenandoah expects ConcGCThreads <= ParallelGCThreads, adjusting ParallelGCThreads automatically");
-    FLAG_SET_DEFAULT(ParallelGCThreads, ConcGCThreads);
+    if (ergo_conc && !ergo_parallel) {
+      FLAG_SET_DEFAULT(ConcGCThreads, ParallelGCThreads);
+    } else if (!ergo_conc && ergo_parallel) {
+      FLAG_SET_DEFAULT(ParallelGCThreads, ConcGCThreads);
+    } else if (ergo_conc && ergo_parallel) {
+      // Should not happen, check the ergonomic computation above. Fail with relevant error.
+      vm_exit_during_initialization("Shenandoah thread count ergonomic error");
+    } else {
+      // User settings error, report and ask user to rectify.
+      vm_exit_during_initialization("Shenandoah expects ConcGCThreads <= ParallelGCThreads, check -XX:ParallelGCThreads, -XX:ConcGCThreads");
+    }
   }
 
   if (FLAG_IS_DEFAULT(ParallelRefProcEnabled)) {
--- a/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -314,9 +314,6 @@
   diagnostic(bool, ShenandoahTerminationTrace, false,                       \
           "Tracing task termination timings")                               \
                                                                             \
-  develop(bool, ShenandoahVerifyObjectEquals, false,                        \
-          "Verify that == and != are not used on oops. Only in fastdebug")  \
-                                                                            \
   diagnostic(bool, ShenandoahAlwaysPreTouch, false,                         \
           "Pre-touch heap memory, overrides global AlwaysPreTouch")         \
                                                                             \
--- a/src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -22,451 +22,157 @@
  */
 
 #include "precompiled.hpp"
-#include "opto/castnode.hpp"
+#include "classfile/javaClasses.hpp"
+#include "gc/z/c2/zBarrierSetC2.hpp"
+#include "gc/z/zBarrierSet.hpp"
+#include "gc/z/zBarrierSetAssembler.hpp"
+#include "gc/z/zBarrierSetRuntime.hpp"
+#include "opto/block.hpp"
 #include "opto/compile.hpp"
-#include "opto/escape.hpp"
 #include "opto/graphKit.hpp"
-#include "opto/loopnode.hpp"
 #include "opto/machnode.hpp"
-#include "opto/macro.hpp"
 #include "opto/memnode.hpp"
-#include "opto/movenode.hpp"
 #include "opto/node.hpp"
-#include "opto/phase.hpp"
-#include "opto/phaseX.hpp"
+#include "opto/regalloc.hpp"
 #include "opto/rootnode.hpp"
-#include "opto/type.hpp"
-#include "utilities/copy.hpp"
 #include "utilities/growableArray.hpp"
 #include "utilities/macros.hpp"
-#include "gc/z/zBarrierSet.hpp"
-#include "gc/z/c2/zBarrierSetC2.hpp"
-#include "gc/z/zThreadLocalData.hpp"
-#include "gc/z/zBarrierSetRuntime.hpp"
 
-ZBarrierSetC2State::ZBarrierSetC2State(Arena* comp_arena) :
-    _load_barrier_nodes(new (comp_arena) GrowableArray<LoadBarrierNode*>(comp_arena, 8,  0, NULL)) {}
+class ZBarrierSetC2State : public ResourceObj {
+private:
+  GrowableArray<ZLoadBarrierStubC2*>* _stubs;
+  Node_Array                          _live;
 
-int ZBarrierSetC2State::load_barrier_count() const {
-  return _load_barrier_nodes->length();
+public:
+  ZBarrierSetC2State(Arena* arena) :
+    _stubs(new (arena) GrowableArray<ZLoadBarrierStubC2*>(arena, 8,  0, NULL)),
+    _live(arena) {}
+
+  GrowableArray<ZLoadBarrierStubC2*>* stubs() {
+    return _stubs;
+  }
+
+  RegMask* live(const Node* node) {
+    if (!node->is_Mach()) {
+      // Don't need liveness for non-MachNodes
+      return NULL;
+    }
+
+    const MachNode* const mach = node->as_Mach();
+    if (mach->barrier_data() != ZLoadBarrierStrong &&
+        mach->barrier_data() != ZLoadBarrierWeak) {
+      // Don't need liveness data for nodes without barriers
+      return NULL;
+    }
+
+    RegMask* live = (RegMask*)_live[node->_idx];
+    if (live == NULL) {
+      live = new (Compile::current()->comp_arena()->Amalloc_D(sizeof(RegMask))) RegMask();
+      _live.map(node->_idx, (Node*)live);
+    }
+
+    return live;
+  }
+};
+
+static ZBarrierSetC2State* barrier_set_state() {
+  return reinterpret_cast<ZBarrierSetC2State*>(Compile::current()->barrier_set_state());
 }
 
-void ZBarrierSetC2State::add_load_barrier_node(LoadBarrierNode * n) {
-  assert(!_load_barrier_nodes->contains(n), " duplicate entry in expand list");
-  _load_barrier_nodes->append(n);
+ZLoadBarrierStubC2* ZLoadBarrierStubC2::create(const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) {
+  ZLoadBarrierStubC2* const stub = new (Compile::current()->comp_arena()) ZLoadBarrierStubC2(node, ref_addr, ref, tmp, weak);
+  if (!Compile::current()->in_scratch_emit_size()) {
+    barrier_set_state()->stubs()->append(stub);
+  }
+
+  return stub;
 }
 
-void ZBarrierSetC2State::remove_load_barrier_node(LoadBarrierNode * n) {
-  // this function may be called twice for a node so check
-  // that the node is in the array before attempting to remove it
-  if (_load_barrier_nodes->contains(n)) {
-    _load_barrier_nodes->remove(n);
-  }
+ZLoadBarrierStubC2::ZLoadBarrierStubC2(const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) :
+    _node(node),
+    _ref_addr(ref_addr),
+    _ref(ref),
+    _tmp(tmp),
+    _weak(weak),
+    _entry(),
+    _continuation() {
+  assert_different_registers(ref, ref_addr.base());
+  assert_different_registers(ref, ref_addr.index());
 }
 
-LoadBarrierNode* ZBarrierSetC2State::load_barrier_node(int idx) const {
-  return _load_barrier_nodes->at(idx);
+Address ZLoadBarrierStubC2::ref_addr() const {
+  return _ref_addr;
+}
+
+Register ZLoadBarrierStubC2::ref() const {
+  return _ref;
+}
+
+Register ZLoadBarrierStubC2::tmp() const {
+  return _tmp;
+}
+
+address ZLoadBarrierStubC2::slow_path() const {
+  const DecoratorSet decorators = _weak ? ON_WEAK_OOP_REF : ON_STRONG_OOP_REF;
+  return ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators);
+}
+
+RegMask& ZLoadBarrierStubC2::live() const {
+  return *barrier_set_state()->live(_node);
+}
+
+Label* ZLoadBarrierStubC2::entry() {
+  // The _entry will never be bound when in_scratch_emit_size() is true.
+  // However, we still need to return a label that is not bound now, but
+  // will eventually be bound. Any lable will do, as it will only act as
+  // a placeholder, so we return the _continuation label.
+  return Compile::current()->in_scratch_emit_size() ? &_continuation : &_entry;
+}
+
+Label* ZLoadBarrierStubC2::continuation() {
+  return &_continuation;
 }
 
 void* ZBarrierSetC2::create_barrier_state(Arena* comp_arena) const {
-  return new(comp_arena) ZBarrierSetC2State(comp_arena);
+  return new (comp_arena) ZBarrierSetC2State(comp_arena);
 }
 
-ZBarrierSetC2State* ZBarrierSetC2::state() const {
-  return reinterpret_cast<ZBarrierSetC2State*>(Compile::current()->barrier_set_state());
+void ZBarrierSetC2::late_barrier_analysis() const {
+  analyze_dominating_barriers();
+  compute_liveness_at_stubs();
 }
 
-bool ZBarrierSetC2::is_gc_barrier_node(Node* node) const {
-  // 1. This step follows potential oop projections of a load barrier before expansion
-  if (node->is_Proj()) {
-    node = node->in(0);
+void ZBarrierSetC2::emit_stubs(CodeBuffer& cb) const {
+  MacroAssembler masm(&cb);
+  GrowableArray<ZLoadBarrierStubC2*>* const stubs = barrier_set_state()->stubs();
+
+  for (int i = 0; i < stubs->length(); i++) {
+    // Make sure there is enough space in the code buffer
+    if (cb.insts()->maybe_expand_to_ensure_remaining(Compile::MAX_inst_size) && cb.blob() == NULL) {
+      ciEnv::current()->record_failure("CodeCache is full");
+      return;
+    }
+
+    ZBarrierSet::assembler()->generate_c2_load_barrier_stub(&masm, stubs->at(i));
   }
 
-  // 2. This step checks for unexpanded load barriers
-  if (node->is_LoadBarrier()) {
-    return true;
+  masm.flush();
+}
+
+int ZBarrierSetC2::estimate_stub_size() const {
+  Compile* const C = Compile::current();
+  BufferBlob* const blob = C->scratch_buffer_blob();
+  GrowableArray<ZLoadBarrierStubC2*>* const stubs = barrier_set_state()->stubs();
+  int size = 0;
+
+  for (int i = 0; i < stubs->length(); i++) {
+    CodeBuffer cb(blob->content_begin(), (address)C->scratch_locs_memory() - blob->content_begin());
+    MacroAssembler masm(&cb);
+    ZBarrierSet::assembler()->generate_c2_load_barrier_stub(&masm, stubs->at(i));
+    size += cb.insts_size();
   }
 
-  // 3. This step checks for the phi corresponding to an optimized load barrier expansion
-  if (node->is_Phi()) {
-    PhiNode* phi = node->as_Phi();
-    Node* n = phi->in(1);
-    if (n != NULL && n->is_LoadBarrierSlowReg()) {
-      return true;
-    }
-  }
-
-  return false;
-}
-
-void ZBarrierSetC2::register_potential_barrier_node(Node* node) const {
-  if (node->is_LoadBarrier()) {
-    state()->add_load_barrier_node(node->as_LoadBarrier());
-  }
-}
-
-void ZBarrierSetC2::unregister_potential_barrier_node(Node* node) const {
-  if (node->is_LoadBarrier()) {
-    state()->remove_load_barrier_node(node->as_LoadBarrier());
-  }
-}
-
-void ZBarrierSetC2::eliminate_useless_gc_barriers(Unique_Node_List &useful, Compile* C) const {
-  // Remove useless LoadBarrier nodes
-  ZBarrierSetC2State* s = state();
-  for (int i = s->load_barrier_count()-1; i >= 0; i--) {
-    LoadBarrierNode* n = s->load_barrier_node(i);
-    if (!useful.member(n)) {
-      unregister_potential_barrier_node(n);
-    }
-  }
-}
-
-void ZBarrierSetC2::enqueue_useful_gc_barrier(PhaseIterGVN* igvn, Node* node) const {
-  if (node->is_LoadBarrier() && !node->as_LoadBarrier()->has_true_uses()) {
-    igvn->_worklist.push(node);
-  }
-}
-
-const uint NoBarrier       = 0;
-const uint RequireBarrier  = 1;
-const uint WeakBarrier     = 2;
-const uint ExpandedBarrier = 4;
-
-static bool load_require_barrier(LoadNode* load)      { return (load->barrier_data() & RequireBarrier)  == RequireBarrier; }
-static bool load_has_weak_barrier(LoadNode* load)     { return (load->barrier_data() & WeakBarrier)     == WeakBarrier; }
-static bool load_has_expanded_barrier(LoadNode* load) { return (load->barrier_data() & ExpandedBarrier) == ExpandedBarrier; }
-static void load_set_expanded_barrier(LoadNode* load) { return load->set_barrier_data(ExpandedBarrier); }
-
-static void load_set_barrier(LoadNode* load, bool weak) {
-  if (weak) {
-    load->set_barrier_data(RequireBarrier | WeakBarrier);
-  } else {
-    load->set_barrier_data(RequireBarrier);
-  }
-}
-
-// == LoadBarrierNode ==
-
-LoadBarrierNode::LoadBarrierNode(Compile* C,
-                                 Node* c,
-                                 Node* mem,
-                                 Node* val,
-                                 Node* adr,
-                                 bool weak) :
-    MultiNode(Number_of_Inputs),
-    _weak(weak) {
-  init_req(Control, c);
-  init_req(Memory, mem);
-  init_req(Oop, val);
-  init_req(Address, adr);
-  init_req(Similar, C->top());
-
-  init_class_id(Class_LoadBarrier);
-  BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
-  bs->register_potential_barrier_node(this);
-}
-
-uint LoadBarrierNode::size_of() const {
-  return sizeof(*this);
-}
-
-bool LoadBarrierNode::cmp(const Node& n) const {
-  ShouldNotReachHere();
-  return false;
-}
-
-const Type *LoadBarrierNode::bottom_type() const {
-  const Type** floadbarrier = (const Type **)(Compile::current()->type_arena()->Amalloc_4((Number_of_Outputs)*sizeof(Type*)));
-  Node* in_oop = in(Oop);
-  floadbarrier[Control] = Type::CONTROL;
-  floadbarrier[Memory] = Type::MEMORY;
-  floadbarrier[Oop] = in_oop == NULL ? Type::TOP : in_oop->bottom_type();
-  return TypeTuple::make(Number_of_Outputs, floadbarrier);
-}
-
-const TypePtr* LoadBarrierNode::adr_type() const {
-  ShouldNotReachHere();
-  return NULL;
-}
-
-const Type *LoadBarrierNode::Value(PhaseGVN *phase) const {
-  const Type** floadbarrier = (const Type **)(phase->C->type_arena()->Amalloc_4((Number_of_Outputs)*sizeof(Type*)));
-  const Type* val_t = phase->type(in(Oop));
-  floadbarrier[Control] = Type::CONTROL;
-  floadbarrier[Memory]  = Type::MEMORY;
-  floadbarrier[Oop]     = val_t;
-  return TypeTuple::make(Number_of_Outputs, floadbarrier);
-}
-
-bool LoadBarrierNode::is_dominator(PhaseIdealLoop* phase, bool linear_only, Node *d, Node *n) {
-  if (phase != NULL) {
-    return phase->is_dominator(d, n);
-  }
-
-  for (int i = 0; i < 10 && n != NULL; i++) {
-    n = IfNode::up_one_dom(n, linear_only);
-    if (n == d) {
-      return true;
-    }
-  }
-
-  return false;
-}
-
-LoadBarrierNode* LoadBarrierNode::has_dominating_barrier(PhaseIdealLoop* phase, bool linear_only, bool look_for_similar) {
-  if (is_weak()) {
-    // Weak barriers can't be eliminated
-    return NULL;
-  }
-
-  Node* val = in(LoadBarrierNode::Oop);
-  if (in(Similar)->is_Proj() && in(Similar)->in(0)->is_LoadBarrier()) {
-    LoadBarrierNode* lb = in(Similar)->in(0)->as_LoadBarrier();
-    assert(lb->in(Address) == in(Address), "");
-    // Load barrier on Similar edge dominates so if it now has the Oop field it can replace this barrier.
-    if (lb->in(Oop) == in(Oop)) {
-      return lb;
-    }
-    // Follow chain of load barrier through Similar edges
-    while (!lb->in(Similar)->is_top()) {
-      lb = lb->in(Similar)->in(0)->as_LoadBarrier();
-      assert(lb->in(Address) == in(Address), "");
-    }
-    if (lb != in(Similar)->in(0)) {
-      return lb;
-    }
-  }
-  for (DUIterator_Fast imax, i = val->fast_outs(imax); i < imax; i++) {
-    Node* u = val->fast_out(i);
-    if (u != this && u->is_LoadBarrier() && u->in(Oop) == val && u->as_LoadBarrier()->has_true_uses()) {
-      Node* this_ctrl = in(LoadBarrierNode::Control);
-      Node* other_ctrl = u->in(LoadBarrierNode::Control);
-      if (is_dominator(phase, linear_only, other_ctrl, this_ctrl)) {
-        return u->as_LoadBarrier();
-      }
-    }
-  }
-
-  if (can_be_eliminated()) {
-    return NULL;
-  }
-
-  if (!look_for_similar) {
-    return NULL;
-  }
-
-  Node* addr = in(LoadBarrierNode::Address);
-  for (DUIterator_Fast imax, i = addr->fast_outs(imax); i < imax; i++) {
-    Node* u = addr->fast_out(i);
-    if (u != this && u->is_LoadBarrier() && u->as_LoadBarrier()->has_true_uses()) {
-      Node* this_ctrl = in(LoadBarrierNode::Control);
-      Node* other_ctrl = u->in(LoadBarrierNode::Control);
-      if (is_dominator(phase, linear_only, other_ctrl, this_ctrl)) {
-        ResourceMark rm;
-        Unique_Node_List wq;
-        wq.push(in(LoadBarrierNode::Control));
-        bool ok = true;
-        bool dom_found = false;
-        for (uint next = 0; next < wq.size(); ++next) {
-          Node *n = wq.at(next);
-          if (n->is_top()) {
-            return NULL;
-          }
-          assert(n->is_CFG(), "");
-          if (n->is_SafePoint()) {
-            ok = false;
-            break;
-          }
-          if (n == u) {
-            dom_found = true;
-            continue;
-          }
-          if (n->is_Region()) {
-            for (uint i = 1; i < n->req(); i++) {
-              Node* m = n->in(i);
-              if (m != NULL) {
-                wq.push(m);
-              }
-            }
-          } else {
-            Node* m = n->in(0);
-            if (m != NULL) {
-              wq.push(m);
-            }
-          }
-        }
-        if (ok) {
-          assert(dom_found, "");
-          return u->as_LoadBarrier();
-        }
-        break;
-      }
-    }
-  }
-
-  return NULL;
-}
-
-void LoadBarrierNode::push_dominated_barriers(PhaseIterGVN* igvn) const {
-  // Change to that barrier may affect a dominated barrier so re-push those
-  assert(!is_weak(), "sanity");
-  Node* val = in(LoadBarrierNode::Oop);
-
-  for (DUIterator_Fast imax, i = val->fast_outs(imax); i < imax; i++) {
-    Node* u = val->fast_out(i);
-    if (u != this && u->is_LoadBarrier() && u->in(Oop) == val) {
-      Node* this_ctrl = in(Control);
-      Node* other_ctrl = u->in(Control);
-      if (is_dominator(NULL, false, this_ctrl, other_ctrl)) {
-        igvn->_worklist.push(u);
-      }
-    }
-
-    Node* addr = in(LoadBarrierNode::Address);
-    for (DUIterator_Fast imax, i = addr->fast_outs(imax); i < imax; i++) {
-      Node* u = addr->fast_out(i);
-      if (u != this && u->is_LoadBarrier() && u->in(Similar)->is_top()) {
-        Node* this_ctrl = in(Control);
-        Node* other_ctrl = u->in(Control);
-        if (is_dominator(NULL, false, this_ctrl, other_ctrl)) {
-          igvn->_worklist.push(u);
-        }
-      }
-    }
-  }
-}
-
-Node *LoadBarrierNode::Identity(PhaseGVN *phase) {
-  LoadBarrierNode* dominating_barrier = has_dominating_barrier(NULL, true, false);
-  if (dominating_barrier != NULL) {
-    assert(!is_weak(), "Weak barriers cant be eliminated");
-    assert(dominating_barrier->in(Oop) == in(Oop), "");
-    return dominating_barrier;
-  }
-
-  return this;
-}
-
-Node *LoadBarrierNode::Ideal(PhaseGVN *phase, bool can_reshape) {
-  if (remove_dead_region(phase, can_reshape)) {
-    return this;
-  }
-
-  Node *val = in(Oop);
-  Node *mem = in(Memory);
-  Node *ctrl = in(Control);
-
-  assert(val->Opcode() != Op_LoadN, "");
-  assert(val->Opcode() != Op_DecodeN, "");
-
-  if (mem->is_MergeMem()) {
-    Node *new_mem = mem->as_MergeMem()->memory_at(Compile::AliasIdxRaw);
-    set_req(Memory, new_mem);
-    if (mem->outcnt() == 0 && can_reshape) {
-      phase->is_IterGVN()->_worklist.push(mem);
-    }
-    return this;
-  }
-
-  LoadBarrierNode *dominating_barrier = NULL;
-  if (!is_weak()) {
-    dominating_barrier = has_dominating_barrier(NULL, !can_reshape, !phase->C->major_progress());
-    if (dominating_barrier != NULL && dominating_barrier->in(Oop) != in(Oop)) {
-      assert(in(Address) == dominating_barrier->in(Address), "");
-      set_req(Similar, dominating_barrier->proj_out(Oop));
-      return this;
-    }
-  }
-
-  bool eliminate = can_reshape && (dominating_barrier != NULL || !has_true_uses());
-  if (eliminate) {
-    if (can_reshape) {
-      PhaseIterGVN* igvn = phase->is_IterGVN();
-      Node* out_ctrl = proj_out_or_null(Control);
-      Node* out_res = proj_out_or_null(Oop);
-
-      if (out_ctrl != NULL) {
-        igvn->replace_node(out_ctrl, ctrl);
-      }
-
-      // That transformation may cause the Similar edge on the load barrier to be invalid
-      fix_similar_in_uses(igvn);
-      if (out_res != NULL) {
-        if (dominating_barrier != NULL) {
-          assert(!is_weak(), "Sanity");
-          igvn->replace_node(out_res, dominating_barrier->proj_out(Oop));
-        } else {
-          igvn->replace_node(out_res, val);
-        }
-      }
-    }
-    return new ConINode(TypeInt::ZERO);
-  }
-
-  // If the Similar edge is no longer a load barrier, clear it
-  Node* similar = in(Similar);
-  if (!similar->is_top() && !(similar->is_Proj() && similar->in(0)->is_LoadBarrier())) {
-    set_req(Similar, phase->C->top());
-    return this;
-  }
-
-  if (can_reshape && !is_weak()) {
-    // If this barrier is linked through the Similar edge by a
-    // dominated barrier and both barriers have the same Oop field,
-    // the dominated barrier can go away, so push it for reprocessing.
-    // We also want to avoid a barrier to depend on another dominating
-    // barrier through its Similar edge that itself depend on another
-    // barrier through its Similar edge and rather have the first
-    // depend on the third.
-    PhaseIterGVN* igvn = phase->is_IterGVN();
-    Node* out_res = proj_out(Oop);
-    for (DUIterator_Fast imax, i = out_res->fast_outs(imax); i < imax; i++) {
-      Node* u = out_res->fast_out(i);
-      if (u->is_LoadBarrier() && u->in(Similar) == out_res &&
-          (u->in(Oop) == val || !u->in(Similar)->is_top())) {
-        assert(!u->as_LoadBarrier()->is_weak(), "Sanity");
-        igvn->_worklist.push(u);
-      }
-    }
-    push_dominated_barriers(igvn);
-  }
-
-  return NULL;
-}
-
-uint LoadBarrierNode::match_edge(uint idx) const {
-  ShouldNotReachHere();
-  return 0;
-}
-
-void LoadBarrierNode::fix_similar_in_uses(PhaseIterGVN* igvn) {
-  Node* out_res = proj_out_or_null(Oop);
-  if (out_res == NULL) {
-    return;
-  }
-
-  for (DUIterator_Fast imax, i = out_res->fast_outs(imax); i < imax; i++) {
-    Node* u = out_res->fast_out(i);
-    if (u->is_LoadBarrier() && u->in(Similar) == out_res) {
-      igvn->replace_input_of(u, Similar, igvn->C->top());
-      --i;
-      --imax;
-    }
-  }
-}
-
-bool LoadBarrierNode::has_true_uses() const {
-  Node* out_res = proj_out_or_null(Oop);
-  if (out_res != NULL) {
-    for (DUIterator_Fast imax, i = out_res->fast_outs(imax); i < imax; i++) {
-      Node *u = out_res->fast_out(i);
-      if (!u->is_LoadBarrier() || u->in(Similar) != out_res) {
-        return true;
-      }
-    }
-  }
-  return false;
+  return size;
 }
 
 static bool barrier_needed(C2Access& access) {
@@ -474,1223 +180,252 @@
 }
 
 Node* ZBarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const {
-  Node* p = BarrierSetC2::load_at_resolved(access, val_type);
-  if (!barrier_needed(access)) {
-    return p;
+  Node* result = BarrierSetC2::load_at_resolved(access, val_type);
+  if (barrier_needed(access) && access.raw_access()->is_Mem()) {
+    if ((access.decorators() & ON_WEAK_OOP_REF) != 0) {
+      access.raw_access()->as_Load()->set_barrier_data(ZLoadBarrierWeak);
+    } else {
+      access.raw_access()->as_Load()->set_barrier_data(ZLoadBarrierStrong);
+    }
   }
 
-  bool weak = (access.decorators() & ON_WEAK_OOP_REF) != 0;
-  if (p->isa_Load()) {
-    load_set_barrier(p->as_Load(), weak);
-  }
-  return p;
+  return result;
 }
 
 Node* ZBarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
                                                     Node* new_val, const Type* val_type) const {
   Node* result = BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, val_type);
-  LoadStoreNode* lsn = result->as_LoadStore();
   if (barrier_needed(access)) {
-    lsn->set_has_barrier();
+    access.raw_access()->as_LoadStore()->set_barrier_data(ZLoadBarrierStrong);
   }
-  return lsn;
+  return result;
 }
 
 Node* ZBarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
                                                      Node* new_val, const Type* value_type) const {
   Node* result = BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
-  LoadStoreNode* lsn = result->as_LoadStore();
   if (barrier_needed(access)) {
-    lsn->set_has_barrier();
+    access.raw_access()->as_LoadStore()->set_barrier_data(ZLoadBarrierStrong);
   }
-  return lsn;
+  return result;
 }
 
 Node* ZBarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* val_type) const {
   Node* result = BarrierSetC2::atomic_xchg_at_resolved(access, new_val, val_type);
-  LoadStoreNode* lsn = result->as_LoadStore();
   if (barrier_needed(access)) {
-    lsn->set_has_barrier();
+    access.raw_access()->as_LoadStore()->set_barrier_data(ZLoadBarrierStrong);
   }
-  return lsn;
+  return result;
 }
 
-// == Macro Expansion ==
-
-// Optimized, low spill, loadbarrier variant using stub specialized on register used
-void ZBarrierSetC2::expand_loadbarrier_node(PhaseMacroExpand* phase, LoadBarrierNode* barrier) const {
-  PhaseIterGVN &igvn = phase->igvn();
-  float unlikely  = PROB_UNLIKELY(0.999);
-
-  Node* in_ctrl = barrier->in(LoadBarrierNode::Control);
-  Node* in_mem = barrier->in(LoadBarrierNode::Memory);
-  Node* in_val = barrier->in(LoadBarrierNode::Oop);
-  Node* in_adr = barrier->in(LoadBarrierNode::Address);
-
-  Node* out_ctrl = barrier->proj_out(LoadBarrierNode::Control);
-  Node* out_res = barrier->proj_out(LoadBarrierNode::Oop);
-
-  assert(barrier->in(LoadBarrierNode::Oop) != NULL, "oop to loadbarrier node cannot be null");
-
-  Node* jthread = igvn.transform(new ThreadLocalNode());
-  Node* adr = phase->basic_plus_adr(jthread, in_bytes(ZThreadLocalData::address_bad_mask_offset()));
-  Node* bad_mask = igvn.transform(LoadNode::make(igvn, in_ctrl, in_mem, adr,
-                                                 TypeRawPtr::BOTTOM, TypeX_X, TypeX_X->basic_type(),
-                                                 MemNode::unordered));
-  Node* cast = igvn.transform(new CastP2XNode(in_ctrl, in_val));
-  Node* obj_masked = igvn.transform(new AndXNode(cast, bad_mask));
-  Node* cmp = igvn.transform(new CmpXNode(obj_masked, igvn.zerocon(TypeX_X->basic_type())));
-  Node *bol = igvn.transform(new BoolNode(cmp, BoolTest::ne))->as_Bool();
-  IfNode* iff = igvn.transform(new IfNode(in_ctrl, bol, unlikely, COUNT_UNKNOWN))->as_If();
-  Node* then = igvn.transform(new IfTrueNode(iff));
-  Node* elsen = igvn.transform(new IfFalseNode(iff));
-
-  Node* new_loadp = igvn.transform(new LoadBarrierSlowRegNode(then, in_adr, in_val,
-                                                              (const TypePtr*) in_val->bottom_type(), barrier->is_weak()));
-
-  // Create the final region/phi pair to converge cntl/data paths to downstream code
-  Node* result_region = igvn.transform(new RegionNode(3));
-  result_region->set_req(1, then);
-  result_region->set_req(2, elsen);
-
-  Node* result_phi = igvn.transform(new PhiNode(result_region, TypeInstPtr::BOTTOM));
-  result_phi->set_req(1, new_loadp);
-  result_phi->set_req(2, barrier->in(LoadBarrierNode::Oop));
-
-  igvn.replace_node(out_ctrl, result_region);
-  igvn.replace_node(out_res, result_phi);
-
-  assert(barrier->outcnt() == 0,"LoadBarrier macro node has non-null outputs after expansion!");
-
-  igvn.remove_dead_node(barrier);
-  igvn.remove_dead_node(out_ctrl);
-  igvn.remove_dead_node(out_res);
-
-  assert(is_gc_barrier_node(result_phi), "sanity");
-  assert(step_over_gc_barrier(result_phi) == in_val, "sanity");
-
-  phase->C->print_method(PHASE_BARRIER_EXPANSION, 4, barrier->_idx);
+bool ZBarrierSetC2::array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type,
+                                                    bool is_clone, ArrayCopyPhase phase) const {
+  return type == T_OBJECT || type == T_ARRAY;
 }
 
-bool ZBarrierSetC2::expand_barriers(Compile* C, PhaseIterGVN& igvn) const {
-  ZBarrierSetC2State* s = state();
-  if (s->load_barrier_count() > 0) {
-    PhaseMacroExpand macro(igvn);
+// == Dominating barrier elision ==
 
-    int skipped = 0;
-    while (s->load_barrier_count() > skipped) {
-      int load_barrier_count = s->load_barrier_count();
-      LoadBarrierNode * n = s->load_barrier_node(load_barrier_count-1-skipped);
-      if (igvn.type(n) == Type::TOP || (n->in(0) != NULL && n->in(0)->is_top())) {
-        // Node is unreachable, so don't try to expand it
-        s->remove_load_barrier_node(n);
-        continue;
-      }
-      if (!n->can_be_eliminated()) {
-        skipped++;
-        continue;
-      }
-      expand_loadbarrier_node(&macro, n);
-      assert(s->load_barrier_count() < load_barrier_count, "must have deleted a node from load barrier list");
-      if (C->failing()) {
-        return true;
-      }
-    }
-    while (s->load_barrier_count() > 0) {
-      int load_barrier_count = s->load_barrier_count();
-      LoadBarrierNode* n = s->load_barrier_node(load_barrier_count - 1);
-      assert(!(igvn.type(n) == Type::TOP || (n->in(0) != NULL && n->in(0)->is_top())), "should have been processed already");
-      assert(!n->can_be_eliminated(), "should have been processed already");
-      expand_loadbarrier_node(&macro, n);
-      assert(s->load_barrier_count() < load_barrier_count, "must have deleted a node from load barrier list");
-      if (C->failing()) {
-        return true;
-      }
-    }
-    igvn.set_delay_transform(false);
-    igvn.optimize();
-    if (C->failing()) {
+static bool block_has_safepoint(const Block* block, uint from, uint to) {
+  for (uint i = from; i < to; i++) {
+    if (block->get_node(i)->is_MachSafePoint()) {
+      // Safepoint found
       return true;
     }
   }
 
+  // Safepoint not found
   return false;
 }
 
-Node* ZBarrierSetC2::step_over_gc_barrier(Node* c) const {
-  Node* node = c;
+static bool block_has_safepoint(const Block* block) {
+  return block_has_safepoint(block, 0, block->number_of_nodes());
+}
 
-  // 1. This step follows potential oop projections of a load barrier before expansion
-  if (node->is_Proj()) {
-    node = node->in(0);
+static uint block_index(const Block* block, const Node* node) {
+  for (uint j = 0; j < block->number_of_nodes(); ++j) {
+    if (block->get_node(j) == node) {
+      return j;
+    }
   }
+  ShouldNotReachHere();
+  return 0;
+}
 
-  // 2. This step checks for unexpanded load barriers
-  if (node->is_LoadBarrier()) {
-    return node->in(LoadBarrierNode::Oop);
-  }
+void ZBarrierSetC2::analyze_dominating_barriers() const {
+  ResourceMark rm;
+  Compile* const C = Compile::current();
+  PhaseCFG* const cfg = C->cfg();
+  Block_List worklist;
+  Node_List mem_ops;
+  Node_List barrier_loads;
 
-  // 3. This step checks for the phi corresponding to an optimized load barrier expansion
-  if (node->is_Phi()) {
-    PhiNode* phi = node->as_Phi();
-    Node* n = phi->in(1);
-    if (n != NULL && n->is_LoadBarrierSlowReg()) {
-      assert(c == node, "projections from step 1 should only be seen before macro expansion");
-      return phi->in(2);
+  // Step 1 - Find accesses, and track them in lists
+  for (uint i = 0; i < cfg->number_of_blocks(); ++i) {
+    const Block* const block = cfg->get_block(i);
+    for (uint j = 0; j < block->number_of_nodes(); ++j) {
+      const Node* const node = block->get_node(j);
+      if (!node->is_Mach()) {
+        continue;
+      }
+
+      MachNode* const mach = node->as_Mach();
+      switch (mach->ideal_Opcode()) {
+      case Op_LoadP:
+      case Op_CompareAndExchangeP:
+      case Op_CompareAndSwapP:
+      case Op_GetAndSetP:
+        if (mach->barrier_data() == ZLoadBarrierStrong) {
+          barrier_loads.push(mach);
+        }
+      case Op_StoreP:
+        mem_ops.push(mach);
+        break;
+
+      default:
+        break;
+      }
     }
   }
 
-  return c;
-}
+  // Step 2 - Find dominating accesses for each load
+  for (uint i = 0; i < barrier_loads.size(); i++) {
+    MachNode* const load = barrier_loads.at(i)->as_Mach();
+    const TypePtr* load_adr_type = NULL;
+    intptr_t load_offset = 0;
+    const Node* const load_obj = load->get_base_and_disp(load_offset, load_adr_type);
+    Block* const load_block = cfg->get_block_for_node(load);
+    const uint load_index = block_index(load_block, load);
 
-Node* ZBarrierSetC2::step_over_gc_barrier_ctrl(Node* c) const {
-  Node* node = c;
+    for (uint j = 0; j < mem_ops.size(); j++) {
+      MachNode* mem = mem_ops.at(j)->as_Mach();
+      const TypePtr* mem_adr_type = NULL;
+      intptr_t mem_offset = 0;
+      const Node* mem_obj = mem_obj = mem->get_base_and_disp(mem_offset, mem_adr_type);
+      Block* mem_block = cfg->get_block_for_node(mem);
+      uint mem_index = block_index(mem_block, mem);
 
-  // 1. This step follows potential ctrl projections of a load barrier before expansion
-  if (node->is_Proj()) {
-    node = node->in(0);
-  }
+      if (load_obj == NodeSentinel || mem_obj == NodeSentinel ||
+          load_obj == NULL || mem_obj == NULL ||
+          load_offset < 0 || mem_offset < 0) {
+        continue;
+      }
 
-  // 2. This step checks for unexpanded load barriers
-  if (node->is_LoadBarrier()) {
-    return node->in(LoadBarrierNode::Control);
-  }
+      if (mem_obj != load_obj || mem_offset != load_offset) {
+        // Not the same addresses, not a candidate
+        continue;
+      }
 
-  return c;
-}
+      if (load_block == mem_block) {
+        // Earlier accesses in the same block
+        if (mem_index < load_index && !block_has_safepoint(mem_block, mem_index + 1, load_index)) {
+          load->set_barrier_data(ZLoadBarrierElided);
+        }
+      } else if (mem_block->dominates(load_block)) {
+        // Dominating block? Look around for safepoints
+        ResourceMark rm;
+        Block_List stack;
+        VectorSet visited(Thread::current()->resource_area());
+        stack.push(load_block);
+        bool safepoint_found = block_has_safepoint(load_block);
+        while (!safepoint_found && stack.size() > 0) {
+          Block* block = stack.pop();
+          if (visited.test_set(block->_pre_order)) {
+            continue;
+          }
+          if (block_has_safepoint(block)) {
+            safepoint_found = true;
+            break;
+          }
+          if (block == mem_block) {
+            continue;
+          }
 
-bool ZBarrierSetC2::array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type, bool is_clone, ArrayCopyPhase phase) const {
-  return is_reference_type(type);
-}
-
-bool ZBarrierSetC2::final_graph_reshaping(Compile* compile, Node* n, uint opcode) const {
-  switch (opcode) {
-    case Op_LoadBarrier:
-      assert(0, "There should be no load barriers left");
-    case Op_ZGetAndSetP:
-    case Op_ZCompareAndExchangeP:
-    case Op_ZCompareAndSwapP:
-    case Op_ZWeakCompareAndSwapP:
-#ifdef ASSERT
-      if (VerifyOptoOopOffsets) {
-        MemNode *mem = n->as_Mem();
-        // Check to see if address types have grounded out somehow.
-        const TypeInstPtr *tp = mem->in(MemNode::Address)->bottom_type()->isa_instptr();
-        ciInstanceKlass *k = tp->klass()->as_instance_klass();
-        bool oop_offset_is_sane = k->contains_field_offset(tp->offset());
-        assert(!tp || oop_offset_is_sane, "");
-      }
-#endif
-      return true;
-    default:
-      return false;
-  }
-}
-
-bool ZBarrierSetC2::matcher_find_shared_visit(Matcher* matcher, Matcher::MStack& mstack, Node* n, uint opcode, bool& mem_op, int& mem_addr_idx) const {
-  switch(opcode) {
-    case Op_CallLeaf:
-      if (n->as_Call()->entry_point() == ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr() ||
-          n->as_Call()->entry_point() == ZBarrierSetRuntime::load_barrier_on_weak_oop_field_preloaded_addr()) {
-        mem_op = true;
-        mem_addr_idx = TypeFunc::Parms + 1;
-        return true;
-      }
-      return false;
-    default:
-      return false;
-  }
-}
-
-bool ZBarrierSetC2::matcher_find_shared_post_visit(Matcher* matcher, Node* n, uint opcode) const {
-  switch(opcode) {
-    case Op_ZCompareAndExchangeP:
-    case Op_ZCompareAndSwapP:
-    case Op_ZWeakCompareAndSwapP: {
-      Node *mem = n->in(MemNode::Address);
-      Node *keepalive = n->in(5);
-      Node *pair1 = new BinaryNode(mem, keepalive);
-
-      Node *newval = n->in(MemNode::ValueIn);
-      Node *oldval = n->in(LoadStoreConditionalNode::ExpectedIn);
-      Node *pair2 = new BinaryNode(oldval, newval);
-
-      n->set_req(MemNode::Address, pair1);
-      n->set_req(MemNode::ValueIn, pair2);
-      n->del_req(5);
-      n->del_req(LoadStoreConditionalNode::ExpectedIn);
-      return true;
-    }
-    case Op_ZGetAndSetP: {
-      Node *keepalive = n->in(4);
-      Node *newval = n->in(MemNode::ValueIn);
-      Node *pair = new BinaryNode(newval, keepalive);
-      n->set_req(MemNode::ValueIn, pair);
-      n->del_req(4);
-      return true;
-    }
-
-    default:
-      return false;
-  }
-}
-
-// == Verification ==
-
-#ifdef ASSERT
-
-static void verify_slippery_safepoints_internal(Node* ctrl) {
-  // Given a CFG node, make sure it does not contain both safepoints and loads
-  // that have expanded barriers.
-  bool found_safepoint = false;
-  bool found_load = false;
-
-  for (DUIterator_Fast imax, i = ctrl->fast_outs(imax); i < imax; i++) {
-    Node* node = ctrl->fast_out(i);
-    if (node->in(0) != ctrl) {
-      // Skip outgoing precedence edges from ctrl.
-      continue;
-    }
-    if (node->is_SafePoint()) {
-      found_safepoint = true;
-    }
-    if (node->is_Load() && load_require_barrier(node->as_Load()) &&
-        load_has_expanded_barrier(node->as_Load())) {
-      found_load = true;
-    }
-  }
-  assert(!found_safepoint || !found_load, "found load and safepoint in same block");
-}
-
-static void verify_slippery_safepoints(Compile* C) {
-  ResourceArea *area = Thread::current()->resource_area();
-  Unique_Node_List visited(area);
-  Unique_Node_List checked(area);
-
-  // Recursively walk the graph.
-  visited.push(C->root());
-  while (visited.size() > 0) {
-    Node* node = visited.pop();
-
-    Node* ctrl = node;
-    if (!node->is_CFG()) {
-      ctrl = node->in(0);
-    }
-
-    if (ctrl != NULL && !checked.member(ctrl)) {
-      // For each block found in the graph, verify that it does not
-      // contain both a safepoint and a load requiring barriers.
-      verify_slippery_safepoints_internal(ctrl);
-
-      checked.push(ctrl);
-    }
-
-    checked.push(node);
-
-    for (DUIterator_Fast imax, i = node->fast_outs(imax); i < imax; i++) {
-      Node* use = node->fast_out(i);
-      if (checked.member(use))  continue;
-      if (visited.member(use))  continue;
-      visited.push(use);
-    }
-  }
-}
-
-void ZBarrierSetC2::verify_gc_barriers(Compile* compile, CompilePhase phase) const {
-  switch(phase) {
-    case BarrierSetC2::BeforeOptimize:
-    case BarrierSetC2::BeforeLateInsertion:
-      assert(state()->load_barrier_count() == 0, "No barriers inserted yet");
-      break;
-    case BarrierSetC2::BeforeMacroExpand:
-      // Barrier placement should be set by now.
-      verify_gc_barriers(false /*post_parse*/);
-      break;
-    case BarrierSetC2::BeforeCodeGen:
-      // Barriers has been fully expanded.
-      assert(state()->load_barrier_count() == 0, "No more macro barriers");
-      verify_slippery_safepoints(compile);
-      break;
-    default:
-      assert(0, "Phase without verification");
-  }
-}
-
-// post_parse implies that there might be load barriers without uses after parsing
-// That only applies when adding barriers at parse time.
-void ZBarrierSetC2::verify_gc_barriers(bool post_parse) const {
-  ZBarrierSetC2State* s = state();
-  Compile* C = Compile::current();
-  ResourceMark rm;
-  VectorSet visited(Thread::current()->resource_area());
-
-  for (int i = 0; i < s->load_barrier_count(); i++) {
-    LoadBarrierNode* n = s->load_barrier_node(i);
-
-    // The dominating barrier on the same address if it exists and
-    // this barrier must not be applied on the value from the same
-    // load otherwise the value is not reloaded before it's used the
-    // second time.
-    assert(n->in(LoadBarrierNode::Similar)->is_top() ||
-           (n->in(LoadBarrierNode::Similar)->in(0)->is_LoadBarrier() &&
-            n->in(LoadBarrierNode::Similar)->in(0)->in(LoadBarrierNode::Address) == n->in(LoadBarrierNode::Address) &&
-            n->in(LoadBarrierNode::Similar)->in(0)->in(LoadBarrierNode::Oop) != n->in(LoadBarrierNode::Oop)),
-           "broken similar edge");
-
-    assert(n->as_LoadBarrier()->has_true_uses(),
-           "found unneeded load barrier");
-
-    // Several load barrier nodes chained through their Similar edge
-    // break the code that remove the barriers in final graph reshape.
-    assert(n->in(LoadBarrierNode::Similar)->is_top() ||
-           (n->in(LoadBarrierNode::Similar)->in(0)->is_LoadBarrier() &&
-            n->in(LoadBarrierNode::Similar)->in(0)->in(LoadBarrierNode::Similar)->is_top()),
-           "chain of Similar load barriers");
-
-    if (!n->in(LoadBarrierNode::Similar)->is_top()) {
-      ResourceMark rm;
-      Unique_Node_List wq;
-      Node* other = n->in(LoadBarrierNode::Similar)->in(0);
-      wq.push(n);
-      for (uint next = 0; next < wq.size(); ++next) {
-        Node *nn = wq.at(next);
-        assert(nn->is_CFG(), "");
-        assert(!nn->is_SafePoint(), "");
-
-        if (nn == other) {
-          continue;
+          // Push predecessor blocks
+          for (uint p = 1; p < block->num_preds(); ++p) {
+            Block* pred = cfg->get_block_for_node(block->pred(p));
+            stack.push(pred);
+          }
         }
 
-        if (nn->is_Region()) {
-          for (uint i = 1; i < nn->req(); i++) {
-            Node* m = nn->in(i);
-            if (m != NULL) {
-              wq.push(m);
-            }
-          }
-        } else {
-          Node* m = nn->in(0);
-          if (m != NULL) {
-            wq.push(m);
-          }
+        if (!safepoint_found) {
+          load->set_barrier_data(ZLoadBarrierElided);
         }
       }
     }
   }
 }
 
-#endif // end verification code
+// == Reduced spilling optimization ==
 
-// If a call is the control, we actually want its control projection
-static Node* normalize_ctrl(Node* node) {
- if (node->is_Call()) {
-   node = node->as_Call()->proj_out(TypeFunc::Control);
- }
- return node;
-}
+void ZBarrierSetC2::compute_liveness_at_stubs() const {
+  ResourceMark rm;
+  Compile* const C = Compile::current();
+  Arena* const A = Thread::current()->resource_area();
+  PhaseCFG* const cfg = C->cfg();
+  PhaseRegAlloc* const regalloc = C->regalloc();
+  RegMask* const live = NEW_ARENA_ARRAY(A, RegMask, cfg->number_of_blocks() * sizeof(RegMask));
+  ZBarrierSetAssembler* const bs = ZBarrierSet::assembler();
+  Block_List worklist;
 
-static Node* get_ctrl_normalized(PhaseIdealLoop *phase, Node* node) {
-  return normalize_ctrl(phase->get_ctrl(node));
-}
-
-static void call_catch_cleanup_one(PhaseIdealLoop* phase, LoadNode* load, Node* ctrl);
-
-// This code is cloning all uses of a load that is between a call and the catch blocks,
-// to each use.
-
-static bool fixup_uses_in_catch(PhaseIdealLoop *phase, Node *start_ctrl, Node *node) {
-
-  if (!phase->has_ctrl(node)) {
-    // This node is floating - doesn't need to be cloned.
-    assert(node != start_ctrl, "check");
-    return false;
+  for (uint i = 0; i < cfg->number_of_blocks(); ++i) {
+    new ((void*)(live + i)) RegMask();
+    worklist.push(cfg->get_block(i));
   }
 
-  Node* ctrl = get_ctrl_normalized(phase, node);
-  if (ctrl != start_ctrl) {
-    // We are in a successor block - the node is ok.
-    return false; // Unwind
-  }
+  while (worklist.size() > 0) {
+    const Block* const block = worklist.pop();
+    RegMask& old_live = live[block->_pre_order];
+    RegMask new_live;
 
-  // Process successor nodes
-  int outcnt = node->outcnt();
-  for (int i = 0; i < outcnt; i++) {
-    Node* n = node->raw_out(0);
-    assert(!n->is_LoadBarrier(), "Sanity");
-    // Calling recursively, visiting leafs first
-    fixup_uses_in_catch(phase, start_ctrl, n);
-  }
+    // Initialize to union of successors
+    for (uint i = 0; i < block->_num_succs; i++) {
+      const uint succ_id = block->_succs[i]->_pre_order;
+      new_live.OR(live[succ_id]);
+    }
 
-  // Now all successors are outside
-  // - Clone this node to both successors
-  assert(!node->is_Store(), "Stores not expected here");
+    // Walk block backwards, computing liveness
+    for (int i = block->number_of_nodes() - 1; i >= 0; --i) {
+      const Node* const node = block->get_node(i);
 
-  // In some very rare cases a load that doesn't need a barrier will end up here
-  // Treat it as a LoadP and the insertion of phis will be done correctly.
-  if (node->is_Load()) {
-    call_catch_cleanup_one(phase, node->as_Load(), phase->get_ctrl(node));
-  } else {
-    for (DUIterator_Fast jmax, i = node->fast_outs(jmax); i < jmax; i++) {
-      Node* use = node->fast_out(i);
-      Node* clone = node->clone();
-      assert(clone->outcnt() == 0, "");
-
-      assert(use->find_edge(node) != -1, "check");
-      phase->igvn().rehash_node_delayed(use);
-      use->replace_edge(node, clone);
-
-      Node* new_ctrl;
-      if (use->is_block_start()) {
-        new_ctrl = use;
-      } else if (use->is_CFG()) {
-        new_ctrl = use->in(0);
-        assert (new_ctrl != NULL, "");
-      } else {
-        new_ctrl = get_ctrl_normalized(phase, use);
+      // Remove def bits
+      const OptoReg::Name first = bs->refine_register(node, regalloc->get_reg_first(node));
+      const OptoReg::Name second = bs->refine_register(node, regalloc->get_reg_second(node));
+      if (first != OptoReg::Bad) {
+        new_live.Remove(first);
+      }
+      if (second != OptoReg::Bad) {
+        new_live.Remove(second);
       }
 
-      phase->set_ctrl(clone, new_ctrl);
-
-      if (phase->C->directive()->ZTraceLoadBarriersOption) tty->print_cr("  Clone op %i as %i to control %i", node->_idx, clone->_idx, new_ctrl->_idx);
-      phase->igvn().register_new_node_with_optimizer(clone);
-      --i, --jmax;
-    }
-    assert(node->outcnt() == 0, "must be empty now");
-
-    // Node node is dead.
-    phase->igvn().remove_dead_node(node);
-  }
-  return true; // unwind - return if a use was processed
-}
-
-// Clone a load to a specific catch_proj
-static Node* clone_load_to_catchproj(PhaseIdealLoop* phase, Node* load, Node* catch_proj) {
-  Node* cloned_load = load->clone();
-  cloned_load->set_req(0, catch_proj);      // set explicit control
-  phase->set_ctrl(cloned_load, catch_proj); // update
-  if (phase->C->directive()->ZTraceLoadBarriersOption) tty->print_cr("  Clone LOAD %i as %i to control %i", load->_idx, cloned_load->_idx, catch_proj->_idx);
-  phase->igvn().register_new_node_with_optimizer(cloned_load);
-  return cloned_load;
-}
-
-static Node* get_dominating_region(PhaseIdealLoop* phase, Node* node, Node* stop) {
-  Node* region = node;
-  while (!region->isa_Region()) {
-    Node *up = phase->idom(region);
-    assert(up != region, "Must not loop");
-    assert(up != stop,   "Must not find original control");
-    region = up;
-  }
-  return region;
-}
-
-// Clone this load to each catch block
-static void call_catch_cleanup_one(PhaseIdealLoop* phase, LoadNode* load, Node* ctrl) {
-  bool trace = phase->C->directive()->ZTraceLoadBarriersOption;
-  phase->igvn().set_delay_transform(true);
-
-  // Verify pre conditions
-  assert(ctrl->isa_Proj() && ctrl->in(0)->isa_Call(), "Must be a call proj");
-  assert(ctrl->raw_out(0)->isa_Catch(), "Must be a catch");
-
-  if (ctrl->raw_out(0)->isa_Catch()->outcnt() == 1) {
-    if (trace) tty->print_cr("Cleaning up catch: Skipping load %i, call with single catch", load->_idx);
-    return;
-  }
-
-  // Process the loads successor nodes - if any is between
-  // the call and the catch blocks, they need to be cloned to.
-  // This is done recursively
-  for (uint i = 0; i < load->outcnt();) {
-    Node *n = load->raw_out(i);
-    assert(!n->is_LoadBarrier(), "Sanity");
-    if (!fixup_uses_in_catch(phase, ctrl, n)) {
-      // if no successor was cloned, progress to next out.
-      i++;
-    }
-  }
-
-  // Now all the loads uses has been cloned down
-  // Only thing left is to clone the loads, but they must end up
-  // first in the catch blocks.
-
-  // We clone the loads oo the catch blocks only when needed.
-  // An array is used to map the catch blocks to each lazily cloned load.
-  // In that way no extra unnecessary loads are cloned.
-
-  // Any use dominated by original block must have an phi and a region added
-
-  Node* catch_node = ctrl->raw_out(0);
-  int number_of_catch_projs = catch_node->outcnt();
-  Node** proj_to_load_mapping = NEW_RESOURCE_ARRAY(Node*, number_of_catch_projs);
-  Copy::zero_to_bytes(proj_to_load_mapping, sizeof(Node*) * number_of_catch_projs);
-
-  // The phi_map is used to keep track of where phis have already been inserted
-  int phi_map_len = phase->C->unique();
-  Node** phi_map = NEW_RESOURCE_ARRAY(Node*, phi_map_len);
-  Copy::zero_to_bytes(phi_map, sizeof(Node*) * phi_map_len);
-
-  for (unsigned int i = 0; i  < load->outcnt(); i++) {
-    Node* load_use_control = NULL;
-    Node* load_use = load->raw_out(i);
-
-    if (phase->has_ctrl(load_use)) {
-      load_use_control = get_ctrl_normalized(phase, load_use);
-      assert(load_use_control != ctrl, "sanity");
-    } else {
-      load_use_control = load_use->in(0);
-    }
-    assert(load_use_control != NULL, "sanity");
-    if (trace) tty->print_cr("  Handling use: %i, with control: %i", load_use->_idx, load_use_control->_idx);
-
-    // Some times the loads use is a phi. For them we need to determine from which catch block
-    // the use is defined.
-    bool load_use_is_phi = false;
-    unsigned int load_use_phi_index = 0;
-    Node* phi_ctrl = NULL;
-    if (load_use->is_Phi()) {
-      // Find phi input that matches load
-      for (unsigned int u = 1; u < load_use->req(); u++) {
-        if (load_use->in(u) == load) {
-          load_use_is_phi = true;
-          load_use_phi_index = u;
-          assert(load_use->in(0)->is_Region(), "Region or broken");
-          phi_ctrl = load_use->in(0)->in(u);
-          assert(phi_ctrl->is_CFG(), "check");
-          assert(phi_ctrl != load,   "check");
-          break;
+      // Add use bits
+      for (uint j = 1; j < node->req(); ++j) {
+        const Node* const use = node->in(j);
+        const OptoReg::Name first = bs->refine_register(use, regalloc->get_reg_first(use));
+        const OptoReg::Name second = bs->refine_register(use, regalloc->get_reg_second(use));
+        if (first != OptoReg::Bad) {
+          new_live.Insert(first);
         }
-      }
-      assert(load_use_is_phi,        "must find");
-      assert(load_use_phi_index > 0, "sanity");
-    }
-
-    // For each load use, see which catch projs dominates, create load clone lazily and reconnect
-    bool found_dominating_catchproj = false;
-    for (int c = 0; c < number_of_catch_projs; c++) {
-      Node* catchproj = catch_node->raw_out(c);
-      assert(catchproj != NULL && catchproj->isa_CatchProj(), "Sanity");
-
-      if (!phase->is_dominator(catchproj, load_use_control)) {
-        if (load_use_is_phi && phase->is_dominator(catchproj, phi_ctrl)) {
-          // The loads use is local to the catchproj.
-          // fall out and replace load with catch-local load clone.
-        } else {
-          continue;
-        }
-      }
-      assert(!found_dominating_catchproj, "Max one should match");
-
-      // Clone loads to catch projs
-      Node* load_clone = proj_to_load_mapping[c];
-      if (load_clone == NULL) {
-        load_clone = clone_load_to_catchproj(phase, load, catchproj);
-        proj_to_load_mapping[c] = load_clone;
-      }
-      phase->igvn().rehash_node_delayed(load_use);
-
-      if (load_use_is_phi) {
-        // phis are special - the load is defined from a specific control flow
-        load_use->set_req(load_use_phi_index, load_clone);
-      } else {
-        // Multipe edges can be replaced at once - on calls for example
-        load_use->replace_edge(load, load_clone);
-      }
-      --i; // more than one edge can have been removed, but the next is in later iterations
-
-      // We could break the for-loop after finding a dominating match.
-      // But keep iterating to catch any bad idom early.
-      found_dominating_catchproj = true;
-    }
-
-    // We found no single catchproj that dominated the use - The use is at a point after
-    // where control flow from multiple catch projs have merged. We will have to create
-    // phi nodes before the use and tie the output from the cloned loads together. It
-    // can be a single phi or a number of chained phis, depending on control flow
-    if (!found_dominating_catchproj) {
-
-      // Use phi-control if use is a phi
-      if (load_use_is_phi) {
-        load_use_control = phi_ctrl;
-      }
-      assert(phase->is_dominator(ctrl, load_use_control), "Common use but no dominator");
-
-      // Clone a load on all paths
-      for (int c = 0; c < number_of_catch_projs; c++) {
-        Node* catchproj = catch_node->raw_out(c);
-        Node* load_clone = proj_to_load_mapping[c];
-        if (load_clone == NULL) {
-          load_clone = clone_load_to_catchproj(phase, load, catchproj);
-          proj_to_load_mapping[c] = load_clone;
+        if (second != OptoReg::Bad) {
+          new_live.Insert(second);
         }
       }
 
-      // Move up dominator tree from use until dom front is reached
-      Node* next_region = get_dominating_region(phase, load_use_control, ctrl);
-      while (phase->idom(next_region) != catch_node) {
-        next_region = phase->idom(next_region);
-        if (trace) tty->print_cr("Moving up idom to region ctrl %i", next_region->_idx);
-      }
-      assert(phase->is_dominator(catch_node, next_region), "Sanity");
-
-      // Create or reuse phi node that collect all cloned loads and feed it to the use.
-      Node* test_phi = phi_map[next_region->_idx];
-      if ((test_phi != NULL) && test_phi->is_Phi()) {
-        // Reuse an already created phi
-        if (trace) tty->print_cr("    Using cached Phi %i on load_use %i", test_phi->_idx, load_use->_idx);
-        phase->igvn().rehash_node_delayed(load_use);
-        load_use->replace_edge(load, test_phi);
-        // Now this use is done
-      } else {
-        // Otherwise we need to create one or more phis
-        PhiNode* next_phi = new PhiNode(next_region, load->type());
-        phi_map[next_region->_idx] = next_phi; // cache new phi
-        phase->igvn().rehash_node_delayed(load_use);
-        load_use->replace_edge(load, next_phi);
-
-        int dominators_of_region = 0;
-        do {
-          // New phi, connect to region and add all loads as in.
-          Node* region = next_region;
-          assert(region->isa_Region() && region->req() > 2, "Catch dead region nodes");
-          PhiNode* new_phi = next_phi;
-
-          if (trace) tty->print_cr("Created Phi %i on load %i with control %i", new_phi->_idx, load->_idx, region->_idx);
-
-          // Need to add all cloned loads to the phi, taking care that the right path is matched
-          dominators_of_region = 0; // reset for new region
-          for (unsigned int reg_i = 1; reg_i < region->req(); reg_i++) {
-            Node* region_pred = region->in(reg_i);
-            assert(region_pred->is_CFG(), "check");
-            bool pred_has_dominator = false;
-            for (int c = 0; c < number_of_catch_projs; c++) {
-              Node* catchproj = catch_node->raw_out(c);
-              if (phase->is_dominator(catchproj, region_pred)) {
-                new_phi->set_req(reg_i, proj_to_load_mapping[c]);
-                if (trace) tty->print_cr(" - Phi in(%i) set to load %i", reg_i, proj_to_load_mapping[c]->_idx);
-                pred_has_dominator = true;
-                dominators_of_region++;
-                break;
-              }
-            }
-
-            // Sometimes we need to chain several phis.
-            if (!pred_has_dominator) {
-              assert(dominators_of_region <= 1, "More than one region can't require extra phi");
-              if (trace) tty->print_cr(" - Region %i pred %i not dominated by catch proj", region->_idx, region_pred->_idx);
-              // Continue search on on this region_pred
-              // - walk up to next region
-              // - create a new phi and connect to first new_phi
-              next_region = get_dominating_region(phase, region_pred, ctrl);
-
-              // Lookup if there already is a phi, create a new otherwise
-              Node* test_phi = phi_map[next_region->_idx];
-              if ((test_phi != NULL) && test_phi->is_Phi()) {
-                next_phi = test_phi->isa_Phi();
-                dominators_of_region++; // record that a match was found and that we are done
-                if (trace) tty->print_cr("    Using cached phi Phi %i on control %i", next_phi->_idx, next_region->_idx);
-              } else {
-                next_phi = new PhiNode(next_region, load->type());
-                phi_map[next_region->_idx] = next_phi;
-              }
-              new_phi->set_req(reg_i, next_phi);
-            }
-          }
-
-          new_phi->set_req(0, region);
-          phase->igvn().register_new_node_with_optimizer(new_phi);
-          phase->set_ctrl(new_phi, region);
-
-          assert(dominators_of_region != 0, "Must have found one this iteration");
-        } while (dominators_of_region == 1);
-      }
-      --i;
-    }
-  } // end of loop over uses
-
-  assert(load->outcnt() == 0, "All uses should be handled");
-  phase->igvn().remove_dead_node(load);
-  phase->C->print_method(PHASE_CALL_CATCH_CLEANUP, 4, load->_idx);
-
-  // Now we should be home
-  phase->igvn().set_delay_transform(false);
-}
-
-// Sort out the loads that are between a call ant its catch blocks
-static void process_catch_cleanup_candidate(PhaseIdealLoop* phase, LoadNode* load, bool verify) {
-  bool trace = phase->C->directive()->ZTraceLoadBarriersOption;
-
-  Node* ctrl = get_ctrl_normalized(phase, load);
-  if (!ctrl->is_Proj() || (ctrl->in(0) == NULL) || !ctrl->in(0)->isa_Call()) {
-    return;
-  }
-
-  Node* catch_node = ctrl->isa_Proj()->raw_out(0);
-  if (catch_node->is_Catch()) {
-    if (catch_node->outcnt() > 1) {
-      assert(!verify, "All loads should already have been moved");
-      call_catch_cleanup_one(phase, load, ctrl);
-    } else {
-      if (trace) tty->print_cr("Call catch cleanup with only one catch: load %i ", load->_idx);
-    }
-  }
-}
-
-void ZBarrierSetC2::barrier_insertion_phase(Compile* C, PhaseIterGVN& igvn) const {
-  PhaseIdealLoop::optimize(igvn, LoopOptsZBarrierInsertion);
-  if (C->failing())  return;
-}
-
-bool ZBarrierSetC2::optimize_loops(PhaseIdealLoop* phase, LoopOptsMode mode, VectorSet& visited, Node_Stack& nstack, Node_List& worklist) const {
-
-  if (mode == LoopOptsZBarrierInsertion) {
-    // First make sure all loads between call and catch are moved to the catch block
-    clean_catch_blocks(phase);
-    DEBUG_ONLY(clean_catch_blocks(phase, true /* verify */);)
-
-    // Then expand barriers on all loads
-    insert_load_barriers(phase);
-
-    // Handle all Unsafe that need barriers.
-    insert_barriers_on_unsafe(phase);
-
-    phase->C->clear_major_progress();
-    return true;
-  } else {
-    return false;
-  }
-}
-
-static bool can_simplify_cas(LoadStoreNode* node) {
-  if (node->isa_LoadStoreConditional()) {
-    Node *expected_in = node->as_LoadStoreConditional()->in(LoadStoreConditionalNode::ExpectedIn);
-    return (expected_in->get_ptr_type() == TypePtr::NULL_PTR);
-  } else {
-    return false;
-  }
-}
-
-static void insert_barrier_before_unsafe(PhaseIdealLoop* phase, LoadStoreNode* old_node) {
-
-  Compile *C = phase->C;
-  PhaseIterGVN &igvn = phase->igvn();
-  LoadStoreNode* zclone = NULL;
-
-  Node *in_ctrl = old_node->in(MemNode::Control);
-  Node *in_mem  = old_node->in(MemNode::Memory);
-  Node *in_adr  = old_node->in(MemNode::Address);
-  Node *in_val  = old_node->in(MemNode::ValueIn);
-  const TypePtr *adr_type = old_node->adr_type();
-  const TypePtr* load_type = TypeOopPtr::BOTTOM; // The type for the load we are adding
-
-  switch (old_node->Opcode()) {
-    case Op_CompareAndExchangeP: {
-      zclone = new ZCompareAndExchangePNode(in_ctrl, in_mem, in_adr, in_val, old_node->in(LoadStoreConditionalNode::ExpectedIn),
-              adr_type, old_node->get_ptr_type(), ((CompareAndExchangeNode*)old_node)->order());
-      load_type = old_node->bottom_type()->is_ptr();
-      break;
-    }
-    case Op_WeakCompareAndSwapP: {
-      if (can_simplify_cas(old_node)) {
-        break;
-      }
-      zclone = new ZWeakCompareAndSwapPNode(in_ctrl, in_mem, in_adr, in_val, old_node->in(LoadStoreConditionalNode::ExpectedIn),
-              ((CompareAndSwapNode*)old_node)->order());
-      adr_type = TypePtr::BOTTOM;
-      break;
-    }
-    case Op_CompareAndSwapP: {
-      if (can_simplify_cas(old_node)) {
-        break;
-      }
-      zclone = new ZCompareAndSwapPNode(in_ctrl, in_mem, in_adr, in_val, old_node->in(LoadStoreConditionalNode::ExpectedIn),
-              ((CompareAndSwapNode*)old_node)->order());
-      adr_type = TypePtr::BOTTOM;
-      break;
-    }
-    case Op_GetAndSetP: {
-      zclone = new ZGetAndSetPNode(in_ctrl, in_mem, in_adr, in_val, old_node->adr_type(), old_node->get_ptr_type());
-      load_type = old_node->bottom_type()->is_ptr();
-      break;
-    }
-  }
-  if (zclone != NULL) {
-    igvn.register_new_node_with_optimizer(zclone, old_node);
-
-    // Make load
-    LoadPNode *load = new LoadPNode(NULL, in_mem, in_adr, adr_type, load_type, MemNode::unordered,
-                                    LoadNode::DependsOnlyOnTest);
-    load_set_expanded_barrier(load);
-    igvn.register_new_node_with_optimizer(load);
-    igvn.replace_node(old_node, zclone);
-
-    Node *barrier = new LoadBarrierNode(C, NULL, in_mem, load, in_adr, false /* weak */);
-    Node *barrier_val = new ProjNode(barrier, LoadBarrierNode::Oop);
-    Node *barrier_ctrl = new ProjNode(barrier, LoadBarrierNode::Control);
-
-    igvn.register_new_node_with_optimizer(barrier);
-    igvn.register_new_node_with_optimizer(barrier_val);
-    igvn.register_new_node_with_optimizer(barrier_ctrl);
-
-    // loop over all of in_ctrl usages and move to barrier_ctrl
-    for (DUIterator_Last imin, i = in_ctrl->last_outs(imin); i >= imin; --i) {
-      Node *use = in_ctrl->last_out(i);
-      uint l;
-      for (l = 0; use->in(l) != in_ctrl; l++) {}
-      igvn.replace_input_of(use, l, barrier_ctrl);
-    }
-
-    load->set_req(MemNode::Control, in_ctrl);
-    barrier->set_req(LoadBarrierNode::Control, in_ctrl);
-    zclone->add_req(barrier_val); // add req as keep alive.
-
-    C->print_method(PHASE_ADD_UNSAFE_BARRIER, 4, zclone->_idx);
-  }
-}
-
-void ZBarrierSetC2::insert_barriers_on_unsafe(PhaseIdealLoop* phase) const {
-  Compile *C = phase->C;
-  PhaseIterGVN &igvn = phase->igvn();
-  uint new_ids = C->unique();
-  VectorSet visited(Thread::current()->resource_area());
-  GrowableArray<Node *> nodeStack(Thread::current()->resource_area(), 0, 0, NULL);
-  nodeStack.push(C->root());
-  visited.test_set(C->root()->_idx);
-
-  // Traverse all nodes, visit all unsafe ops that require a barrier
-  while (nodeStack.length() > 0) {
-    Node *n = nodeStack.pop();
-
-    bool is_old_node = (n->_idx < new_ids); // don't process nodes that were created during cleanup
-    if (is_old_node) {
-      if (n->is_LoadStore()) {
-        LoadStoreNode* lsn = n->as_LoadStore();
-        if (lsn->has_barrier()) {
-          BasicType bt = lsn->in(MemNode::Address)->bottom_type()->basic_type();
-          assert (is_reference_type(bt), "Sanity test");
-          insert_barrier_before_unsafe(phase, lsn);
-        }
-      }
-    }
-    for (uint i = 0; i < n->len(); i++) {
-      if (n->in(i)) {
-        if (!visited.test_set(n->in(i)->_idx)) {
-          nodeStack.push(n->in(i));
-        }
-      }
-    }
-  }
-
-  igvn.optimize();
-  C->print_method(PHASE_ADD_UNSAFE_BARRIER, 2);
-}
-
-// The purpose of ZBarrierSetC2::clean_catch_blocks is to prepare the IR for
-// splicing in load barrier nodes.
-//
-// The problem is that we might have instructions between a call and its catch nodes.
-// (This is usually handled in PhaseCFG:call_catch_cleanup, which clones mach nodes in
-// already scheduled blocks.) We can't have loads that require barriers there,
-// because we need to splice in new control flow, and that would violate the IR.
-//
-// clean_catch_blocks find all Loads that require a barrier and clone them and any
-// dependent instructions to each use. The loads must be in the beginning of the catch block
-// before any store.
-//
-// Sometimes the loads use will be at a place dominated by all catch blocks, then we need
-// a load in each catch block, and a Phi at the dominated use.
-
-void ZBarrierSetC2::clean_catch_blocks(PhaseIdealLoop* phase, bool verify) const {
-
-  Compile *C = phase->C;
-  uint new_ids = C->unique();
-  PhaseIterGVN &igvn = phase->igvn();
-  VectorSet visited(Thread::current()->resource_area());
-  GrowableArray<Node *> nodeStack(Thread::current()->resource_area(), 0, 0, NULL);
-  nodeStack.push(C->root());
-  visited.test_set(C->root()->_idx);
-
-  // Traverse all nodes, visit all loads that require a barrier
-  while(nodeStack.length() > 0) {
-    Node *n = nodeStack.pop();
-
-    for (uint i = 0; i < n->len(); i++) {
-      if (n->in(i)) {
-        if (!visited.test_set(n->in(i)->_idx)) {
-          nodeStack.push(n->in(i));
-        }
+      // If this node tracks liveness, update it
+      RegMask* const regs = barrier_set_state()->live(node);
+      if (regs != NULL) {
+        regs->OR(new_live);
       }
     }
 
-    bool is_old_node = (n->_idx < new_ids); // don't process nodes that were created during cleanup
-    if (n->is_Load() && is_old_node) {
-      LoadNode* load = n->isa_Load();
-      // only care about loads that will have a barrier
-      if (load_require_barrier(load)) {
-        process_catch_cleanup_candidate(phase, load, verify);
-      }
-    }
-  }
-
-  C->print_method(PHASE_CALL_CATCH_CLEANUP, 2);
-}
-
-class DomDepthCompareClosure : public CompareClosure<LoadNode*> {
-  PhaseIdealLoop* _phase;
-
-public:
-  DomDepthCompareClosure(PhaseIdealLoop* phase) : _phase(phase) { }
-
-  int do_compare(LoadNode* const &n1, LoadNode* const &n2) {
-    int d1 = _phase->dom_depth(_phase->get_ctrl(n1));
-    int d2 = _phase->dom_depth(_phase->get_ctrl(n2));
-    if (d1 == d2) {
-      // Compare index if the depth is the same, ensures all entries are unique.
-      return n1->_idx - n2->_idx;
-    } else {
-      return d2 - d1;
-    }
-  }
-};
-
-// Traverse graph and add all loadPs to list, sorted by dom depth
-void gather_loadnodes_sorted(PhaseIdealLoop* phase, GrowableArray<LoadNode*>* loadList) {
-
-  VectorSet visited(Thread::current()->resource_area());
-  GrowableArray<Node *> nodeStack(Thread::current()->resource_area(), 0, 0, NULL);
-  DomDepthCompareClosure ddcc(phase);
-
-  nodeStack.push(phase->C->root());
-  while(nodeStack.length() > 0) {
-    Node *n = nodeStack.pop();
-    if (visited.test(n->_idx)) {
-      continue;
-    }
-
-    if (n->isa_Load()) {
-      LoadNode *load = n->as_Load();
-      if (load_require_barrier(load)) {
-        assert(phase->get_ctrl(load) != NULL, "sanity");
-        assert(phase->dom_depth(phase->get_ctrl(load)) != 0, "sanity");
-        loadList->insert_sorted(&ddcc, load);
-      }
-    }
-
-    visited.set(n->_idx);
-    for (uint i = 0; i < n->req(); i++) {
-      if (n->in(i)) {
-        if (!visited.test(n->in(i)->_idx)) {
-          nodeStack.push(n->in(i));
-        }
+    // Now at block top, see if we have any changes
+    new_live.SUBTRACT(old_live);
+    if (new_live.is_NotEmpty()) {
+      // Liveness has refined, update and propagate to prior blocks
+      old_live.OR(new_live);
+      for (uint i = 1; i < block->num_preds(); ++i) {
+        Block* const pred = cfg->get_block_for_node(block->pred(i));
+        worklist.push(pred);
       }
     }
   }
 }
-
-// Add LoadBarriers to all LoadPs
-void ZBarrierSetC2::insert_load_barriers(PhaseIdealLoop* phase) const {
-
-  bool trace = phase->C->directive()->ZTraceLoadBarriersOption;
-  GrowableArray<LoadNode *> loadList(Thread::current()->resource_area(), 0, 0, NULL);
-  gather_loadnodes_sorted(phase, &loadList);
-
-  PhaseIterGVN &igvn = phase->igvn();
-  int count = 0;
-
-  for (GrowableArrayIterator<LoadNode *> loadIter = loadList.begin(); loadIter != loadList.end(); ++loadIter) {
-    LoadNode *load = *loadIter;
-
-    if (load_has_expanded_barrier(load)) {
-      continue;
-    }
-
-    do {
-      // Insert a barrier on a loadP
-      // if another load is found that needs to be expanded first, retry on that one
-      LoadNode* result = insert_one_loadbarrier(phase, load, phase->get_ctrl(load));
-      while (result != NULL) {
-        result = insert_one_loadbarrier(phase, result, phase->get_ctrl(result));
-      }
-    } while (!load_has_expanded_barrier(load));
-  }
-
-  phase->C->print_method(PHASE_INSERT_BARRIER, 2);
-}
-
-void push_antidependent_stores(PhaseIdealLoop* phase, Node_Stack& nodestack, LoadNode* start_load) {
-  // push all stores on the same mem, that can_alias
-  // Any load found must be handled first
-  PhaseIterGVN &igvn = phase->igvn();
-  int load_alias_idx = igvn.C->get_alias_index(start_load->adr_type());
-
-  Node *mem = start_load->in(1);
-  for (DUIterator_Fast imax, u = mem->fast_outs(imax); u < imax; u++) {
-    Node *mem_use = mem->fast_out(u);
-
-    if (mem_use == start_load) continue;
-    if (!mem_use->is_Store()) continue;
-    if (!phase->has_ctrl(mem_use)) continue;
-    if (phase->get_ctrl(mem_use) != phase->get_ctrl(start_load)) continue;
-
-    // add any aliasing store in this block
-    StoreNode *store = mem_use->isa_Store();
-    const TypePtr *adr_type = store->adr_type();
-    if (igvn.C->can_alias(adr_type, load_alias_idx)) {
-      nodestack.push(store, 0);
-    }
-  }
-}
-
-LoadNode* ZBarrierSetC2::insert_one_loadbarrier(PhaseIdealLoop* phase, LoadNode* start_load, Node* ctrl) const {
-  bool trace = phase->C->directive()->ZTraceLoadBarriersOption;
-  PhaseIterGVN &igvn = phase->igvn();
-
-  // Check for other loadPs at the same loop depth that is reachable by a DFS
-  // - if found - return it. It needs to be inserted first
-  // - otherwise proceed and insert barrier
-
-  VectorSet visited(Thread::current()->resource_area());
-  Node_Stack nodestack(100);
-
-  nodestack.push(start_load, 0);
-  push_antidependent_stores(phase, nodestack, start_load);
-
-  while(!nodestack.is_empty()) {
-    Node* n = nodestack.node(); // peek
-    nodestack.pop();
-    if (visited.test(n->_idx)) {
-      continue;
-    }
-
-    if (n->is_Load() && n != start_load && load_require_barrier(n->as_Load()) && !load_has_expanded_barrier(n->as_Load())) {
-      // Found another load that needs a barrier in the same block. Must expand later loads first.
-      if (trace) tty->print_cr(" * Found LoadP %i on DFS", n->_idx);
-      return n->as_Load(); // return node that should be expanded first
-    }
-
-    if (!phase->has_ctrl(n)) continue;
-    if (phase->get_ctrl(n) != phase->get_ctrl(start_load)) continue;
-    if (n->is_Phi()) continue;
-
-    visited.set(n->_idx);
-    // push all children
-    for (DUIterator_Fast imax, ii = n->fast_outs(imax); ii < imax; ii++) {
-      Node* c = n->fast_out(ii);
-      if (c != NULL) {
-        nodestack.push(c, 0);
-      }
-    }
-  }
-
-  insert_one_loadbarrier_inner(phase, start_load, ctrl, visited);
-  return NULL;
-}
-
-void ZBarrierSetC2::insert_one_loadbarrier_inner(PhaseIdealLoop* phase, LoadNode* load, Node* ctrl, VectorSet visited2) const {
-  PhaseIterGVN &igvn = phase->igvn();
-  Compile* C = igvn.C;
-  bool trace = C->directive()->ZTraceLoadBarriersOption;
-
-  // create barrier
-  Node* barrier = new LoadBarrierNode(C, NULL, load->in(LoadNode::Memory), NULL, load->in(LoadNode::Address), load_has_weak_barrier(load));
-  Node* barrier_val = new ProjNode(barrier, LoadBarrierNode::Oop);
-  Node* barrier_ctrl = new ProjNode(barrier, LoadBarrierNode::Control);
-  ctrl = normalize_ctrl(ctrl);
-
-  if (trace) tty->print_cr("Insert load %i with barrier: %i and ctrl : %i", load->_idx, barrier->_idx, ctrl->_idx);
-
-  // Splice control
-  // - insert barrier control diamond between loads ctrl and ctrl successor on path to block end.
-  // - If control successor is a catch, step over to next.
-  Node* ctrl_succ = NULL;
-  for (DUIterator_Fast imax, j = ctrl->fast_outs(imax); j < imax; j++) {
-    Node* tmp = ctrl->fast_out(j);
-
-    // - CFG nodes is the ones we are going to splice (1 only!)
-    // - Phi nodes will continue to hang from the region node!
-    // - self loops should be skipped
-    if (tmp->is_Phi() || tmp == ctrl) {
-      continue;
-    }
-
-    if (tmp->is_CFG()) {
-      assert(ctrl_succ == NULL, "There can be only one");
-      ctrl_succ = tmp;
-      continue;
-    }
-  }
-
-  // Now splice control
-  assert(ctrl_succ != load, "sanity");
-  assert(ctrl_succ != NULL, "Broken IR");
-  bool found = false;
-  for(uint k = 0; k < ctrl_succ->req(); k++) {
-    if (ctrl_succ->in(k) == ctrl) {
-      assert(!found, "sanity");
-      if (trace) tty->print_cr(" Move CFG ctrl_succ %i to barrier_ctrl", ctrl_succ->_idx);
-      igvn.replace_input_of(ctrl_succ, k, barrier_ctrl);
-      found = true;
-      k--;
-    }
-  }
-
-  // For all successors of ctrl - move all visited to become successors of barrier_ctrl instead
-  for (DUIterator_Fast imax, r = ctrl->fast_outs(imax); r < imax; r++) {
-    Node* tmp = ctrl->fast_out(r);
-    if (tmp->is_SafePoint() || (visited2.test(tmp->_idx) && (tmp != load))) {
-      if (trace) tty->print_cr(" Move ctrl_succ %i to barrier_ctrl", tmp->_idx);
-      igvn.replace_input_of(tmp, 0, barrier_ctrl);
-      --r; --imax;
-    }
-  }
-
-  // Move the loads user to the barrier
-  for (DUIterator_Fast imax, i = load->fast_outs(imax); i < imax; i++) {
-    Node* u = load->fast_out(i);
-    if (u->isa_LoadBarrier()) {
-      continue;
-    }
-
-    // find correct input  - replace with iterator?
-    for(uint j = 0; j < u->req(); j++) {
-      if (u->in(j) == load) {
-        igvn.replace_input_of(u, j, barrier_val);
-        --i; --imax; // Adjust the iterator of the *outer* loop
-        break; // some nodes (calls) might have several uses from the same node
-      }
-    }
-  }
-
-  // Connect barrier to load and control
-  barrier->set_req(LoadBarrierNode::Oop, load);
-  barrier->set_req(LoadBarrierNode::Control, ctrl);
-
-  igvn.replace_input_of(load, MemNode::Control, ctrl);
-  load->pin();
-
-  igvn.rehash_node_delayed(load);
-  igvn.register_new_node_with_optimizer(barrier);
-  igvn.register_new_node_with_optimizer(barrier_val);
-  igvn.register_new_node_with_optimizer(barrier_ctrl);
-  load_set_expanded_barrier(load);
-
-  C->print_method(PHASE_INSERT_BARRIER, 3, load->_idx);
-}
-
-// The bad_mask in the ThreadLocalData shouldn't have an anti-dep-check.
-// The bad_mask address if of type TypeRawPtr, but that will alias
-// InitializeNodes until the type system is expanded.
-bool ZBarrierSetC2::needs_anti_dependence_check(const Node* node) const {
-  MachNode* mnode = node->as_Mach();
-  if (mnode != NULL) {
-    intptr_t offset = 0;
-    const TypePtr *adr_type2 = NULL;
-    const Node* base = mnode->get_base_and_disp(offset, adr_type2);
-    if ((base != NULL) &&
-        (base->is_Mach() && base->as_Mach()->ideal_Opcode() == Op_ThreadLocal) &&
-        (offset == in_bytes(ZThreadLocalData::address_bad_mask_offset()))) {
-      return false;
-    }
-  }
-  return true;
-}
--- a/src/hotspot/share/gc/z/c2/zBarrierSetC2.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/gc/z/c2/zBarrierSetC2.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -29,134 +29,38 @@
 #include "opto/node.hpp"
 #include "utilities/growableArray.hpp"
 
-class ZCompareAndSwapPNode : public CompareAndSwapPNode {
-public:
-    ZCompareAndSwapPNode(Node* c, Node *mem, Node *adr, Node *val, Node *ex, MemNode::MemOrd mem_ord) : CompareAndSwapPNode(c, mem, adr, val, ex, mem_ord) { }
-    virtual int Opcode() const;
-};
+const uint8_t ZLoadBarrierStrong = 1;
+const uint8_t ZLoadBarrierWeak   = 2;
+const uint8_t ZLoadBarrierElided = 3;
 
-class ZWeakCompareAndSwapPNode : public WeakCompareAndSwapPNode {
-public:
-    ZWeakCompareAndSwapPNode(Node* c, Node *mem, Node *adr, Node *val, Node *ex, MemNode::MemOrd mem_ord) : WeakCompareAndSwapPNode(c, mem, adr, val, ex, mem_ord) { }
-    virtual int Opcode() const;
-};
+class ZLoadBarrierStubC2 : public ResourceObj {
+private:
+  const MachNode* _node;
+  const Address   _ref_addr;
+  const Register  _ref;
+  const Register  _tmp;
+  const bool      _weak;
+  Label           _entry;
+  Label           _continuation;
 
-class ZCompareAndExchangePNode : public CompareAndExchangePNode {
-public:
-    ZCompareAndExchangePNode(Node* c, Node *mem, Node *adr, Node *val, Node *ex, const TypePtr* at, const Type* t, MemNode::MemOrd mem_ord) : CompareAndExchangePNode(c, mem, adr, val, ex, at, t, mem_ord) { }
-    virtual int Opcode() const;
-};
-
-class ZGetAndSetPNode : public GetAndSetPNode {
-public:
-    ZGetAndSetPNode(Node* c, Node *mem, Node *adr, Node *val, const TypePtr* at, const Type* t) : GetAndSetPNode(c, mem, adr, val, at, t) { }
-    virtual int Opcode() const;
-};
-
-class LoadBarrierNode : public MultiNode {
-private:
-  bool _weak;               // On strong or weak oop reference
-  static bool is_dominator(PhaseIdealLoop* phase, bool linear_only, Node *d, Node *n);
-  void push_dominated_barriers(PhaseIterGVN* igvn) const;
+  ZLoadBarrierStubC2(const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak);
 
 public:
-  enum {
-    Control,
-    Memory,
-    Oop,
-    Address,
-    Number_of_Outputs = Address,
-    Similar,
-    Number_of_Inputs
-  };
+  static ZLoadBarrierStubC2* create(const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak);
 
-  LoadBarrierNode(Compile* C,
-                  Node* c,
-                  Node* mem,
-                  Node* val,
-                  Node* adr,
-                  bool weak);
-
-  virtual int Opcode() const;
-  virtual uint size_of() const;
-  virtual bool cmp(const Node& n) const;
-  virtual const Type *bottom_type() const;
-  virtual const TypePtr* adr_type() const;
-  virtual const Type *Value(PhaseGVN *phase) const;
-  virtual Node *Identity(PhaseGVN *phase);
-  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
-  virtual uint match_edge(uint idx) const;
-
-  LoadBarrierNode* has_dominating_barrier(PhaseIdealLoop* phase,
-                                          bool linear_only,
-                                          bool look_for_similar);
-
-  void fix_similar_in_uses(PhaseIterGVN* igvn);
-
-  bool has_true_uses() const;
-
-  bool can_be_eliminated() const {
-    return !in(Similar)->is_top();
-  }
-
-  bool is_weak() const {
-    return _weak;
-  }
-};
-
-class LoadBarrierSlowRegNode : public TypeNode {
-private:
-  bool _is_weak;
-public:
-  LoadBarrierSlowRegNode(Node *c,
-                         Node *adr,
-                         Node *src,
-                         const TypePtr* t,
-                         bool weak) :
-      TypeNode(t, 3), _is_weak(weak) {
-    init_req(1, adr);
-    init_req(2, src);
-    init_class_id(Class_LoadBarrierSlowReg);
-  }
-
-  virtual uint size_of() const {
-    return sizeof(*this);
-  }
-
-  virtual const char * name() {
-    return "LoadBarrierSlowRegNode";
-  }
-
-  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape) {
-    return NULL;
-  }
-
-  virtual int Opcode() const;
-
-  bool is_weak() { return _is_weak; }
-};
-
-class ZBarrierSetC2State : public ResourceObj {
-private:
-  // List of load barrier nodes which need to be expanded before matching
-  GrowableArray<LoadBarrierNode*>* _load_barrier_nodes;
-
-public:
-  ZBarrierSetC2State(Arena* comp_arena);
-  int load_barrier_count() const;
-  void add_load_barrier_node(LoadBarrierNode* n);
-  void remove_load_barrier_node(LoadBarrierNode* n);
-  LoadBarrierNode* load_barrier_node(int idx) const;
+  Address ref_addr() const;
+  Register ref() const;
+  Register tmp() const;
+  address slow_path() const;
+  RegMask& live() const;
+  Label* entry();
+  Label* continuation();
 };
 
 class ZBarrierSetC2 : public BarrierSetC2 {
 private:
-  ZBarrierSetC2State* state() const;
-  void expand_loadbarrier_node(PhaseMacroExpand* phase, LoadBarrierNode* barrier) const;
-
-#ifdef ASSERT
-  void verify_gc_barriers(bool post_parse) const;
-#endif
+  void compute_liveness_at_stubs() const;
+  void analyze_dominating_barriers() const;
 
 protected:
   virtual Node* load_at_resolved(C2Access& access, const Type* val_type) const;
@@ -174,43 +78,14 @@
 
 public:
   virtual void* create_barrier_state(Arena* comp_arena) const;
+  virtual bool array_copy_requires_gc_barriers(bool tightly_coupled_alloc,
+                                               BasicType type,
+                                               bool is_clone,
+                                               ArrayCopyPhase phase) const;
 
-  virtual bool has_load_barriers() const { return true; }
-  virtual bool is_gc_barrier_node(Node* node) const;
-  virtual Node* step_over_gc_barrier(Node* c) const;
-  virtual Node* step_over_gc_barrier_ctrl(Node* c) const;
-
-  virtual void register_potential_barrier_node(Node* node) const;
-  virtual void unregister_potential_barrier_node(Node* node) const;
-  virtual void eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const { }
-  virtual void enqueue_useful_gc_barrier(PhaseIterGVN* igvn, Node* node) const;
-  virtual void eliminate_useless_gc_barriers(Unique_Node_List &useful, Compile* C) const;
-
-  virtual bool array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type, bool is_clone, ArrayCopyPhase phase) const;
-
-  virtual bool expand_barriers(Compile* C, PhaseIterGVN& igvn) const;
-  virtual bool final_graph_reshaping(Compile* compile, Node* n, uint opcode) const;
-  virtual bool matcher_find_shared_visit(Matcher* matcher, Matcher::MStack& mstack, Node* n, uint opcode, bool& mem_op, int& mem_addr_idx) const;
-  virtual bool matcher_find_shared_post_visit(Matcher* matcher, Node* n, uint opcode) const;
-  virtual bool needs_anti_dependence_check(const Node* node) const;
-
-#ifdef ASSERT
-  virtual void verify_gc_barriers(Compile* compile, CompilePhase phase) const;
-#endif
-
-  // Load barrier insertion and expansion external
-  virtual void barrier_insertion_phase(Compile* C, PhaseIterGVN &igvn) const;
-  virtual bool optimize_loops(PhaseIdealLoop* phase, LoopOptsMode mode, VectorSet& visited, Node_Stack& nstack, Node_List& worklist) const;
-  virtual bool is_gc_specific_loop_opts_pass(LoopOptsMode mode) const { return (mode == LoopOptsZBarrierInsertion); }
-  virtual bool strip_mined_loops_expanded(LoopOptsMode mode) const { return mode == LoopOptsZBarrierInsertion; }
-
-private:
-  // Load barrier insertion and expansion internal
-  void insert_barriers_on_unsafe(PhaseIdealLoop* phase) const;
-  void clean_catch_blocks(PhaseIdealLoop* phase, bool verify = false) const;
-  void insert_load_barriers(PhaseIdealLoop* phase) const;
-  LoadNode* insert_one_loadbarrier(PhaseIdealLoop* phase, LoadNode* load, Node* ctrl) const;
-  void insert_one_loadbarrier_inner(PhaseIdealLoop* phase, LoadNode* load, Node* ctrl, VectorSet visited) const;
+  virtual void late_barrier_analysis() const;
+  virtual int estimate_stub_size() const;
+  virtual void emit_stubs(CodeBuffer& cb) const;
 };
 
 #endif // SHARE_GC_Z_C2_ZBARRIERSETC2_HPP
--- a/src/hotspot/share/gc/z/zArguments.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/gc/z/zArguments.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -37,11 +37,6 @@
 void ZArguments::initialize() {
   GCArguments::initialize();
 
-  // Check max heap size
-  if (MaxHeapSize > ZMaxHeapSize) {
-    vm_exit_during_initialization("Java heap too large");
-  }
-
   // Enable NUMA by default
   if (FLAG_IS_DEFAULT(UseNUMA)) {
     FLAG_SET_DEFAULT(UseNUMA, true);
--- a/src/hotspot/share/gc/z/zBarrierSetAssembler.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/gc/z/zBarrierSetAssembler.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -24,10 +24,7 @@
 #ifndef SHARE_GC_Z_ZBARRIERSETASSEMBLER_HPP
 #define SHARE_GC_Z_ZBARRIERSETASSEMBLER_HPP
 
-#include "asm/macroAssembler.hpp"
 #include "gc/shared/barrierSetAssembler.hpp"
-#include "oops/accessDecorators.hpp"
-#include "utilities/globalDefinitions.hpp"
 #include "utilities/macros.hpp"
 
 class ZBarrierSetAssemblerBase : public BarrierSetAssembler {
--- a/src/hotspot/share/gc/z/zGlobals.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/gc/z/zGlobals.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -44,10 +44,6 @@
 const size_t      ZGranuleSizeShift             = ZPlatformGranuleSizeShift;
 const size_t      ZGranuleSize                  = (size_t)1 << ZGranuleSizeShift;
 
-// Max heap size shift/size
-const size_t      ZMaxHeapSizeShift             = ZPlatformMaxHeapSizeShift;
-const size_t      ZMaxHeapSize                  = (size_t)1 << ZMaxHeapSizeShift;
-
 // Page types
 const uint8_t     ZPageTypeSmall                = 0;
 const uint8_t     ZPageTypeMedium               = 1;
--- a/src/hotspot/share/gc/z/zVirtualMemory.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/gc/z/zVirtualMemory.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -31,6 +31,13 @@
     _manager(),
     _initialized(false) {
 
+  // Check max supported heap size
+  if (max_capacity > ZAddressOffsetMax) {
+    log_error(gc)("Java heap too large (max supported heap size is " SIZE_FORMAT "G)",
+                  ZAddressOffsetMax / G);
+    return;
+  }
+
   log_info(gc, init)("Address Space: " SIZE_FORMAT "T", ZAddressOffsetMax / K / G);
 
   // Reserve address space
--- a/src/hotspot/share/include/jvm.h	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/include/jvm.h	Fri Oct 11 10:39:58 2019 +0200
@@ -1044,19 +1044,6 @@
 #include "classfile_constants.h"
 
 /*
- * A function defined by the byte-code verifier and called by the VM.
- * This is not a function implemented in the VM.
- *
- * Returns JNI_FALSE if verification fails. A detailed error message
- * will be places in msg_buf, whose length is specified by buf_len.
- */
-typedef jboolean (*verifier_fn_t)(JNIEnv *env,
-                                  jclass cb,
-                                  char * msg_buf,
-                                  jint buf_len);
-
-
-/*
  * Support for a VM-independent class format checker.
  */
 typedef struct {
@@ -1086,28 +1073,6 @@
 
 typedef jstring (*to_java_string_fn_t)(JNIEnv *env, char *str);
 
-typedef char *(*to_c_string_fn_t)(JNIEnv *env, jstring s, jboolean *b);
-
-/* This is the function defined in libjava.so that performs class
- * format checks. This functions fills in size information about
- * the class file and returns:
- *
- *   0: good
- *  -1: out of memory
- *  -2: bad format
- *  -3: unsupported version
- *  -4: bad class name
- */
-
-typedef jint (*check_format_fn_t)(char *class_name,
-                                  unsigned char *data,
-                                  unsigned int data_size,
-                                  class_size_info *class_size,
-                                  char *message_buffer,
-                                  jint buffer_length,
-                                  jboolean measure_only,
-                                  jboolean check_relaxed);
-
 #define JVM_RECOGNIZED_CLASS_MODIFIERS (JVM_ACC_PUBLIC | \
                                         JVM_ACC_FINAL | \
                                         JVM_ACC_SUPER | \
--- a/src/hotspot/share/jfr/recorder/jfrRecorder.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/jfr/recorder/jfrRecorder.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -168,7 +168,7 @@
 
 static bool is_cds_dump_requested() {
   // we will not be able to launch recordings if a cds dump is being requested
-  if ((DumpSharedSpaces || DynamicDumpSharedSpaces) && (JfrOptionSet::startup_recording_options() != NULL)) {
+  if (Arguments::is_dumping_archive() && (JfrOptionSet::startup_recording_options() != NULL)) {
     warning("JFR will be disabled during CDS dumping");
     teardown_startup_support();
     return true;
--- a/src/hotspot/share/jfr/recorder/repository/jfrEmergencyDump.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/jfr/recorder/repository/jfrEmergencyDump.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -393,6 +393,10 @@
     Service_lock->unlock();
   }
 
+  if (UseNotificationThread && Notification_lock->owned_by_self()) {
+    Notification_lock->unlock();
+  }
+
   if (CodeCache_lock->owned_by_self()) {
     CodeCache_lock->unlock();
   }
--- a/src/hotspot/share/memory/filemap.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/memory/filemap.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -263,7 +263,7 @@
 
 void SharedClassPathEntry::init(bool is_modules_image,
                                 ClassPathEntry* cpe, TRAPS) {
-  assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "dump time only");
+  Arguments::assert_is_dumping_archive();
   _timestamp = 0;
   _filesize  = 0;
   _from_class_path_attr = false;
@@ -397,7 +397,7 @@
 }
 
 void FileMapInfo::allocate_shared_path_table() {
-  assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "Sanity");
+  Arguments::assert_is_dumping_archive();
 
   EXCEPTION_MARK; // The following calls should never throw, but would exit VM on error.
   ClassLoaderData* loader_data = ClassLoaderData::the_null_class_loader_data();
@@ -444,7 +444,7 @@
 }
 
 void FileMapInfo::check_nonempty_dir_in_shared_path_table() {
-  assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "dump time only");
+  Arguments::assert_is_dumping_archive();
 
   bool has_nonempty_dir = false;
 
@@ -471,7 +471,7 @@
 }
 
 void FileMapInfo::record_non_existent_class_path_entry(const char* path) {
-  assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "dump time only");
+  Arguments::assert_is_dumping_archive();
   log_info(class, path)("non-existent Class-Path entry %s", path);
   if (_non_existent_class_paths == NULL) {
     _non_existent_class_paths = new (ResourceObj::C_HEAP, mtInternal)GrowableArray<const char*>(10, true);
@@ -480,7 +480,7 @@
 }
 
 int FileMapInfo::num_non_existent_class_paths() {
-  assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "dump time only");
+  Arguments::assert_is_dumping_archive();
   if (_non_existent_class_paths != NULL) {
     return _non_existent_class_paths->length();
   } else {
@@ -1150,7 +1150,7 @@
 
 void FileMapInfo::write_region(int region, char* base, size_t size,
                                bool read_only, bool allow_exec) {
-  assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "Dump time only");
+  Arguments::assert_is_dumping_archive();
 
   FileMapRegion* si = space_at(region);
   char* target_base = base;
--- a/src/hotspot/share/memory/metaspaceShared.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/memory/metaspaceShared.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -425,7 +425,7 @@
 }
 
 void MetaspaceShared::commit_shared_space_to(char* newtop) {
-  assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "dump-time only");
+  Arguments::assert_is_dumping_archive();
   char* base = _shared_rs.base();
   size_t need_committed_size = newtop - base;
   size_t has_committed_size = _shared_vs.committed_size();
@@ -509,8 +509,7 @@
 }
 
 uintx MetaspaceShared::object_delta_uintx(void* obj) {
-  assert(DumpSharedSpaces || DynamicDumpSharedSpaces,
-         "supported only for dumping");
+  Arguments::assert_is_dumping_archive();
   if (DumpSharedSpaces) {
     assert(shared_rs()->contains(obj), "must be");
   } else {
--- a/src/hotspot/share/memory/universe.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/memory/universe.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -713,7 +713,7 @@
   }
 
 #if INCLUDE_CDS
-  if (DumpSharedSpaces || DynamicDumpSharedSpaces) {
+  if (Arguments::is_dumping_archive()) {
     MetaspaceShared::prepare_for_dumping();
   }
 #endif
--- a/src/hotspot/share/oops/constMethod.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/oops/constMethod.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -26,6 +26,7 @@
 #define SHARE_OOPS_CONSTMETHOD_HPP
 
 #include "oops/oop.hpp"
+#include "runtime/arguments.hpp"
 #include "utilities/align.hpp"
 
 // An ConstMethod represents portions of a Java method which are not written to after
@@ -293,7 +294,7 @@
     _adapter = adapter;
   }
   void set_adapter_trampoline(AdapterHandlerEntry** trampoline) {
-    assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "must be");
+    Arguments::assert_is_dumping_archive();
     if (DumpSharedSpaces) {
       assert(*trampoline == NULL,
              "must be NULL during dump time, to be initialized at run time");
--- a/src/hotspot/share/oops/cpCache.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/oops/cpCache.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -714,7 +714,7 @@
 }
 
 void ConstantPoolCache::walk_entries_for_initialization(bool check_only) {
-  assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "sanity");
+  Arguments::assert_is_dumping_archive();
   // When dumping the archive, we want to clean up the ConstantPoolCache
   // to remove any effect of linking due to the execution of Java code --
   // each ConstantPoolCacheEntry will have the same contents as if
--- a/src/hotspot/share/oops/instanceKlass.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/oops/instanceKlass.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -494,7 +494,7 @@
     assert(is_instance_klass(), "is layout incorrect?");
     assert(size_helper() == parser.layout_size(), "incorrect size_helper?");
 
-  if (DumpSharedSpaces || DynamicDumpSharedSpaces) {
+  if (Arguments::is_dumping_archive()) {
       SystemDictionaryShared::init_dumptime_info(this);
     }
 }
@@ -644,7 +644,7 @@
   }
   set_annotations(NULL);
 
-  if (DumpSharedSpaces || DynamicDumpSharedSpaces) {
+  if (Arguments::is_dumping_archive()) {
     SystemDictionaryShared::remove_dumptime_info(this);
   }
 }
@@ -2361,7 +2361,7 @@
     // (1) We are running AOT to generate a shared library.
     return true;
   }
-  if (DumpSharedSpaces || DynamicDumpSharedSpaces) {
+  if (Arguments::is_dumping_archive()) {
     // (2) We are running -Xshare:dump or -XX:ArchiveClassesAtExit to create a shared archive
     return true;
   }
@@ -2609,7 +2609,7 @@
   // notify ClassLoadingService of class unload
   ClassLoadingService::notify_class_unloaded(ik);
 
-  if (DumpSharedSpaces || DynamicDumpSharedSpaces) {
+  if (Arguments::is_dumping_archive()) {
     SystemDictionaryShared::remove_dumptime_info(ik);
   }
 
--- a/src/hotspot/share/oops/klass.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/oops/klass.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -525,7 +525,7 @@
 }
 
 void Klass::remove_unshareable_info() {
-  assert (DumpSharedSpaces || DynamicDumpSharedSpaces,
+  assert (Arguments::is_dumping_archive(),
           "only called during CDS dump time");
   JFR_ONLY(REMOVE_ID(this);)
   if (log_is_enabled(Trace, cds, unshareable)) {
@@ -543,7 +543,7 @@
 }
 
 void Klass::remove_java_mirror() {
-  assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "only called during CDS dump time");
+  Arguments::assert_is_dumping_archive();
   if (log_is_enabled(Trace, cds, unshareable)) {
     ResourceMark rm;
     log_trace(cds, unshareable)("remove java_mirror: %s", external_name());
--- a/src/hotspot/share/oops/klassVtable.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/oops/klassVtable.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -128,11 +128,6 @@
   *vtable_length_ret = vtable_length;
 }
 
-int klassVtable::index_of(Method* m, int len) const {
-  assert(m->has_vtable_index(), "do not ask this of non-vtable methods");
-  return m->vtable_index();
-}
-
 // Copy super class's vtable to the first part (prefix) of this class's vtable,
 // and return the number of entries copied.  Expects that 'super' is the Java
 // super class (arrays can have "array" super classes that must be skipped).
@@ -169,7 +164,6 @@
 
   // Note:  Arrays can have intermediate array supers.  Use java_super to skip them.
   InstanceKlass* super = _klass->java_super();
-  int nofNewEntries = 0;
 
   bool is_shared = _klass->is_shared();
 
@@ -1029,15 +1023,6 @@
 }
 #endif // INCLUDE_JVMTI
 
-// CDS/RedefineClasses support - clear vtables so they can be reinitialized
-void klassVtable::clear_vtable() {
-  for (int i = 0; i < _length; i++) table()[i].clear();
-}
-
-bool klassVtable::is_initialized() {
-  return _length == 0 || table()[0].method() != NULL;
-}
-
 //-----------------------------------------------------------------------------------------
 // Itable code
 
@@ -1481,31 +1466,6 @@
 #endif
 }
 
-
-// inverse to itable_index
-Method* klassItable::method_for_itable_index(InstanceKlass* intf, int itable_index) {
-  assert(intf->is_interface(), "sanity check");
-  assert(intf->verify_itable_index(itable_index), "");
-  Array<Method*>* methods = InstanceKlass::cast(intf)->methods();
-
-  if (itable_index < 0 || itable_index >= method_count_for_interface(intf))
-    return NULL;                // help caller defend against bad indices
-
-  int index = itable_index;
-  Method* m = methods->at(index);
-  int index2 = -1;
-  while (!m->has_itable_index() ||
-         (index2 = m->itable_index()) != itable_index) {
-    assert(index2 < itable_index, "monotonic");
-    if (++index == methods->length())
-      return NULL;
-    m = methods->at(index);
-  }
-  assert(m->itable_index() == itable_index, "correct inverse");
-
-  return m;
-}
-
 void klassVtable::verify(outputStream* st, bool forced) {
   // make sure table is initialized
   if (!Universe::is_fully_initialized()) return;
--- a/src/hotspot/share/oops/klassVtable.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/oops/klassVtable.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -48,13 +48,6 @@
   int          _verify_count;     // to make verify faster
 #endif
 
-  // Ordering important, so greater_than (>) can be used as an merge operator.
-  enum AccessType {
-    acc_private         = 0,
-    acc_package_private = 1,
-    acc_publicprotected = 2
-  };
-
  public:
   klassVtable(Klass* klass, void* base, int length) : _klass(klass) {
     _tableOffset = (address)base - (address)klass; _length = length;
@@ -66,22 +59,12 @@
   int length() const              { return _length; }
   inline Method* method_at(int i) const;
   inline Method* unchecked_method_at(int i) const;
-  inline Method** adr_method_at(int i) const;
 
   // searching; all methods return -1 if not found
-  int index_of(Method* m) const                         { return index_of(m, _length); }
   int index_of_miranda(Symbol* name, Symbol* signature);
 
   void initialize_vtable(bool checkconstraints, TRAPS);   // initialize vtable of a new klass
 
-  // CDS/RedefineClasses support - clear vtables so they can be reinitialized
-  // at dump time.  Clearing gives us an easy way to tell if the vtable has
-  // already been reinitialized at dump time (see dump.cpp).  Vtables can
-  // be initialized at run time by RedefineClasses so dumping the right order
-  // is necessary.
-  void clear_vtable();
-  bool is_initialized();
-
   // computes vtable length (in words) and the number of miranda methods
   static void compute_vtable_size_and_num_mirandas(int* vtable_length,
                                                    int* num_new_mirandas,
@@ -125,7 +108,6 @@
  private:
   void copy_vtable_to(vtableEntry* start);
   int  initialize_from_super(Klass* super);
-  int  index_of(Method* m, int len) const; // same as index_of, but search only up to len
   void put_method_at(Method* m, int index);
   static bool needs_new_vtable_entry(const methodHandle& m,
                                      const Klass* super,
@@ -223,12 +205,6 @@
   return table()[i].method();
 }
 
-inline Method** klassVtable::adr_method_at(int i) const {
-  // Allow one past the last entry to be referenced; useful for loop bounds.
-  assert(i >= 0 && i <= _length, "index out of bounds");
-  return (Method**)(address(table() + i) + vtableEntry::method_offset_in_bytes());
-}
-
 // --------------------------------------------------------------------------------
 class klassItable;
 class itableMethodEntry;
@@ -336,9 +312,6 @@
   static int compute_itable_size(Array<InstanceKlass*>* transitive_interfaces);
   static void setup_itable_offset_table(InstanceKlass* klass);
 
-  // Resolving of method to index
-  static Method* method_for_itable_index(InstanceKlass* klass, int itable_index);
-
   // Debugging/Statistics
   static void print_statistics() PRODUCT_RETURN;
  private:
--- a/src/hotspot/share/oops/method.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/oops/method.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -1015,7 +1015,7 @@
 void Method::unlink_method() {
   _code = NULL;
 
-  assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "dump time only");
+  Arguments::assert_is_dumping_archive();
   // Set the values to what they should be at run time. Note that
   // this Method can no longer be executed during dump time.
   _i2i_entry = Interpreter::entry_for_cds_method(this);
--- a/src/hotspot/share/opto/c2compiler.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/opto/c2compiler.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -102,7 +102,8 @@
   assert(is_initialized(), "Compiler thread must be initialized");
 
   bool subsume_loads = SubsumeLoads;
-  bool do_escape_analysis = DoEscapeAnalysis && !env->should_retain_local_variables();
+  bool do_escape_analysis = DoEscapeAnalysis && !env->should_retain_local_variables()
+                                             && !env->jvmti_can_get_owned_monitor_info();
   bool eliminate_boxing = EliminateAutoBox;
 
   while (!env->failing()) {
--- a/src/hotspot/share/opto/classes.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/opto/classes.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -49,9 +49,6 @@
 #include "opto/valuetypenode.hpp"
 #include "opto/vectornode.hpp"
 #include "utilities/macros.hpp"
-#if INCLUDE_ZGC
-#include "gc/z/c2/zBarrierSetC2.hpp"
-#endif
 #if INCLUDE_SHENANDOAHGC
 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
 #endif
--- a/src/hotspot/share/opto/classes.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/opto/classes.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -195,17 +195,6 @@
 macro(LoadN)
 macro(LoadRange)
 macro(LoadS)
-#if INCLUDE_ZGC
-#define zgcmacro(x) macro(x)
-#else
-#define zgcmacro(x) optionalmacro(x)
-#endif
-zgcmacro(LoadBarrier)
-zgcmacro(LoadBarrierSlowReg)
-zgcmacro(ZCompareAndSwapP)
-zgcmacro(ZWeakCompareAndSwapP)
-zgcmacro(ZCompareAndExchangeP)
-zgcmacro(ZGetAndSetP)
 macro(Lock)
 macro(Loop)
 macro(LoopLimit)
--- a/src/hotspot/share/opto/compile.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/opto/compile.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -77,9 +77,6 @@
 #include "utilities/align.hpp"
 #include "utilities/copy.hpp"
 #include "utilities/macros.hpp"
-#if INCLUDE_ZGC
-#include "gc/z/c2/zBarrierSetC2.hpp"
-#endif
 
 
 // -------------------- Compile::mach_constant_base_node -----------------------
@@ -1026,6 +1023,7 @@
     _has_method_handle_invokes(false),
     _clinit_barrier_on_entry(false),
     _comp_arena(mtCompiler),
+    _barrier_set_state(BarrierSet::barrier_set()->barrier_set_c2()->create_barrier_state(comp_arena())),
     _env(ci_env),
     _directive(directive),
     _log(ci_env->log()),
@@ -2845,13 +2843,6 @@
     print_method(PHASE_MACRO_EXPANSION, 2);
   }
 
-#ifdef ASSERT
-  bs->verify_gc_barriers(this, BarrierSetC2::BeforeLateInsertion);
-#endif
-
-  bs->barrier_insertion_phase(C, igvn);
-  if (failing())  return;
-
   {
     TracePhase tp("barrierExpand", &timers[_t_barrierExpand]);
     if (bs->expand_barriers(this, igvn)) {
--- a/src/hotspot/share/opto/compile.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/opto/compile.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -56,7 +56,6 @@
 class IdealGraphPrinter;
 class InlineTree;
 class Int_Array;
-class LoadBarrierNode;
 class Matcher;
 class MachConstantNode;
 class MachConstantBaseNode;
@@ -98,7 +97,6 @@
   LoopOptsNone,
   LoopOptsShenandoahExpand,
   LoopOptsShenandoahPostExpand,
-  LoopOptsZBarrierInsertion,
   LoopOptsSkipSplitIf,
   LoopOptsVerify
 };
@@ -1211,11 +1209,7 @@
   bool           in_scratch_emit_size() const   { return _in_scratch_emit_size;     }
 
   enum ScratchBufferBlob {
-#if defined(PPC64)
     MAX_inst_size       = 2048,
-#else
-    MAX_inst_size       = 1024,
-#endif
     MAX_locs_size       = 128, // number of relocInfo elements
     MAX_const_size      = 128,
     MAX_stubs_size      = 128
@@ -1290,14 +1284,30 @@
   // Process an OopMap Element while emitting nodes
   void Process_OopMap_Node(MachNode *mach, int code_offset);
 
+  class BufferSizingData {
+  public:
+    int _stub;
+    int _code;
+    int _const;
+    int _reloc;
+
+      BufferSizingData() :
+      _stub(0),
+      _code(0),
+      _const(0),
+      _reloc(0)
+      { };
+  };
+
   // Initialize code buffer
-  CodeBuffer* init_buffer(uint* blk_starts);
+  void        estimate_buffer_size(int& const_req);
+  CodeBuffer* init_buffer(BufferSizingData& buf_sizes);
 
   // Write out basic block data to code buffer
   void fill_buffer(CodeBuffer* cb, uint* blk_starts);
 
   // Determine which variable sized branches can be shortened
-  void shorten_branches(uint* blk_starts, int& code_size, int& reloc_size, int& stub_size);
+  void shorten_branches(uint* blk_starts, BufferSizingData& buf_sizes);
 
   // Compute the size of first NumberOfLoopInstrToAlign instructions
   // at the head of a loop.
--- a/src/hotspot/share/opto/loopnode.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/opto/loopnode.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -993,18 +993,6 @@
       }
     }
 
-    if (UseZGC && !inner_out->in(0)->is_CountedLoopEnd()) {
-      // In some very special cases there can be a load that has no other uses than the
-      // counted loop safepoint. Then its loadbarrier will be placed between the inner
-      // loop exit and the safepoint. This is very rare
-
-      Node* ifnode = inner_out->in(1)->in(0);
-      // Region->IfTrue->If == Region->Iffalse->If
-      if (ifnode == inner_out->in(2)->in(0)) {
-        inner_out = ifnode->in(0);
-      }
-    }
-
     CountedLoopEndNode* cle = inner_out->in(0)->as_CountedLoopEnd();
     assert(cle == inner->loopexit_or_null(), "mismatch");
     bool has_skeleton = outer_le->in(1)->bottom_type()->singleton() && outer_le->in(1)->bottom_type()->is_int()->get_con() == 0;
@@ -4049,28 +4037,32 @@
   // dominated by early is considered a potentially interfering store.
   // This can produce false positives.
   if (n->is_Load() && LCA != early) {
-    Node_List worklist;
-
-    Node *mem = n->in(MemNode::Memory);
-    for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
-      Node* s = mem->fast_out(i);
-      worklist.push(s);
-    }
-    while(worklist.size() != 0 && LCA != early) {
-      Node* s = worklist.pop();
-      if (s->is_Load() || s->Opcode() == Op_SafePoint ||
-          (s->is_CallStaticJava() && s->as_CallStaticJava()->uncommon_trap_request() != 0)) {
-        continue;
-      } else if (s->is_MergeMem()) {
-        for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) {
-          Node* s1 = s->fast_out(i);
-          worklist.push(s1);
-        }
-      } else {
-        Node *sctrl = has_ctrl(s) ? get_ctrl(s) : s->in(0);
-        assert(sctrl != NULL || s->outcnt() == 0, "must have control");
-        if (sctrl != NULL && !sctrl->is_top() && is_dominator(early, sctrl)) {
-          LCA = dom_lca_for_get_late_ctrl(LCA, sctrl, n);
+    int load_alias_idx = C->get_alias_index(n->adr_type());
+    if (C->alias_type(load_alias_idx)->is_rewritable()) {
+
+      Node_List worklist;
+
+      Node *mem = n->in(MemNode::Memory);
+      for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
+        Node* s = mem->fast_out(i);
+        worklist.push(s);
+      }
+      while(worklist.size() != 0 && LCA != early) {
+        Node* s = worklist.pop();
+        if (s->is_Load() || s->Opcode() == Op_SafePoint ||
+            (s->is_CallStaticJava() && s->as_CallStaticJava()->uncommon_trap_request() != 0)) {
+          continue;
+        } else if (s->is_MergeMem()) {
+          for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) {
+            Node* s1 = s->fast_out(i);
+            worklist.push(s1);
+          }
+        } else {
+          Node *sctrl = has_ctrl(s) ? get_ctrl(s) : s->in(0);
+          assert(sctrl != NULL || s->outcnt() == 0, "must have control");
+          if (sctrl != NULL && !sctrl->is_top() && C->can_alias(s->adr_type(), load_alias_idx) && is_dominator(early, sctrl)) {
+            LCA = dom_lca_for_get_late_ctrl(LCA, sctrl, n);
+          }
         }
       }
     }
--- a/src/hotspot/share/opto/loopopts.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/opto/loopopts.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -42,9 +42,6 @@
 #include "opto/subnode.hpp"
 #include "opto/valuetypenode.hpp"
 #include "utilities/macros.hpp"
-#if INCLUDE_ZGC
-#include "gc/z/c2/zBarrierSetC2.hpp"
-#endif
 
 //=============================================================================
 //------------------------------split_thru_phi---------------------------------
@@ -1082,26 +1079,21 @@
   // uses.
   // A better fix for this problem can be found in the BugTraq entry, but
   // expediency for Mantis demands this hack.
-  // 6855164: If the merge point has a FastLockNode with a PhiNode input, we stop
-  // split_if_with_blocks from splitting a block because we could not move around
-  // the FastLockNode.
+#ifdef _LP64
   for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
     Node* n = region->fast_out(i);
     if (n->is_Phi()) {
       for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
         Node* m = n->fast_out(j);
-        if (m->is_FastLock())
-          return false;
-#ifdef _LP64
         if (m->Opcode() == Op_ConvI2L)
           return false;
         if (m->is_CastII() && m->isa_CastII()->has_range_check()) {
           return false;
         }
-#endif
       }
     }
   }
+#endif
   return true;
 }
 
--- a/src/hotspot/share/opto/machnode.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/opto/machnode.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -197,7 +197,7 @@
 // ADLC inherit from this class.
 class MachNode : public Node {
 public:
-  MachNode() : Node((uint)0), _num_opnds(0), _opnds(NULL) {
+  MachNode() : Node((uint)0), _barrier(0), _num_opnds(0), _opnds(NULL) {
     init_class_id(Class_Mach);
   }
   // Required boilerplate
@@ -211,6 +211,9 @@
   // no constant base node input.
   virtual uint mach_constant_base_node_input() const { return (uint)-1; }
 
+  uint8_t barrier_data() const { return _barrier; }
+  void set_barrier_data(uint data) { _barrier = data; }
+
   // Copy inputs and operands to new node of instruction.
   // Called from cisc_version() and short_branch_version().
   // !!!! The method's body is defined in ad_<arch>.cpp file.
@@ -255,6 +258,9 @@
   // output have choices - but they must use the same choice.
   virtual uint two_adr( ) const { return 0; }
 
+  // The GC might require some barrier metadata for machine code emission.
+  uint8_t _barrier;
+
   // Array of complex operand pointers.  Each corresponds to zero or
   // more leafs.  Must be set by MachNode constructor to point to an
   // internal array of MachOpers.  The MachOper array is sized by
--- a/src/hotspot/share/opto/matcher.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/opto/matcher.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -1813,6 +1813,13 @@
     _shared_nodes.map(leaf->_idx, ex);
   }
 
+  // Have mach nodes inherit GC barrier data
+  if (leaf->is_LoadStore()) {
+    mach->set_barrier_data(leaf->as_LoadStore()->barrier_data());
+  } else if (leaf->is_Mem()) {
+    mach->set_barrier_data(leaf->as_Mem()->barrier_data());
+  }
+
   return ex;
 }
 
--- a/src/hotspot/share/opto/memnode.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/opto/memnode.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -50,9 +50,6 @@
 #include "utilities/copy.hpp"
 #include "utilities/macros.hpp"
 #include "utilities/vmError.hpp"
-#if INCLUDE_ZGC
-#include "gc/z/c2/zBarrierSetC2.hpp"
-#endif
 
 // Portions of code courtesy of Clifford Click
 
@@ -3001,7 +2998,7 @@
   : Node(required),
     _type(rt),
     _adr_type(at),
-    _has_barrier(false)
+    _barrier(0)
 {
   init_req(MemNode::Control, c  );
   init_req(MemNode::Memory , mem);
--- a/src/hotspot/share/opto/memnode.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/opto/memnode.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -43,6 +43,8 @@
   bool _unaligned_access; // Unaligned access from unsafe
   bool _mismatched_access; // Mismatched access from unsafe: byte read in integer array for instance
   bool _unsafe_access;     // Access of unsafe origin.
+  uint8_t _barrier; // Bit field with barrier information
+
 protected:
 #ifdef ASSERT
   const TypePtr* _adr_type;     // What kind of memory is being addressed?
@@ -62,18 +64,30 @@
                  unset          // The memory ordering is not set (used for testing)
   } MemOrd;
 protected:
-  MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at )
-    : Node(c0,c1,c2   ), _unaligned_access(false), _mismatched_access(false), _unsafe_access(false) {
+  MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at ) :
+      Node(c0,c1,c2),
+      _unaligned_access(false),
+      _mismatched_access(false),
+      _unsafe_access(false),
+      _barrier(0) {
     init_class_id(Class_Mem);
     debug_only(_adr_type=at; adr_type();)
   }
-  MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at, Node *c3 )
-    : Node(c0,c1,c2,c3), _unaligned_access(false), _mismatched_access(false), _unsafe_access(false) {
+  MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at, Node *c3 ) :
+      Node(c0,c1,c2,c3),
+      _unaligned_access(false),
+      _mismatched_access(false),
+      _unsafe_access(false),
+      _barrier(0) {
     init_class_id(Class_Mem);
     debug_only(_adr_type=at; adr_type();)
   }
-  MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at, Node *c3, Node *c4)
-    : Node(c0,c1,c2,c3,c4), _unaligned_access(false), _mismatched_access(false), _unsafe_access(false) {
+  MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at, Node *c3, Node *c4) :
+      Node(c0,c1,c2,c3,c4),
+      _unaligned_access(false),
+      _mismatched_access(false),
+      _unsafe_access(false),
+      _barrier(0) {
     init_class_id(Class_Mem);
     debug_only(_adr_type=at; adr_type();)
   }
@@ -129,6 +143,9 @@
 #endif
   }
 
+  uint8_t barrier_data() { return _barrier; }
+  void set_barrier_data(uint8_t barrier_data) { _barrier = barrier_data; }
+
   // Search through memory states which precede this node (load or store).
   // Look for an exact match for the address, with no intervening
   // aliased stores.
@@ -185,8 +202,6 @@
   // this field.
   const MemOrd _mo;
 
-  uint _barrier; // Bit field with barrier information
-
   AllocateNode* is_new_object_mark_load(PhaseGVN *phase) const;
 
 protected:
@@ -200,7 +215,7 @@
 public:
 
   LoadNode(Node *c, Node *mem, Node *adr, const TypePtr* at, const Type *rt, MemOrd mo, ControlDependency control_dependency)
-    : MemNode(c,mem,adr,at), _control_dependency(control_dependency), _mo(mo), _barrier(0), _type(rt) {
+    : MemNode(c,mem,adr,at), _control_dependency(control_dependency), _mo(mo), _type(rt) {
     init_class_id(Class_Load);
   }
   inline bool is_unordered() const { return !is_acquire(); }
@@ -269,10 +284,6 @@
   Node* convert_to_unsigned_load(PhaseGVN& gvn);
   Node* convert_to_signed_load(PhaseGVN& gvn);
 
-  void copy_barrier_info(const Node* src) { _barrier = src->as_Load()->_barrier; }
-  uint barrier_data() { return _barrier; }
-  void set_barrier_data(uint barrier_data) { _barrier |= barrier_data; }
-
   void pin() { _control_dependency = Pinned; }
   bool has_unknown_control_dependency() const { return _control_dependency == UnknownControl; }
 
@@ -864,7 +875,7 @@
 private:
   const Type* const _type;      // What kind of value is loaded?
   const TypePtr* _adr_type;     // What kind of memory is being addressed?
-  bool _has_barrier;
+  uint8_t _barrier; // Bit field with barrier information
   virtual uint size_of() const; // Size is bigger
 public:
   LoadStoreNode( Node *c, Node *mem, Node *adr, Node *val, const TypePtr* at, const Type* rt, uint required );
@@ -877,8 +888,9 @@
 
   bool result_not_used() const;
   MemBarNode* trailing_membar() const;
-  void set_has_barrier() { _has_barrier = true; };
-  bool has_barrier() const { return _has_barrier; };
+
+  uint8_t barrier_data() { return _barrier; }
+  void set_barrier_data(uint8_t barrier_data) { _barrier = barrier_data; }
 };
 
 class LoadStoreConditionalNode : public LoadStoreNode {
@@ -930,6 +942,7 @@
   MemNode::MemOrd order() const {
     return _mem_ord;
   }
+  virtual uint size_of() const { return sizeof(*this); }
 };
 
 class CompareAndExchangeNode : public LoadStoreNode {
@@ -947,6 +960,7 @@
   MemNode::MemOrd order() const {
     return _mem_ord;
   }
+  virtual uint size_of() const { return sizeof(*this); }
 };
 
 //------------------------------CompareAndSwapBNode---------------------------
--- a/src/hotspot/share/opto/node.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/opto/node.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -546,9 +546,6 @@
   if (n->is_SafePoint()) {
     n->as_SafePoint()->clone_replaced_nodes();
   }
-  if (n->is_Load()) {
-    n->as_Load()->copy_barrier_info(this);
-  }
   if (n->is_ValueTypeBase()) {
     C->add_value_type(n);
   }
@@ -1482,10 +1479,6 @@
   if (req() < 2 || (_flags & Flag_needs_anti_dependence_check) == 0) {
     return false;
   }
-  BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
-  if (!bs->needs_anti_dependence_check(this)) {
-    return false;
-  }
   return in(1)->bottom_type()->has_memory();
 }
 
--- a/src/hotspot/share/opto/node.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/opto/node.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -83,8 +83,6 @@
 class JumpNode;
 class JumpProjNode;
 class LoadNode;
-class LoadBarrierNode;
-class LoadBarrierSlowRegNode;
 class LoadStoreNode;
 class LoadStoreConditionalNode;
 class LockNode;
@@ -646,7 +644,6 @@
       DEFINE_CLASS_ID(MemBar,      Multi, 3)
         DEFINE_CLASS_ID(Initialize,       MemBar, 0)
         DEFINE_CLASS_ID(MemBarStoreStore, MemBar, 1)
-      DEFINE_CLASS_ID(LoadBarrier, Multi, 4)
 
     DEFINE_CLASS_ID(Mach,  Node, 1)
       DEFINE_CLASS_ID(MachReturn, Mach, 0)
@@ -684,7 +681,6 @@
       DEFINE_CLASS_ID(EncodeNarrowPtr, Type, 6)
         DEFINE_CLASS_ID(EncodeP, EncodeNarrowPtr, 0)
         DEFINE_CLASS_ID(EncodePKlass, EncodeNarrowPtr, 1)
-      DEFINE_CLASS_ID(LoadBarrierSlowReg, Type, 7)
       DEFINE_CLASS_ID(ValueTypeBase, Type, 8)
         DEFINE_CLASS_ID(ValueType, ValueTypeBase, 0)
         DEFINE_CLASS_ID(ValueTypePtr, ValueTypeBase, 1)
@@ -844,8 +840,6 @@
   DEFINE_CLASS_QUERY(Load)
   DEFINE_CLASS_QUERY(LoadStore)
   DEFINE_CLASS_QUERY(LoadStoreConditional)
-  DEFINE_CLASS_QUERY(LoadBarrier)
-  DEFINE_CLASS_QUERY(LoadBarrierSlowReg)
   DEFINE_CLASS_QUERY(Lock)
   DEFINE_CLASS_QUERY(Loop)
   DEFINE_CLASS_QUERY(Mach)
--- a/src/hotspot/share/opto/output.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/opto/output.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -31,6 +31,8 @@
 #include "compiler/compileBroker.hpp"
 #include "compiler/compilerDirectives.hpp"
 #include "compiler/oopMap.hpp"
+#include "gc/shared/barrierSet.hpp"
+#include "gc/shared/c2/barrierSetC2.hpp"
 #include "memory/allocation.inline.hpp"
 #include "opto/ad.hpp"
 #include "opto/callnode.hpp"
@@ -124,15 +126,19 @@
     }
   }
 
+  // Keeper of sizing aspects
+  BufferSizingData buf_sizes = BufferSizingData();
+
+  // Initialize code buffer
+  estimate_buffer_size(buf_sizes._const);
+  if (failing()) return;
+
+  // Pre-compute the length of blocks and replace
+  // long branches with short if machine supports it.
+  // Must be done before ScheduleAndBundle due to SPARC delay slots
   uint* blk_starts = NEW_RESOURCE_ARRAY(uint, _cfg->number_of_blocks() + 1);
   blk_starts[0] = 0;
-
-  // Initialize code buffer and process short branches.
-  CodeBuffer* cb = init_buffer(blk_starts);
-
-  if (cb == NULL || failing()) {
-    return;
-  }
+  shorten_branches(blk_starts, buf_sizes);
 
   if (!is_osr_compilation() && _method && _method->has_scalarized_args()) {
     // Compute the offsets of the entry points required by the value type calling convention
@@ -160,24 +166,18 @@
   }
 
   ScheduleAndBundle();
-
-#ifndef PRODUCT
-  if (trace_opto_output()) {
-    tty->print("\n---- After ScheduleAndBundle ----\n");
-    for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
-      tty->print("\nBB#%03d:\n", i);
-      Block* block = _cfg->get_block(i);
-      for (uint j = 0; j < block->number_of_nodes(); j++) {
-        Node* n = block->get_node(j);
-        OptoReg::Name reg = _regalloc->get_reg_first(n);
-        tty->print(" %-6s ", reg >= 0 && reg < REG_COUNT ? Matcher::regName[reg] : "");
-        n->dump();
-      }
-    }
+  if (failing()) {
+    return;
   }
-#endif
-
-  if (failing()) {
+
+  // Late barrier analysis must be done after schedule and bundle
+  // Otherwise liveness based spilling will fail
+  BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
+  bs->late_barrier_analysis();
+
+  // Complete sizing of codebuffer
+  CodeBuffer* cb = init_buffer(buf_sizes);
+  if (cb == NULL || failing()) {
     return;
   }
 
@@ -258,7 +258,7 @@
 
 // The architecture description provides short branch variants for some long
 // branch instructions. Replace eligible long branches with short branches.
-void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size, int& stub_size) {
+void Compile::shorten_branches(uint* blk_starts, BufferSizingData& buf_sizes) {
   // Compute size of each block, method size, and relocation information size
   uint nblocks  = _cfg->number_of_blocks();
 
@@ -276,11 +276,11 @@
   bool has_short_branch_candidate = false;
 
   // Initialize the sizes to 0
-  code_size  = 0;          // Size in bytes of generated code
-  stub_size  = 0;          // Size in bytes of all stub entries
+  int code_size  = 0;          // Size in bytes of generated code
+  int stub_size  = 0;          // Size in bytes of all stub entries
   // Size in bytes of all relocation entries, including those in local stubs.
   // Start with 2-bytes of reloc info for the unvalidated entry point
-  reloc_size = 1;          // Number of relocation entries
+  int reloc_size = 1;          // Number of relocation entries
 
   // Make three passes.  The first computes pessimistic blk_starts,
   // relative jmp_offset and reloc_size information.  The second performs
@@ -516,6 +516,10 @@
   // a relocation index.
   // The CodeBuffer will expand the locs array if this estimate is too low.
   reloc_size *= 10 / sizeof(relocInfo);
+
+  buf_sizes._reloc = reloc_size;
+  buf_sizes._code  = code_size;
+  buf_sizes._stub  = stub_size;
 }
 
 //------------------------------FillLocArray-----------------------------------
@@ -527,8 +531,8 @@
   // This should never have accepted Bad before
   assert(OptoReg::is_valid(regnum), "location must be valid");
   return (OptoReg::is_reg(regnum))
-    ? new LocationValue(Location::new_reg_loc(l_type, OptoReg::as_VMReg(regnum)) )
-    : new LocationValue(Location::new_stk_loc(l_type,  ra->reg2offset(regnum)));
+         ? new LocationValue(Location::new_reg_loc(l_type, OptoReg::as_VMReg(regnum)) )
+         : new LocationValue(Location::new_stk_loc(l_type,  ra->reg2offset(regnum)));
 }
 
 
@@ -647,12 +651,12 @@
     }
 #endif //_LP64
     else if( (t->base() == Type::FloatBot || t->base() == Type::FloatCon) &&
-               OptoReg::is_reg(regnum) ) {
+             OptoReg::is_reg(regnum) ) {
       array->append(new_loc_value( _regalloc, regnum, Matcher::float_in_double()
-                                   ? Location::float_in_dbl : Location::normal ));
+                                                      ? Location::float_in_dbl : Location::normal ));
     } else if( t->base() == Type::Int && OptoReg::is_reg(regnum) ) {
       array->append(new_loc_value( _regalloc, regnum, Matcher::int_in_long
-                                   ? Location::int_in_long : Location::normal ));
+                                                      ? Location::int_in_long : Location::normal ));
     } else if( t->base() == Type::NarrowOop ) {
       array->append(new_loc_value( _regalloc, regnum, Location::narrowoop ));
     } else {
@@ -663,48 +667,48 @@
 
   // No register.  It must be constant data.
   switch (t->base()) {
-  case Type::Half:              // Second half of a double
-    ShouldNotReachHere();       // Caller should skip 2nd halves
-    break;
-  case Type::AnyPtr:
-    array->append(new ConstantOopWriteValue(NULL));
-    break;
-  case Type::AryPtr:
-  case Type::InstPtr:          // fall through
-    array->append(new ConstantOopWriteValue(t->isa_oopptr()->const_oop()->constant_encoding()));
-    break;
-  case Type::NarrowOop:
-    if (t == TypeNarrowOop::NULL_PTR) {
+    case Type::Half:              // Second half of a double
+      ShouldNotReachHere();       // Caller should skip 2nd halves
+      break;
+    case Type::AnyPtr:
       array->append(new ConstantOopWriteValue(NULL));
-    } else {
-      array->append(new ConstantOopWriteValue(t->make_ptr()->isa_oopptr()->const_oop()->constant_encoding()));
+      break;
+    case Type::AryPtr:
+    case Type::InstPtr:          // fall through
+      array->append(new ConstantOopWriteValue(t->isa_oopptr()->const_oop()->constant_encoding()));
+      break;
+    case Type::NarrowOop:
+      if (t == TypeNarrowOop::NULL_PTR) {
+        array->append(new ConstantOopWriteValue(NULL));
+      } else {
+        array->append(new ConstantOopWriteValue(t->make_ptr()->isa_oopptr()->const_oop()->constant_encoding()));
+      }
+      break;
+    case Type::Int:
+      array->append(new ConstantIntValue(t->is_int()->get_con()));
+      break;
+    case Type::RawPtr:
+      // A return address (T_ADDRESS).
+      assert((intptr_t)t->is_ptr()->get_con() < (intptr_t)0x10000, "must be a valid BCI");
+#ifdef _LP64
+      // Must be restored to the full-width 64-bit stack slot.
+      array->append(new ConstantLongValue(t->is_ptr()->get_con()));
+#else
+      array->append(new ConstantIntValue(t->is_ptr()->get_con()));
+#endif
+      break;
+    case Type::FloatCon: {
+      float f = t->is_float_constant()->getf();
+      array->append(new ConstantIntValue(jint_cast(f)));
+      break;
     }
-    break;
-  case Type::Int:
-    array->append(new ConstantIntValue(t->is_int()->get_con()));
-    break;
-  case Type::RawPtr:
-    // A return address (T_ADDRESS).
-    assert((intptr_t)t->is_ptr()->get_con() < (intptr_t)0x10000, "must be a valid BCI");
+    case Type::DoubleCon: {
+      jdouble d = t->is_double_constant()->getd();
 #ifdef _LP64
-    // Must be restored to the full-width 64-bit stack slot.
-    array->append(new ConstantLongValue(t->is_ptr()->get_con()));
+      array->append(new ConstantIntValue((jint)0));
+      array->append(new ConstantDoubleValue(d));
 #else
-    array->append(new ConstantIntValue(t->is_ptr()->get_con()));
-#endif
-    break;
-  case Type::FloatCon: {
-    float f = t->is_float_constant()->getf();
-    array->append(new ConstantIntValue(jint_cast(f)));
-    break;
-  }
-  case Type::DoubleCon: {
-    jdouble d = t->is_double_constant()->getd();
-#ifdef _LP64
-    array->append(new ConstantIntValue((jint)0));
-    array->append(new ConstantDoubleValue(d));
-#else
-    // Repack the double as two jints.
+      // Repack the double as two jints.
     // The convention the interpreter uses is that the second local
     // holds the first raw word of the native double representation.
     // This is actually reasonable, since locals and stack arrays
@@ -716,15 +720,15 @@
     array->append(new ConstantIntValue(acc.words[1]));
     array->append(new ConstantIntValue(acc.words[0]));
 #endif
-    break;
-  }
-  case Type::Long: {
-    jlong d = t->is_long()->get_con();
+      break;
+    }
+    case Type::Long: {
+      jlong d = t->is_long()->get_con();
 #ifdef _LP64
-    array->append(new ConstantIntValue((jint)0));
-    array->append(new ConstantLongValue(d));
+      array->append(new ConstantIntValue((jint)0));
+      array->append(new ConstantLongValue(d));
 #else
-    // Repack the long as two jints.
+      // Repack the long as two jints.
     // The convention the interpreter uses is that the second local
     // holds the first raw word of the native double representation.
     // This is actually reasonable, since locals and stack arrays
@@ -736,14 +740,14 @@
     array->append(new ConstantIntValue(acc.words[1]));
     array->append(new ConstantIntValue(acc.words[0]));
 #endif
-    break;
-  }
-  case Type::Top:               // Add an illegal value here
-    array->append(new LocationValue(Location()));
-    break;
-  default:
-    ShouldNotReachHere();
-    break;
+      break;
+    }
+    case Type::Top:               // Add an illegal value here
+      array->append(new LocationValue(Location()));
+      break;
+    default:
+      ShouldNotReachHere();
+      break;
   }
 }
 
@@ -912,58 +916,58 @@
 
 // A simplified version of Process_OopMap_Node, to handle non-safepoints.
 class NonSafepointEmitter {
-  Compile*  C;
-  JVMState* _pending_jvms;
-  int       _pending_offset;
-
-  void emit_non_safepoint();
+    Compile*  C;
+    JVMState* _pending_jvms;
+    int       _pending_offset;
+
+    void emit_non_safepoint();
 
  public:
-  NonSafepointEmitter(Compile* compile) {
-    this->C = compile;
-    _pending_jvms = NULL;
-    _pending_offset = 0;
-  }
-
-  void observe_instruction(Node* n, int pc_offset) {
-    if (!C->debug_info()->recording_non_safepoints())  return;
-
-    Node_Notes* nn = C->node_notes_at(n->_idx);
-    if (nn == NULL || nn->jvms() == NULL)  return;
-    if (_pending_jvms != NULL &&
-        _pending_jvms->same_calls_as(nn->jvms())) {
-      // Repeated JVMS?  Stretch it up here.
-      _pending_offset = pc_offset;
-    } else {
+    NonSafepointEmitter(Compile* compile) {
+      this->C = compile;
+      _pending_jvms = NULL;
+      _pending_offset = 0;
+    }
+
+    void observe_instruction(Node* n, int pc_offset) {
+      if (!C->debug_info()->recording_non_safepoints())  return;
+
+      Node_Notes* nn = C->node_notes_at(n->_idx);
+      if (nn == NULL || nn->jvms() == NULL)  return;
       if (_pending_jvms != NULL &&
+          _pending_jvms->same_calls_as(nn->jvms())) {
+        // Repeated JVMS?  Stretch it up here.
+        _pending_offset = pc_offset;
+      } else {
+        if (_pending_jvms != NULL &&
+            _pending_offset < pc_offset) {
+          emit_non_safepoint();
+        }
+        _pending_jvms = NULL;
+        if (pc_offset > C->debug_info()->last_pc_offset()) {
+          // This is the only way _pending_jvms can become non-NULL:
+          _pending_jvms = nn->jvms();
+          _pending_offset = pc_offset;
+        }
+      }
+    }
+
+    // Stay out of the way of real safepoints:
+    void observe_safepoint(JVMState* jvms, int pc_offset) {
+      if (_pending_jvms != NULL &&
+          !_pending_jvms->same_calls_as(jvms) &&
           _pending_offset < pc_offset) {
         emit_non_safepoint();
       }
       _pending_jvms = NULL;
-      if (pc_offset > C->debug_info()->last_pc_offset()) {
-        // This is the only way _pending_jvms can become non-NULL:
-        _pending_jvms = nn->jvms();
-        _pending_offset = pc_offset;
+    }
+
+    void flush_at_end() {
+      if (_pending_jvms != NULL) {
+        emit_non_safepoint();
       }
+      _pending_jvms = NULL;
     }
-  }
-
-  // Stay out of the way of real safepoints:
-  void observe_safepoint(JVMState* jvms, int pc_offset) {
-    if (_pending_jvms != NULL &&
-        !_pending_jvms->same_calls_as(jvms) &&
-        _pending_offset < pc_offset) {
-      emit_non_safepoint();
-    }
-    _pending_jvms = NULL;
-  }
-
-  void flush_at_end() {
-    if (_pending_jvms != NULL) {
-      emit_non_safepoint();
-    }
-    _pending_jvms = NULL;
-  }
 };
 
 void NonSafepointEmitter::emit_non_safepoint() {
@@ -993,15 +997,11 @@
 }
 
 //------------------------------init_buffer------------------------------------
-CodeBuffer* Compile::init_buffer(uint* blk_starts) {
+void Compile::estimate_buffer_size(int& const_req) {
 
   // Set the initially allocated size
-  int  code_req   = initial_code_capacity;
-  int  locs_req   = initial_locs_capacity;
-  int  stub_req   = initial_stub_capacity;
-  int  const_req  = initial_const_capacity;
-
-  int  pad_req    = NativeCall::instruction_size;
+  const_req = initial_const_capacity;
+
   // The extra spacing after the code is necessary on some platforms.
   // Sometimes we need to patch in a jump after the last instruction,
   // if the nmethod has been deoptimized.  (See 4932387, 4894843.)
@@ -1017,7 +1017,7 @@
 
   // Compute prolog code size
   _method_size = 0;
-  _frame_slots = OptoReg::reg2stack(_matcher->_old_SP)+_regalloc->_framesize;
+  _frame_slots = OptoReg::reg2stack(_matcher->_old_SP) + _regalloc->_framesize;
 #if defined(IA64) && !defined(AIX)
   if (save_argument_registers()) {
     // 4815101: this is a stub with implicit and unknown precision fp args.
@@ -1066,11 +1066,18 @@
   // Initialize the space for the BufferBlob used to find and verify
   // instruction size in MachNode::emit_size()
   init_scratch_buffer_blob(const_req);
-  if (failing())  return NULL; // Out of memory
-
-  // Pre-compute the length of blocks and replace
-  // long branches with short if machine supports it.
-  shorten_branches(blk_starts, code_req, locs_req, stub_req);
+}
+
+CodeBuffer* Compile::init_buffer(BufferSizingData& buf_sizes) {
+
+  int stub_req  = buf_sizes._stub;
+  int code_req  = buf_sizes._code;
+  int const_req = buf_sizes._const;
+
+  int pad_req   = NativeCall::instruction_size;
+
+  BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
+  stub_req += bs->estimate_stub_size();
 
   // nmethod and CodeBuffer count stubs & constants as part of method's code.
   // class HandlerImpl is platform-specific and defined in the *.ad files.
@@ -1083,18 +1090,18 @@
     code_req = const_req = stub_req = exception_handler_req = deopt_handler_req = 0x10;  // force expansion
 
   int total_req =
-    const_req +
-    code_req +
-    pad_req +
-    stub_req +
-    exception_handler_req +
-    deopt_handler_req;               // deopt handler
+          const_req +
+          code_req +
+          pad_req +
+          stub_req +
+          exception_handler_req +
+          deopt_handler_req;               // deopt handler
 
   if (has_method_handle_invokes())
     total_req += deopt_handler_req;  // deopt MH handler
 
   CodeBuffer* cb = code_buffer();
-  cb->initialize(total_req, locs_req);
+  cb->initialize(total_req, buf_sizes._reloc);
 
   // Have we run out of code space?
   if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) {
@@ -1315,12 +1322,12 @@
           Process_OopMap_Node(mach, current_offset);
         } // End if safepoint
 
-        // If this is a null check, then add the start of the previous instruction to the list
+          // If this is a null check, then add the start of the previous instruction to the list
         else if( mach->is_MachNullCheck() ) {
           inct_starts[inct_cnt++] = previous_offset;
         }
 
-        // If this is a branch, then fill in the label with the target BB's label
+          // If this is a branch, then fill in the label with the target BB's label
         else if (mach->is_MachBranch()) {
           // This requires the TRUE branch target be in succs[0]
           uint block_num = block->non_connector_successor(0)->_pre_order;
@@ -1331,8 +1338,8 @@
           bool delay_slot_is_used = valid_bundle_info(n) &&
                                     node_bundling(n)->use_unconditional_delay();
           if (!delay_slot_is_used && mach->may_be_short_branch()) {
-           assert(delay_slot == NULL, "not expecting delay slot node");
-           int br_size = n->size(_regalloc);
+            assert(delay_slot == NULL, "not expecting delay slot node");
+            int br_size = n->size(_regalloc);
             int offset = blk_starts[block_num] - current_offset;
             if (block_num >= i) {
               // Current and following block's offset are not
@@ -1390,7 +1397,7 @@
           }
         }
 #ifdef ASSERT
-        // Check that oop-store precedes the card-mark
+          // Check that oop-store precedes the card-mark
         else if (mach->ideal_Opcode() == Op_StoreCM) {
           uint storeCM_idx = j;
           int count = 0;
@@ -1561,6 +1568,10 @@
   }
 #endif
 
+  BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
+  bs->emit_stubs(*cb);
+  if (failing())  return;
+
 #ifndef PRODUCT
   // Information on the size of the method, without the extraneous code
   Scheduling::increment_method_size(cb->insts_size());
@@ -1735,20 +1746,20 @@
 // Initializer for class Scheduling
 
 Scheduling::Scheduling(Arena *arena, Compile &compile)
-  : _arena(arena),
-    _cfg(compile.cfg()),
-    _regalloc(compile.regalloc()),
-    _scheduled(arena),
-    _available(arena),
-    _reg_node(arena),
-    _pinch_free_list(arena),
-    _next_node(NULL),
-    _bundle_instr_count(0),
-    _bundle_cycle_number(0),
-    _bundle_use(0, 0, resource_count, &_bundle_use_elements[0])
+        : _arena(arena),
+          _cfg(compile.cfg()),
+          _regalloc(compile.regalloc()),
+          _scheduled(arena),
+          _available(arena),
+          _reg_node(arena),
+          _pinch_free_list(arena),
+          _next_node(NULL),
+          _bundle_instr_count(0),
+          _bundle_cycle_number(0),
+          _bundle_use(0, 0, resource_count, &_bundle_use_elements[0])
 #ifndef PRODUCT
-  , _branches(0)
-  , _unconditional_delays(0)
+        , _branches(0)
+        , _unconditional_delays(0)
 #endif
 {
   // Create a MachNopNode
@@ -1829,8 +1840,8 @@
   _bundle_use.reset();
 
   memcpy(_bundle_use_elements,
-    Pipeline_Use::elaborated_elements,
-    sizeof(Pipeline_Use::elaborated_elements));
+         Pipeline_Use::elaborated_elements,
+         sizeof(Pipeline_Use::elaborated_elements));
 }
 
 // Perform instruction scheduling and bundling over the sequence of
@@ -1857,6 +1868,22 @@
   // Walk backwards over each basic block, computing the needed alignment
   // Walk over all the basic blocks
   scheduling.DoScheduling();
+
+#ifndef PRODUCT
+  if (trace_opto_output()) {
+    tty->print("\n---- After ScheduleAndBundle ----\n");
+    for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
+      tty->print("\nBB#%03d:\n", i);
+      Block* block = _cfg->get_block(i);
+      for (uint j = 0; j < block->number_of_nodes(); j++) {
+        Node* n = block->get_node(j);
+        OptoReg::Name reg = _regalloc->get_reg_first(n);
+        tty->print(" %-6s ", reg >= 0 && reg < REG_COUNT ? Matcher::regName[reg] : "");
+        n->dump();
+      }
+    }
+  }
+#endif
 }
 
 // Compute the latency of all the instructions.  This is fairly simple,
@@ -1925,7 +1952,7 @@
 #ifndef PRODUCT
     if (_cfg->C->trace_opto_output())
       tty->print("#     NodeFitsInBundle [%4d]: FALSE; latency %4d > %d\n",
-        n->_idx, _current_latency[n_idx], _bundle_cycle_number);
+                 n->_idx, _current_latency[n_idx], _bundle_cycle_number);
 #endif
     return (false);
   }
@@ -1942,7 +1969,7 @@
 #ifndef PRODUCT
     if (_cfg->C->trace_opto_output())
       tty->print("#     NodeFitsInBundle [%4d]: FALSE; too many instructions: %d > %d\n",
-        n->_idx, _bundle_instr_count + instruction_count, Pipeline::_max_instrs_per_cycle);
+                 n->_idx, _bundle_instr_count + instruction_count, Pipeline::_max_instrs_per_cycle);
 #endif
     return (false);
   }
@@ -2150,12 +2177,12 @@
         // Don't allow safepoints in the branch shadow, that will
         // cause a number of difficulties
         if ( avail_pipeline->instructionCount() == 1 &&
-            !avail_pipeline->hasMultipleBundles() &&
-            !avail_pipeline->hasBranchDelay() &&
-            Pipeline::instr_has_unit_size() &&
-            d->size(_regalloc) == Pipeline::instr_unit_size() &&
-            NodeFitsInBundle(d) &&
-            !node_bundling(d)->used_in_delay()) {
+             !avail_pipeline->hasMultipleBundles() &&
+             !avail_pipeline->hasBranchDelay() &&
+             Pipeline::instr_has_unit_size() &&
+             d->size(_regalloc) == Pipeline::instr_unit_size() &&
+             NodeFitsInBundle(d) &&
+             !node_bundling(d)->used_in_delay()) {
 
           if (d->is_Mach() && !d->is_MachSafePoint()) {
             // A node that fits in the delay slot was found, so we need to
@@ -2200,13 +2227,13 @@
     // step of the bundles
     if (!NodeFitsInBundle(n)) {
 #ifndef PRODUCT
-        if (_cfg->C->trace_opto_output())
-          tty->print("#  *** STEP(branch won't fit) ***\n");
+      if (_cfg->C->trace_opto_output())
+        tty->print("#  *** STEP(branch won't fit) ***\n");
 #endif
-        // Update the state information
-        _bundle_instr_count = 0;
-        _bundle_cycle_number += 1;
-        _bundle_use.step(1);
+      // Update the state information
+      _bundle_instr_count = 0;
+      _bundle_cycle_number += 1;
+      _bundle_use.step(1);
     }
   }
 
@@ -2252,8 +2279,8 @@
 #ifndef PRODUCT
         if (_cfg->C->trace_opto_output())
           tty->print("#  *** STEP(%d >= %d instructions) ***\n",
-            instruction_count + _bundle_instr_count,
-            Pipeline::_max_instrs_per_cycle);
+                     instruction_count + _bundle_instr_count,
+                     Pipeline::_max_instrs_per_cycle);
 #endif
         step(1);
       }
@@ -2459,7 +2486,7 @@
     }
     assert(!last->is_Mach() || last->as_Mach()->ideal_Opcode() != Op_Con, "");
     if( last->is_Catch() ||
-       (last->is_Mach() && last->as_Mach()->ideal_Opcode() == Op_Halt) ) {
+        (last->is_Mach() && last->as_Mach()->ideal_Opcode() == Op_Halt) ) {
       // There might be a prior call.  Skip it.
       while (_bb_start < _bb_end && bb->get_node(--_bb_end)->is_MachProj());
     } else if( last->is_MachNullCheck() ) {
@@ -2529,7 +2556,7 @@
     }
 #endif
 #ifdef ASSERT
-  verify_good_schedule(bb,"after block local scheduling");
+    verify_good_schedule(bb,"after block local scheduling");
 #endif
   }
 
@@ -2877,31 +2904,31 @@
 //
 void Scheduling::garbage_collect_pinch_nodes() {
 #ifndef PRODUCT
-    if (_cfg->C->trace_opto_output()) tty->print("Reclaimed pinch nodes:");
+  if (_cfg->C->trace_opto_output()) tty->print("Reclaimed pinch nodes:");
 #endif
-    int trace_cnt = 0;
-    for (uint k = 0; k < _reg_node.Size(); k++) {
-      Node* pinch = _reg_node[k];
-      if ((pinch != NULL) && pinch->Opcode() == Op_Node &&
-          // no predecence input edges
-          (pinch->req() == pinch->len() || pinch->in(pinch->req()) == NULL) ) {
-        cleanup_pinch(pinch);
-        _pinch_free_list.push(pinch);
-        _reg_node.map(k, NULL);
+  int trace_cnt = 0;
+  for (uint k = 0; k < _reg_node.Size(); k++) {
+    Node* pinch = _reg_node[k];
+    if ((pinch != NULL) && pinch->Opcode() == Op_Node &&
+        // no predecence input edges
+        (pinch->req() == pinch->len() || pinch->in(pinch->req()) == NULL) ) {
+      cleanup_pinch(pinch);
+      _pinch_free_list.push(pinch);
+      _reg_node.map(k, NULL);
 #ifndef PRODUCT
-        if (_cfg->C->trace_opto_output()) {
-          trace_cnt++;
-          if (trace_cnt > 40) {
-            tty->print("\n");
-            trace_cnt = 0;
-          }
-          tty->print(" %d", pinch->_idx);
+      if (_cfg->C->trace_opto_output()) {
+        trace_cnt++;
+        if (trace_cnt > 40) {
+          tty->print("\n");
+          trace_cnt = 0;
         }
+        tty->print(" %d", pinch->_idx);
+      }
 #endif
-      }
     }
+  }
 #ifndef PRODUCT
-    if (_cfg->C->trace_opto_output()) tty->print("\n");
+  if (_cfg->C->trace_opto_output()) tty->print("\n");
 #endif
 }
 
@@ -2938,19 +2965,19 @@
 void Scheduling::print_statistics() {
   // Print the size added by nops for bundling
   tty->print("Nops added %d bytes to total of %d bytes",
-    _total_nop_size, _total_method_size);
+             _total_nop_size, _total_method_size);
   if (_total_method_size > 0)
     tty->print(", for %.2f%%",
-      ((double)_total_nop_size) / ((double) _total_method_size) * 100.0);
+               ((double)_total_nop_size) / ((double) _total_method_size) * 100.0);
   tty->print("\n");
 
   // Print the number of branch shadows filled
   if (Pipeline::_branch_has_delay_slot) {
     tty->print("Of %d branches, %d had unconditional delay slots filled",
-      _total_branches, _total_unconditional_delays);
+               _total_branches, _total_unconditional_delays);
     if (_total_branches > 0)
       tty->print(", for %.2f%%",
-        ((double)_total_unconditional_delays) / ((double)_total_branches) * 100.0);
+                 ((double)_total_unconditional_delays) / ((double)_total_branches) * 100.0);
     tty->print("\n");
   }
 
@@ -2964,6 +2991,6 @@
 
   if (total_bundles > 0)
     tty->print("Average ILP (excluding nops) is %.2f\n",
-      ((double)total_instructions) / ((double)total_bundles));
+               ((double)total_instructions) / ((double)total_bundles));
 }
 #endif
--- a/src/hotspot/share/opto/output.hpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/opto/output.hpp	Fri Oct 11 10:39:58 2019 +0200
@@ -40,7 +40,6 @@
 class PhaseChaitin;
 class Pipeline_Use_Element;
 class Pipeline_Use;
-
 #ifndef PRODUCT
 #define DEBUG_ARG(x) , x
 #else
@@ -49,10 +48,7 @@
 
 // Define the initial sizes for allocation of the resizable code buffer
 enum {
-  initial_code_capacity  =  16 * 1024,
-  initial_stub_capacity  =   4 * 1024,
-  initial_const_capacity =   4 * 1024,
-  initial_locs_capacity  =   3 * 1024
+  initial_const_capacity =   4 * 1024
 };
 
 //------------------------------Scheduling----------------------------------
--- a/src/hotspot/share/opto/phaseX.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/opto/phaseX.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -1670,14 +1670,14 @@
     // of the mirror load depends on the type of 'n'. See LoadNode::Value().
     //   LoadBarrier?(LoadP(LoadP(AddP(foo:Klass, #java_mirror))))
     BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
-    bool has_load_barriers = bs->has_load_barriers();
+    bool has_load_barrier_nodes = bs->has_load_barrier_nodes();
 
     if (use_op == Op_LoadP && use->bottom_type()->isa_rawptr()) {
       for (DUIterator_Fast i2max, i2 = use->fast_outs(i2max); i2 < i2max; i2++) {
         Node* u = use->fast_out(i2);
         const Type* ut = u->bottom_type();
         if (u->Opcode() == Op_LoadP && ut->isa_instptr()) {
-          if (has_load_barriers) {
+          if (has_load_barrier_nodes) {
             // Search for load barriers behind the load
             for (DUIterator_Fast i3max, i3 = u->fast_outs(i3max); i3 < i3max; i3++) {
               Node* b = u->fast_out(i3);
@@ -1848,14 +1848,14 @@
         // Loading the java mirror from a Klass requires two loads and the type
         // of the mirror load depends on the type of 'n'. See LoadNode::Value().
         BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
-        bool has_load_barriers = bs->has_load_barriers();
+        bool has_load_barrier_nodes = bs->has_load_barrier_nodes();
 
         if (m_op == Op_LoadP && m->bottom_type()->isa_rawptr()) {
           for (DUIterator_Fast i2max, i2 = m->fast_outs(i2max); i2 < i2max; i2++) {
             Node* u = m->fast_out(i2);
             const Type* ut = u->bottom_type();
             if (u->Opcode() == Op_LoadP && ut->isa_instptr() && ut != type(u)) {
-              if (has_load_barriers) {
+              if (has_load_barrier_nodes) {
                 // Search for load barriers behind the load
                 for (DUIterator_Fast i3max, i3 = u->fast_outs(i3max); i3 < i3max; i3++) {
                   Node* b = u->fast_out(i3);
--- a/src/hotspot/share/opto/split_if.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/opto/split_if.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -111,83 +111,90 @@
         n->dump();
       }
 #endif
-      // Clone down any block-local BoolNode uses of this CmpNode
-      for (DUIterator i = n->outs(); n->has_out(i); i++) {
-        Node* bol = n->out(i);
-        assert( bol->is_Bool(), "" );
-        if (bol->outcnt() == 1) {
-          Node* use = bol->unique_out();
-          if (use->Opcode() == Op_Opaque4) {
-            if (use->outcnt() == 1) {
-              Node* iff = use->unique_out();
-              assert(iff->is_If(), "unexpected node type");
-              Node *use_c = iff->in(0);
+      if (!n->is_FastLock()) {
+        // Clone down any block-local BoolNode uses of this CmpNode
+        for (DUIterator i = n->outs(); n->has_out(i); i++) {
+          Node* bol = n->out(i);
+          assert( bol->is_Bool(), "" );
+          if (bol->outcnt() == 1) {
+            Node* use = bol->unique_out();
+            if (use->Opcode() == Op_Opaque4) {
+              if (use->outcnt() == 1) {
+                Node* iff = use->unique_out();
+                assert(iff->is_If(), "unexpected node type");
+                Node *use_c = iff->in(0);
+                if (use_c == blk1 || use_c == blk2) {
+                  continue;
+                }
+              }
+            } else {
+              // We might see an Opaque1 from a loop limit check here
+              assert(use->is_If() || use->is_CMove() || use->Opcode() == Op_Opaque1, "unexpected node type");
+              Node *use_c = use->is_If() ? use->in(0) : get_ctrl(use);
               if (use_c == blk1 || use_c == blk2) {
+                assert(use->is_CMove(), "unexpected node type");
                 continue;
               }
             }
-          } else {
-            // We might see an Opaque1 from a loop limit check here
-            assert(use->is_If() || use->is_CMove() || use->Opcode() == Op_Opaque1, "unexpected node type");
-            Node *use_c = use->is_If() ? use->in(0) : get_ctrl(use);
-            if (use_c == blk1 || use_c == blk2) {
-              assert(use->is_CMove(), "unexpected node type");
-              continue;
+          }
+          if (get_ctrl(bol) == blk1 || get_ctrl(bol) == blk2) {
+            // Recursively sink any BoolNode
+#ifndef PRODUCT
+            if( PrintOpto && VerifyLoopOptimizations ) {
+              tty->print("Cloning down: ");
+              bol->dump();
             }
+#endif
+            for (DUIterator j = bol->outs(); bol->has_out(j); j++) {
+              Node* u = bol->out(j);
+              // Uses are either IfNodes, CMoves or Opaque4
+              if (u->Opcode() == Op_Opaque4) {
+                assert(u->in(1) == bol, "bad input");
+                for (DUIterator_Last kmin, k = u->last_outs(kmin); k >= kmin; --k) {
+                  Node* iff = u->last_out(k);
+                  assert(iff->is_If() || iff->is_CMove(), "unexpected node type");
+                  assert( iff->in(1) == u, "" );
+                  // Get control block of either the CMove or the If input
+                  Node *iff_ctrl = iff->is_If() ? iff->in(0) : get_ctrl(iff);
+                  Node *x1 = bol->clone();
+                  Node *x2 = u->clone();
+                  register_new_node(x1, iff_ctrl);
+                  register_new_node(x2, iff_ctrl);
+                  _igvn.replace_input_of(x2, 1, x1);
+                  _igvn.replace_input_of(iff, 1, x2);
+                }
+                _igvn.remove_dead_node(u);
+                --j;
+              } else {
+                // We might see an Opaque1 from a loop limit check here
+                assert(u->is_If() || u->is_CMove() || u->Opcode() == Op_Opaque1, "unexpected node type");
+                assert(u->in(1) == bol, "");
+                // Get control block of either the CMove or the If input
+                Node *u_ctrl = u->is_If() ? u->in(0) : get_ctrl(u);
+                assert((u_ctrl != blk1 && u_ctrl != blk2) || u->is_CMove(), "won't converge");
+                Node *x = bol->clone();
+                register_new_node(x, u_ctrl);
+                _igvn.replace_input_of(u, 1, x);
+                --j;
+              }
+            }
+            _igvn.remove_dead_node(bol);
+            --i;
           }
         }
-        if (get_ctrl(bol) == blk1 || get_ctrl(bol) == blk2) {
-          // Recursively sink any BoolNode
-#ifndef PRODUCT
-          if( PrintOpto && VerifyLoopOptimizations ) {
-            tty->print("Cloning down: ");
-            bol->dump();
-          }
-#endif
-          for (DUIterator j = bol->outs(); bol->has_out(j); j++) {
-            Node* u = bol->out(j);
-            // Uses are either IfNodes, CMoves or Opaque4
-            if (u->Opcode() == Op_Opaque4) {
-              assert(u->in(1) == bol, "bad input");
-              for (DUIterator_Last kmin, k = u->last_outs(kmin); k >= kmin; --k) {
-                Node* iff = u->last_out(k);
-                assert(iff->is_If() || iff->is_CMove(), "unexpected node type");
-                assert( iff->in(1) == u, "" );
-                // Get control block of either the CMove or the If input
-                Node *iff_ctrl = iff->is_If() ? iff->in(0) : get_ctrl(iff);
-                Node *x1 = bol->clone();
-                Node *x2 = u->clone();
-                register_new_node(x1, iff_ctrl);
-                register_new_node(x2, iff_ctrl);
-                _igvn.replace_input_of(x2, 1, x1);
-                _igvn.replace_input_of(iff, 1, x2);
-              }
-              _igvn.remove_dead_node(u);
-              --j;
-            } else {
-              // We might see an Opaque1 from a loop limit check here
-              assert(u->is_If() || u->is_CMove() || u->Opcode() == Op_Opaque1, "unexpected node type");
-              assert(u->in(1) == bol, "");
-              // Get control block of either the CMove or the If input
-              Node *u_ctrl = u->is_If() ? u->in(0) : get_ctrl(u);
-              assert((u_ctrl != blk1 && u_ctrl != blk2) || u->is_CMove(), "won't converge");
-              Node *x = bol->clone();
-              register_new_node(x, u_ctrl);
-              _igvn.replace_input_of(u, 1, x);
-              --j;
-            }
-          }
-          _igvn.remove_dead_node(bol);
-          --i;
-        }
       }
       // Clone down this CmpNode
       for (DUIterator_Last jmin, j = n->last_outs(jmin); j >= jmin; --j) {
-        Node* bol = n->last_out(j);
-        assert( bol->in(1) == n, "" );
+        Node* use = n->last_out(j);
+        uint pos = 1;
+        if (n->is_FastLock()) {
+          pos = TypeFunc::Parms + 2;
+          assert(use->is_Lock(), "FastLock only used by LockNode");
+        }
+        assert(use->in(pos) == n, "" );
         Node *x = n->clone();
-        register_new_node(x, get_ctrl(bol));
-        _igvn.replace_input_of(bol, 1, x);
+        register_new_node(x, ctrl_or_self(use));
+        _igvn.replace_input_of(use, pos, x);
       }
       _igvn.remove_dead_node( n );
 
--- a/src/hotspot/share/prims/jniCheck.cpp	Thu Oct 10 14:11:18 2019 +0200
+++ b/src/hotspot/share/prims/jniCheck.cpp	Fri Oct 11 10:39:58 2019 +0200
@@ -450,16 +450,16 @@
 Method* jniCheck::validate_jmethod_id(JavaThread* thr, jmethodID method_id) {
   ASSERT_OOPS_ALLOWED;
   // do the fast jmethodID check first
-  Method* moop = Method::checked_resolve_jmethod_id(method_id);
-  if (moop == NULL) {
+  Method* m = Method::checked_resolve_jmethod_id(method_id);
+  if (m == NULL) {
     ReportJNIFatalError(thr, fatal_wrong_class_or_method);
   }
-  // jmethodIDs are supposed to be weak handles in the class loader data,
+  // jmethodIDs are handles in the class loader data,
   // but that can be expensive so check it last
   else if (!Method::is_method_id(method_id)) {
     ReportJNIFatalError(thr, fatal_non_weak_method);
   }
-  return moop;
+  return m;
 }
 
 
@@ -520,18 +520,29 @@
   }
 }
 
-void jniCheck::validate_call_object(JavaThread* thr, jobject obj, jmethodID method_id) {
-  /* validate the object being passed */
+void jniCheck::validate_call(JavaThread* thr, jclass clazz, jmethodID method_id, jobject obj) {
   ASSERT_OOPS_ALLOWED;
-  jniCheck::validate_jmethod_id(thr, method_id);
-  jniCheck::validate_object(thr, obj);
-}
+  Method* m = jniCheck::validate_jmethod_id(thr, method_id);
+  InstanceKlass* holder = m->method_holder();
 
-void jniCheck::validate_call_class(JavaThread* thr, jclass clazz, jmethodID method_id) {
-  /* validate the class being passed */
-  ASSERT_OOPS_ALLOWED;
-  jniCheck::validate_jmethod_id(thr, method_id);
-  jniCheck::validate_class(thr, clazz, false);
+  if (clazz != NULL) {
+    Klass* k = jniCheck::validate_class(thr, clazz, false);
+    // Check that method is in the class, must be InstanceKlass
+    if (!InstanceKlass::cast(k)->is_subtype_of(holder)) {
+      ReportJNIFatalError(thr, fatal_wrong_class_or_method);
+    }
+  }
+
+  if (obj != NULL) {
+    oop recv = jniCheck::validate_object(thr, obj);
+    assert(recv != NULL, "validate_object checks that");
+    Klass* ik = recv->klass();
+
+    // Check that the object is a subtype of method holder too.
+    if (!InstanceKlass::cast(ik)->is_subtype_of(holder)) {
+      ReportJNIFatalError(thr, fatal_wrong_class_or_method);
+    }
+  }
 }
 
 
@@ -597,8 +608,7 @@
                                 jboolean isStatic))
     functionEnter(thr);
     IN_VM(
-      jniCheck::validate_class(thr, cls, false);
-      jniCheck::validate_jmethod_id(thr, methodID);
+      jniCheck::validate_call(thr, cls, methodID);
     )
     jobject result = UNCHECKED()->ToReflectedMethod(env, cls, methodID,
                                                     isStatic);
@@ -854,8 +864,7 @@
     functionEnter(thr);
     va_list args;
     IN_VM(
-      jniCheck::validate_class(thr, clazz, false);
-      jniCheck::validate_jmethod_id(thr, methodID);
+      jniCheck::validate_call(thr, clazz, methodID);
     )
     va_start(args, methodID);
     jobject result = UNCHECKED()->NewObjectV(env,clazz,methodID,args);
@@ -871,8 +880,7 @@
                          va_list args))
     functionEnter(thr);
     IN_VM(
-      jniCheck::validate_class(thr, clazz, false);
-      jniCheck::validate_jmethod_id(thr, methodID);
+      jniCheck::validate_call(thr, clazz, methodID);
     )
     jobject result = UNCHECKED()->NewObjectV(env,clazz,methodID,args);
     functionExit(thr);
@@ -886,8 +894,7 @@
                          const jvalue *args))
     functionEnter(thr);
     IN_VM(
-      jniCheck::validate_class(thr, clazz, false);
-      jniCheck::validate_jmethod_id(thr, methodID);
+      jniCheck::validate_call(thr, clazz, methodID);
     )
     jobject result = UNCHECKED()->NewObjectA(env,clazz,methodID,args);
     functionExit(thr);
@@ -943,7 +950,7 @@
     functionEnter(thr); \
     va_list args; \
     IN_VM( \
-      jniCheck::validate_call_object(thr, obj, methodID); \
+      jniCheck::validate_call(thr, NULL, methodID, obj); \
     ) \
     va_start(args,methodID); \
     ResultType result =UNCHECKED()->Call##Result##MethodV(env, obj, methodID, \
@@ -961,7 +968,7 @@
                                     va_list args)) \
     functionEnter(thr); \
     IN_VM(\
-      jniCheck::validate_call_object(thr, obj, methodID); \
+      jniCheck::validate_call(thr, NULL, methodID, obj); \
     ) \
     ResultType result = UNCHECKED()->Call##Result##MethodV(env, obj, methodID,\
                                                            args); \
@@ -977,7 +984,7 @@
                                     const jvalue * args)) \
     functionEnter(thr); \
     IN_VM( \
-      jniCheck::validate_call_object(thr, obj, methodID); \
+      jniCheck::validate_call(thr, NULL, methodID, obj); \
     ) \
     ResultType result = UNCHECKED()->Call##Result##MethodA(env, obj, methodID,\
                                                            args); \
@@ -1004,7 +1011,7 @@
     functionEnter(thr);
     va_list args;
     IN_VM(
-      jniCheck::validate_call_object(thr, obj, methodID);
+      jniCheck::validate_call(thr, NULL, methodID, obj);
     )
     va_start(args,methodID);
     UNCHECKED()->CallVoidMethodV(env,obj,methodID,args);
@@ -1020,7 +1027,7 @@
                               va_list args))
     functionEnter(thr);
     IN_VM(
-      jniCheck::validate_call_object(thr, obj, methodID);
+      jniCheck::validate_call(thr, NULL, methodID, obj);
     )
     UNCHECKED()->CallVoidMethodV(env,obj,methodID,args);
     thr->set_pending_jni_exception_check("CallVoidMethodV");
@@ -1034,7 +1041,7 @@
                               const jvalue * args))
     functionEnter(thr);
     IN_VM(
-      jniCheck::validate_call_object(thr, obj, methodID);
+      jniCheck::validate_call(thr, NULL, methodID, obj);
     )
     UNCHECKED()->CallVoidMethodA(env,obj,methodID,args);
     thr->set_pending_jni_exception_check("CallVoidMethodA");
@@ -1051,8 +1058,7 @@
     functionEnter(thr); \
     va_list args; \
     IN_VM( \
-      jniCheck::validate_call_object(thr, obj, methodID); \
-      jniCheck::validate_call_class(thr, clazz, methodID); \
+      jniCheck::validate_call(thr, clazz, methodID, obj); \
     ) \
     va_start(args,methodID); \
     ResultType result = UNCHECKED()->CallNonvirtual##Result##MethodV(env, \
@@ -1074,8 +1080,7 @@
                                               va_list args)) \
     functionEnter(thr); \
     IN_VM( \
-      jniCheck::validate_call_object(thr, obj, methodID); \
-      jniCheck::validate_call_class(thr, clazz, methodID); \
+      jniCheck::validate_call(thr, clazz, methodID, obj); \
     ) \
     ResultType result = UNCHECKED()->CallNonvirtual##Result##MethodV(env, \
                                                                      obj, \
@@ -1095,8 +1100,7 @@
                                               const jvalue * args)) \
     functionEnter(thr); \
     IN_VM( \
-      jniCheck::validate_call_object(thr, obj, methodID); \
-      jniCheck::validate_call_class(th