changeset 49761:46dc568d6804

Merge
author jwilhelm
date Fri, 13 Apr 2018 14:06:39 +0200
parents 4beba2c2a329 69bae761600b
children b07d069b189a
files make/hotspot/lib/JvmFlags.gmk src/hotspot/share/gc/cms/vmStructs_parNew.hpp src/hotspot/share/gc/g1/g1_globals.cpp src/hotspot/share/memory/binaryTreeDictionary.cpp src/hotspot/share/memory/freeList.cpp src/hotspot/share/runtime/commandLineFlagConstraintsGC.cpp src/hotspot/share/runtime/commandLineFlagConstraintsGC.hpp test/hotspot/jtreg/runtime/6626217/Test6626217.sh test/hotspot/jtreg/runtime/6626217/many_loader1.java.foo test/hotspot/jtreg/runtime/6626217/many_loader2.java.foo test/jdk/ProblemList.txt
diffstat 391 files changed, 13008 insertions(+), 9122 deletions(-) [+]
line wrap: on
line diff
--- a/make/autoconf/flags-cflags.m4	Fri Apr 13 03:05:19 2018 +0200
+++ b/make/autoconf/flags-cflags.m4	Fri Apr 13 14:06:39 2018 +0200
@@ -453,7 +453,7 @@
   elif test "x$TOOLCHAIN_TYPE" = xmicrosoft; then
     ALWAYS_DEFINES_JDK="-DWIN32_LEAN_AND_MEAN -D_CRT_SECURE_NO_DEPRECATE \
         -D_CRT_NONSTDC_NO_DEPRECATE -DWIN32 -DIAL"
-    ALWAYS_DEFINES_JVM="-DNOMINMAX"
+    ALWAYS_DEFINES_JVM="-DNOMINMAX -DWIN32_LEAN_AND_MEAN"
   fi
 
   ###############################################################################
--- a/make/hotspot/lib/JvmOverrideFiles.gmk	Fri Apr 13 03:05:19 2018 +0200
+++ b/make/hotspot/lib/JvmOverrideFiles.gmk	Fri Apr 13 14:06:39 2018 +0200
@@ -34,6 +34,7 @@
   BUILD_LIBJVM_jvmciCompilerToVM.cpp_CXXFLAGS := -fno-var-tracking-assignments
   BUILD_LIBJVM_jvmciCompilerToVMInit.cpp_CXXFLAGS := -fno-var-tracking-assignments
   BUILD_LIBJVM_assembler_x86.cpp_CXXFLAGS := -Wno-maybe-uninitialized
+  BUILD_LIBJVM_cardTableBarrierSetAssembler_x86.cpp_CXXFLAGS := -Wno-maybe-uninitialized
   BUILD_LIBJVM_interp_masm_x86.cpp_CXXFLAGS := -Wno-uninitialized
 endif
 
--- a/src/hotspot/cpu/aarch64/aarch64.ad	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/aarch64/aarch64.ad	Fri Apr 13 14:06:39 2018 +0200
@@ -5847,8 +5847,8 @@
 operand immByteMapBase()
 %{
   // Get base of card map
-  predicate(Universe::heap()->barrier_set()->is_a(BarrierSet::CardTableBarrierSet) &&
-            (jbyte*)n->get_ptr() == ((CardTableBarrierSet*)(Universe::heap()->barrier_set()))->card_table()->byte_map_base());
+  predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) &&
+            (jbyte*)n->get_ptr() == ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base());
   match(ConP);
 
   op_cost(0);
@@ -16167,9 +16167,8 @@
   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
   ins_encode %{
     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
-    __ arrays_equals($str1$$Register, $str2$$Register,
-                     $result$$Register, $cnt$$Register,
-                     1, /*is_string*/true);
+    __ string_equals($str1$$Register, $str2$$Register,
+                     $result$$Register, $cnt$$Register, 1);
   %}
   ins_pipe(pipe_class_memory);
 %}
@@ -16184,42 +16183,42 @@
   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
   ins_encode %{
     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
-    __ asrw($cnt$$Register, $cnt$$Register, 1);
-    __ arrays_equals($str1$$Register, $str2$$Register,
-                     $result$$Register, $cnt$$Register,
-                     2, /*is_string*/true);
+    __ string_equals($str1$$Register, $str2$$Register,
+                     $result$$Register, $cnt$$Register, 2);
   %}
   ins_pipe(pipe_class_memory);
 %}
 
 instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
-                      iRegP_R10 tmp, rFlagsReg cr)
+                       iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
+                       iRegP_R10 tmp, rFlagsReg cr)
 %{
   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
   match(Set result (AryEq ary1 ary2));
-  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
+  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
 
   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
   ins_encode %{
     __ arrays_equals($ary1$$Register, $ary2$$Register,
-                     $result$$Register, $tmp$$Register,
-                     1, /*is_string*/false);
+                     $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
+                     $result$$Register, $tmp$$Register, 1);
     %}
   ins_pipe(pipe_class_memory);
 %}
 
 instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
-                      iRegP_R10 tmp, rFlagsReg cr)
+                       iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
+                       iRegP_R10 tmp, rFlagsReg cr)
 %{
   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
   match(Set result (AryEq ary1 ary2));
-  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
+  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
 
   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
   ins_encode %{
     __ arrays_equals($ary1$$Register, $ary2$$Register,
-                     $result$$Register, $tmp$$Register,
-                     2, /*is_string*/false);
+                     $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
+                     $result$$Register, $tmp$$Register, 2);
   %}
   ins_pipe(pipe_class_memory);
 %}
--- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp	Fri Apr 13 14:06:39 2018 +0200
@@ -819,7 +819,7 @@
   void NAME(Register Rd, Register Rn, unsigned immr, unsigned imms) {   \
     starti;                                                             \
     f(opcode, 31, 22), f(immr, 21, 16), f(imms, 15, 10);                \
-    rf(Rn, 5), rf(Rd, 0);                                               \
+    zrf(Rn, 5), rf(Rd, 0);                                              \
   }
 
   INSN(sbfmw, 0b0001001100);
--- a/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -46,6 +46,7 @@
 #if INCLUDE_ALL_GCS
 #include "gc/g1/g1BarrierSet.hpp"
 #include "gc/g1/g1CardTable.hpp"
+#include "gc/g1/g1ThreadLocalData.hpp"
 #endif
 
 
@@ -1106,7 +1107,7 @@
         StubFrame f(sasm, "g1_pre_barrier", dont_gc_arguments);
         // arg0 : previous value of memory
 
-        BarrierSet* bs = Universe::heap()->barrier_set();
+        BarrierSet* bs = BarrierSet::barrier_set();
         if (bs->kind() != BarrierSet::G1BarrierSet) {
           __ mov(r0, (int)id);
           __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), r0);
@@ -1118,13 +1119,9 @@
         const Register thread = rthread;
         const Register tmp = rscratch1;
 
-        Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
-                                             SATBMarkQueue::byte_offset_of_active()));
-
-        Address queue_index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
-                                             SATBMarkQueue::byte_offset_of_index()));
-        Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
-                                        SATBMarkQueue::byte_offset_of_buf()));
+        Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
+        Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
+        Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
 
         Label done;
         Label runtime;
@@ -1162,7 +1159,7 @@
       {
         StubFrame f(sasm, "g1_post_barrier", dont_gc_arguments);
 
-        BarrierSet* bs = Universe::heap()->barrier_set();
+        BarrierSet* bs = BarrierSet::barrier_set();
         if (bs->kind() != BarrierSet::G1BarrierSet) {
           __ mov(r0, (int)id);
           __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), r0);
@@ -1181,10 +1178,8 @@
 
         const Register thread = rthread;
 
-        Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
-                                             DirtyCardQueue::byte_offset_of_index()));
-        Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
-                                        DirtyCardQueue::byte_offset_of_buf()));
+        Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
+        Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
 
         const Register card_offset = rscratch2;
         // LR is free here, so we can use it to hold the byte_map_base.
--- a/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -25,12 +25,12 @@
 #include "precompiled.hpp"
 #include "asm/macroAssembler.inline.hpp"
 #include "gc/g1/g1BarrierSet.hpp"
+#include "gc/g1/g1BarrierSetAssembler.hpp"
 #include "gc/g1/g1CardTable.hpp"
-#include "gc/g1/g1BarrierSetAssembler.hpp"
+#include "gc/g1/g1ThreadLocalData.hpp"
 #include "gc/g1/heapRegion.hpp"
-#include "gc/shared/collectedHeap.hpp"
-#include "runtime/thread.hpp"
 #include "interpreter/interp_masm.hpp"
+#include "runtime/sharedRuntime.hpp"
 
 #define __ masm->
 
@@ -75,3 +75,233 @@
   __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSet::write_ref_array_post_entry), 2);
   __ pop(saved_regs, sp);
 }
+
+void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
+                                                 Register obj,
+                                                 Register pre_val,
+                                                 Register thread,
+                                                 Register tmp,
+                                                 bool tosca_live,
+                                                 bool expand_call) {
+  // If expand_call is true then we expand the call_VM_leaf macro
+  // directly to skip generating the check by
+  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
+
+  assert(thread == rthread, "must be");
+
+  Label done;
+  Label runtime;
+
+  assert_different_registers(obj, pre_val, tmp, rscratch1);
+  assert(pre_val != noreg &&  tmp != noreg, "expecting a register");
+
+  Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
+  Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
+  Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
+
+  // Is marking active?
+  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
+    __ ldrw(tmp, in_progress);
+  } else {
+    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
+    __ ldrb(tmp, in_progress);
+  }
+  __ cbzw(tmp, done);
+
+  // Do we need to load the previous value?
+  if (obj != noreg) {
+    __ load_heap_oop(pre_val, Address(obj, 0));
+  }
+
+  // Is the previous value null?
+  __ cbz(pre_val, done);
+
+  // Can we store original value in the thread's buffer?
+  // Is index == 0?
+  // (The index field is typed as size_t.)
+
+  __ ldr(tmp, index);                      // tmp := *index_adr
+  __ cbz(tmp, runtime);                    // tmp == 0?
+                                        // If yes, goto runtime
+
+  __ sub(tmp, tmp, wordSize);              // tmp := tmp - wordSize
+  __ str(tmp, index);                      // *index_adr := tmp
+  __ ldr(rscratch1, buffer);
+  __ add(tmp, tmp, rscratch1);             // tmp := tmp + *buffer_adr
+
+  // Record the previous value
+  __ str(pre_val, Address(tmp, 0));
+  __ b(done);
+
+  __ bind(runtime);
+  // save the live input values
+  RegSet saved = RegSet::of(pre_val);
+  if (tosca_live) saved += RegSet::of(r0);
+  if (obj != noreg) saved += RegSet::of(obj);
+
+  __ push(saved, sp);
+
+  // Calling the runtime using the regular call_VM_leaf mechanism generates
+  // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
+  // that checks that the *(rfp+frame::interpreter_frame_last_sp) == NULL.
+  //
+  // If we care generating the pre-barrier without a frame (e.g. in the
+  // intrinsified Reference.get() routine) then ebp might be pointing to
+  // the caller frame and so this check will most likely fail at runtime.
+  //
+  // Expanding the call directly bypasses the generation of the check.
+  // So when we do not have have a full interpreter frame on the stack
+  // expand_call should be passed true.
+
+  if (expand_call) {
+    assert(pre_val != c_rarg1, "smashed arg");
+    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
+  } else {
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
+  }
+
+  __ pop(saved, sp);
+
+  __ bind(done);
+
+}
+
+void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
+                                                  Register store_addr,
+                                                  Register new_val,
+                                                  Register thread,
+                                                  Register tmp,
+                                                  Register tmp2) {
+  assert(thread == rthread, "must be");
+  assert_different_registers(store_addr, new_val, thread, tmp, tmp2,
+                             rscratch1);
+  assert(store_addr != noreg && new_val != noreg && tmp != noreg
+         && tmp2 != noreg, "expecting a register");
+
+  Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
+  Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
+
+  BarrierSet* bs = BarrierSet::barrier_set();
+  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
+  CardTable* ct = ctbs->card_table();
+  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
+
+  Label done;
+  Label runtime;
+
+  // Does store cross heap regions?
+
+  __ eor(tmp, store_addr, new_val);
+  __ lsr(tmp, tmp, HeapRegion::LogOfHRGrainBytes);
+  __ cbz(tmp, done);
+
+  // crosses regions, storing NULL?
+
+  __ cbz(new_val, done);
+
+  // storing region crossing non-NULL, is card already dirty?
+
+  ExternalAddress cardtable((address) ct->byte_map_base());
+  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
+  const Register card_addr = tmp;
+
+  __ lsr(card_addr, store_addr, CardTable::card_shift);
+
+  // get the address of the card
+  __ load_byte_map_base(tmp2);
+  __ add(card_addr, card_addr, tmp2);
+  __ ldrb(tmp2, Address(card_addr));
+  __ cmpw(tmp2, (int)G1CardTable::g1_young_card_val());
+  __ br(Assembler::EQ, done);
+
+  assert((int)CardTable::dirty_card_val() == 0, "must be 0");
+
+  __ membar(Assembler::StoreLoad);
+
+  __ ldrb(tmp2, Address(card_addr));
+  __ cbzw(tmp2, done);
+
+  // storing a region crossing, non-NULL oop, card is clean.
+  // dirty card and log.
+
+  __ strb(zr, Address(card_addr));
+
+  __ ldr(rscratch1, queue_index);
+  __ cbz(rscratch1, runtime);
+  __ sub(rscratch1, rscratch1, wordSize);
+  __ str(rscratch1, queue_index);
+
+  __ ldr(tmp2, buffer);
+  __ str(card_addr, Address(tmp2, rscratch1));
+  __ b(done);
+
+  __ bind(runtime);
+  // save the live input values
+  RegSet saved = RegSet::of(store_addr, new_val);
+  __ push(saved, sp);
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
+  __ pop(saved, sp);
+
+  __ bind(done);
+}
+
+void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                    Register dst, Address src, Register tmp1, Register tmp_thread) {
+  bool on_oop = type == T_OBJECT || type == T_ARRAY;
+  bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
+  bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
+  bool on_reference = on_weak || on_phantom;
+  ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
+  if (on_oop && on_reference) {
+    // Generate the G1 pre-barrier code to log the value of
+    // the referent field in an SATB buffer.
+    g1_write_barrier_pre(masm /* masm */,
+                         noreg /* obj */,
+                         dst /* pre_val */,
+                         rthread /* thread */,
+                         tmp1 /* tmp */,
+                         true /* tosca_live */,
+                         true /* expand_call */);
+  }
+}
+
+void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                         Address dst, Register val, Register tmp1, Register tmp2) {
+  // flatten object address if needed
+  if (dst.index() == noreg && dst.offset() == 0) {
+    if (dst.base() != r3) {
+      __ mov(r3, dst.base());
+    }
+  } else {
+    __ lea(r3, dst);
+  }
+
+  g1_write_barrier_pre(masm,
+                       r3 /* obj */,
+                       tmp2 /* pre_val */,
+                       rthread /* thread */,
+                       tmp1  /* tmp */,
+                       val != noreg /* tosca_live */,
+                       false /* expand_call */);
+
+  if (val == noreg) {
+    __ store_heap_oop_null(Address(r3, 0));
+  } else {
+    // G1 barrier needs uncompressed oop for region cross check.
+    Register new_val = val;
+    if (UseCompressedOops) {
+      new_val = rscratch2;
+      __ mov(new_val, val);
+    }
+    __ store_heap_oop(Address(r3, 0), val);
+    g1_write_barrier_post(masm,
+                          r3 /* store_adr */,
+                          new_val /* new_val */,
+                          rthread /* thread */,
+                          tmp1 /* tmp */,
+                          tmp2 /* tmp2 */);
+  }
+
+}
+
+#undef __
--- a/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.hpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.hpp	Fri Apr 13 14:06:39 2018 +0200
@@ -34,6 +34,28 @@
                                        Register addr, Register count, RegSet saved_regs);
   void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
                                         Register start, Register end, Register tmp, RegSet saved_regs);
+
+  void g1_write_barrier_pre(MacroAssembler* masm,
+                            Register obj,
+                            Register pre_val,
+                            Register thread,
+                            Register tmp,
+                            bool tosca_live,
+                            bool expand_call);
+
+  void g1_write_barrier_post(MacroAssembler* masm,
+                             Register store_addr,
+                             Register new_val,
+                             Register thread,
+                             Register tmp,
+                             Register tmp2);
+
+  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                            Address dst, Register val, Register tmp1, Register tmp2);
+
+public:
+  void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+               Register dst, Address src, Register tmp1, Register tmp_thread);
 };
 
 #endif // CPU_AARCH64_GC_G1_G1BARRIERSETASSEMBLER_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
+
+#define __ masm->
+
+void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                  Register dst, Address src, Register tmp1, Register tmp_thread) {
+  bool on_heap = (decorators & IN_HEAP) != 0;
+  bool on_root = (decorators & IN_ROOT) != 0;
+  switch (type) {
+  case T_OBJECT:
+  case T_ARRAY: {
+    if (on_heap) {
+      __ load_heap_oop(dst, src);
+    } else {
+      assert(on_root, "why else?");
+      __ ldr(dst, src);
+    }
+    break;
+  }
+  default: Unimplemented();
+  }
+}
+
+void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                   Address dst, Register val, Register tmp1, Register tmp2) {
+  bool on_heap = (decorators & IN_HEAP) != 0;
+  bool on_root = (decorators & IN_ROOT) != 0;
+  switch (type) {
+  case T_OBJECT:
+  case T_ARRAY: {
+    if (on_heap) {
+      __ store_heap_oop(dst, val);
+    } else {
+      assert(on_root, "why else?");
+      __ str(val, dst);
+    }
+    break;
+  }
+  default: Unimplemented();
+  }
+}
--- a/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.hpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.hpp	Fri Apr 13 14:06:39 2018 +0200
@@ -35,6 +35,12 @@
                                   Register addr, Register count, RegSet saved_regs) {}
   virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
                                   Register start, Register end, Register tmp, RegSet saved_regs) {}
+  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                       Register dst, Address src, Register tmp1, Register tmp_thread);
+  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                        Address dst, Register val, Register tmp1, Register tmp2);
+
+  virtual void barrier_stubs_init() {}
 };
 
 #endif // CPU_AARCH64_GC_SHARED_BARRIERSETASSEMBLER_AARCH64_HPP
--- a/src/hotspot/cpu/aarch64/gc/shared/cardTableBarrierSetAssembler_aarch64.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/aarch64/gc/shared/cardTableBarrierSetAssembler_aarch64.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -28,15 +28,44 @@
 #include "gc/shared/cardTable.hpp"
 #include "gc/shared/cardTableBarrierSet.hpp"
 #include "gc/shared/cardTableBarrierSetAssembler.hpp"
-#include "gc/shared/collectedHeap.hpp"
 #include "interpreter/interp_masm.hpp"
 
 #define __ masm->
 
+
+void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Address dst) {
+
+  BarrierSet* bs = BarrierSet::barrier_set();
+  assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind");
+
+  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
+  CardTable* ct = ctbs->card_table();
+  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
+
+  __ lsr(obj, obj, CardTable::card_shift);
+
+  assert(CardTable::dirty_card_val() == 0, "must be");
+
+  __ load_byte_map_base(rscratch1);
+
+  if (UseCondCardMark) {
+    Label L_already_dirty;
+    __ membar(Assembler::StoreLoad);
+    __ ldrb(rscratch2,  Address(obj, rscratch1));
+    __ cbz(rscratch2, L_already_dirty);
+    __ strb(zr, Address(obj, rscratch1));
+    __ bind(L_already_dirty);
+  } else {
+    if (UseConcMarkSweepGC && CMSPrecleaningEnabled) {
+      __ membar(Assembler::StoreStore);
+    }
+    __ strb(zr, Address(obj, rscratch1));
+  }
+}
+
 void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
                                                                     Register start, Register end, Register scratch, RegSet saved_regs) {
-
-  BarrierSet* bs = Universe::heap()->barrier_set();
+  BarrierSet* bs = BarrierSet::barrier_set();
   CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
   CardTable* ct = ctbs->card_table();
   assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
@@ -58,3 +87,22 @@
   __ subs(count, count, 1);
   __ br(Assembler::GE, L_loop);
 }
+
+void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                                Address dst, Register val, Register tmp1, Register tmp2) {
+  bool on_array = (decorators & IN_HEAP_ARRAY) != 0;
+  bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
+  bool precise = on_array || on_anonymous;
+  if (val == noreg) {
+    __ store_heap_oop_null(dst);
+  } else {
+    __ store_heap_oop(dst, val);
+    // flatten object address if needed
+    if (!precise || (dst.index() == noreg && dst.offset() == 0)) {
+      store_check(masm, dst.base(), dst);
+    } else {
+      __ lea(r3, dst);
+      store_check(masm, r3, dst);
+    }
+  }
+}
--- a/src/hotspot/cpu/aarch64/gc/shared/cardTableBarrierSetAssembler_aarch64.hpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/aarch64/gc/shared/cardTableBarrierSetAssembler_aarch64.hpp	Fri Apr 13 14:06:39 2018 +0200
@@ -30,8 +30,13 @@
 
 class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler {
 protected:
+  void store_check(MacroAssembler* masm, Register obj, Address dst);
+
   virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
                                                 Register start, Register end, Register tmp, RegSet saved_regs);
+  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                            Address dst, Register val, Register tmp1, Register tmp2);
+
 };
 
 #endif // #ifndef CPU_AARCH64_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_AARCH64_HPP
--- a/src/hotspot/cpu/aarch64/gc/shared/modRefBarrierSetAssembler_aarch64.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/aarch64/gc/shared/modRefBarrierSetAssembler_aarch64.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -43,3 +43,18 @@
     gen_write_ref_array_post_barrier(masm, decorators, start, end, tmp, saved_regs);
   }
 }
+
+
+void ModRefBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                             Address dst, Register val, Register tmp1, Register tmp2) {
+  BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
+}
+
+void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                         Address dst, Register val, Register tmp1, Register tmp2) {
+  if (type == T_OBJECT || type == T_ARRAY) {
+    oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2);
+  } else {
+    BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
+  }
+}
--- a/src/hotspot/cpu/aarch64/gc/shared/modRefBarrierSetAssembler_aarch64.hpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/aarch64/gc/shared/modRefBarrierSetAssembler_aarch64.hpp	Fri Apr 13 14:06:39 2018 +0200
@@ -28,6 +28,10 @@
 #include "asm/macroAssembler.hpp"
 #include "gc/shared/barrierSetAssembler.hpp"
 
+// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other
+// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected
+// accesses, which are overridden in the concrete BarrierSetAssembler.
+
 class ModRefBarrierSetAssembler: public BarrierSetAssembler {
 protected:
   virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
@@ -35,11 +39,16 @@
   virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
                                                 Register start, Register end, Register tmp, RegSet saved_regs) {}
 
+  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                            Address dst, Register val, Register tmp1, Register tmp2);
+
 public:
   virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
                                   Register addr, Register count, RegSet saved_regs);
   virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
                                   Register start, Register end, Register tmp, RegSet saved_regs);
+  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                        Address dst, Register val, Register tmp1, Register tmp2);
 };
 
 #endif // CPU_AARCH64_GC_SHARED_MODREFBARRIERSETASSEMBLER_AARCH64_HPP
--- a/src/hotspot/cpu/aarch64/globals_aarch64.hpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/aarch64/globals_aarch64.hpp	Fri Apr 13 14:06:39 2018 +0200
@@ -147,6 +147,10 @@
           "Use CRC32 instructions for CRC32 computation")               \
   product(bool, UseSIMDForMemoryOps, false,                             \
           "Use SIMD instructions in generated memory move code")        \
+  product(bool, UseSIMDForArrayEquals, true,                            \
+          "Use SIMD instructions in generated array equals code")       \
+  product(bool, UseSimpleArrayEquals, false,                            \
+          "Use simpliest and shortest implementation for array equals") \
   product(bool, AvoidUnalignedAccesses, false,                          \
           "Avoid generating unaligned memory accesses")                 \
   product(bool, UseLSE, false,                                          \
--- a/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -24,6 +24,8 @@
  */
 
 #include "precompiled.hpp"
+#include "gc/shared/barrierSet.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
 #include "interp_masm_aarch64.hpp"
 #include "interpreter/interpreter.hpp"
 #include "interpreter/interpreterRuntime.hpp"
@@ -277,9 +279,8 @@
   resolve_oop_handle(result);
   // Add in the index
   add(result, result, tmp);
-  load_heap_oop(result, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
-  // The resulting oop is null if the reference is not yet resolved.
-  // It is Universe::the_null_sentinel() if the reference resolved to NULL via condy.
+  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+  bs->load_at(this, IN_HEAP, T_OBJECT, result, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), /*tmp1*/ noreg, /*tmp_thread*/ noreg);
 }
 
 void InterpreterMacroAssembler::load_resolved_klass_at_offset(
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -29,7 +29,9 @@
 #include "jvm.h"
 #include "asm/assembler.hpp"
 #include "asm/assembler.inline.hpp"
+#include "gc/shared/barrierSet.hpp"
 #include "gc/shared/cardTable.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
 #include "gc/shared/cardTableBarrierSet.hpp"
 #include "interpreter/interpreter.hpp"
 #include "compiler/disassembler.hpp"
@@ -50,7 +52,6 @@
 #if INCLUDE_ALL_GCS
 #include "gc/g1/g1BarrierSet.hpp"
 #include "gc/g1/g1CardTable.hpp"
-#include "gc/g1/g1CollectedHeap.inline.hpp"
 #include "gc/g1/heapRegion.hpp"
 #endif
 
@@ -2091,6 +2092,28 @@
 }
 #endif
 
+void MacroAssembler::resolve_jobject(Register value, Register thread, Register tmp) {
+  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+  Label done, not_weak;
+  cbz(value, done);           // Use NULL as-is.
+
+  STATIC_ASSERT(JNIHandles::weak_tag_mask == 1u);
+  tbz(r0, 0, not_weak);    // Test for jweak tag.
+
+  // Resolve jweak.
+  bs->load_at(this, IN_ROOT | ON_PHANTOM_OOP_REF, T_OBJECT,
+                    value, Address(value, -JNIHandles::weak_tag_value), tmp, thread);
+  verify_oop(value);
+  b(done);
+
+  bind(not_weak);
+  // Resolve (untagged) jobject.
+  bs->load_at(this, IN_ROOT | ON_STRONG_OOP_REF, T_OBJECT,
+                    value, Address(value, 0), tmp, thread);
+  verify_oop(value);
+  bind(done);
+}
+
 void MacroAssembler::stop(const char* msg) {
   address ip = pc();
   pusha();
@@ -3609,43 +3632,6 @@
   cmp(src1, rscratch1);
 }
 
-void MacroAssembler::store_check(Register obj, Address dst) {
-  store_check(obj);
-}
-
-void MacroAssembler::store_check(Register obj) {
-  // Does a store check for the oop in register obj. The content of
-  // register obj is destroyed afterwards.
-
-  BarrierSet* bs = Universe::heap()->barrier_set();
-  assert(bs->kind() == BarrierSet::CardTableBarrierSet,
-         "Wrong barrier set kind");
-
-  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
-  CardTable* ct = ctbs->card_table();
-  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
-
-  lsr(obj, obj, CardTable::card_shift);
-
-  assert(CardTable::dirty_card_val() == 0, "must be");
-
-  load_byte_map_base(rscratch1);
-
-  if (UseCondCardMark) {
-    Label L_already_dirty;
-    membar(StoreLoad);
-    ldrb(rscratch2,  Address(obj, rscratch1));
-    cbz(rscratch2, L_already_dirty);
-    strb(zr, Address(obj, rscratch1));
-    bind(L_already_dirty);
-  } else {
-    if (UseConcMarkSweepGC && CMSPrecleaningEnabled) {
-      membar(StoreStore);
-    }
-    strb(zr, Address(obj, rscratch1));
-  }
-}
-
 void MacroAssembler::load_klass(Register dst, Register src) {
   if (UseCompressedClassPointers) {
     ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
@@ -4009,190 +3995,6 @@
     str(zr, dst);
 }
 
-#if INCLUDE_ALL_GCS
-/*
- * g1_write_barrier_pre -- G1GC pre-write barrier for store of new_val at
- * store_addr.
- *
- * Allocates rscratch1
- */
-void MacroAssembler::g1_write_barrier_pre(Register obj,
-                                          Register pre_val,
-                                          Register thread,
-                                          Register tmp,
-                                          bool tosca_live,
-                                          bool expand_call) {
-  // If expand_call is true then we expand the call_VM_leaf macro
-  // directly to skip generating the check by
-  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
-
-  assert(thread == rthread, "must be");
-
-  Label done;
-  Label runtime;
-
-  assert_different_registers(obj, pre_val, tmp, rscratch1);
-  assert(pre_val != noreg &&  tmp != noreg, "expecting a register");
-
-  Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
-                                       SATBMarkQueue::byte_offset_of_active()));
-  Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
-                                       SATBMarkQueue::byte_offset_of_index()));
-  Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
-                                       SATBMarkQueue::byte_offset_of_buf()));
-
-
-  // Is marking active?
-  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
-    ldrw(tmp, in_progress);
-  } else {
-    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
-    ldrb(tmp, in_progress);
-  }
-  cbzw(tmp, done);
-
-  // Do we need to load the previous value?
-  if (obj != noreg) {
-    load_heap_oop(pre_val, Address(obj, 0));
-  }
-
-  // Is the previous value null?
-  cbz(pre_val, done);
-
-  // Can we store original value in the thread's buffer?
-  // Is index == 0?
-  // (The index field is typed as size_t.)
-
-  ldr(tmp, index);                      // tmp := *index_adr
-  cbz(tmp, runtime);                    // tmp == 0?
-                                        // If yes, goto runtime
-
-  sub(tmp, tmp, wordSize);              // tmp := tmp - wordSize
-  str(tmp, index);                      // *index_adr := tmp
-  ldr(rscratch1, buffer);
-  add(tmp, tmp, rscratch1);             // tmp := tmp + *buffer_adr
-
-  // Record the previous value
-  str(pre_val, Address(tmp, 0));
-  b(done);
-
-  bind(runtime);
-  // save the live input values
-  push(r0->bit(tosca_live) | obj->bit(obj != noreg) | pre_val->bit(true), sp);
-
-  // Calling the runtime using the regular call_VM_leaf mechanism generates
-  // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
-  // that checks that the *(rfp+frame::interpreter_frame_last_sp) == NULL.
-  //
-  // If we care generating the pre-barrier without a frame (e.g. in the
-  // intrinsified Reference.get() routine) then ebp might be pointing to
-  // the caller frame and so this check will most likely fail at runtime.
-  //
-  // Expanding the call directly bypasses the generation of the check.
-  // So when we do not have have a full interpreter frame on the stack
-  // expand_call should be passed true.
-
-  if (expand_call) {
-    assert(pre_val != c_rarg1, "smashed arg");
-    pass_arg1(this, thread);
-    pass_arg0(this, pre_val);
-    MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
-  } else {
-    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
-  }
-
-  pop(r0->bit(tosca_live) | obj->bit(obj != noreg) | pre_val->bit(true), sp);
-
-  bind(done);
-}
-
-/*
- * g1_write_barrier_post -- G1GC post-write barrier for store of new_val at
- * store_addr
- *
- * Allocates rscratch1
- */
-void MacroAssembler::g1_write_barrier_post(Register store_addr,
-                                           Register new_val,
-                                           Register thread,
-                                           Register tmp,
-                                           Register tmp2) {
-  assert(thread == rthread, "must be");
-  assert_different_registers(store_addr, new_val, thread, tmp, tmp2,
-                             rscratch1);
-  assert(store_addr != noreg && new_val != noreg && tmp != noreg
-         && tmp2 != noreg, "expecting a register");
-
-  Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
-                                       DirtyCardQueue::byte_offset_of_index()));
-  Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
-                                       DirtyCardQueue::byte_offset_of_buf()));
-
-  BarrierSet* bs = Universe::heap()->barrier_set();
-  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
-  CardTable* ct = ctbs->card_table();
-  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
-
-  Label done;
-  Label runtime;
-
-  // Does store cross heap regions?
-
-  eor(tmp, store_addr, new_val);
-  lsr(tmp, tmp, HeapRegion::LogOfHRGrainBytes);
-  cbz(tmp, done);
-
-  // crosses regions, storing NULL?
-
-  cbz(new_val, done);
-
-  // storing region crossing non-NULL, is card already dirty?
-
-  ExternalAddress cardtable((address) ct->byte_map_base());
-  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
-  const Register card_addr = tmp;
-
-  lsr(card_addr, store_addr, CardTable::card_shift);
-
-  // get the address of the card
-  load_byte_map_base(tmp2);
-  add(card_addr, card_addr, tmp2);
-  ldrb(tmp2, Address(card_addr));
-  cmpw(tmp2, (int)G1CardTable::g1_young_card_val());
-  br(Assembler::EQ, done);
-
-  assert((int)CardTable::dirty_card_val() == 0, "must be 0");
-
-  membar(Assembler::StoreLoad);
-
-  ldrb(tmp2, Address(card_addr));
-  cbzw(tmp2, done);
-
-  // storing a region crossing, non-NULL oop, card is clean.
-  // dirty card and log.
-
-  strb(zr, Address(card_addr));
-
-  ldr(rscratch1, queue_index);
-  cbz(rscratch1, runtime);
-  sub(rscratch1, rscratch1, wordSize);
-  str(rscratch1, queue_index);
-
-  ldr(tmp2, buffer);
-  str(card_addr, Address(tmp2, rscratch1));
-  b(done);
-
-  bind(runtime);
-  // save the live input values
-  push(store_addr->bit(true) | new_val->bit(true), sp);
-  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
-  pop(store_addr->bit(true) | new_val->bit(true), sp);
-
-  bind(done);
-}
-
-#endif // INCLUDE_ALL_GCS
-
 Address MacroAssembler::allocate_metadata_address(Metadata* obj) {
   assert(oop_recorder() != NULL, "this assembler needs a Recorder");
   int index = oop_recorder()->allocate_metadata_index(obj);
@@ -4515,7 +4317,7 @@
 
 void MacroAssembler::load_byte_map_base(Register reg) {
   jbyte *byte_map_base =
-    ((CardTableBarrierSet*)(Universe::heap()->barrier_set()))->card_table()->byte_map_base();
+    ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base();
 
   if (is_valid_AArch64_address((address)byte_map_base)) {
     // Strictly speaking the byte_map_base isn't an address at all,
@@ -5182,28 +4984,11 @@
   BIND(DONE);
 }
 
-// Compare Strings or char/byte arrays.
-
-// is_string is true iff this is a string comparison.
-
-// For Strings we're passed the address of the first characters in a1
-// and a2 and the length in cnt1.
-
-// For byte and char arrays we're passed the arrays themselves and we
-// have to extract length fields and do null checks here.
-
-// elem_size is the element size in bytes: either 1 or 2.
-
-// There are two implementations.  For arrays >= 8 bytes, all
-// comparisons (including the final one, which may overlap) are
-// performed 8 bytes at a time.  For arrays < 8 bytes, we compare a
-// halfword, then a short, and then a byte.
-
-void MacroAssembler::arrays_equals(Register a1, Register a2,
-                                   Register result, Register cnt1,
-                                   int elem_size, bool is_string)
+void MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
+                                   Register tmp4, Register tmp5, Register result,
+                                   Register cnt1, int elem_size)
 {
-  Label SAME, DONE, SHORT, NEXT_WORD, ONE;
+  Label DONE;
   Register tmp1 = rscratch1;
   Register tmp2 = rscratch2;
   Register cnt2 = tmp2;  // cnt2 only used in array length compare
@@ -5212,6 +4997,7 @@
   int length_offset = arrayOopDesc::length_offset_in_bytes();
   int base_offset
     = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
+  int stubBytesThreshold = 3 * 64 + (UseSIMDForArrayEquals ? 0 : 16);
 
   assert(elem_size == 1 || elem_size == 2, "must be char or byte");
   assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
@@ -5220,43 +5006,229 @@
   {
     const char kind = (elem_size == 2) ? 'U' : 'L';
     char comment[64];
-    snprintf(comment, sizeof comment, "%s%c%s {",
-             is_string ? "string_equals" : "array_equals",
-             kind, "{");
+    snprintf(comment, sizeof comment, "array_equals%c{", kind);
     BLOCK_COMMENT(comment);
   }
 #endif
-
-  mov(result, false);
-
-  if (!is_string) {
-    // if (a==a2)
+  if (UseSimpleArrayEquals) {
+    Label NEXT_WORD, SHORT, SAME, TAIL03, TAIL01, A_MIGHT_BE_NULL, A_IS_NOT_NULL;
+    // if (a1==a2)
     //     return true;
-    eor(rscratch1, a1, a2);
-    cbz(rscratch1, SAME);
     // if (a==null || a2==null)
     //     return false;
+    // a1 & a2 == 0 means (some-pointer is null) or
+    // (very-rare-or-even-probably-impossible-pointer-values)
+    // so, we can save one branch in most cases
+    eor(rscratch1, a1, a2);
+    tst(a1, a2);
+    mov(result, false);
+    cbz(rscratch1, SAME);
+    br(EQ, A_MIGHT_BE_NULL);
+    // if (a1.length != a2.length)
+    //      return false;
+    bind(A_IS_NOT_NULL);
+    ldrw(cnt1, Address(a1, length_offset));
+    ldrw(cnt2, Address(a2, length_offset));
+    eorw(tmp5, cnt1, cnt2);
+    cbnzw(tmp5, DONE);
+    lea(a1, Address(a1, base_offset));
+    lea(a2, Address(a2, base_offset));
+    // Check for short strings, i.e. smaller than wordSize.
+    subs(cnt1, cnt1, elem_per_word);
+    br(Assembler::LT, SHORT);
+    // Main 8 byte comparison loop.
+    bind(NEXT_WORD); {
+      ldr(tmp1, Address(post(a1, wordSize)));
+      ldr(tmp2, Address(post(a2, wordSize)));
+      subs(cnt1, cnt1, elem_per_word);
+      eor(tmp5, tmp1, tmp2);
+      cbnz(tmp5, DONE);
+    } br(GT, NEXT_WORD);
+    // Last longword.  In the case where length == 4 we compare the
+    // same longword twice, but that's still faster than another
+    // conditional branch.
+    // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
+    // length == 4.
+    if (log_elem_size > 0)
+      lsl(cnt1, cnt1, log_elem_size);
+    ldr(tmp3, Address(a1, cnt1));
+    ldr(tmp4, Address(a2, cnt1));
+    eor(tmp5, tmp3, tmp4);
+    cbnz(tmp5, DONE);
+    b(SAME);
+    bind(A_MIGHT_BE_NULL);
+    // in case both a1 and a2 are not-null, proceed with loads
     cbz(a1, DONE);
     cbz(a2, DONE);
-    // if (a1.length != a2.length)
-    //      return false;
+    b(A_IS_NOT_NULL);
+    bind(SHORT);
+
+    tbz(cnt1, 2 - log_elem_size, TAIL03); // 0-7 bytes left.
+    {
+      ldrw(tmp1, Address(post(a1, 4)));
+      ldrw(tmp2, Address(post(a2, 4)));
+      eorw(tmp5, tmp1, tmp2);
+      cbnzw(tmp5, DONE);
+    }
+    bind(TAIL03);
+    tbz(cnt1, 1 - log_elem_size, TAIL01); // 0-3 bytes left.
+    {
+      ldrh(tmp3, Address(post(a1, 2)));
+      ldrh(tmp4, Address(post(a2, 2)));
+      eorw(tmp5, tmp3, tmp4);
+      cbnzw(tmp5, DONE);
+    }
+    bind(TAIL01);
+    if (elem_size == 1) { // Only needed when comparing byte arrays.
+      tbz(cnt1, 0, SAME); // 0-1 bytes left.
+      {
+        ldrb(tmp1, a1);
+        ldrb(tmp2, a2);
+        eorw(tmp5, tmp1, tmp2);
+        cbnzw(tmp5, DONE);
+      }
+    }
+    bind(SAME);
+    mov(result, true);
+  } else {
+    Label NEXT_DWORD, A_IS_NULL, SHORT, TAIL, TAIL2, STUB, EARLY_OUT,
+        CSET_EQ, LAST_CHECK, LEN_IS_ZERO, SAME;
+    cbz(a1, A_IS_NULL);
     ldrw(cnt1, Address(a1, length_offset));
+    cbz(a2, A_IS_NULL);
     ldrw(cnt2, Address(a2, length_offset));
-    eorw(tmp1, cnt1, cnt2);
-    cbnzw(tmp1, DONE);
-
-    lea(a1, Address(a1, base_offset));
-    lea(a2, Address(a2, base_offset));
+    mov(result, false);
+    // on most CPUs a2 is still "locked"(surprisingly) in ldrw and it's
+    // faster to perform another branch before comparing a1 and a2
+    cmp(cnt1, elem_per_word);
+    br(LE, SHORT); // short or same
+    cmp(a1, a2);
+    br(EQ, SAME);
+    ldr(tmp3, Address(pre(a1, base_offset)));
+    cmp(cnt1, stubBytesThreshold);
+    br(GE, STUB);
+    ldr(tmp4, Address(pre(a2, base_offset)));
+    sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
+    cmp(cnt2, cnt1);
+    br(NE, DONE);
+
+    // Main 16 byte comparison loop with 2 exits
+    bind(NEXT_DWORD); {
+      ldr(tmp1, Address(pre(a1, wordSize)));
+      ldr(tmp2, Address(pre(a2, wordSize)));
+      subs(cnt1, cnt1, 2 * elem_per_word);
+      br(LE, TAIL);
+      eor(tmp4, tmp3, tmp4);
+      cbnz(tmp4, DONE);
+      ldr(tmp3, Address(pre(a1, wordSize)));
+      ldr(tmp4, Address(pre(a2, wordSize)));
+      cmp(cnt1, elem_per_word);
+      br(LE, TAIL2);
+      cmp(tmp1, tmp2);
+    } br(EQ, NEXT_DWORD);
+    b(DONE);
+
+    bind(TAIL);
+    eor(tmp4, tmp3, tmp4);
+    eor(tmp2, tmp1, tmp2);
+    lslv(tmp2, tmp2, tmp5);
+    orr(tmp5, tmp4, tmp2);
+    cmp(tmp5, zr);
+    b(CSET_EQ);
+
+    bind(TAIL2);
+    eor(tmp2, tmp1, tmp2);
+    cbnz(tmp2, DONE);
+    b(LAST_CHECK);
+
+    bind(STUB);
+    ldr(tmp4, Address(pre(a2, base_offset)));
+    cmp(cnt2, cnt1);
+    br(NE, DONE);
+    if (elem_size == 2) { // convert to byte counter
+      lsl(cnt1, cnt1, 1);
+    }
+    eor(tmp5, tmp3, tmp4);
+    cbnz(tmp5, DONE);
+    RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_array_equals());
+    assert(stub.target() != NULL, "array_equals_long stub has not been generated");
+    trampoline_call(stub);
+    b(DONE);
+
+    bind(SAME);
+    mov(result, true);
+    b(DONE);
+    bind(A_IS_NULL);
+    // a1 or a2 is null. if a2 == a2 then return true. else return false
+    cmp(a1, a2);
+    b(CSET_EQ);
+    bind(EARLY_OUT);
+    // (a1 != null && a2 == null) || (a1 != null && a2 != null && a1 == a2)
+    // so, if a2 == null => return false(0), else return true, so we can return a2
+    mov(result, a2);
+    b(DONE);
+    bind(LEN_IS_ZERO);
+    cmp(cnt2, zr);
+    b(CSET_EQ);
+    bind(SHORT);
+    cbz(cnt1, LEN_IS_ZERO);
+    sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
+    ldr(tmp3, Address(a1, base_offset));
+    ldr(tmp4, Address(a2, base_offset));
+    bind(LAST_CHECK);
+    eor(tmp4, tmp3, tmp4);
+    lslv(tmp5, tmp4, tmp5);
+    cmp(tmp5, zr);
+    bind(CSET_EQ);
+    cset(result, EQ);
   }
 
+  // That's it.
+  bind(DONE);
+
+  BLOCK_COMMENT("} array_equals");
+}
+
+// Compare Strings
+
+// For Strings we're passed the address of the first characters in a1
+// and a2 and the length in cnt1.
+// elem_size is the element size in bytes: either 1 or 2.
+// There are two implementations.  For arrays >= 8 bytes, all
+// comparisons (including the final one, which may overlap) are
+// performed 8 bytes at a time.  For strings < 8 bytes, we compare a
+// halfword, then a short, and then a byte.
+
+void MacroAssembler::string_equals(Register a1, Register a2,
+                                   Register result, Register cnt1, int elem_size)
+{
+  Label SAME, DONE, SHORT, NEXT_WORD;
+  Register tmp1 = rscratch1;
+  Register tmp2 = rscratch2;
+  Register cnt2 = tmp2;  // cnt2 only used in array length compare
+
+  assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte");
+  assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
+
+#ifndef PRODUCT
+  {
+    const char kind = (elem_size == 2) ? 'U' : 'L';
+    char comment[64];
+    snprintf(comment, sizeof comment, "{string_equals%c", kind);
+    BLOCK_COMMENT(comment);
+  }
+#endif
+
+  mov(result, false);
+
   // Check for short strings, i.e. smaller than wordSize.
-  subs(cnt1, cnt1, elem_per_word);
+  subs(cnt1, cnt1, wordSize);
   br(Assembler::LT, SHORT);
   // Main 8 byte comparison loop.
   bind(NEXT_WORD); {
     ldr(tmp1, Address(post(a1, wordSize)));
     ldr(tmp2, Address(post(a2, wordSize)));
-    subs(cnt1, cnt1, elem_per_word);
+    subs(cnt1, cnt1, wordSize);
     eor(tmp1, tmp1, tmp2);
     cbnz(tmp1, DONE);
   } br(GT, NEXT_WORD);
@@ -5265,18 +5237,16 @@
   // conditional branch.
   // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
   // length == 4.
-  if (log_elem_size > 0)
-    lsl(cnt1, cnt1, log_elem_size);
   ldr(tmp1, Address(a1, cnt1));
   ldr(tmp2, Address(a2, cnt1));
-  eor(tmp1, tmp1, tmp2);
-  cbnz(tmp1, DONE);
+  eor(tmp2, tmp1, tmp2);
+  cbnz(tmp2, DONE);
   b(SAME);
 
   bind(SHORT);
   Label TAIL03, TAIL01;
 
-  tbz(cnt1, 2 - log_elem_size, TAIL03); // 0-7 bytes left.
+  tbz(cnt1, 2, TAIL03); // 0-7 bytes left.
   {
     ldrw(tmp1, Address(post(a1, 4)));
     ldrw(tmp2, Address(post(a2, 4)));
@@ -5284,7 +5254,7 @@
     cbnzw(tmp1, DONE);
   }
   bind(TAIL03);
-  tbz(cnt1, 1 - log_elem_size, TAIL01); // 0-3 bytes left.
+  tbz(cnt1, 1, TAIL01); // 0-3 bytes left.
   {
     ldrh(tmp1, Address(post(a1, 2)));
     ldrh(tmp2, Address(post(a2, 2)));
@@ -5292,7 +5262,7 @@
     cbnzw(tmp1, DONE);
   }
   bind(TAIL01);
-  if (elem_size == 1) { // Only needed when comparing byte arrays.
+  if (elem_size == 1) { // Only needed when comparing 1-byte elements
     tbz(cnt1, 0, SAME); // 0-1 bytes left.
     {
       ldrb(tmp1, a1);
@@ -5307,7 +5277,7 @@
 
   // That's it.
   bind(DONE);
-  BLOCK_COMMENT(is_string ? "} string_equals" : "} array_equals");
+  BLOCK_COMMENT("} string_equals");
 }
 
 
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp	Fri Apr 13 14:06:39 2018 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -779,6 +779,8 @@
   void store_check(Register obj);                // store check for obj - register is destroyed afterwards
   void store_check(Register obj, Address dst);   // same as above, dst is exact store location (reg. is destroyed)
 
+  void resolve_jobject(Register value, Register thread, Register tmp);
+
 #if INCLUDE_ALL_GCS
 
   void g1_write_barrier_pre(Register obj,
@@ -1225,9 +1227,11 @@
 
   void has_negatives(Register ary1, Register len, Register result);
 
-  void arrays_equals(Register a1, Register a2,
-                     Register result, Register cnt1,
-                     int elem_size, bool is_string);
+  void arrays_equals(Register a1, Register a2, Register result, Register cnt1,
+                     Register tmp1, Register tmp2, Register tmp3, int elem_size);
+
+  void string_equals(Register a1, Register a2, Register result, Register cnt1,
+                     int elem_size);
 
   void fill_words(Register base, Register cnt, Register value);
   void zero_words(Register base, u_int64_t cnt);
--- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -2050,29 +2050,7 @@
 
   // Unbox oop result, e.g. JNIHandles::resolve result.
   if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
-    Label done, not_weak;
-    __ cbz(r0, done);           // Use NULL as-is.
-    STATIC_ASSERT(JNIHandles::weak_tag_mask == 1u);
-    __ tbz(r0, 0, not_weak);    // Test for jweak tag.
-    // Resolve jweak.
-    __ ldr(r0, Address(r0, -JNIHandles::weak_tag_value));
-    __ verify_oop(r0);
-#if INCLUDE_ALL_GCS
-    if (UseG1GC) {
-      __ g1_write_barrier_pre(noreg /* obj */,
-                              r0 /* pre_val */,
-                              rthread /* thread */,
-                              rscratch2 /* tmp */,
-                              true /* tosca_live */,
-                              true /* expand_call */);
-    }
-#endif // INCLUDE_ALL_GCS
-    __ b(done);
-    __ bind(not_weak);
-    // Resolve (untagged) jobject.
-    __ ldr(r0, Address(r0, 0));
-    __ verify_oop(r0);
-    __ bind(done);
+    __ resolve_jobject(r0, rthread, rscratch2);
   }
 
   if (CheckJNICalls) {
--- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -1359,7 +1359,7 @@
       decorators |= ARRAYCOPY_ALIGNED;
     }
 
-    BarrierSetAssembler *bs = Universe::heap()->barrier_set()->barrier_set_assembler();
+    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
     bs->arraycopy_prologue(_masm, decorators, is_oop, d, count, saved_reg);
 
     if (is_oop) {
@@ -1433,7 +1433,7 @@
       decorators |= ARRAYCOPY_ALIGNED;
     }
 
-    BarrierSetAssembler *bs = Universe::heap()->barrier_set()->barrier_set_assembler();
+    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
     bs->arraycopy_prologue(_masm, decorators, is_oop, d, count, saved_regs);
 
     if (is_oop) {
@@ -1795,7 +1795,7 @@
       decorators |= AS_DEST_NOT_INITIALIZED;
     }
 
-    BarrierSetAssembler *bs = Universe::heap()->barrier_set()->barrier_set_assembler();
+    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
     bs->arraycopy_prologue(_masm, decorators, is_oop, to, count, wb_pre_saved_regs);
 
     // save the original count
@@ -3813,6 +3813,182 @@
     __ ret(lr);
     return entry;
   }
+
+  void generate_large_array_equals_loop_nonsimd(int loopThreshold,
+        bool usePrefetch, Label &NOT_EQUAL) {
+    Register a1 = r1, a2 = r2, result = r0, cnt1 = r10, tmp1 = rscratch1,
+        tmp2 = rscratch2, tmp3 = r3, tmp4 = r4, tmp5 = r5, tmp6 = r11,
+        tmp7 = r12, tmp8 = r13;
+    Label LOOP;
+
+    __ ldp(tmp1, tmp3, Address(__ post(a1, 2 * wordSize)));
+    __ ldp(tmp2, tmp4, Address(__ post(a2, 2 * wordSize)));
+    __ bind(LOOP);
+    if (usePrefetch) {
+      __ prfm(Address(a1, SoftwarePrefetchHintDistance));
+      __ prfm(Address(a2, SoftwarePrefetchHintDistance));
+    }
+    __ ldp(tmp5, tmp7, Address(__ post(a1, 2 * wordSize)));
+    __ eor(tmp1, tmp1, tmp2);
+    __ eor(tmp3, tmp3, tmp4);
+    __ ldp(tmp6, tmp8, Address(__ post(a2, 2 * wordSize)));
+    __ orr(tmp1, tmp1, tmp3);
+    __ cbnz(tmp1, NOT_EQUAL);
+    __ ldp(tmp1, tmp3, Address(__ post(a1, 2 * wordSize)));
+    __ eor(tmp5, tmp5, tmp6);
+    __ eor(tmp7, tmp7, tmp8);
+    __ ldp(tmp2, tmp4, Address(__ post(a2, 2 * wordSize)));
+    __ orr(tmp5, tmp5, tmp7);
+    __ cbnz(tmp5, NOT_EQUAL);
+    __ ldp(tmp5, tmp7, Address(__ post(a1, 2 * wordSize)));
+    __ eor(tmp1, tmp1, tmp2);
+    __ eor(tmp3, tmp3, tmp4);
+    __ ldp(tmp6, tmp8, Address(__ post(a2, 2 * wordSize)));
+    __ orr(tmp1, tmp1, tmp3);
+    __ cbnz(tmp1, NOT_EQUAL);
+    __ ldp(tmp1, tmp3, Address(__ post(a1, 2 * wordSize)));
+    __ eor(tmp5, tmp5, tmp6);
+    __ sub(cnt1, cnt1, 8 * wordSize);
+    __ eor(tmp7, tmp7, tmp8);
+    __ ldp(tmp2, tmp4, Address(__ post(a2, 2 * wordSize)));
+    __ cmp(cnt1, loopThreshold);
+    __ orr(tmp5, tmp5, tmp7);
+    __ cbnz(tmp5, NOT_EQUAL);
+    __ br(__ GE, LOOP);
+    // post-loop
+    __ eor(tmp1, tmp1, tmp2);
+    __ eor(tmp3, tmp3, tmp4);
+    __ orr(tmp1, tmp1, tmp3);
+    __ sub(cnt1, cnt1, 2 * wordSize);
+    __ cbnz(tmp1, NOT_EQUAL);
+  }
+
+  void generate_large_array_equals_loop_simd(int loopThreshold,
+        bool usePrefetch, Label &NOT_EQUAL) {
+    Register a1 = r1, a2 = r2, result = r0, cnt1 = r10, tmp1 = rscratch1,
+        tmp2 = rscratch2;
+    Label LOOP;
+
+    __ bind(LOOP);
+    if (usePrefetch) {
+      __ prfm(Address(a1, SoftwarePrefetchHintDistance));
+      __ prfm(Address(a2, SoftwarePrefetchHintDistance));
+    }
+    __ ld1(v0, v1, v2, v3, __ T2D, Address(__ post(a1, 4 * 2 * wordSize)));
+    __ sub(cnt1, cnt1, 8 * wordSize);
+    __ ld1(v4, v5, v6, v7, __ T2D, Address(__ post(a2, 4 * 2 * wordSize)));
+    __ cmp(cnt1, loopThreshold);
+    __ eor(v0, __ T16B, v0, v4);
+    __ eor(v1, __ T16B, v1, v5);
+    __ eor(v2, __ T16B, v2, v6);
+    __ eor(v3, __ T16B, v3, v7);
+    __ orr(v0, __ T16B, v0, v1);
+    __ orr(v1, __ T16B, v2, v3);
+    __ orr(v0, __ T16B, v0, v1);
+    __ umov(tmp1, v0, __ D, 0);
+    __ umov(tmp2, v0, __ D, 1);
+    __ orr(tmp1, tmp1, tmp2);
+    __ cbnz(tmp1, NOT_EQUAL);
+    __ br(__ GE, LOOP);
+  }
+
+  // a1 = r1 - array1 address
+  // a2 = r2 - array2 address
+  // result = r0 - return value. Already contains "false"
+  // cnt1 = r10 - amount of elements left to check, reduced by wordSize
+  // r3-r5 are reserved temporary registers
+  address generate_large_array_equals() {
+    StubCodeMark mark(this, "StubRoutines", "large_array_equals");
+    Register a1 = r1, a2 = r2, result = r0, cnt1 = r10, tmp1 = rscratch1,
+        tmp2 = rscratch2, tmp3 = r3, tmp4 = r4, tmp5 = r5, tmp6 = r11,
+        tmp7 = r12, tmp8 = r13;
+    Label TAIL, NOT_EQUAL, EQUAL, NOT_EQUAL_NO_POP, NO_PREFETCH_LARGE_LOOP,
+        SMALL_LOOP, POST_LOOP;
+    const int PRE_LOOP_SIZE = UseSIMDForArrayEquals ? 0 : 16;
+    // calculate if at least 32 prefetched bytes are used
+    int prefetchLoopThreshold = SoftwarePrefetchHintDistance + 32;
+    int nonPrefetchLoopThreshold = (64 + PRE_LOOP_SIZE);
+    RegSet spilled_regs = RegSet::range(tmp6, tmp8);
+    assert_different_registers(a1, a2, result, cnt1, tmp1, tmp2, tmp3, tmp4,
+        tmp5, tmp6, tmp7, tmp8);
+
+    __ align(CodeEntryAlignment);
+    address entry = __ pc();
+    __ enter();
+    __ sub(cnt1, cnt1, wordSize);  // first 8 bytes were loaded outside of stub
+    // also advance pointers to use post-increment instead of pre-increment
+    __ add(a1, a1, wordSize);
+    __ add(a2, a2, wordSize);
+    if (AvoidUnalignedAccesses) {
+      // both implementations (SIMD/nonSIMD) are using relatively large load
+      // instructions (ld1/ldp), which has huge penalty (up to x2 exec time)
+      // on some CPUs in case of address is not at least 16-byte aligned.
+      // Arrays are 8-byte aligned currently, so, we can make additional 8-byte
+      // load if needed at least for 1st address and make if 16-byte aligned.
+      Label ALIGNED16;
+      __ tbz(a1, 3, ALIGNED16);
+      __ ldr(tmp1, Address(__ post(a1, wordSize)));
+      __ ldr(tmp2, Address(__ post(a2, wordSize)));
+      __ sub(cnt1, cnt1, wordSize);
+      __ eor(tmp1, tmp1, tmp2);
+      __ cbnz(tmp1, NOT_EQUAL_NO_POP);
+      __ bind(ALIGNED16);
+    }
+    if (UseSIMDForArrayEquals) {
+      if (SoftwarePrefetchHintDistance >= 0) {
+        __ cmp(cnt1, prefetchLoopThreshold);
+        __ br(__ LE, NO_PREFETCH_LARGE_LOOP);
+        generate_large_array_equals_loop_simd(prefetchLoopThreshold,
+            /* prfm = */ true, NOT_EQUAL);
+        __ cmp(cnt1, nonPrefetchLoopThreshold);
+        __ br(__ LT, TAIL);
+      }
+      __ bind(NO_PREFETCH_LARGE_LOOP);
+      generate_large_array_equals_loop_simd(nonPrefetchLoopThreshold,
+          /* prfm = */ false, NOT_EQUAL);
+    } else {
+      __ push(spilled_regs, sp);
+      if (SoftwarePrefetchHintDistance >= 0) {
+        __ cmp(cnt1, prefetchLoopThreshold);
+        __ br(__ LE, NO_PREFETCH_LARGE_LOOP);
+        generate_large_array_equals_loop_nonsimd(prefetchLoopThreshold,
+            /* prfm = */ true, NOT_EQUAL);
+        __ cmp(cnt1, nonPrefetchLoopThreshold);
+        __ br(__ LT, TAIL);
+      }
+      __ bind(NO_PREFETCH_LARGE_LOOP);
+      generate_large_array_equals_loop_nonsimd(nonPrefetchLoopThreshold,
+          /* prfm = */ false, NOT_EQUAL);
+    }
+    __ bind(TAIL);
+      __ cbz(cnt1, EQUAL);
+      __ subs(cnt1, cnt1, wordSize);
+      __ br(__ LE, POST_LOOP);
+    __ bind(SMALL_LOOP);
+      __ ldr(tmp1, Address(__ post(a1, wordSize)));
+      __ ldr(tmp2, Address(__ post(a2, wordSize)));
+      __ subs(cnt1, cnt1, wordSize);
+      __ eor(tmp1, tmp1, tmp2);
+      __ cbnz(tmp1, NOT_EQUAL);
+      __ br(__ GT, SMALL_LOOP);
+    __ bind(POST_LOOP);
+      __ ldr(tmp1, Address(a1, cnt1));
+      __ ldr(tmp2, Address(a2, cnt1));
+      __ eor(tmp1, tmp1, tmp2);
+      __ cbnz(tmp1, NOT_EQUAL);
+    __ bind(EQUAL);
+      __ mov(result, true);
+    __ bind(NOT_EQUAL);
+      if (!UseSIMDForArrayEquals) {
+        __ pop(spilled_regs, sp);
+      }
+    __ bind(NOT_EQUAL_NO_POP);
+    __ leave();
+    __ ret(lr);
+    return entry;
+  }
+
+
   /**
    *  Arguments:
    *
@@ -4895,6 +5071,11 @@
     // has negatives stub for large arrays.
     StubRoutines::aarch64::_has_negatives = generate_has_negatives(StubRoutines::aarch64::_has_negatives_long);
 
+    // array equals stub for large arrays.
+    if (!UseSimpleArrayEquals) {
+      StubRoutines::aarch64::_large_array_equals = generate_large_array_equals();
+    }
+
     if (UseMultiplyToLenIntrinsic) {
       StubRoutines::_multiplyToLen = generate_multiplyToLen();
     }
--- a/src/hotspot/cpu/aarch64/stubRoutines_aarch64.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/aarch64/stubRoutines_aarch64.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -46,6 +46,7 @@
 address StubRoutines::aarch64::_zero_blocks = NULL;
 address StubRoutines::aarch64::_has_negatives = NULL;
 address StubRoutines::aarch64::_has_negatives_long = NULL;
+address StubRoutines::aarch64::_large_array_equals = NULL;
 bool StubRoutines::aarch64::_completed = false;
 
 /**
--- a/src/hotspot/cpu/aarch64/stubRoutines_aarch64.hpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/aarch64/stubRoutines_aarch64.hpp	Fri Apr 13 14:06:39 2018 +0200
@@ -65,6 +65,7 @@
 
   static address _has_negatives;
   static address _has_negatives_long;
+  static address _large_array_equals;
   static bool _completed;
 
  public:
@@ -131,6 +132,10 @@
       return _has_negatives_long;
   }
 
+  static address large_array_equals() {
+      return _large_array_equals;
+  }
+
   static bool complete() {
     return _completed;
   }
--- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, Red Hat Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -25,6 +25,7 @@
 
 #include "precompiled.hpp"
 #include "asm/macroAssembler.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
 #include "interpreter/bytecodeHistogram.hpp"
 #include "interpreter/interpreter.hpp"
 #include "interpreter/interpreterRuntime.hpp"
@@ -889,7 +890,6 @@
 
 // Method entry for java.lang.ref.Reference.get.
 address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {
-#if INCLUDE_ALL_GCS
   // Code: _aload_0, _getfield, _areturn
   // parameter size = 1
   //
@@ -923,43 +923,29 @@
   const int referent_offset = java_lang_ref_Reference::referent_offset;
   guarantee(referent_offset > 0, "referent offset not initialized");
 
-  if (UseG1GC) {
-    Label slow_path;
-    const Register local_0 = c_rarg0;
-    // Check if local 0 != NULL
-    // If the receiver is null then it is OK to jump to the slow path.
-    __ ldr(local_0, Address(esp, 0));
-    __ cbz(local_0, slow_path);
+  Label slow_path;
+  const Register local_0 = c_rarg0;
+  // Check if local 0 != NULL
+  // If the receiver is null then it is OK to jump to the slow path.
+  __ ldr(local_0, Address(esp, 0));
+  __ cbz(local_0, slow_path);
 
-    // Load the value of the referent field.
-    const Address field_address(local_0, referent_offset);
-    __ load_heap_oop(local_0, field_address);
+  __ mov(r19, r13);   // Move senderSP to a callee-saved register
 
-    __ mov(r19, r13);   // Move senderSP to a callee-saved register
-    // Generate the G1 pre-barrier code to log the value of
-    // the referent field in an SATB buffer.
-    __ enter(); // g1_write may call runtime
-    __ g1_write_barrier_pre(noreg /* obj */,
-                            local_0 /* pre_val */,
-                            rthread /* thread */,
-                            rscratch2 /* tmp */,
-                            true /* tosca_live */,
-                            true /* expand_call */);
-    __ leave();
-    // areturn
-    __ andr(sp, r19, -16);  // done with stack
-    __ ret(lr);
+  // Load the value of the referent field.
+  const Address field_address(local_0, referent_offset);
+  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+  bs->load_at(_masm, IN_HEAP | ON_WEAK_OOP_REF, T_OBJECT, local_0, field_address, /*tmp1*/ rscratch2, /*tmp2*/ rscratch1);
 
-    // generate a vanilla interpreter entry as the slow path
-    __ bind(slow_path);
-    __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals));
-    return entry;
-  }
-#endif // INCLUDE_ALL_GCS
+  // areturn
+  __ andr(sp, r19, -16);  // done with stack
+  __ ret(lr);
 
-  // If G1 is not enabled then attempt to go through the accessor entry point
-  // Reference.get is an accessor
-  return NULL;
+  // generate a vanilla interpreter entry as the slow path
+  __ bind(slow_path);
+  __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals));
+  return entry;
+
 }
 
 /**
@@ -1434,28 +1420,7 @@
     __ br(Assembler::NE, no_oop);
     // Unbox oop result, e.g. JNIHandles::resolve result.
     __ pop(ltos);
-    __ cbz(r0, store_result);   // Use NULL as-is.
-    STATIC_ASSERT(JNIHandles::weak_tag_mask == 1u);
-    __ tbz(r0, 0, not_weak);    // Test for jweak tag.
-    // Resolve jweak.
-    __ ldr(r0, Address(r0, -JNIHandles::weak_tag_value));
-#if INCLUDE_ALL_GCS
-    if (UseG1GC) {
-      __ enter();                   // Barrier may call runtime.
-      __ g1_write_barrier_pre(noreg /* obj */,
-                              r0 /* pre_val */,
-                              rthread /* thread */,
-                              t /* tmp */,
-                              true /* tosca_live */,
-                              true /* expand_call */);
-      __ leave();
-    }
-#endif // INCLUDE_ALL_GCS
-    __ b(store_result);
-    __ bind(not_weak);
-    // Resolve (untagged) jobject.
-    __ ldr(r0, Address(r0, 0));
-    __ bind(store_result);
+    __ resolve_jobject(r0, rthread, t);
     __ str(r0, Address(rfp, frame::interpreter_frame_oop_temp_offset*wordSize));
     // keep stack depth as expected by pushing oop which will eventually be discarded
     __ push(ltos);
--- a/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -25,6 +25,7 @@
 
 #include "precompiled.hpp"
 #include "asm/macroAssembler.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
 #include "interpreter/interpreter.hpp"
 #include "interpreter/interpreterRuntime.hpp"
 #include "interpreter/interp_masm.hpp"
@@ -142,76 +143,20 @@
 // Store an oop (or NULL) at the Address described by obj.
 // If val == noreg this means store a NULL
 static void do_oop_store(InterpreterMacroAssembler* _masm,
-                         Address obj,
+                         Address dst,
                          Register val,
-                         BarrierSet::Name barrier,
-                         bool precise) {
+                         DecoratorSet decorators) {
   assert(val == noreg || val == r0, "parameter is just for looks");
-  switch (barrier) {
-#if INCLUDE_ALL_GCS
-    case BarrierSet::G1BarrierSet:
-      {
-        // flatten object address if needed
-        if (obj.index() == noreg && obj.offset() == 0) {
-          if (obj.base() != r3) {
-            __ mov(r3, obj.base());
-          }
-        } else {
-          __ lea(r3, obj);
-        }
-        __ g1_write_barrier_pre(r3 /* obj */,
-                                r1 /* pre_val */,
-                                rthread /* thread */,
-                                r10  /* tmp */,
-                                val != noreg /* tosca_live */,
-                                false /* expand_call */);
-        if (val == noreg) {
-          __ store_heap_oop_null(Address(r3, 0));
-        } else {
-          // G1 barrier needs uncompressed oop for region cross check.
-          Register new_val = val;
-          if (UseCompressedOops) {
-            new_val = rscratch2;
-            __ mov(new_val, val);
-          }
-          __ store_heap_oop(Address(r3, 0), val);
-          __ g1_write_barrier_post(r3 /* store_adr */,
-                                   new_val /* new_val */,
-                                   rthread /* thread */,
-                                   r10 /* tmp */,
-                                   r1 /* tmp2 */);
-        }
-
-      }
-      break;
-#endif // INCLUDE_ALL_GCS
-    case BarrierSet::CardTableBarrierSet:
-      {
-        if (val == noreg) {
-          __ store_heap_oop_null(obj);
-        } else {
-          __ store_heap_oop(obj, val);
-          // flatten object address if needed
-          if (!precise || (obj.index() == noreg && obj.offset() == 0)) {
-            __ store_check(obj.base());
-          } else {
-            __ lea(r3, obj);
-            __ store_check(r3);
-          }
-        }
-      }
-      break;
-    case BarrierSet::ModRef:
-      if (val == noreg) {
-        __ store_heap_oop_null(obj);
-      } else {
-        __ store_heap_oop(obj, val);
-      }
-      break;
-    default      :
-      ShouldNotReachHere();
-
-  }
+  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+  bs->store_at(_masm, decorators, T_OBJECT, dst, val, /*tmp1*/ r10, /*tmp2*/ r1);
+}
+
+static void do_oop_load(InterpreterMacroAssembler* _masm,
+                        Address src,
+                        Register dst,
+                        DecoratorSet decorators) {
+  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+  bs->load_at(_masm, decorators, T_OBJECT, dst, src, /*tmp1*/ r10, /*tmp_thread*/ r1);
 }
 
 Address TemplateTable::at_bcp(int offset) {
@@ -865,7 +810,10 @@
   index_check(r0, r1); // leaves index in r1, kills rscratch1
   int s = (UseCompressedOops ? 2 : 3);
   __ lea(r1, Address(r0, r1, Address::uxtw(s)));
-  __ load_heap_oop(r0, Address(r1, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
+  do_oop_load(_masm,
+              Address(r1, arrayOopDesc::base_offset_in_bytes(T_OBJECT)),
+              r0,
+              IN_HEAP | IN_HEAP_ARRAY);
 }
 
 void TemplateTable::baload()
@@ -1193,7 +1141,7 @@
   // Get the value we will store
   __ ldr(r0, at_tos());
   // Now store using the appropriate barrier
-  do_oop_store(_masm, element_address, r0, _bs->kind(), true);
+  do_oop_store(_masm, element_address, r0, IN_HEAP | IN_HEAP_ARRAY);
   __ b(done);
 
   // Have a NULL in r0, r3=array, r2=index.  Store NULL at ary[idx]
@@ -1201,7 +1149,7 @@
   __ profile_null_seen(r2);
 
   // Store a NULL
-  do_oop_store(_masm, element_address, noreg, _bs->kind(), true);
+  do_oop_store(_masm, element_address, noreg, IN_HEAP | IN_HEAP_ARRAY);
 
   // Pop stack arguments
   __ bind(done);
@@ -2591,7 +2539,7 @@
   __ cmp(flags, atos);
   __ br(Assembler::NE, notObj);
   // atos
-  __ load_heap_oop(r0, field);
+  do_oop_load(_masm, field, r0, IN_HEAP);
   __ push(atos);
   if (rc == may_rewrite) {
     patch_bytecode(Bytecodes::_fast_agetfield, bc, r1);
@@ -2834,7 +2782,7 @@
     __ pop(atos);
     if (!is_static) pop_and_check_object(obj);
     // Store into the field
-    do_oop_store(_masm, field, r0, _bs->kind(), false);
+    do_oop_store(_masm, field, r0, IN_HEAP);
     if (rc == may_rewrite) {
       patch_bytecode(Bytecodes::_fast_aputfield, bc, r1, true, byte_no);
     }
@@ -3054,7 +3002,7 @@
   // access field
   switch (bytecode()) {
   case Bytecodes::_fast_aputfield:
-    do_oop_store(_masm, field, r0, _bs->kind(), false);
+    do_oop_store(_masm, field, r0, IN_HEAP);
     break;
   case Bytecodes::_fast_lputfield:
     __ str(r0, field);
@@ -3146,7 +3094,7 @@
   // access field
   switch (bytecode()) {
   case Bytecodes::_fast_agetfield:
-    __ load_heap_oop(r0, field);
+    do_oop_load(_masm, field, r0, IN_HEAP);
     __ verify_oop(r0);
     break;
   case Bytecodes::_fast_lgetfield:
@@ -3216,7 +3164,7 @@
     __ ldrw(r0, Address(r0, r1, Address::lsl(0)));
     break;
   case atos:
-    __ load_heap_oop(r0, Address(r0, r1, Address::lsl(0)));
+    do_oop_load(_masm, Address(r0, r1, Address::lsl(0)), r0, IN_HEAP);
     __ verify_oop(r0);
     break;
   case ftos:
--- a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -203,7 +203,11 @@
     if (FLAG_IS_DEFAULT(UseSIMDForMemoryOps)) {
       FLAG_SET_DEFAULT(UseSIMDForMemoryOps, (_variant > 0));
     }
+    if (FLAG_IS_DEFAULT(UseSIMDForArrayEquals)) {
+      FLAG_SET_DEFAULT(UseSIMDForArrayEquals, false);
+    }
   }
+
   // ThunderX2
   if ((_cpu == CPU_CAVIUM && (_model == 0xAF)) ||
       (_cpu == CPU_BROADCOM && (_model == 0x516))) {
@@ -218,7 +222,25 @@
     }
   }
 
-  if (_cpu == CPU_ARM && (_model == 0xd03 || _model2 == 0xd03)) _features |= CPU_A53MAC;
+  // Cortex A53
+  if (_cpu == CPU_ARM && (_model == 0xd03 || _model2 == 0xd03)) {
+    _features |= CPU_A53MAC;
+    if (FLAG_IS_DEFAULT(UseSIMDForArrayEquals)) {
+      FLAG_SET_DEFAULT(UseSIMDForArrayEquals, false);
+    }
+  }
+
+  // Cortex A73
+  if (_cpu == CPU_ARM && (_model == 0xd09 || _model2 == 0xd09)) {
+    if (FLAG_IS_DEFAULT(SoftwarePrefetchHintDistance)) {
+      FLAG_SET_DEFAULT(SoftwarePrefetchHintDistance, -1);
+    }
+    // A73 is faster with short-and-easy-for-speculative-execution-loop
+    if (FLAG_IS_DEFAULT(UseSimpleArrayEquals)) {
+      FLAG_SET_DEFAULT(UseSimpleArrayEquals, true);
+    }
+  }
+
   if (_cpu == CPU_ARM && (_model == 0xd07 || _model2 == 0xd07)) _features |= CPU_STXR_PREFETCH;
   // If an olde style /proc/cpuinfo (cpu_lines == 1) then if _model is an A57 (0xd07)
   // we assume the worst and assume we could be on a big little system and have
--- a/src/hotspot/cpu/arm/assembler_arm.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/arm/assembler_arm.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -44,7 +44,6 @@
 #include "utilities/macros.hpp"
 #if INCLUDE_ALL_GCS
 #include "gc/g1/g1BarrierSet.hpp"
-#include "gc/g1/g1CollectedHeap.inline.hpp"
 #include "gc/g1/heapRegion.hpp"
 #endif // INCLUDE_ALL_GCS
 
--- a/src/hotspot/cpu/arm/assembler_arm_32.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/arm/assembler_arm_32.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -44,7 +44,6 @@
 #include "utilities/macros.hpp"
 #if INCLUDE_ALL_GCS
 #include "gc/g1/g1BarrierSet.hpp"
-#include "gc/g1/g1CollectedHeap.inline.hpp"
 #include "gc/g1/heapRegion.hpp"
 #endif // INCLUDE_ALL_GCS
 
--- a/src/hotspot/cpu/arm/assembler_arm_64.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/arm/assembler_arm_64.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -44,7 +44,6 @@
 #include "utilities/macros.hpp"
 #if INCLUDE_ALL_GCS
 #include "gc/g1/g1BarrierSet.hpp"
-#include "gc/g1/g1CollectedHeap.inline.hpp"
 #include "gc/g1/heapRegion.hpp"
 #endif // INCLUDE_ALL_GCS
 
--- a/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -533,9 +533,11 @@
     set_card(tmp, card_addr);
     __ branch_destination(L_already_dirty->label());
   } else {
+#if INCLUDE_ALL_GCS
     if (UseConcMarkSweepGC && CMSPrecleaningEnabled) {
       __ membar_storestore();
     }
+#endif
     set_card(tmp, card_addr);
   }
 }
--- a/src/hotspot/cpu/arm/c1_Runtime1_arm.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/arm/c1_Runtime1_arm.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -45,6 +45,7 @@
 #if INCLUDE_ALL_GCS
 #include "gc/g1/g1BarrierSet.hpp"
 #include "gc/g1/g1CardTable.hpp"
+#include "gc/g1/g1ThreadLocalData.hpp"
 #endif
 
 // Note: Rtemp usage is this file should not impact C2 and should be
@@ -540,7 +541,7 @@
 
         __ set_info("g1_pre_barrier_slow_id", dont_gc_arguments);
 
-        BarrierSet* bs = Universe::heap()->barrier_set();
+        BarrierSet* bs = BarrierSet::barrier_set();
         if (bs->kind() != BarrierSet::G1BarrierSet) {
           __ mov(R0, (int)id);
           __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), R0);
@@ -564,12 +565,9 @@
         const Register r_index_1    = R1;
         const Register r_buffer_2   = R2;
 
-        Address queue_active(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() +
-                                               SATBMarkQueue::byte_offset_of_active()));
-        Address queue_index(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() +
-                                              SATBMarkQueue::byte_offset_of_index()));
-        Address buffer(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() +
-                                         SATBMarkQueue::byte_offset_of_buf()));
+        Address queue_active(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
+        Address queue_index(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
+        Address buffer(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
 
         Label done;
         Label runtime;
@@ -620,7 +618,7 @@
 
         __ set_info("g1_post_barrier_slow_id", dont_gc_arguments);
 
-        BarrierSet* bs = Universe::heap()->barrier_set();
+        BarrierSet* bs = BarrierSet::barrier_set();
         if (bs->kind() != BarrierSet::G1BarrierSet) {
           __ mov(R0, (int)id);
           __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), R0);
@@ -632,10 +630,8 @@
         Label recheck;
         Label runtime;
 
-        Address queue_index(Rthread, in_bytes(JavaThread::dirty_card_queue_offset() +
-                                              DirtyCardQueue::byte_offset_of_index()));
-        Address buffer(Rthread, in_bytes(JavaThread::dirty_card_queue_offset() +
-                                         DirtyCardQueue::byte_offset_of_buf()));
+        Address queue_index(Rthread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
+        Address buffer(Rthread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
 
         AddressLiteral cardtable(ci_card_table_address_as<address>(), relocInfo::none);
 
--- a/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -28,7 +28,6 @@
 #include "gc/g1/g1BarrierSetAssembler.hpp"
 #include "gc/g1/g1CardTable.hpp"
 #include "gc/g1/heapRegion.hpp"
-#include "gc/shared/collectedHeap.hpp"
 #include "interpreter/interp_masm.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/thread.hpp"
--- a/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.hpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.hpp	Fri Apr 13 14:06:39 2018 +0200
@@ -35,6 +35,8 @@
                                   Register addr, Register count, int callee_saved_regs) {}
   virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
                                   Register addr, Register count, Register tmp) {}
+
+  virtual void barrier_stubs_init() {}
 };
 
 #endif // CPU_ARM_GC_SHARED_BARRIERSETASSEMBLER_ARM_HPP
--- a/src/hotspot/cpu/arm/gc/shared/cardTableBarrierSetAssembler_arm.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/arm/gc/shared/cardTableBarrierSetAssembler_arm.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -44,7 +44,7 @@
 void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
                                                                     Register addr, Register count, Register tmp) {
   BLOCK_COMMENT("CardTablePostBarrier");
-  BarrierSet* bs = Universe::heap()->barrier_set();
+  BarrierSet* bs = BarrierSet::barrier_set();
   CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
   CardTable* ct = ctbs->card_table();
   assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
--- a/src/hotspot/cpu/arm/interp_masm_arm.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/arm/interp_masm_arm.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -45,7 +45,6 @@
 
 #if INCLUDE_ALL_GCS
 #include "gc/g1/g1BarrierSet.hpp"
-#include "gc/g1/g1CollectedHeap.inline.hpp"
 #include "gc/g1/heapRegion.hpp"
 #endif // INCLUDE_ALL_GCS
 
@@ -411,7 +410,7 @@
 // Sets card_table_base register.
 void InterpreterMacroAssembler::store_check_part1(Register card_table_base) {
   // Check barrier set type (should be card table) and element size
-  BarrierSet* bs = Universe::heap()->barrier_set();
+  BarrierSet* bs = BarrierSet::barrier_set();
   assert(bs->kind() == BarrierSet::CardTableBarrierSet,
          "Wrong barrier set kind");
 
@@ -451,9 +450,11 @@
 #endif
 
   if (UseCondCardMark) {
+#if INCLUDE_ALL_GCS
     if (UseConcMarkSweepGC) {
       membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), noreg);
     }
+#endif
     Label already_dirty;
 
     ldrb(tmp, card_table_addr);
@@ -463,9 +464,11 @@
     bind(already_dirty);
 
   } else {
+#if INCLUDE_ALL_GCS
     if (UseConcMarkSweepGC && CMSPrecleaningEnabled) {
       membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore), noreg);
     }
+#endif
     set_card(card_table_base, card_table_addr, tmp);
   }
 }
@@ -474,7 +477,7 @@
 #ifdef AARCH64
   strb(ZR, card_table_addr);
 #else
-  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(Universe::heap()->barrier_set());
+  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
   CardTable* ct = ctbs->card_table();
   if ((((uintptr_t)ct->byte_map_base() & 0xff) == 0)) {
     // Card table is aligned so the lowest byte of the table address base is zero.
--- a/src/hotspot/cpu/arm/macroAssembler_arm.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/arm/macroAssembler_arm.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -29,6 +29,7 @@
 #include "ci/ciEnv.hpp"
 #include "code/nativeInst.hpp"
 #include "compiler/disassembler.hpp"
+#include "gc/shared/barrierSet.hpp"
 #include "gc/shared/cardTable.hpp"
 #include "gc/shared/cardTableBarrierSet.hpp"
 #include "gc/shared/collectedHeap.inline.hpp"
@@ -46,7 +47,7 @@
 #if INCLUDE_ALL_GCS
 #include "gc/g1/g1BarrierSet.hpp"
 #include "gc/g1/g1CardTable.hpp"
-#include "gc/g1/g1CollectedHeap.inline.hpp"
+#include "gc/g1/g1ThreadLocalData.hpp"
 #include "gc/g1/heapRegion.hpp"
 #endif
 
@@ -2175,12 +2176,9 @@
     assert_different_registers(pre_val, tmp1, tmp2, noreg);
   }
 
-  Address in_progress(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() +
-                                        SATBMarkQueue::byte_offset_of_active()));
-  Address index(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() +
-                                  SATBMarkQueue::byte_offset_of_index()));
-  Address buffer(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() +
-                                   SATBMarkQueue::byte_offset_of_buf()));
+  Address in_progress(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
+  Address index(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
+  Address buffer(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
 
   // Is marking active?
   assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "adjust this code");
@@ -2261,12 +2259,10 @@
                                            Register tmp2,
                                            Register tmp3) {
 
-  Address queue_index(Rthread, in_bytes(JavaThread::dirty_card_queue_offset() +
-                                        DirtyCardQueue::byte_offset_of_index()));
-  Address buffer(Rthread, in_bytes(JavaThread::dirty_card_queue_offset() +
-                                   DirtyCardQueue::byte_offset_of_buf()));
-
-  BarrierSet* bs = Universe::heap()->barrier_set();
+  Address queue_index(Rthread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
+  Address buffer(Rthread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
+
+  BarrierSet* bs = BarrierSet::barrier_set();
   CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
   CardTable* ct = ctbs->card_table();
   Label done;
--- a/src/hotspot/cpu/ppc/assembler_ppc.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/ppc/assembler_ppc.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -39,7 +39,6 @@
 #include "utilities/macros.hpp"
 #if INCLUDE_ALL_GCS
 #include "gc/g1/g1BarrierSet.hpp"
-#include "gc/g1/g1CollectedHeap.inline.hpp"
 #include "gc/g1/heapRegion.hpp"
 #endif // INCLUDE_ALL_GCS
 
--- a/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp	Fri Apr 13 14:06:39 2018 +0200
@@ -620,7 +620,7 @@
 inline void Assembler::stqcx_(Register s, Register a, Register b)                             { emit_int32( STQCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); }
 
 // Instructions for adjusting thread priority
-// for simultaneous multithreading (SMT) on POWER5.
+// for simultaneous multithreading (SMT) on >= POWER5.
 inline void Assembler::smt_prio_very_low()    { Assembler::or_unchecked(R31, R31, R31); }
 inline void Assembler::smt_prio_low()         { Assembler::or_unchecked(R1,  R1,  R1); }
 inline void Assembler::smt_prio_medium_low()  { Assembler::or_unchecked(R6,  R6,  R6); }
@@ -628,11 +628,11 @@
 inline void Assembler::smt_prio_medium_high() { Assembler::or_unchecked(R5,  R5,  R5); }
 inline void Assembler::smt_prio_high()        { Assembler::or_unchecked(R3,  R3,  R3); }
 // >= Power7
-inline void Assembler::smt_yield()            { Assembler::or_unchecked(R27, R27, R27); }
-inline void Assembler::smt_mdoio()            { Assembler::or_unchecked(R29, R29, R29); }
-inline void Assembler::smt_mdoom()            { Assembler::or_unchecked(R30, R30, R30); }
-// >= Power8
-inline void Assembler::smt_miso()             { Assembler::or_unchecked(R26, R26, R26); }
+inline void Assembler::smt_yield()            { Assembler::or_unchecked(R27, R27, R27); } // never actually implemented
+inline void Assembler::smt_mdoio()            { Assembler::or_unchecked(R29, R29, R29); } // never actually implemetned
+inline void Assembler::smt_mdoom()            { Assembler::or_unchecked(R30, R30, R30); } // never actually implemented
+// Power8
+inline void Assembler::smt_miso()             { Assembler::or_unchecked(R26, R26, R26); } // never actually implemented
 
 inline void Assembler::twi_0(Register a)      { twi_unchecked(0, a, 0);}
 
--- a/src/hotspot/cpu/ppc/c1_Runtime1_ppc.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/ppc/c1_Runtime1_ppc.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -45,6 +45,7 @@
 #if INCLUDE_ALL_GCS
 #include "gc/g1/g1BarrierSet.hpp"
 #include "gc/g1/g1CardTable.hpp"
+#include "gc/g1/g1ThreadLocalData.hpp"
 #endif
 
 // Implementation of StubAssembler
@@ -710,7 +711,7 @@
 #if INCLUDE_ALL_GCS
     case g1_pre_barrier_slow_id:
       {
-        BarrierSet* bs = Universe::heap()->barrier_set();
+        BarrierSet* bs = BarrierSet::barrier_set();
         if (bs->kind() != BarrierSet::G1BarrierSet) {
           goto unimplemented_entry;
         }
@@ -724,15 +725,9 @@
         Register tmp2 = R15;
 
         Label refill, restart, marking_not_active;
-        int satb_q_active_byte_offset =
-          in_bytes(JavaThread::satb_mark_queue_offset() +
-                   SATBMarkQueue::byte_offset_of_active());
-        int satb_q_index_byte_offset =
-          in_bytes(JavaThread::satb_mark_queue_offset() +
-                   SATBMarkQueue::byte_offset_of_index());
-        int satb_q_buf_byte_offset =
-          in_bytes(JavaThread::satb_mark_queue_offset() +
-                   SATBMarkQueue::byte_offset_of_buf());
+        int satb_q_active_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
+        int satb_q_index_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset());
+        int satb_q_buf_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset());
 
         // Spill
         __ std(tmp, -16, R1_SP);
@@ -787,7 +782,7 @@
 
   case g1_post_barrier_slow_id:
     {
-        BarrierSet* bs = Universe::heap()->barrier_set();
+        BarrierSet* bs = BarrierSet::barrier_set();
         if (bs->kind() != BarrierSet::G1BarrierSet) {
           goto unimplemented_entry;
         }
@@ -829,12 +824,8 @@
         __ li(tmp, G1CardTable::dirty_card_val());
         __ stb(tmp, 0, addr);
 
-        int dirty_card_q_index_byte_offset =
-          in_bytes(JavaThread::dirty_card_queue_offset() +
-                   DirtyCardQueue::byte_offset_of_index());
-        int dirty_card_q_buf_byte_offset =
-          in_bytes(JavaThread::dirty_card_queue_offset() +
-                   DirtyCardQueue::byte_offset_of_buf());
+        int dirty_card_q_index_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset());
+        int dirty_card_q_buf_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset());
 
         __ bind(restart);
 
--- a/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -28,10 +28,10 @@
 #include "gc/g1/g1BarrierSet.hpp"
 #include "gc/g1/g1CardTable.hpp"
 #include "gc/g1/g1BarrierSetAssembler.hpp"
+#include "gc/g1/g1ThreadLocalData.hpp"
 #include "gc/g1/heapRegion.hpp"
-#include "gc/shared/collectedHeap.hpp"
-#include "runtime/thread.hpp"
 #include "interpreter/interp_masm.hpp"
+#include "runtime/sharedRuntime.hpp"
 
 #define __ masm->
 
@@ -49,10 +49,10 @@
 
     // Is marking active?
     if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
-      __ lwz(R0, in_bytes(JavaThread::satb_mark_queue_offset() + SATBMarkQueue::byte_offset_of_active()), R16_thread);
+      __ lwz(R0, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), R16_thread);
     } else {
       guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
-      __ lbz(R0, in_bytes(JavaThread::satb_mark_queue_offset() + SATBMarkQueue::byte_offset_of_active()), R16_thread);
+      __ lbz(R0, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), R16_thread);
     }
     __ cmpdi(CCR0, R0, 0);
     __ beq(CCR0, filtered);
@@ -98,3 +98,245 @@
   __ addi(R1_SP, R1_SP, frame_size); // pop_frame();
   __ restore_LR_CR(R0);
 }
+
+void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, DecoratorSet decorators, Register obj, RegisterOrConstant ind_or_offs, Register pre_val,
+                                                 Register tmp1, Register tmp2, bool needs_frame) {
+  bool not_null  = (decorators & OOP_NOT_NULL) != 0,
+       preloaded = obj == noreg;
+  Register nv_save = noreg;
+
+  if (preloaded) {
+    // We are not loading the previous value so make
+    // sure that we don't trash the value in pre_val
+    // with the code below.
+    assert_different_registers(pre_val, tmp1, tmp2);
+    if (pre_val->is_volatile()) {
+      nv_save = !tmp1->is_volatile() ? tmp1 : tmp2;
+      assert(!nv_save->is_volatile(), "need one nv temp register if pre_val lives in volatile register");
+    }
+  }
+
+  Label runtime, filtered;
+
+  // Is marking active?
+  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
+    __ lwz(tmp1, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), R16_thread);
+  } else {
+    guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
+    __ lbz(tmp1, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), R16_thread);
+  }
+  __ cmpdi(CCR0, tmp1, 0);
+  __ beq(CCR0, filtered);
+
+  // Do we need to load the previous value?
+  if (!preloaded) {
+    // Load the previous value...
+    if (UseCompressedOops) {
+      __ lwz(pre_val, ind_or_offs, obj);
+    } else {
+      __ ld(pre_val, ind_or_offs, obj);
+    }
+    // Previous value has been loaded into Rpre_val.
+  }
+  assert(pre_val != noreg, "must have a real register");
+
+  // Is the previous value null?
+  if (preloaded && not_null) {
+#ifdef ASSERT
+    __ cmpdi(CCR0, pre_val, 0);
+    __ asm_assert_ne("null oop not allowed (G1 pre)", 0x321); // Checked by caller.
+#endif
+  } else {
+    __ cmpdi(CCR0, pre_val, 0);
+    __ beq(CCR0, filtered);
+  }
+
+  if (!preloaded && UseCompressedOops) {
+    __ decode_heap_oop_not_null(pre_val);
+  }
+
+  // OK, it's not filtered, so we'll need to call enqueue. In the normal
+  // case, pre_val will be a scratch G-reg, but there are some cases in
+  // which it's an O-reg. In the first case, do a normal call. In the
+  // latter, do a save here and call the frameless version.
+
+  // Can we store original value in the thread's buffer?
+  // Is index == 0?
+  // (The index field is typed as size_t.)
+  const Register Rbuffer = tmp1, Rindex = tmp2;
+
+  __ ld(Rindex, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()), R16_thread);
+  __ cmpdi(CCR0, Rindex, 0);
+  __ beq(CCR0, runtime); // If index == 0, goto runtime.
+  __ ld(Rbuffer, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()), R16_thread);
+
+  __ addi(Rindex, Rindex, -wordSize); // Decrement index.
+  __ std(Rindex, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()), R16_thread);
+
+  // Record the previous value.
+  __ stdx(pre_val, Rbuffer, Rindex);
+  __ b(filtered);
+
+  __ bind(runtime);
+
+  // May need to preserve LR. Also needed if current frame is not compatible with C calling convention.
+  if (needs_frame) {
+    __ save_LR_CR(tmp1);
+    __ push_frame_reg_args(0, tmp2);
+  }
+
+  if (pre_val->is_volatile() && preloaded) { __ mr(nv_save, pre_val); } // Save pre_val across C call if it was preloaded.
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, R16_thread);
+  if (pre_val->is_volatile() && preloaded) { __ mr(pre_val, nv_save); } // restore
+
+  if (needs_frame) {
+    __ pop_frame();
+    __ restore_LR_CR(tmp1);
+  }
+
+  __ bind(filtered);
+}
+
+void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, DecoratorSet decorators, Register store_addr, Register new_val,
+                                                  Register tmp1, Register tmp2, Register tmp3) {
+  bool not_null = (decorators & OOP_NOT_NULL) != 0;
+
+  Label runtime, filtered;
+  assert_different_registers(store_addr, new_val, tmp1, tmp2);
+
+  CardTableBarrierSet* ct = barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
+  assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code");
+
+  // Does store cross heap regions?
+  if (G1RSBarrierRegionFilter) {
+    __ xorr(tmp1, store_addr, new_val);
+    __ srdi_(tmp1, tmp1, HeapRegion::LogOfHRGrainBytes);
+    __ beq(CCR0, filtered);
+  }
+
+  // Crosses regions, storing NULL?
+  if (not_null) {
+#ifdef ASSERT
+    __ cmpdi(CCR0, new_val, 0);
+    __ asm_assert_ne("null oop not allowed (G1 post)", 0x322); // Checked by caller.
+#endif
+  } else {
+    __ cmpdi(CCR0, new_val, 0);
+    __ beq(CCR0, filtered);
+  }
+
+  // Storing region crossing non-NULL, is card already dirty?
+  const Register Rcard_addr = tmp1;
+  Register Rbase = tmp2;
+  __ load_const_optimized(Rbase, (address)(ct->card_table()->byte_map_base()), /*temp*/ tmp3);
+
+  __ srdi(Rcard_addr, store_addr, CardTable::card_shift);
+
+  // Get the address of the card.
+  __ lbzx(/*card value*/ tmp3, Rbase, Rcard_addr);
+  __ cmpwi(CCR0, tmp3, (int)G1CardTable::g1_young_card_val());
+  __ beq(CCR0, filtered);
+
+  __ membar(Assembler::StoreLoad);
+  __ lbzx(/*card value*/ tmp3, Rbase, Rcard_addr);  // Reload after membar.
+  __ cmpwi(CCR0, tmp3 /* card value */, (int)G1CardTable::dirty_card_val());
+  __ beq(CCR0, filtered);
+
+  // Storing a region crossing, non-NULL oop, card is clean.
+  // Dirty card and log.
+  __ li(tmp3, (int)G1CardTable::dirty_card_val());
+  //release(); // G1: oops are allowed to get visible after dirty marking.
+  __ stbx(tmp3, Rbase, Rcard_addr);
+
+  __ add(Rcard_addr, Rbase, Rcard_addr); // This is the address which needs to get enqueued.
+  Rbase = noreg; // end of lifetime
+
+  const Register Rqueue_index = tmp2,
+                 Rqueue_buf   = tmp3;
+  __ ld(Rqueue_index, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()), R16_thread);
+  __ cmpdi(CCR0, Rqueue_index, 0);
+  __ beq(CCR0, runtime); // index == 0 then jump to runtime
+  __ ld(Rqueue_buf, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()), R16_thread);
+
+  __ addi(Rqueue_index, Rqueue_index, -wordSize); // decrement index
+  __ std(Rqueue_index, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()), R16_thread);
+
+  __ stdx(Rcard_addr, Rqueue_buf, Rqueue_index); // store card
+  __ b(filtered);
+
+  __ bind(runtime);
+
+  // Save the live input values.
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), Rcard_addr, R16_thread);
+
+  __ bind(filtered);
+}
+
+void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                       Register base, RegisterOrConstant ind_or_offs, Register val,
+                                       Register tmp1, Register tmp2, Register tmp3, bool needs_frame) {
+  bool on_array = (decorators & IN_HEAP_ARRAY) != 0;
+  bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
+  bool precise = on_array || on_anonymous;
+  // Load and record the previous value.
+  g1_write_barrier_pre(masm, decorators, base, ind_or_offs,
+                       tmp1, tmp2, tmp3, needs_frame);
+
+  BarrierSetAssembler::store_at(masm, decorators, type, base, ind_or_offs, val, tmp1, tmp2, tmp3, needs_frame);
+
+  // No need for post barrier if storing NULL
+  if (val != noreg) {
+    if (precise) {
+      if (ind_or_offs.is_constant()) {
+        __ add_const_optimized(base, base, ind_or_offs.as_constant(), tmp1);
+      } else {
+        __ add(base, ind_or_offs.as_register(), base);
+      }
+    }
+    g1_write_barrier_post(masm, decorators, base, val, tmp1, tmp2, tmp3);
+  }
+}
+
+void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                    Register base, RegisterOrConstant ind_or_offs, Register dst,
+                                    Register tmp1, Register tmp2, bool needs_frame, Label *is_null) {
+  bool on_oop = type == T_OBJECT || type == T_ARRAY;
+  bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
+  bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
+  bool on_reference = on_weak || on_phantom;
+  Label done;
+  if (on_oop && on_reference && is_null == NULL) { is_null = &done; }
+  // Load the value of the referent field.
+  ModRefBarrierSetAssembler::load_at(masm, decorators, type, base, ind_or_offs, dst, tmp1, tmp2, needs_frame, is_null);
+  if (on_oop && on_reference) {
+    // Generate the G1 pre-barrier code to log the value of
+    // the referent field in an SATB buffer. Note with
+    // these parameters the pre-barrier does not generate
+    // the load of the previous value
+    // We only reach here if value is not null.
+    g1_write_barrier_pre(masm, decorators | OOP_NOT_NULL, noreg /* obj */, (intptr_t)0, dst /* pre_val */,
+                         tmp1, tmp2, needs_frame);
+  }
+  __ bind(done);
+}
+
+void G1BarrierSetAssembler::resolve_jobject(MacroAssembler* masm, Register value, Register tmp1, Register tmp2, bool needs_frame) {
+  Label done, not_weak;
+  __ cmpdi(CCR0, value, 0);
+  __ beq(CCR0, done);         // Use NULL as-is.
+
+  __ clrrdi(tmp1, value, JNIHandles::weak_tag_size);
+  __ andi_(tmp2, value, JNIHandles::weak_tag_mask);
+  __ ld(value, 0, tmp1);      // Resolve (untagged) jobject.
+
+  __ beq(CCR0, not_weak);     // Test for jweak tag.
+  __ verify_oop(value);
+  g1_write_barrier_pre(masm, IN_ROOT | ON_PHANTOM_OOP_REF,
+                       noreg, noreg, value,
+                       tmp1, tmp2, needs_frame);
+  __ bind(not_weak);
+  __ verify_oop(value);
+  __ bind(done);
+}
+
+#undef __
--- a/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.hpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.hpp	Fri Apr 13 14:06:39 2018 +0200
@@ -34,6 +34,22 @@
   virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, Register from, Register to, Register count,
                                                Register preserve1, Register preserve2);
   virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register preserve);
+
+  void g1_write_barrier_pre(MacroAssembler* masm, DecoratorSet decorators, Register obj, RegisterOrConstant ind_or_offs, Register pre_val,
+                            Register tmp1, Register tmp2, bool needs_frame);
+  void g1_write_barrier_post(MacroAssembler* masm, DecoratorSet decorators, Register store_addr, Register new_val,
+                             Register tmp1, Register tmp2, Register tmp3);
+
+  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                            Register base, RegisterOrConstant ind_or_offs, Register val,
+                            Register tmp1, Register tmp2, Register tmp3, bool needs_frame);
+
+public:
+  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                       Register base, RegisterOrConstant ind_or_offs, Register dst,
+                       Register tmp1, Register tmp2, bool needs_frame, Label *is_null = NULL);
+
+  virtual void resolve_jobject(MacroAssembler* masm, Register value, Register tmp1, Register tmp2, bool needs_frame);
 };
 
 #endif // CPU_PPC_GC_G1_G1BARRIERSETASSEMBLER_PPC_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
+#include "interpreter/interp_masm.hpp"
+
+#define __ masm->
+
+void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                   Register base, RegisterOrConstant ind_or_offs, Register val,
+                                   Register tmp1, Register tmp2, Register tmp3, bool needs_frame) {
+  bool on_heap  = (decorators & IN_HEAP) != 0;
+  bool on_root  = (decorators & IN_ROOT) != 0;
+  bool not_null = (decorators & OOP_NOT_NULL) != 0;
+  assert(on_heap || on_root, "where?");
+  assert_different_registers(base, val, tmp1, tmp2, R0);
+
+  switch (type) {
+  case T_ARRAY:
+  case T_OBJECT: {
+    if (UseCompressedOops && on_heap) {
+      Register co = tmp1;
+      if (val == noreg) {
+        __ li(co, 0);
+      } else {
+        co = not_null ? __ encode_heap_oop_not_null(tmp1, val) : __ encode_heap_oop(tmp1, val);
+      }
+      __ stw(co, ind_or_offs, base, tmp2);
+    } else {
+      if (val == noreg) {
+        val = tmp1;
+        __ li(val, 0);
+      }
+      __ std(val, ind_or_offs, base, tmp2);
+    }
+    break;
+  }
+  default: Unimplemented();
+  }
+}
+
+void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                  Register base, RegisterOrConstant ind_or_offs, Register dst,
+                                  Register tmp1, Register tmp2, bool needs_frame, Label *is_null) {
+  bool on_heap = (decorators & IN_HEAP) != 0;
+  bool on_root = (decorators & IN_ROOT) != 0;
+  assert(on_heap || on_root, "where?");
+  assert_different_registers(ind_or_offs.register_or_noreg(), dst, R0);
+
+  switch (type) {
+  case T_ARRAY:
+  case T_OBJECT: {
+    if (UseCompressedOops && on_heap) {
+      __ lwz(dst, ind_or_offs, base);
+      if (is_null) {
+        __ cmpwi(CCR0, dst, 0);
+        __ beq(CCR0, *is_null);
+        __ decode_heap_oop_not_null(dst);
+      } else {
+        __ decode_heap_oop(dst);
+      }
+    } else {
+      __ ld(dst, ind_or_offs, base);
+      if (is_null) {
+        __ cmpdi(CCR0, dst, 0);
+        __ beq(CCR0, *is_null);
+      }
+    }
+    break;
+  }
+  default: Unimplemented();
+  }
+}
+
+void BarrierSetAssembler::resolve_jobject(MacroAssembler* masm, Register value,
+                                          Register tmp1, Register tmp2, bool needs_frame) {
+  Label done;
+  __ cmpdi(CCR0, value, 0);
+  __ beq(CCR0, done);         // Use NULL as-is.
+
+  __ clrrdi(tmp1, value, JNIHandles::weak_tag_size);
+  __ ld(value, 0, tmp1);      // Resolve (untagged) jobject.
+
+  __ verify_oop(value);
+  __ bind(done);
+}
--- a/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.hpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.hpp	Fri Apr 13 14:06:39 2018 +0200
@@ -38,6 +38,18 @@
                                   Register src, Register dst, Register count, Register preserve1, Register preserve2) {}
   virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
                                   Register dst, Register count, Register preserve) {}
+
+  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                        Register base, RegisterOrConstant ind_or_offs, Register val,
+                        Register tmp1, Register tmp2, Register tmp3, bool needs_frame);
+
+  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                       Register base, RegisterOrConstant ind_or_offs, Register dst,
+                       Register tmp1, Register tmp2, bool needs_frame, Label *is_null = NULL);
+
+  virtual void resolve_jobject(MacroAssembler* masm, Register value, Register tmp1, Register tmp2, bool needs_frame);
+
+  virtual void barrier_stubs_init() {}
 };
 
 #endif // CPU_PPC_GC_SHARED_BARRIERSETASSEMBLER_PPC_HPP
--- a/src/hotspot/cpu/ppc/gc/shared/cardTableBarrierSetAssembler_ppc.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/ppc/gc/shared/cardTableBarrierSetAssembler_ppc.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -29,7 +29,6 @@
 #include "gc/shared/cardTable.hpp"
 #include "gc/shared/cardTableBarrierSet.hpp"
 #include "gc/shared/cardTableBarrierSetAssembler.hpp"
-#include "gc/shared/collectedHeap.hpp"
 #include "interpreter/interp_masm.hpp"
 
 #define __ masm->
@@ -44,7 +43,7 @@
 
 void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr,
                                                                     Register count, Register preserve) {
-  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(Universe::heap()->barrier_set());
+  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
   CardTable* ct = ctbs->card_table();
   assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
   assert_different_registers(addr, count, R0);
@@ -72,3 +71,41 @@
   __ bdnz(Lstore_loop);
   __ bind(Lskip_loop);
 }
+
+void CardTableBarrierSetAssembler::card_table_write(MacroAssembler* masm,
+                                                    jbyte* byte_map_base,
+                                                    Register tmp, Register obj) {
+  assert_different_registers(obj, tmp, R0);
+  __ load_const_optimized(tmp, (address)byte_map_base, R0);
+  __ srdi(obj, obj, CardTable::card_shift);
+  __ li(R0, CardTable::dirty_card_val());
+  if (UseConcMarkSweepGC) { __ membar(Assembler::StoreStore); }
+  __ stbx(R0, tmp, obj);
+}
+
+void CardTableBarrierSetAssembler::card_write_barrier_post(MacroAssembler* masm, Register store_addr, Register tmp) {
+  CardTableBarrierSet* bs = barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
+  card_table_write(masm, bs->card_table()->byte_map_base(), tmp, store_addr);
+}
+
+void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                                Register base, RegisterOrConstant ind_or_offs, Register val,
+                                                Register tmp1, Register tmp2, Register tmp3, bool needs_frame) {
+  bool on_array = (decorators & IN_HEAP_ARRAY) != 0;
+  bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
+  bool precise = on_array || on_anonymous;
+
+  BarrierSetAssembler::store_at(masm, decorators, type, base, ind_or_offs, val, tmp1, tmp2, tmp3, needs_frame);
+
+  // No need for post barrier if storing NULL
+  if (val != noreg) {
+    if (precise) {
+      if (ind_or_offs.is_constant()) {
+        __ add_const_optimized(base, base, ind_or_offs.as_constant(), tmp1);
+      } else {
+        __ add(base, ind_or_offs.as_register(), base);
+      }
+    }
+    card_write_barrier_post(masm, base, tmp1);
+  }
+}
--- a/src/hotspot/cpu/ppc/gc/shared/cardTableBarrierSetAssembler_ppc.hpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/ppc/gc/shared/cardTableBarrierSetAssembler_ppc.hpp	Fri Apr 13 14:06:39 2018 +0200
@@ -31,8 +31,16 @@
 
 class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler {
 protected:
-  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr,
-                                                Register count, Register preserve);
+  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
+                                                Register addr, Register count, Register preserve);
+
+  void card_table_write(MacroAssembler* masm, jbyte* byte_map_base, Register tmp, Register obj);
+
+  void card_write_barrier_post(MacroAssembler* masm, Register store_addr, Register tmp);
+
+  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                            Register base, RegisterOrConstant ind_or_offs, Register val,
+                            Register tmp1, Register tmp2, Register tmp3, bool needs_frame);
 };
 
 #endif // CPU_PPC_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_PPC_HPP
--- a/src/hotspot/cpu/ppc/gc/shared/modRefBarrierSetAssembler_ppc.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/ppc/gc/shared/modRefBarrierSetAssembler_ppc.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -55,3 +55,19 @@
     }
   }
 }
+
+void ModRefBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                             Register base, RegisterOrConstant ind_or_offs, Register val,
+                                             Register tmp1, Register tmp2, Register tmp3, bool needs_frame) {
+  BarrierSetAssembler::store_at(masm, decorators, type, base, ind_or_offs, val, tmp1, tmp2, tmp3, needs_frame);
+}
+
+void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                         Register base, RegisterOrConstant ind_or_offs, Register val,
+                                         Register tmp1, Register tmp2, Register tmp3, bool needs_frame) {
+  if (type == T_OBJECT || type == T_ARRAY) {
+    oop_store_at(masm, decorators, type, base, ind_or_offs, val, tmp1, tmp2, tmp3, needs_frame);
+  } else {
+    BarrierSetAssembler::store_at(masm, decorators, type, base, ind_or_offs, val, tmp1, tmp2, tmp3, needs_frame);
+  }
+}
--- a/src/hotspot/cpu/ppc/gc/shared/modRefBarrierSetAssembler_ppc.hpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/ppc/gc/shared/modRefBarrierSetAssembler_ppc.hpp	Fri Apr 13 14:06:39 2018 +0200
@@ -29,17 +29,28 @@
 #include "asm/macroAssembler.hpp"
 #include "gc/shared/barrierSetAssembler.hpp"
 
+// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other
+// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected
+// accesses, which are overridden in the concrete BarrierSetAssembler.
+
 class ModRefBarrierSetAssembler: public BarrierSetAssembler {
 protected:
   virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, Register from, Register to, Register count,
                                                Register preserve1, Register preserve2) {}
   virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register preserve) {}
 
+  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                            Register base, RegisterOrConstant ind_or_offs, Register val,
+                            Register tmp1, Register tmp2, Register tmp3, bool needs_frame);
 public:
   virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
                                   Register src, Register dst, Register count, Register preserve1, Register preserve2);
   virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
                                   Register dst, Register count, Register preserve);
+
+  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                        Register base, RegisterOrConstant ind_or_offs, Register val,
+                        Register tmp1, Register tmp2, Register tmp3, bool needs_frame);
 };
 
 #endif // CPU_PPC_GC_SHARED_MODREFBARRIERSETASSEMBLER_PPC_HPP
--- a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -26,6 +26,8 @@
 
 #include "precompiled.hpp"
 #include "asm/macroAssembler.inline.hpp"
+#include "gc/shared/barrierSet.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
 #include "interp_masm_ppc.hpp"
 #include "interpreter/interpreterRuntime.hpp"
 #include "prims/jvmtiThreadState.hpp"
@@ -492,9 +494,8 @@
 #endif
   // Add in the index.
   add(result, tmp, result);
-  load_heap_oop(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT), result, is_null);
-  // The resulting oop is null if the reference is not yet resolved.
-  // It is Universe::the_null_sentinel() if the reference resolved to NULL via condy.
+  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+  bs->load_at(this, IN_HEAP, T_OBJECT, result, arrayOopDesc::base_offset_in_bytes(T_OBJECT), result, tmp, R0, false, is_null);
 }
 
 // load cpool->resolved_klass_at(index)
@@ -2446,4 +2447,3 @@
 
   // Dtrace support not implemented.
 }
-
--- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -26,9 +26,9 @@
 #include "precompiled.hpp"
 #include "asm/macroAssembler.inline.hpp"
 #include "compiler/disassembler.hpp"
-#include "gc/shared/cardTable.hpp"
-#include "gc/shared/cardTableBarrierSet.hpp"
 #include "gc/shared/collectedHeap.inline.hpp"
+#include "gc/shared/barrierSet.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
 #include "interpreter/interpreter.hpp"
 #include "memory/resourceArea.hpp"
 #include "nativeInst_ppc.hpp"
@@ -43,12 +43,6 @@
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stubRoutines.hpp"
 #include "utilities/macros.hpp"
-#if INCLUDE_ALL_GCS
-#include "gc/g1/g1BarrierSet.hpp"
-#include "gc/g1/g1CardTable.hpp"
-#include "gc/g1/g1CollectedHeap.inline.hpp"
-#include "gc/g1/heapRegion.hpp"
-#endif // INCLUDE_ALL_GCS
 #ifdef COMPILER2
 #include "opto/intrinsicnode.hpp"
 #endif
@@ -2579,7 +2573,6 @@
   if (checkRetry) { bind(*checkRetry); }
   addic_(retry_count_Reg, retry_count_Reg, -1);
   blt(CCR0, doneRetry);
-  smt_yield(); // Can't use wait(). No permission (SIGILL).
   b(retryLabel);
   bind(doneRetry);
 }
@@ -2590,7 +2583,7 @@
 // output: retry_count_Reg decremented by 1
 // CTR is killed
 void MacroAssembler::rtm_retry_lock_on_busy(Register retry_count_Reg, Register owner_addr_Reg, Label& retryLabel) {
-  Label SpinLoop, doneRetry;
+  Label SpinLoop, doneRetry, doRetry;
   addic_(retry_count_Reg, retry_count_Reg, -1);
   blt(CCR0, doneRetry);
 
@@ -2599,16 +2592,26 @@
     mtctr(R0);
   }
 
+  // low thread priority
+  smt_prio_low();
   bind(SpinLoop);
-  smt_yield(); // Can't use waitrsv(). No permission (SIGILL).
 
   if (RTMSpinLoopCount > 1) {
-    bdz(retryLabel);
+    bdz(doRetry);
     ld(R0, 0, owner_addr_Reg);
     cmpdi(CCR0, R0, 0);
     bne(CCR0, SpinLoop);
   }
 
+  bind(doRetry);
+
+  // restore thread priority to default in userspace
+#ifdef LINUX
+  smt_prio_medium_low();
+#else
+  smt_prio_medium();
+#endif
+
   b(retryLabel);
 
   bind(doneRetry);
@@ -3031,213 +3034,11 @@
   bne(CCR0, slow_path);
 }
 
-
-// GC barrier helper macros
-
-// Write the card table byte if needed.
-void MacroAssembler::card_write_barrier_post(Register Rstore_addr, Register Rnew_val, Register Rtmp) {
-  CardTableBarrierSet* bs =
-    barrier_set_cast<CardTableBarrierSet>(Universe::heap()->barrier_set());
-  assert(bs->kind() == BarrierSet::CardTableBarrierSet, "wrong barrier");
-  CardTable* ct = bs->card_table();
-#ifdef ASSERT
-  cmpdi(CCR0, Rnew_val, 0);
-  asm_assert_ne("null oop not allowed", 0x321);
-#endif
-  card_table_write(ct->byte_map_base(), Rtmp, Rstore_addr);
+void MacroAssembler::resolve_jobject(Register value, Register tmp1, Register tmp2, bool needs_frame) {
+  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
+  bs->resolve_jobject(this, value, tmp1, tmp2, needs_frame);
 }
 
-// Write the card table byte.
-void MacroAssembler::card_table_write(jbyte* byte_map_base, Register Rtmp, Register Robj) {
-  assert_different_registers(Robj, Rtmp, R0);
-  load_const_optimized(Rtmp, (address)byte_map_base, R0);
-  srdi(Robj, Robj, CardTable::card_shift);
-  li(R0, 0); // dirty
-  if (UseConcMarkSweepGC) membar(Assembler::StoreStore);
-  stbx(R0, Rtmp, Robj);
-}
-
-// Kills R31 if value is a volatile register.
-void MacroAssembler::resolve_jobject(Register value, Register tmp1, Register tmp2, bool needs_frame) {
-  Label done;
-  cmpdi(CCR0, value, 0);
-  beq(CCR0, done);         // Use NULL as-is.
-
-  clrrdi(tmp1, value, JNIHandles::weak_tag_size);
-#if INCLUDE_ALL_GCS
-  if (UseG1GC) { andi_(tmp2, value, JNIHandles::weak_tag_mask); }
-#endif
-  ld(value, 0, tmp1);      // Resolve (untagged) jobject.
-
-#if INCLUDE_ALL_GCS
-  if (UseG1GC) {
-    Label not_weak;
-    beq(CCR0, not_weak);   // Test for jweak tag.
-    verify_oop(value);
-    g1_write_barrier_pre(noreg, // obj
-                         noreg, // offset
-                         value, // pre_val
-                         tmp1, tmp2, needs_frame);
-    bind(not_weak);
-  }
-#endif // INCLUDE_ALL_GCS
-  verify_oop(value);
-  bind(done);
-}
-
-#if INCLUDE_ALL_GCS
-// General G1 pre-barrier generator.
-// Goal: record the previous value if it is not null.
-void MacroAssembler::g1_write_barrier_pre(Register Robj, RegisterOrConstant offset, Register Rpre_val,
-                                          Register Rtmp1, Register Rtmp2, bool needs_frame) {
-  Label runtime, filtered;
-
-  // Is marking active?
-  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
-    lwz(Rtmp1, in_bytes(JavaThread::satb_mark_queue_offset() + SATBMarkQueue::byte_offset_of_active()), R16_thread);
-  } else {
-    guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
-    lbz(Rtmp1, in_bytes(JavaThread::satb_mark_queue_offset() + SATBMarkQueue::byte_offset_of_active()), R16_thread);
-  }
-  cmpdi(CCR0, Rtmp1, 0);
-  beq(CCR0, filtered);
-
-  // Do we need to load the previous value?
-  if (Robj != noreg) {
-    // Load the previous value...
-    if (UseCompressedOops) {
-      lwz(Rpre_val, offset, Robj);
-    } else {
-      ld(Rpre_val, offset, Robj);
-    }
-    // Previous value has been loaded into Rpre_val.
-  }
-  assert(Rpre_val != noreg, "must have a real register");
-
-  // Is the previous value null?
-  cmpdi(CCR0, Rpre_val, 0);
-  beq(CCR0, filtered);
-
-  if (Robj != noreg && UseCompressedOops) {
-    decode_heap_oop_not_null(Rpre_val);
-  }
-
-  // OK, it's not filtered, so we'll need to call enqueue. In the normal
-  // case, pre_val will be a scratch G-reg, but there are some cases in
-  // which it's an O-reg. In the first case, do a normal call. In the
-  // latter, do a save here and call the frameless version.
-
-  // Can we store original value in the thread's buffer?
-  // Is index == 0?
-  // (The index field is typed as size_t.)
-  const Register Rbuffer = Rtmp1, Rindex = Rtmp2;
-
-  ld(Rindex, in_bytes(JavaThread::satb_mark_queue_offset() + SATBMarkQueue::byte_offset_of_index()), R16_thread);
-  cmpdi(CCR0, Rindex, 0);
-  beq(CCR0, runtime); // If index == 0, goto runtime.
-  ld(Rbuffer, in_bytes(JavaThread::satb_mark_queue_offset() + SATBMarkQueue::byte_offset_of_buf()), R16_thread);
-
-  addi(Rindex, Rindex, -wordSize); // Decrement index.
-  std(Rindex, in_bytes(JavaThread::satb_mark_queue_offset() + SATBMarkQueue::byte_offset_of_index()), R16_thread);
-
-  // Record the previous value.
-  stdx(Rpre_val, Rbuffer, Rindex);
-  b(filtered);
-
-  bind(runtime);
-
-  // May need to preserve LR. Also needed if current frame is not compatible with C calling convention.
-  if (needs_frame) {
-    save_LR_CR(Rtmp1);
-    push_frame_reg_args(0, Rtmp2);
-  }
-
-  if (Rpre_val->is_volatile() && Robj == noreg) mr(R31, Rpre_val); // Save pre_val across C call if it was preloaded.
-  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), Rpre_val, R16_thread);
-  if (Rpre_val->is_volatile() && Robj == noreg) mr(Rpre_val, R31); // restore
-
-  if (needs_frame) {
-    pop_frame();
-    restore_LR_CR(Rtmp1);
-  }
-
-  bind(filtered);
-}
-
-// General G1 post-barrier generator
-// Store cross-region card.
-void MacroAssembler::g1_write_barrier_post(Register Rstore_addr, Register Rnew_val, Register Rtmp1, Register Rtmp2, Register Rtmp3, Label *filtered_ext) {
-  Label runtime, filtered_int;
-  Label& filtered = (filtered_ext != NULL) ? *filtered_ext : filtered_int;
-  assert_different_registers(Rstore_addr, Rnew_val, Rtmp1, Rtmp2);
-
-  G1BarrierSet* bs =
-    barrier_set_cast<G1BarrierSet>(Universe::heap()->barrier_set());
-  CardTable* ct = bs->card_table();
-
-  // Does store cross heap regions?
-  if (G1RSBarrierRegionFilter) {
-    xorr(Rtmp1, Rstore_addr, Rnew_val);
-    srdi_(Rtmp1, Rtmp1, HeapRegion::LogOfHRGrainBytes);
-    beq(CCR0, filtered);
-  }
-
-  // Crosses regions, storing NULL?
-#ifdef ASSERT
-  cmpdi(CCR0, Rnew_val, 0);
-  asm_assert_ne("null oop not allowed (G1)", 0x322); // Checked by caller on PPC64, so following branch is obsolete:
-  //beq(CCR0, filtered);
-#endif
-
-  // Storing region crossing non-NULL, is card already dirty?
-  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
-  const Register Rcard_addr = Rtmp1;
-  Register Rbase = Rtmp2;
-  load_const_optimized(Rbase, (address)ct->byte_map_base(), /*temp*/ Rtmp3);
-
-  srdi(Rcard_addr, Rstore_addr, CardTable::card_shift);
-
-  // Get the address of the card.
-  lbzx(/*card value*/ Rtmp3, Rbase, Rcard_addr);
-  cmpwi(CCR0, Rtmp3, (int)G1CardTable::g1_young_card_val());
-  beq(CCR0, filtered);
-
-  membar(Assembler::StoreLoad);
-  lbzx(/*card value*/ Rtmp3, Rbase, Rcard_addr);  // Reload after membar.
-  cmpwi(CCR0, Rtmp3 /* card value */, CardTable::dirty_card_val());
-  beq(CCR0, filtered);
-
-  // Storing a region crossing, non-NULL oop, card is clean.
-  // Dirty card and log.
-  li(Rtmp3, CardTable::dirty_card_val());
-  //release(); // G1: oops are allowed to get visible after dirty marking.
-  stbx(Rtmp3, Rbase, Rcard_addr);
-
-  add(Rcard_addr, Rbase, Rcard_addr); // This is the address which needs to get enqueued.
-  Rbase = noreg; // end of lifetime
-
-  const Register Rqueue_index = Rtmp2,
-                 Rqueue_buf   = Rtmp3;
-  ld(Rqueue_index, in_bytes(JavaThread::dirty_card_queue_offset() + DirtyCardQueue::byte_offset_of_index()), R16_thread);
-  cmpdi(CCR0, Rqueue_index, 0);
-  beq(CCR0, runtime); // index == 0 then jump to runtime
-  ld(Rqueue_buf, in_bytes(JavaThread::dirty_card_queue_offset() + DirtyCardQueue::byte_offset_of_buf()), R16_thread);
-
-  addi(Rqueue_index, Rqueue_index, -wordSize); // decrement index
-  std(Rqueue_index, in_bytes(JavaThread::dirty_card_queue_offset() + DirtyCardQueue::byte_offset_of_index()), R16_thread);
-
-  stdx(Rcard_addr, Rqueue_buf, Rqueue_index); // store card
-  b(filtered);
-
-  bind(runtime);
-
-  // Save the live input values.
-  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), Rcard_addr, R16_thread);
-
-  bind(filtered_int);
-}
-#endif // INCLUDE_ALL_GCS
-
 // Values for last_Java_pc, and last_Java_sp must comply to the rules
 // in frame_ppc.hpp.
 void MacroAssembler::set_last_Java_frame(Register last_Java_sp, Register last_Java_pc) {
--- a/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp	Fri Apr 13 14:06:39 2018 +0200
@@ -650,21 +650,8 @@
   // Check if safepoint requested and if so branch
   void safepoint_poll(Label& slow_path, Register temp_reg);
 
-  // GC barrier support.
-  void card_write_barrier_post(Register Rstore_addr, Register Rnew_val, Register Rtmp);
-  void card_table_write(jbyte* byte_map_base, Register Rtmp, Register Robj);
-
   void resolve_jobject(Register value, Register tmp1, Register tmp2, bool needs_frame);
 
-#if INCLUDE_ALL_GCS
-  // General G1 pre-barrier generator.
-  void g1_write_barrier_pre(Register Robj, RegisterOrConstant offset, Register Rpre_val,
-                            Register Rtmp1, Register Rtmp2, bool needs_frame = false);
-  // General G1 post-barrier generator
-  void g1_write_barrier_post(Register Rstore_addr, Register Rnew_val, Register Rtmp1,
-                             Register Rtmp2, Register Rtmp3, Label *filtered_ext = NULL);
-#endif
-
   // Support for managing the JavaThread pointer (i.e.; the reference to
   // thread-local information).
 
--- a/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -2495,7 +2495,7 @@
   // --------------------------------------------------------------------------
 
   if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
-    __ resolve_jobject(R3_RET, r_temp_1, r_temp_2, /* needs_frame */ false); // kills R31
+    __ resolve_jobject(R3_RET, r_temp_1, r_temp_2, /* needs_frame */ false);
   }
 
   if (CheckJNICalls) {
--- a/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -2032,7 +2032,7 @@
       decorators |= ARRAYCOPY_ALIGNED;
     }
 
-    BarrierSetAssembler *bs = Universe::heap()->barrier_set()->barrier_set_assembler();
+    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
     bs->arraycopy_prologue(_masm, decorators, T_OBJECT, R3_ARG1, R4_ARG2, R5_ARG3, noreg, noreg);
 
     if (UseCompressedOops) {
@@ -2071,7 +2071,7 @@
       decorators |= ARRAYCOPY_ALIGNED;
     }
 
-    BarrierSetAssembler *bs = Universe::heap()->barrier_set()->barrier_set_assembler();
+    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
     bs->arraycopy_prologue(_masm, decorators, T_OBJECT, R3_ARG1, R4_ARG2, R5_ARG3, noreg, noreg);
 
     if (UseCompressedOops) {
@@ -2164,7 +2164,7 @@
       decorators |= AS_DEST_NOT_INITIALIZED;
     }
 
-    BarrierSetAssembler *bs = Universe::heap()->barrier_set()->barrier_set_assembler();
+    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
     bs->arraycopy_prologue(_masm, decorators, T_OBJECT, R3_from, R4_to, R5_count, /* preserve: */ R6_ckoff, R7_ckval);
 
     //inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, R12_tmp, R3_RET);
--- a/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2014, 2018, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2015, 2017, SAP SE. All rights reserved.
+ * Copyright (c) 2015, 2018, SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,6 +25,7 @@
 
 #include "precompiled.hpp"
 #include "asm/macroAssembler.inline.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
 #include "interpreter/bytecodeHistogram.hpp"
 #include "interpreter/interpreter.hpp"
 #include "interpreter/interpreterRuntime.hpp"
@@ -402,7 +403,7 @@
      break;
   case T_OBJECT:
     // JNIHandles::resolve result.
-    __ resolve_jobject(R3_RET, R11_scratch1, R12_scratch2, /* needs_frame */ true); // kills R31
+    __ resolve_jobject(R3_RET, R11_scratch1, R31, /* needs_frame */ true); // kills R31
     break;
   case T_FLOAT:
      break;
@@ -504,59 +505,50 @@
   //   regular method entry code to generate the NPE.
   //
 
-  if (UseG1GC) {
-    address entry = __ pc();
+  address entry = __ pc();
 
-    const int referent_offset = java_lang_ref_Reference::referent_offset;
-    guarantee(referent_offset > 0, "referent offset not initialized");
+  const int referent_offset = java_lang_ref_Reference::referent_offset;
+  guarantee(referent_offset > 0, "referent offset not initialized");
 
-    Label slow_path;
+  Label slow_path;
 
-    // Debugging not possible, so can't use __ skip_if_jvmti_mode(slow_path, GR31_SCRATCH);
+  // Debugging not possible, so can't use __ skip_if_jvmti_mode(slow_path, GR31_SCRATCH);
 
-    // In the G1 code we don't check if we need to reach a safepoint. We
-    // continue and the thread will safepoint at the next bytecode dispatch.
+  // In the G1 code we don't check if we need to reach a safepoint. We
+  // continue and the thread will safepoint at the next bytecode dispatch.
 
-    // If the receiver is null then it is OK to jump to the slow path.
-    __ ld(R3_RET, Interpreter::stackElementSize, R15_esp); // get receiver
+  // If the receiver is null then it is OK to jump to the slow path.
+  __ ld(R3_RET, Interpreter::stackElementSize, R15_esp); // get receiver
 
-    // Check if receiver == NULL and go the slow path.
-    __ cmpdi(CCR0, R3_RET, 0);
-    __ beq(CCR0, slow_path);
+  // Check if receiver == NULL and go the slow path.
+  __ cmpdi(CCR0, R3_RET, 0);
+  __ beq(CCR0, slow_path);
 
-    // Load the value of the referent field.
-    __ load_heap_oop(R3_RET, referent_offset, R3_RET);
+  // Load the value of the referent field.
+  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+  bs->load_at(_masm, IN_HEAP | ON_WEAK_OOP_REF, T_OBJECT,
+                    R3_RET, referent_offset, R3_RET,
+                    /* non-volatile temp */ R31, R11_scratch1, true);
 
-    // Generate the G1 pre-barrier code to log the value of
-    // the referent field in an SATB buffer. Note with
-    // these parameters the pre-barrier does not generate
-    // the load of the previous value.
+  // Generate the G1 pre-barrier code to log the value of
+  // the referent field in an SATB buffer. Note with
+  // these parameters the pre-barrier does not generate
+  // the load of the previous value.
 
-    // Restore caller sp for c2i case.
+  // Restore caller sp for c2i case.
 #ifdef ASSERT
-      __ ld(R9_ARG7, 0, R1_SP);
-      __ ld(R10_ARG8, 0, R21_sender_SP);
-      __ cmpd(CCR0, R9_ARG7, R10_ARG8);
-      __ asm_assert_eq("backlink", 0x544);
+  __ ld(R9_ARG7, 0, R1_SP);
+  __ ld(R10_ARG8, 0, R21_sender_SP);
+  __ cmpd(CCR0, R9_ARG7, R10_ARG8);
+  __ asm_assert_eq("backlink", 0x544);
 #endif // ASSERT
-    __ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started.
+  __ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started.
 
-    __ g1_write_barrier_pre(noreg,         // obj
-                            noreg,         // offset
-                            R3_RET,        // pre_val
-                            R11_scratch1,  // tmp
-                            R12_scratch2,  // tmp
-                            true);         // needs_frame
+  __ blr();
 
-    __ blr();
-
-    // Generate regular method entry.
-    __ bind(slow_path);
-    __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals), R11_scratch1);
-    return entry;
-  }
-
-  return NULL;
+  __ bind(slow_path);
+  __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals), R11_scratch1);
+  return entry;
 }
 
 address TemplateInterpreterGenerator::generate_StackOverflowError_handler() {
--- a/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -25,6 +25,7 @@
 
 #include "precompiled.hpp"
 #include "asm/macroAssembler.inline.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
 #include "interpreter/interpreter.hpp"
 #include "interpreter/interpreterRuntime.hpp"
 #include "interpreter/interp_masm.hpp"
@@ -53,99 +54,29 @@
 // Kills:
 //   Rbase, Rtmp
 static void do_oop_store(InterpreterMacroAssembler* _masm,
-                         Register           Rbase,
+                         Register           base,
                          RegisterOrConstant offset,
-                         Register           Rval,         // Noreg means always null.
-                         Register           Rtmp1,
-                         Register           Rtmp2,
-                         Register           Rtmp3,
-                         BarrierSet::Name   barrier,
-                         bool               precise,
-                         bool               check_null) {
-  assert_different_registers(Rtmp1, Rtmp2, Rtmp3, Rval, Rbase);
-
-  switch (barrier) {
-#if INCLUDE_ALL_GCS
-    case BarrierSet::G1BarrierSet:
-      {
-        // Load and record the previous value.
-        __ g1_write_barrier_pre(Rbase, offset,
-                                Rtmp3, /* holder of pre_val ? */
-                                Rtmp1, Rtmp2, false /* frame */);
-
-        Label Lnull, Ldone;
-        if (Rval != noreg) {
-          if (check_null) {
-            __ cmpdi(CCR0, Rval, 0);
-            __ beq(CCR0, Lnull);
-          }
-          __ store_heap_oop_not_null(Rval, offset, Rbase, /*Rval must stay uncompressed.*/ Rtmp1);
-          // Mark the card.
-          if (!(offset.is_constant() && offset.as_constant() == 0) && precise) {
-            __ add(Rbase, offset, Rbase);
-          }
-          __ g1_write_barrier_post(Rbase, Rval, Rtmp1, Rtmp2, Rtmp3, /*filtered (fast path)*/ &Ldone);
-          if (check_null) { __ b(Ldone); }
-        }
-
-        if (Rval == noreg || check_null) { // Store null oop.
-          Register Rnull = Rval;
-          __ bind(Lnull);
-          if (Rval == noreg) {
-            Rnull = Rtmp1;
-            __ li(Rnull, 0);
-          }
-          if (UseCompressedOops) {
-            __ stw(Rnull, offset, Rbase);
-          } else {
-            __ std(Rnull, offset, Rbase);
-          }
-        }
-        __ bind(Ldone);
-      }
-      break;
-#endif // INCLUDE_ALL_GCS
-    case BarrierSet::CardTableBarrierSet:
-      {
-        Label Lnull, Ldone;
-        if (Rval != noreg) {
-          if (check_null) {
-            __ cmpdi(CCR0, Rval, 0);
-            __ beq(CCR0, Lnull);
-          }
-          __ store_heap_oop_not_null(Rval, offset, Rbase, /*Rval should better stay uncompressed.*/ Rtmp1);
-          // Mark the card.
-          if (!(offset.is_constant() && offset.as_constant() == 0) && precise) {
-            __ add(Rbase, offset, Rbase);
-          }
-          __ card_write_barrier_post(Rbase, Rval, Rtmp1);
-          if (check_null) {
-            __ b(Ldone);
-          }
-        }
-
-        if (Rval == noreg || check_null) { // Store null oop.
-          Register Rnull = Rval;
-          __ bind(Lnull);
-          if (Rval == noreg) {
-            Rnull = Rtmp1;
-            __ li(Rnull, 0);
-          }
-          if (UseCompressedOops) {
-            __ stw(Rnull, offset, Rbase);
-          } else {
-            __ std(Rnull, offset, Rbase);
-          }
-        }
-        __ bind(Ldone);
-      }
-      break;
-    case BarrierSet::ModRef:
-      ShouldNotReachHere();
-      break;
-    default:
-      ShouldNotReachHere();
-  }
+                         Register           val,         // Noreg means always null.
+                         Register           tmp1,
+                         Register           tmp2,
+                         Register           tmp3,
+                         DecoratorSet       decorators) {
+  assert_different_registers(tmp1, tmp2, tmp3, val, base);
+  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+  bs->store_at(_masm, decorators, T_OBJECT, base, offset, val, tmp1, tmp2, tmp3, false);
+}
+
+static void do_oop_load(InterpreterMacroAssembler* _masm,
+                        Register base,
+                        RegisterOrConstant offset,
+                        Register dst,
+                        Register tmp1,
+                        Register tmp2,
+                        DecoratorSet decorators) {
+  assert_different_registers(base, tmp1, tmp2);
+  assert_different_registers(dst, tmp1, tmp2);
+  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+  bs->load_at(_masm, decorators, T_OBJECT, base, offset, dst, tmp1, tmp2, false);
 }
 
 // ============================================================================
@@ -755,9 +686,11 @@
   // result tos: array
   const Register Rload_addr = R3_ARG1,
                  Rarray     = R4_ARG2,
-                 Rtemp      = R5_ARG3;
+                 Rtemp      = R5_ARG3,
+                 Rtemp2     = R31;
   __ index_check(Rarray, R17_tos /* index */, UseCompressedOops ? 2 : LogBytesPerWord, Rtemp, Rload_addr);
-  __ load_heap_oop(R17_tos, arrayOopDesc::base_offset_in_bytes(T_OBJECT), Rload_addr);
+  do_oop_load(_masm, Rload_addr, arrayOopDesc::base_offset_in_bytes(T_OBJECT), R17_tos, Rtemp, Rtemp2,
+              IN_HEAP | IN_HEAP_ARRAY);
   __ verify_oop(R17_tos);
   //__ dcbt(R17_tos); // prefetch
 }
@@ -1084,14 +1017,14 @@
 
   __ bind(Lis_null);
   do_oop_store(_masm, Rstore_addr, arrayOopDesc::base_offset_in_bytes(T_OBJECT), noreg /* 0 */,
-               Rscratch, Rscratch2, Rscratch3, _bs->kind(), true /* precise */, false /* check_null */);
+               Rscratch, Rscratch2, Rscratch3, IN_HEAP | IN_HEAP_ARRAY);
   __ profile_null_seen(Rscratch, Rscratch2);
   __ b(Ldone);
 
   // Store is OK.
   __ bind(Lstore_ok);
   do_oop_store(_masm, Rstore_addr, arrayOopDesc::base_offset_in_bytes(T_OBJECT), R17_tos /* value */,
-               Rscratch, Rscratch2, Rscratch3, _bs->kind(), true /* precise */, false /* check_null */);
+               Rscratch, Rscratch2, Rscratch3, IN_HEAP | IN_HEAP_ARRAY | OOP_NOT_NULL);
 
   __ bind(Ldone);
   // Adjust sp (pops array, index and value).
@@ -2714,7 +2647,7 @@
   __ fence(); // Volatile entry point (one instruction before non-volatile_entry point).
   assert(branch_table[atos] == 0, "can't compute twice");
   branch_table[atos] = __ pc(); // non-volatile_entry point
-  __ load_heap_oop(R17_tos, (RegisterOrConstant)Roffset, Rclass_or_obj);
+  do_oop_load(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, /* nv temp */ Rflags, IN_HEAP);
   __ verify_oop(R17_tos);
   __ push(atos);
   //__ dcbt(R17_tos); // prefetch
@@ -3047,7 +2980,7 @@
   branch_table[atos] = __ pc(); // non-volatile_entry point
   __ pop(atos);
   if (!is_static) { pop_and_check_object(Rclass_or_obj); } // kills R11_scratch1
-  do_oop_store(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, Rscratch2, Rscratch3, _bs->kind(), false /* precise */, true /* check null */);
+  do_oop_store(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, Rscratch2, Rscratch3, IN_HEAP);
   if (!is_static && rc == may_rewrite) {
     patch_bytecode(Bytecodes::_fast_aputfield, Rbc, Rscratch, true, byte_no);
   }
@@ -3122,7 +3055,7 @@
   switch(bytecode()) {
     case Bytecodes::_fast_aputfield:
       // Store into the field.
-      do_oop_store(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, Rscratch2, Rscratch3, _bs->kind(), false /* precise */, true /* check null */);
+      do_oop_store(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, Rscratch2, Rscratch3, IN_HEAP);
       break;
 
     case Bytecodes::_fast_iputfield:
@@ -3196,13 +3129,13 @@
   switch(bytecode()) {
     case Bytecodes::_fast_agetfield:
     {
-      __ load_heap_oop(R17_tos, (RegisterOrConstant)Roffset, Rclass_or_obj);
+      do_oop_load(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, /* nv temp */ Rflags, IN_HEAP);
       __ verify_oop(R17_tos);
       __ dispatch_epilog(state, Bytecodes::length_for(bytecode()));
 
       __ bind(LisVolatile);
       if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
-      __ load_heap_oop(R17_tos, (RegisterOrConstant)Roffset, Rclass_or_obj);
+      do_oop_load(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, /* nv temp */ Rflags, IN_HEAP);
       __ verify_oop(R17_tos);
       __ twi_0(R17_tos);
       __ isync();
@@ -3336,13 +3269,13 @@
   switch(state) {
   case atos:
     {
-      __ load_heap_oop(R17_tos, (RegisterOrConstant)Roffset, Rclass_or_obj);
+      do_oop_load(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, /* nv temp */ Rflags, IN_HEAP);
       __ verify_oop(R17_tos);
       __ dispatch_epilog(state, Bytecodes::length_for(bytecode()) - 1); // Undo bcp increment.
 
       __ bind(LisVolatile);
       if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
-      __ load_heap_oop(R17_tos, (RegisterOrConstant)Roffset, Rclass_or_obj);
+      do_oop_load(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, /* nv temp */ Rflags, IN_HEAP);
       __ verify_oop(R17_tos);
       __ twi_0(R17_tos);
       __ isync();
--- a/src/hotspot/cpu/s390/assembler_s390.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/s390/assembler_s390.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -40,7 +40,6 @@
 #include "utilities/macros.hpp"
 #if INCLUDE_ALL_GCS
 #include "gc/g1/g1BarrierSet.hpp"
-#include "gc/g1/g1CollectedHeap.inline.hpp"
 #include "gc/g1/heapRegion.hpp"
 #endif
 
--- a/src/hotspot/cpu/s390/c1_Runtime1_s390.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/s390/c1_Runtime1_s390.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -45,6 +45,7 @@
 #if INCLUDE_ALL_GCS
 #include "gc/g1/g1BarrierSet.hpp"
 #include "gc/g1/g1CardTable.hpp"
+#include "gc/g1/g1ThreadLocalData.hpp"
 #endif
 
 // Implementation of StubAssembler
@@ -767,7 +768,7 @@
     case g1_pre_barrier_slow_id:
       { // Z_R1_scratch: previous value of memory
 
-        BarrierSet* bs = Universe::heap()->barrier_set();
+        BarrierSet* bs = BarrierSet::barrier_set();
         if (bs->kind() != BarrierSet::G1BarrierSet) {
           __ should_not_reach_here(FILE_AND_LINE);
           break;
@@ -780,15 +781,9 @@
         Register tmp2 = Z_R7;
 
         Label refill, restart, marking_not_active;
-        int satb_q_active_byte_offset =
-          in_bytes(JavaThread::satb_mark_queue_offset() +
-                   SATBMarkQueue::byte_offset_of_active());
-        int satb_q_index_byte_offset =
-          in_bytes(JavaThread::satb_mark_queue_offset() +
-                   SATBMarkQueue::byte_offset_of_index());
-        int satb_q_buf_byte_offset =
-          in_bytes(JavaThread::satb_mark_queue_offset() +
-                   SATBMarkQueue::byte_offset_of_buf());
+        int satb_q_active_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
+        int satb_q_index_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset());
+        int satb_q_buf_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset());
 
         // Save tmp registers (see assertion in G1PreBarrierStub::emit_code()).
         __ z_stg(tmp,  0*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP);
@@ -836,7 +831,7 @@
 
     case g1_post_barrier_slow_id:
       { // Z_R1_scratch: oop address, address of updated memory slot
-        BarrierSet* bs = Universe::heap()->barrier_set();
+        BarrierSet* bs = BarrierSet::barrier_set();
         if (bs->kind() != BarrierSet::G1BarrierSet) {
           __ should_not_reach_here(FILE_AND_LINE);
           break;
@@ -890,10 +885,8 @@
         // Save registers used below (see assertion in G1PreBarrierStub::emit_code()).
         __ z_stg(r2, 1*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP);
 
-        ByteSize dirty_card_q_index_byte_offset =
-          JavaThread::dirty_card_queue_offset() + DirtyCardQueue::byte_offset_of_index();
-        ByteSize dirty_card_q_buf_byte_offset =
-          JavaThread::dirty_card_queue_offset() + DirtyCardQueue::byte_offset_of_buf();
+        ByteSize dirty_card_q_index_byte_offset = G1ThreadLocalData::dirty_card_queue_index_offset();
+        ByteSize dirty_card_q_buf_byte_offset = G1ThreadLocalData::dirty_card_queue_buffer_offset();
 
         __ bind(restart);
 
--- a/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -29,10 +29,10 @@
 #include "gc/g1/g1CardTable.hpp"
 #include "gc/g1/g1BarrierSet.hpp"
 #include "gc/g1/g1BarrierSetAssembler.hpp"
+#include "gc/g1/g1ThreadLocalData.hpp"
 #include "gc/g1/heapRegion.hpp"
-#include "gc/shared/collectedHeap.hpp"
-#include "runtime/thread.hpp"
 #include "interpreter/interp_masm.hpp"
+#include "runtime/sharedRuntime.hpp"
 
 #define __ masm->
 
@@ -49,8 +49,7 @@
     assert_different_registers(addr,  Z_R0_scratch);  // would be destroyed by push_frame()
     assert_different_registers(count, Z_R0_scratch);  // would be destroyed by push_frame()
     Register Rtmp1 = Z_R0_scratch;
-    const int active_offset = in_bytes(JavaThread::satb_mark_queue_offset() +
-                                       SATBMarkQueue::byte_offset_of_active());
+    const int active_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
     if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
       __ load_and_test_int(Rtmp1, Address(Z_thread, active_offset));
     } else {
@@ -90,3 +89,321 @@
     __ z_br(Z_R1); // Branch without linking, callee will return to stub caller.
   }
 }
+
+void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                    const Address& src, Register dst, Register tmp1, Register tmp2, Label *is_null) {
+  bool on_oop = type == T_OBJECT || type == T_ARRAY;
+  bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
+  bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
+  bool on_reference = on_weak || on_phantom;
+  Label done;
+  if (on_oop && on_reference && is_null == NULL) { is_null = &done; }
+  ModRefBarrierSetAssembler::load_at(masm, decorators, type, src, dst, tmp1, tmp2, is_null);
+  if (on_oop && on_reference) {
+    // Generate the G1 pre-barrier code to log the value of
+    // the referent field in an SATB buffer.
+    g1_write_barrier_pre(masm, decorators | OOP_NOT_NULL,
+                         NULL /* obj */,
+                         dst  /* pre_val */,
+                         noreg/* preserve */ ,
+                         tmp1, tmp2 /* tmp */,
+                         true /* pre_val_needed */);
+  }
+  __ bind(done);
+}
+
+void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, DecoratorSet decorators,
+                                                 const Address*  obj,
+                                                 Register        Rpre_val,      // Ideally, this is a non-volatile register.
+                                                 Register        Rval,          // Will be preserved.
+                                                 Register        Rtmp1,         // If Rpre_val is volatile, either Rtmp1
+                                                 Register        Rtmp2,         // or Rtmp2 has to be non-volatile.
+                                                 bool            pre_val_needed // Save Rpre_val across runtime call, caller uses it.
+                                                 ) {
+
+  bool not_null  = (decorators & OOP_NOT_NULL) != 0,
+       preloaded = obj == NULL;
+
+  const Register Robj = obj ? obj->base() : noreg,
+                 Roff = obj ? obj->index() : noreg;
+  const int active_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
+  const int buffer_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset());
+  const int index_offset  = in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset());
+  assert_different_registers(Rtmp1, Rtmp2, Z_R0_scratch); // None of the Rtmp<i> must be Z_R0!!
+  assert_different_registers(Robj, Z_R0_scratch);         // Used for addressing. Furthermore, push_frame destroys Z_R0!!
+  assert_different_registers(Rval, Z_R0_scratch);         // push_frame destroys Z_R0!!
+
+  Label callRuntime, filtered;
+
+  BLOCK_COMMENT("g1_write_barrier_pre {");
+
+  // Is marking active?
+  // Note: value is loaded for test purposes only. No further use here.
+  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
+    __ load_and_test_int(Rtmp1, Address(Z_thread, active_offset));
+  } else {
+    guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
+    __ load_and_test_byte(Rtmp1, Address(Z_thread, active_offset));
+  }
+  __ z_bre(filtered); // Activity indicator is zero, so there is no marking going on currently.
+
+  assert(Rpre_val != noreg, "must have a real register");
+
+
+  // If an object is given, we need to load the previous value into Rpre_val.
+  if (obj) {
+    // Load the previous value...
+    if (UseCompressedOops) {
+      __ z_llgf(Rpre_val, *obj);
+    } else {
+      __ z_lg(Rpre_val, *obj);
+    }
+  }
+
+  // Is the previous value NULL?
+  // If so, we don't need to record it and we're done.
+  // Note: pre_val is loaded, decompressed and stored (directly or via runtime call).
+  //       Register contents is preserved across runtime call if caller requests to do so.
+  if (preloaded && not_null) {
+#ifdef ASSERT
+    __ z_ltgr(Rpre_val, Rpre_val);
+    __ asm_assert_ne("null oop not allowed (G1 pre)", 0x321); // Checked by caller.
+#endif
+  } else {
+    __ z_ltgr(Rpre_val, Rpre_val);
+    __ z_bre(filtered); // previous value is NULL, so we don't need to record it.
+  }
+
+  // Decode the oop now. We know it's not NULL.
+  if (Robj != noreg && UseCompressedOops) {
+    __ oop_decoder(Rpre_val, Rpre_val, /*maybeNULL=*/false);
+  }
+
+  // OK, it's not filtered, so we'll need to call enqueue.
+
+  // We can store the original value in the thread's buffer
+  // only if index > 0. Otherwise, we need runtime to handle.
+  // (The index field is typed as size_t.)
+  Register Rbuffer = Rtmp1, Rindex = Rtmp2;
+  assert_different_registers(Rbuffer, Rindex, Rpre_val);
+
+  __ z_lg(Rbuffer, buffer_offset, Z_thread);
+
+  __ load_and_test_long(Rindex, Address(Z_thread, index_offset));
+  __ z_bre(callRuntime); // If index == 0, goto runtime.
+
+  __ add2reg(Rindex, -wordSize); // Decrement index.
+  __ z_stg(Rindex, index_offset, Z_thread);
+
+  // Record the previous value.
+  __ z_stg(Rpre_val, 0, Rbuffer, Rindex);
+  __ z_bru(filtered);  // We are done.
+
+  Rbuffer = noreg;  // end of life
+  Rindex  = noreg;  // end of life
+
+  __ bind(callRuntime);
+
+  // Save some registers (inputs and result) over runtime call
+  // by spilling them into the top frame.
+  if (Robj != noreg && Robj->is_volatile()) {
+    __ z_stg(Robj, Robj->encoding()*BytesPerWord, Z_SP);
+  }
+  if (Roff != noreg && Roff->is_volatile()) {
+    __ z_stg(Roff, Roff->encoding()*BytesPerWord, Z_SP);
+  }
+  if (Rval != noreg && Rval->is_volatile()) {
+    __ z_stg(Rval, Rval->encoding()*BytesPerWord, Z_SP);
+  }
+
+  // Save Rpre_val (result) over runtime call.
+  Register Rpre_save = Rpre_val;
+  if ((Rpre_val == Z_R0_scratch) || (pre_val_needed && Rpre_val->is_volatile())) {
+    guarantee(!Rtmp1->is_volatile() || !Rtmp2->is_volatile(), "oops!");
+    Rpre_save = !Rtmp1->is_volatile() ? Rtmp1 : Rtmp2;
+  }
+  __ lgr_if_needed(Rpre_save, Rpre_val);
+
+  // Push frame to protect top frame with return pc and spilled register values.
+  __ save_return_pc();
+  __ push_frame_abi160(0); // Will use Z_R0 as tmp.
+
+  // Rpre_val may be destroyed by push_frame().
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), Rpre_save, Z_thread);
+
+  __ pop_frame();
+  __ restore_return_pc();
+
+  // Restore spilled values.
+  if (Robj != noreg && Robj->is_volatile()) {
+    __ z_lg(Robj, Robj->encoding()*BytesPerWord, Z_SP);
+  }
+  if (Roff != noreg && Roff->is_volatile()) {
+    __ z_lg(Roff, Roff->encoding()*BytesPerWord, Z_SP);
+  }
+  if (Rval != noreg && Rval->is_volatile()) {
+    __ z_lg(Rval, Rval->encoding()*BytesPerWord, Z_SP);
+  }
+  if (pre_val_needed && Rpre_val->is_volatile()) {
+    __ lgr_if_needed(Rpre_val, Rpre_save);
+  }
+
+  __ bind(filtered);
+  BLOCK_COMMENT("} g1_write_barrier_pre");
+}
+
+void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, DecoratorSet decorators, Register Rstore_addr, Register Rnew_val,
+                                                  Register Rtmp1, Register Rtmp2, Register Rtmp3) {
+  bool not_null = (decorators & OOP_NOT_NULL) != 0;
+
+  assert_different_registers(Rstore_addr, Rnew_val, Rtmp1, Rtmp2); // Most probably, Rnew_val == Rtmp3.
+
+  Label callRuntime, filtered;
+
+  CardTableBarrierSet* ct = barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
+  assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code");
+
+  BLOCK_COMMENT("g1_write_barrier_post {");
+
+  // Does store cross heap regions?
+  // It does if the two addresses specify different grain addresses.
+  if (G1RSBarrierRegionFilter) {
+    if (VM_Version::has_DistinctOpnds()) {
+      __ z_xgrk(Rtmp1, Rstore_addr, Rnew_val);
+    } else {
+      __ z_lgr(Rtmp1, Rstore_addr);
+      __ z_xgr(Rtmp1, Rnew_val);
+    }
+    __ z_srag(Rtmp1, Rtmp1, HeapRegion::LogOfHRGrainBytes);
+    __ z_bre(filtered);
+  }
+
+  // Crosses regions, storing NULL?
+  if (not_null) {
+#ifdef ASSERT
+    __ z_ltgr(Rnew_val, Rnew_val);
+    __ asm_assert_ne("null oop not allowed (G1 post)", 0x322); // Checked by caller.
+#endif
+  } else {
+    __ z_ltgr(Rnew_val, Rnew_val);
+    __ z_bre(filtered);
+  }
+
+  Rnew_val = noreg; // end of lifetime
+
+  // Storing region crossing non-NULL, is card already dirty?
+  assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code");
+  assert_different_registers(Rtmp1, Rtmp2, Rtmp3);
+  // Make sure not to use Z_R0 for any of these registers.
+  Register Rcard_addr = (Rtmp1 != Z_R0_scratch) ? Rtmp1 : Rtmp3;
+  Register Rbase      = (Rtmp2 != Z_R0_scratch) ? Rtmp2 : Rtmp3;
+
+  // calculate address of card
+  __ load_const_optimized(Rbase, (address)ct->card_table()->byte_map_base());      // Card table base.
+  __ z_srlg(Rcard_addr, Rstore_addr, CardTable::card_shift);         // Index into card table.
+  __ z_algr(Rcard_addr, Rbase);                                      // Explicit calculation needed for cli.
+  Rbase = noreg; // end of lifetime
+
+  // Filter young.
+  assert((unsigned int)G1CardTable::g1_young_card_val() <= 255, "otherwise check this code");
+  __ z_cli(0, Rcard_addr, G1CardTable::g1_young_card_val());
+  __ z_bre(filtered);
+
+  // Check the card value. If dirty, we're done.
+  // This also avoids false sharing of the (already dirty) card.
+  __ z_sync(); // Required to support concurrent cleaning.
+  assert((unsigned int)G1CardTable::dirty_card_val() <= 255, "otherwise check this code");
+  __ z_cli(0, Rcard_addr, G1CardTable::dirty_card_val()); // Reload after membar.
+  __ z_bre(filtered);
+
+  // Storing a region crossing, non-NULL oop, card is clean.
+  // Dirty card and log.
+  __ z_mvi(0, Rcard_addr, G1CardTable::dirty_card_val());
+
+  Register Rcard_addr_x = Rcard_addr;
+  Register Rqueue_index = (Rtmp2 != Z_R0_scratch) ? Rtmp2 : Rtmp1;
+  Register Rqueue_buf   = (Rtmp3 != Z_R0_scratch) ? Rtmp3 : Rtmp1;
+  const int qidx_off    = in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset());
+  const int qbuf_off    = in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset());
+  if ((Rcard_addr == Rqueue_buf) || (Rcard_addr == Rqueue_index)) {
+    Rcard_addr_x = Z_R0_scratch;  // Register shortage. We have to use Z_R0.
+  }
+  __ lgr_if_needed(Rcard_addr_x, Rcard_addr);
+
+  __ load_and_test_long(Rqueue_index, Address(Z_thread, qidx_off));
+  __ z_bre(callRuntime); // Index == 0 then jump to runtime.
+
+  __ z_lg(Rqueue_buf, qbuf_off, Z_thread);
+
+  __ add2reg(Rqueue_index, -wordSize); // Decrement index.
+  __ z_stg(Rqueue_index, qidx_off, Z_thread);
+
+  __ z_stg(Rcard_addr_x, 0, Rqueue_index, Rqueue_buf); // Store card.
+  __ z_bru(filtered);
+
+  __ bind(callRuntime);
+
+  // TODO: do we need a frame? Introduced to be on the safe side.
+  bool needs_frame = true;
+  __ lgr_if_needed(Rcard_addr, Rcard_addr_x); // copy back asap. push_frame will destroy Z_R0_scratch!
+
+  // VM call need frame to access(write) O register.
+  if (needs_frame) {
+    __ save_return_pc();
+    __ push_frame_abi160(0); // Will use Z_R0 as tmp on old CPUs.
+  }
+
+  // Save the live input values.
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), Rcard_addr, Z_thread);
+
+  if (needs_frame) {
+    __ pop_frame();
+    __ restore_return_pc();
+  }
+
+  __ bind(filtered);
+
+  BLOCK_COMMENT("} g1_write_barrier_post");
+}
+
+void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                         const Address& dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
+  bool on_array = (decorators & IN_HEAP_ARRAY) != 0;
+  bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
+  bool precise = on_array || on_anonymous;
+  // Load and record the previous value.
+  g1_write_barrier_pre(masm, decorators, &dst, tmp3, val, tmp1, tmp2, false);
+
+  BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
+
+  // No need for post barrier if storing NULL
+  if (val != noreg) {
+    const Register base = dst.base(),
+                   idx  = dst.index();
+    const intptr_t disp = dst.disp();
+    if (precise && (disp != 0 || idx != noreg)) {
+      __ add2reg_with_index(base, disp, idx, base);
+    }
+    g1_write_barrier_post(masm, decorators, base, val, tmp1, tmp2, tmp3);
+  }
+}
+
+void G1BarrierSetAssembler::resolve_jobject(MacroAssembler* masm, Register value, Register tmp1, Register tmp2) {
+  NearLabel Ldone, Lnot_weak;
+  __ z_ltgr(tmp1, value);
+  __ z_bre(Ldone);          // Use NULL result as-is.
+
+  __ z_nill(value, ~JNIHandles::weak_tag_mask);
+  __ z_lg(value, 0, value); // Resolve (untagged) jobject.
+
+  __ z_tmll(tmp1, JNIHandles::weak_tag_mask); // Test for jweak tag.
+  __ z_braz(Lnot_weak);
+  __ verify_oop(value);
+  DecoratorSet decorators = IN_ROOT | ON_PHANTOM_OOP_REF;
+  g1_write_barrier_pre(masm, decorators, (const Address*)NULL, value, noreg, tmp1, tmp2, true);
+  __ bind(Lnot_weak);
+  __ verify_oop(value);
+  __ bind(Ldone);
+}
+
+#undef __
--- a/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.hpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.hpp	Fri Apr 13 14:06:39 2018 +0200
@@ -31,10 +31,29 @@
 
 class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
  protected:
-  virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
-                                               Register addr, Register count);
-  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
-                                                Register addr, Register count, bool do_return);
+  virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count);
+  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count,
+                                                bool do_return);
+
+  void g1_write_barrier_pre(MacroAssembler*    masm, DecoratorSet decorators,
+                            const Address*     obj,           // Address of oop or NULL if pre-loaded.
+                            Register           Rpre_val,      // Ideally, this is a non-volatile register.
+                            Register           Rval,          // Will be preserved.
+                            Register           Rtmp1,         // If Rpre_val is volatile, either Rtmp1
+                            Register           Rtmp2,         // or Rtmp2 has to be non-volatile.
+                            bool               pre_val_needed); // Save Rpre_val across runtime call, caller uses it.
+
+  void g1_write_barrier_post(MacroAssembler* masm, DecoratorSet decorators, Register Rstore_addr, Register Rnew_val,
+                             Register Rtmp1, Register Rtmp2, Register Rtmp3);
+
+  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                            const Address& dst, Register val, Register tmp1, Register tmp2, Register tmp3);
+
+ public:
+  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                       const Address& src, Register dst, Register tmp1, Register tmp2, Label *is_null = NULL);
+
+  virtual void resolve_jobject(MacroAssembler* masm, Register value, Register tmp1, Register tmp2);
 };
 
 #endif // CPU_S390_GC_G1_G1BARRIERSETASSEMBLER_S390_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
+#include "interpreter/interp_masm.hpp"
+
+#define __ masm->
+
+void BarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                             Register dst, Register count, bool do_return) {
+  if (do_return) { __ z_br(Z_R14); }
+}
+
+void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                  const Address& addr, Register dst, Register tmp1, Register tmp2, Label *is_null) {
+  bool on_heap = (decorators & IN_HEAP) != 0;
+  bool on_root = (decorators & IN_ROOT) != 0;
+  assert(on_heap || on_root, "where?");
+
+  switch (type) {
+  case T_ARRAY:
+  case T_OBJECT: {
+    if (UseCompressedOops && on_heap) {
+      __ z_llgf(dst, addr);
+      if (is_null) {
+        __ compareU32_and_branch(dst, (intptr_t)0, Assembler::bcondEqual, *is_null);
+        __ oop_decoder(dst, dst, false);
+      } else {
+        __ oop_decoder(dst, dst, true);
+      }
+    } else {
+      __ z_lg(dst, addr);
+      if (is_null) {
+        __ compareU64_and_branch(dst, (intptr_t)0, Assembler::bcondEqual, *is_null);
+      }
+    }
+    break;
+  }
+  default: Unimplemented();
+  }
+}
+
+void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                   const Address& addr, Register val, Register tmp1, Register tmp2, Register tmp3) {
+  bool on_heap  = (decorators & IN_HEAP) != 0;
+  bool on_root  = (decorators & IN_ROOT) != 0;
+  bool not_null = (decorators & OOP_NOT_NULL) != 0;
+  assert(on_heap || on_root, "where?");
+  assert_different_registers(val, tmp1, tmp2);
+
+  switch (type) {
+  case T_ARRAY:
+  case T_OBJECT: {
+    if (UseCompressedOops && on_heap) {
+      if (val == noreg) {
+        __ clear_mem(addr, 4);
+      } else if (Universe::narrow_oop_mode() == Universe::UnscaledNarrowOop) {
+        __ z_st(val, addr);
+      } else {
+        Register tmp = (tmp1 != Z_R1) ? tmp1 : tmp2; // Avoid tmp == Z_R1 (see oop_encoder).
+        __ oop_encoder(tmp, val, !not_null);
+        __ z_st(tmp, addr);
+      }
+    } else {
+      if (val == noreg) {
+        __ clear_mem(addr, 8);
+      } else {
+        __ z_stg(val, addr);
+      }
+    }
+    break;
+  }
+  default: Unimplemented();
+  }
+}
+
+void BarrierSetAssembler::resolve_jobject(MacroAssembler* masm, Register value, Register tmp1, Register tmp2) {
+  NearLabel Ldone;
+  __ z_ltgr(tmp1, value);
+  __ z_bre(Ldone);          // Use NULL result as-is.
+
+  __ z_nill(value, ~JNIHandles::weak_tag_mask);
+  __ z_lg(value, 0, value); // Resolve (untagged) jobject.
+
+  __ verify_oop(value);
+  __ bind(Ldone);
+}
--- a/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.hpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.hpp	Fri Apr 13 14:06:39 2018 +0200
@@ -23,8 +23,8 @@
  *
  */
 
-#ifndef CPU_S390_GC_G1_BARRIERSETASSEMBLER_S390_HPP
-#define CPU_S390_GC_G1_BARRIERSETASSEMBLER_S390_HPP
+#ifndef CPU_S390_GC_SHARED_BARRIERSETASSEMBLER_S390_HPP
+#define CPU_S390_GC_SHARED_BARRIERSETASSEMBLER_S390_HPP
 
 #include "asm/macroAssembler.hpp"
 #include "memory/allocation.hpp"
@@ -38,6 +38,15 @@
                                   Register src, Register dst, Register count) {}
   virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
                                   Register dst, Register count, bool do_return = false);
+
+  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                       const Address& addr, Register dst, Register tmp1, Register tmp2, Label *is_null = NULL);
+  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                        const Address& addr, Register val, Register tmp1, Register tmp2, Register tmp3);
+
+  virtual void resolve_jobject(MacroAssembler* masm, Register value, Register tmp1, Register tmp2);
+
+  virtual void barrier_stubs_init() {}
 };
 
-#endif // CPU_S390_GC_G1_BARRIERSETASSEMBLER_S390_HPP
+#endif // CPU_S390_GC_SHARED_BARRIERSETASSEMBLER_S390_HPP
--- a/src/hotspot/cpu/s390/gc/shared/cardTableBarrierSetAssembler_s390.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/s390/gc/shared/cardTableBarrierSetAssembler_s390.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -29,7 +29,6 @@
 #include "gc/shared/cardTable.hpp"
 #include "gc/shared/cardTableBarrierSet.hpp"
 #include "gc/shared/cardTableBarrierSetAssembler.hpp"
-#include "gc/shared/collectedHeap.hpp"
 #include "interpreter/interp_masm.hpp"
 
 #define __ masm->
@@ -46,7 +45,7 @@
 
 void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count,
                                                                     bool do_return) {
-  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(Universe::heap()->barrier_set());
+  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
   CardTable* ct = ctbs->card_table();
   assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
 
@@ -139,3 +138,38 @@
 
   __ bind(done);
 }
+
+void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register store_addr, Register tmp) {
+  // Does a store check for the oop in register obj. The content of
+  // register obj is destroyed afterwards.
+  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
+  CardTable* ct = ctbs->card_table();
+  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
+
+  assert_different_registers(store_addr, tmp);
+
+  __ z_srlg(store_addr, store_addr, CardTable::card_shift);
+  __ load_absolute_address(tmp, (address)ct->byte_map_base());
+  __ z_agr(store_addr, tmp);
+  __ z_mvi(0, store_addr, CardTable::dirty_card_val());
+}
+
+void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                                const Address& dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
+  bool on_array = (decorators & IN_HEAP_ARRAY) != 0;
+  bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
+  bool precise = on_array || on_anonymous;
+
+  BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
+
+  // No need for post barrier if storing NULL
+  if (val != noreg) {
+    const Register base = dst.base(),
+                   idx  = dst.index();
+    const intptr_t disp = dst.disp();
+    if (precise && (disp != 0 || idx != noreg)) {
+      __ add2reg_with_index(base, disp, idx, base);
+    }
+    store_check(masm, base, tmp1);
+  }
+}
--- a/src/hotspot/cpu/s390/gc/shared/cardTableBarrierSetAssembler_s390.hpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/s390/gc/shared/cardTableBarrierSetAssembler_s390.hpp	Fri Apr 13 14:06:39 2018 +0200
@@ -23,16 +23,21 @@
  *
  */
 
-#ifndef CPU_X86_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_X86_HPP
-#define CPU_X86_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_X86_HPP
+#ifndef CPU_S390_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_S390_HPP
+#define CPU_S390_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_S390_HPP
 
 #include "asm/macroAssembler.hpp"
 #include "gc/shared/modRefBarrierSetAssembler.hpp"
 
 class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler {
 protected:
+  void store_check(MacroAssembler* masm, Register store_addr, Register tmp);
+
   virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count,
                                                 bool do_return);
+
+  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                            const Address& dst, Register val, Register tmp1, Register tmp2, Register tmp3);
 };
 
-#endif // CPU_X86_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_X86_HPP
+#endif // CPU_S390_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_S390_HPP
--- a/src/hotspot/cpu/s390/gc/shared/modRefBarrierSetAssembler_s390.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/s390/gc/shared/modRefBarrierSetAssembler_s390.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -49,3 +49,17 @@
     if (do_return) { __ z_br(Z_R14); }
   }
 }
+
+void ModRefBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                             const Address& dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
+  BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
+}
+
+void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                         const Address& dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
+  if (type == T_OBJECT || type == T_ARRAY) {
+    oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
+  } else {
+    BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
+  }
+}
--- a/src/hotspot/cpu/s390/gc/shared/modRefBarrierSetAssembler_s390.hpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/s390/gc/shared/modRefBarrierSetAssembler_s390.hpp	Fri Apr 13 14:06:39 2018 +0200
@@ -23,23 +23,31 @@
  *
  */
 
-#ifndef CPU_X86_GC_SHARED_MODREFBARRIERSETASSEMBLER_X86_HPP
-#define CPU_X86_GC_SHARED_MODREFBARRIERSETASSEMBLER_X86_HPP
+#ifndef CPU_S390_GC_SHARED_MODREFBARRIERSETASSEMBLER_S390_HPP
+#define CPU_S390_GC_SHARED_MODREFBARRIERSETASSEMBLER_S390_HPP
 
 #include "asm/macroAssembler.hpp"
 #include "gc/shared/barrierSetAssembler.hpp"
 
+// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other
+// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected
+// accesses, which are overridden in the concrete BarrierSetAssembler.
+
 class ModRefBarrierSetAssembler: public BarrierSetAssembler {
 protected:
   virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count) {}
   virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count,
                                                 bool do_return);
-
+  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                            const Address& dst, Register val, Register tmp1, Register tmp2, Register tmp3);
 public:
   virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
                                   Register src, Register dst, Register count);
   virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
                                   Register dst, Register count, bool do_return = false);
+
+  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                        const Address& dst, Register val, Register tmp1, Register tmp2, Register tmp3);
 };
 
-#endif // CPU_X86_GC_SHARED_MODREFBARRIERSETASSEMBLER_X86_HPP
+#endif // CPU_S390_GC_SHARED_MODREFBARRIERSETASSEMBLER_S390_HPP
--- a/src/hotspot/cpu/s390/interp_masm_s390.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/s390/interp_masm_s390.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2016, 2017 SAP SE. All rights reserved.
+ * Copyright (c) 2016, 2018 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,6 +27,8 @@
 
 #include "precompiled.hpp"
 #include "asm/macroAssembler.inline.hpp"
+#include "gc/shared/barrierSet.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
 #include "interp_masm_s390.hpp"
 #include "interpreter/interpreter.hpp"
 #include "interpreter/interpreterRuntime.hpp"
@@ -42,7 +44,7 @@
 #include "runtime/thread.inline.hpp"
 
 // Implementation of InterpreterMacroAssembler.
-// This file specializes the assember with interpreter-specific macros.
+// This file specializes the assembler with interpreter-specific macros.
 
 #ifdef PRODUCT
 #define BLOCK_COMMENT(str)
@@ -389,9 +391,8 @@
   bind(index_ok);
 #endif
   z_agr(result, index);    // Address of indexed array element.
-  load_heap_oop(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT), result);
-  // The resulting oop is null if the reference is not yet resolved.
-  // It is Universe::the_null_sentinel() if the reference resolved to NULL via condy.
+  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+  bs->load_at(this, IN_HEAP, T_OBJECT, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), result, tmp, noreg);
 }
 
 // load cpool->resolved_klass_at(index)
@@ -2197,4 +2198,3 @@
     unimplemented("verfiyFPU");
   }
 }
-
--- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2016, 2017, SAP SE. All rights reserved.
+ * Copyright (c) 2016, 2018, SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,7 +27,8 @@
 #include "asm/codeBuffer.hpp"
 #include "asm/macroAssembler.inline.hpp"
 #include "compiler/disassembler.hpp"
-#include "gc/shared/cardTable.hpp"
+#include "gc/shared/barrierSet.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
 #include "gc/shared/collectedHeap.inline.hpp"
 #include "interpreter/interpreter.hpp"
 #include "gc/shared/cardTableBarrierSet.hpp"
@@ -51,12 +52,6 @@
 #include "runtime/stubRoutines.hpp"
 #include "utilities/events.hpp"
 #include "utilities/macros.hpp"
-#if INCLUDE_ALL_GCS
-#include "gc/g1/g1BarrierSet.hpp"
-#include "gc/g1/g1CardTable.hpp"
-#include "gc/g1/g1CollectedHeap.inline.hpp"
-#include "gc/g1/heapRegion.hpp"
-#endif
 
 #include <ucontext.h>
 
@@ -3503,315 +3498,10 @@
   // flag == NE indicates failure
 }
 
-// Write to card table for modification at store_addr - register is destroyed afterwards.
-void MacroAssembler::card_write_barrier_post(Register store_addr, Register tmp) {
-  BarrierSet* bs = Universe::heap()->barrier_set();
-  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
-  CardTable* ct = ctbs->card_table();
-  assert(bs->kind() == BarrierSet::CardTableBarrierSet, "wrong barrier");
-  assert_different_registers(store_addr, tmp);
-  z_srlg(store_addr, store_addr, CardTable::card_shift);
-  load_absolute_address(tmp, (address)ct->byte_map_base());
-  z_agr(store_addr, tmp);
-  z_mvi(0, store_addr, 0); // Store byte 0.
-}
-
 void MacroAssembler::resolve_jobject(Register value, Register tmp1, Register tmp2) {
-  NearLabel Ldone;
-  z_ltgr(tmp1, value);
-  z_bre(Ldone);          // Use NULL result as-is.
-
-  z_nill(value, ~JNIHandles::weak_tag_mask);
-  z_lg(value, 0, value); // Resolve (untagged) jobject.
-
-#if INCLUDE_ALL_GCS
-  if (UseG1GC) {
-    NearLabel Lnot_weak;
-    z_tmll(tmp1, JNIHandles::weak_tag_mask); // Test for jweak tag.
-    z_braz(Lnot_weak);
-    verify_oop(value);
-    g1_write_barrier_pre(noreg /* obj */,
-                         noreg /* offset */,
-                         value /* pre_val */,
-                         noreg /* val */,
-                         tmp1  /* tmp1 */,
-                         tmp2  /* tmp2 */,
-                         true  /* pre_val_needed */);
-    bind(Lnot_weak);
-  }
-#endif // INCLUDE_ALL_GCS
-  verify_oop(value);
-  bind(Ldone);
-}
-
-#if INCLUDE_ALL_GCS
-
-//------------------------------------------------------
-// General G1 pre-barrier generator.
-// Purpose: record the previous value if it is not null.
-// All non-tmps are preserved.
-//------------------------------------------------------
-// Note: Rpre_val needs special attention.
-//   The flag pre_val_needed indicated that the caller of this emitter function
-//   relies on Rpre_val containing the correct value, that is:
-//     either the value it contained on entry to this code segment
-//     or the value that was loaded into the register from (Robj+offset).
-//
-//   Independent from this requirement, the contents of Rpre_val must survive
-//   the push_frame() operation. push_frame() uses Z_R0_scratch by default
-//   to temporarily remember the frame pointer.
-//   If Rpre_val is assigned Z_R0_scratch by the caller, code must be emitted to
-//   save it's value.
-void MacroAssembler::g1_write_barrier_pre(Register           Robj,
-                                          RegisterOrConstant offset,
-                                          Register           Rpre_val,      // Ideally, this is a non-volatile register.
-                                          Register           Rval,          // Will be preserved.
-                                          Register           Rtmp1,         // If Rpre_val is volatile, either Rtmp1
-                                          Register           Rtmp2,         // or Rtmp2 has to be non-volatile..
-                                          bool               pre_val_needed // Save Rpre_val across runtime call, caller uses it.
-                                       ) {
-  Label callRuntime, filtered;
-  const int active_offset = in_bytes(JavaThread::satb_mark_queue_offset() + SATBMarkQueue::byte_offset_of_active());
-  const int buffer_offset = in_bytes(JavaThread::satb_mark_queue_offset() + SATBMarkQueue::byte_offset_of_buf());
-  const int index_offset  = in_bytes(JavaThread::satb_mark_queue_offset() + SATBMarkQueue::byte_offset_of_index());
-  assert_different_registers(Rtmp1, Rtmp2, Z_R0_scratch); // None of the Rtmp<i> must be Z_R0!!
-  assert_different_registers(Robj, Z_R0_scratch);         // Used for addressing. Furthermore, push_frame destroys Z_R0!!
-  assert_different_registers(Rval, Z_R0_scratch);         // push_frame destroys Z_R0!!
-
-#ifdef ASSERT
-  // make sure the register is not Z_R0. Used for addressing. Furthermore, would be destroyed by push_frame.
-  if (offset.is_register() && offset.as_register()->encoding() == 0) {
-    tty->print_cr("Roffset(g1_write_barrier_pre)  = %%r%d", offset.as_register()->encoding());
-    assert(false, "bad register for offset");
-  }
-#endif
-
-  BLOCK_COMMENT("g1_write_barrier_pre {");
-
-  // Is marking active?
-  // Note: value is loaded for test purposes only. No further use here.
-  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
-    load_and_test_int(Rtmp1, Address(Z_thread, active_offset));
-  } else {
-    guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
-    load_and_test_byte(Rtmp1, Address(Z_thread, active_offset));
-  }
-  z_bre(filtered); // Activity indicator is zero, so there is no marking going on currently.
-
-  assert(Rpre_val != noreg, "must have a real register");
-
-
-  // If an object is given, we need to load the previous value into Rpre_val.
-  if (Robj != noreg) {
-    // Load the previous value...
-    Register ixReg = offset.is_register() ? offset.register_or_noreg() : Z_R0;
-    if (UseCompressedOops) {
-      z_llgf(Rpre_val, offset.constant_or_zero(), ixReg, Robj);
-    } else {
-      z_lg(Rpre_val, offset.constant_or_zero(), ixReg, Robj);
-    }
-  }
-
-  // Is the previous value NULL?
-  // If so, we don't need to record it and we're done.
-  // Note: pre_val is loaded, decompressed and stored (directly or via runtime call).
-  //       Register contents is preserved across runtime call if caller requests to do so.
-  z_ltgr(Rpre_val, Rpre_val);
-  z_bre(filtered); // previous value is NULL, so we don't need to record it.
-
-  // Decode the oop now. We know it's not NULL.
-  if (Robj != noreg && UseCompressedOops) {
-    oop_decoder(Rpre_val, Rpre_val, /*maybeNULL=*/false);
-  }
-
-  // OK, it's not filtered, so we'll need to call enqueue.
-
-  // We can store the original value in the thread's buffer
-  // only if index > 0. Otherwise, we need runtime to handle.
-  // (The index field is typed as size_t.)
-  Register Rbuffer = Rtmp1, Rindex = Rtmp2;
-  assert_different_registers(Rbuffer, Rindex, Rpre_val);
-
-  z_lg(Rbuffer, buffer_offset, Z_thread);
-
-  load_and_test_long(Rindex, Address(Z_thread, index_offset));
-  z_bre(callRuntime); // If index == 0, goto runtime.
-
-  add2reg(Rindex, -wordSize); // Decrement index.
-  z_stg(Rindex, index_offset, Z_thread);
-
-  // Record the previous value.
-  z_stg(Rpre_val, 0, Rbuffer, Rindex);
-  z_bru(filtered);  // We are done.
-
-  Rbuffer = noreg;  // end of life
-  Rindex  = noreg;  // end of life
-
-  bind(callRuntime);
-
-  // Save some registers (inputs and result) over runtime call
-  // by spilling them into the top frame.
-  if (Robj != noreg && Robj->is_volatile()) {
-    z_stg(Robj, Robj->encoding()*BytesPerWord, Z_SP);
-  }
-  if (offset.is_register() && offset.as_register()->is_volatile()) {
-    Register Roff = offset.as_register();
-    z_stg(Roff, Roff->encoding()*BytesPerWord, Z_SP);
-  }
-  if (Rval != noreg && Rval->is_volatile()) {
-    z_stg(Rval, Rval->encoding()*BytesPerWord, Z_SP);
-  }
-
-  // Save Rpre_val (result) over runtime call.
-  Register Rpre_save = Rpre_val;
-  if ((Rpre_val == Z_R0_scratch) || (pre_val_needed && Rpre_val->is_volatile())) {
-    guarantee(!Rtmp1->is_volatile() || !Rtmp2->is_volatile(), "oops!");
-    Rpre_save = !Rtmp1->is_volatile() ? Rtmp1 : Rtmp2;
-  }
-  lgr_if_needed(Rpre_save, Rpre_val);
-
-  // Push frame to protect top frame with return pc and spilled register values.
-  save_return_pc();
-  push_frame_abi160(0); // Will use Z_R0 as tmp.
-
-  // Rpre_val may be destroyed by push_frame().
-  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), Rpre_save, Z_thread);
-
-  pop_frame();
-  restore_return_pc();
-
-  // Restore spilled values.
-  if (Robj != noreg && Robj->is_volatile()) {
-    z_lg(Robj, Robj->encoding()*BytesPerWord, Z_SP);
-  }
-  if (offset.is_register() && offset.as_register()->is_volatile()) {
-    Register Roff = offset.as_register();
-    z_lg(Roff, Roff->encoding()*BytesPerWord, Z_SP);
-  }
-  if (Rval != noreg && Rval->is_volatile()) {
-    z_lg(Rval, Rval->encoding()*BytesPerWord, Z_SP);
-  }
-  if (pre_val_needed && Rpre_val->is_volatile()) {
-    lgr_if_needed(Rpre_val, Rpre_save);
-  }
-
-  bind(filtered);
-  BLOCK_COMMENT("} g1_write_barrier_pre");
-}
-
-// General G1 post-barrier generator.
-// Purpose: Store cross-region card.
-void MacroAssembler::g1_write_barrier_post(Register Rstore_addr,
-                                           Register Rnew_val,
-                                           Register Rtmp1,
-                                           Register Rtmp2,
-                                           Register Rtmp3) {
-  Label callRuntime, filtered;
-
-  assert_different_registers(Rstore_addr, Rnew_val, Rtmp1, Rtmp2); // Most probably, Rnew_val == Rtmp3.
-
-  G1BarrierSet* bs = barrier_set_cast<G1BarrierSet>(Universe::heap()->barrier_set());
-  CardTable* ct = bs->card_table();
-  assert(bs->kind() == BarrierSet::G1BarrierSet, "wrong barrier");
-
-  BLOCK_COMMENT("g1_write_barrier_post {");
-
-  // Does store cross heap regions?
-  // It does if the two addresses specify different grain addresses.
-  if (G1RSBarrierRegionFilter) {
-    if (VM_Version::has_DistinctOpnds()) {
-      z_xgrk(Rtmp1, Rstore_addr, Rnew_val);
-    } else {
-      z_lgr(Rtmp1, Rstore_addr);
-      z_xgr(Rtmp1, Rnew_val);
-    }
-    z_srag(Rtmp1, Rtmp1, HeapRegion::LogOfHRGrainBytes);
-    z_bre(filtered);
-  }
-
-  // Crosses regions, storing NULL?
-#ifdef ASSERT
-  z_ltgr(Rnew_val, Rnew_val);
-  asm_assert_ne("null oop not allowed (G1)", 0x255); // TODO: also on z? Checked by caller on PPC64, so following branch is obsolete:
-  z_bre(filtered);  // Safety net: don't break if we have a NULL oop.
-#endif
-  Rnew_val = noreg; // end of lifetime
-
-  // Storing region crossing non-NULL, is card already dirty?
-  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
-  assert_different_registers(Rtmp1, Rtmp2, Rtmp3);
-  // Make sure not to use Z_R0 for any of these registers.
-  Register Rcard_addr = (Rtmp1 != Z_R0_scratch) ? Rtmp1 : Rtmp3;
-  Register Rbase      = (Rtmp2 != Z_R0_scratch) ? Rtmp2 : Rtmp3;
-
-  // calculate address of card
-  load_const_optimized(Rbase, (address)ct->byte_map_base());      // Card table base.
-  z_srlg(Rcard_addr, Rstore_addr, CardTable::card_shift);         // Index into card table.
-  z_algr(Rcard_addr, Rbase);                                      // Explicit calculation needed for cli.
-  Rbase = noreg; // end of lifetime
-
-  // Filter young.
-  assert((unsigned int)G1CardTable::g1_young_card_val() <= 255, "otherwise check this code");
-  z_cli(0, Rcard_addr, (int)G1CardTable::g1_young_card_val());
-  z_bre(filtered);
-
-  // Check the card value. If dirty, we're done.
-  // This also avoids false sharing of the (already dirty) card.
-  z_sync(); // Required to support concurrent cleaning.
-  assert((unsigned int)CardTable::dirty_card_val() <= 255, "otherwise check this code");
-  z_cli(0, Rcard_addr, CardTable::dirty_card_val()); // Reload after membar.
-  z_bre(filtered);
-
-  // Storing a region crossing, non-NULL oop, card is clean.
-  // Dirty card and log.
-  z_mvi(0, Rcard_addr, CardTable::dirty_card_val());
-
-  Register Rcard_addr_x = Rcard_addr;
-  Register Rqueue_index = (Rtmp2 != Z_R0_scratch) ? Rtmp2 : Rtmp1;
-  Register Rqueue_buf   = (Rtmp3 != Z_R0_scratch) ? Rtmp3 : Rtmp1;
-  const int qidx_off    = in_bytes(JavaThread::dirty_card_queue_offset() + SATBMarkQueue::byte_offset_of_index());
-  const int qbuf_off    = in_bytes(JavaThread::dirty_card_queue_offset() + SATBMarkQueue::byte_offset_of_buf());
-  if ((Rcard_addr == Rqueue_buf) || (Rcard_addr == Rqueue_index)) {
-    Rcard_addr_x = Z_R0_scratch;  // Register shortage. We have to use Z_R0.
-  }
-  lgr_if_needed(Rcard_addr_x, Rcard_addr);
-
-  load_and_test_long(Rqueue_index, Address(Z_thread, qidx_off));
-  z_bre(callRuntime); // Index == 0 then jump to runtime.
-
-  z_lg(Rqueue_buf, qbuf_off, Z_thread);
-
-  add2reg(Rqueue_index, -wordSize); // Decrement index.
-  z_stg(Rqueue_index, qidx_off, Z_thread);
-
-  z_stg(Rcard_addr_x, 0, Rqueue_index, Rqueue_buf); // Store card.
-  z_bru(filtered);
-
-  bind(callRuntime);
-
-  // TODO: do we need a frame? Introduced to be on the safe side.
-  bool needs_frame = true;
-  lgr_if_needed(Rcard_addr, Rcard_addr_x); // copy back asap. push_frame will destroy Z_R0_scratch!
-
-  // VM call need frame to access(write) O register.
-  if (needs_frame) {
-    save_return_pc();
-    push_frame_abi160(0); // Will use Z_R0 as tmp on old CPUs.
-  }
-
-  // Save the live input values.
-  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), Rcard_addr, Z_thread);
-
-  if (needs_frame) {
-    pop_frame();
-    restore_return_pc();
-  }
-
-  bind(filtered);
-
-  BLOCK_COMMENT("} g1_write_barrier_post");
-}
-#endif // INCLUDE_ALL_GCS
+  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
+  bs->resolve_jobject(this, value, tmp1, tmp2);
+}
 
 // Last_Java_sp must comply to the rules in frame_s390.hpp.
 void MacroAssembler::set_last_Java_frame(Register last_Java_sp, Register last_Java_pc, bool allow_relocation) {
--- a/src/hotspot/cpu/s390/macroAssembler_s390.hpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/s390/macroAssembler_s390.hpp	Fri Apr 13 14:06:39 2018 +0200
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2016, 2017, SAP SE. All rights reserved.
+ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2018, SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -744,32 +744,8 @@
   void compiler_fast_lock_object(Register oop, Register box, Register temp1, Register temp2, bool try_bias = UseBiasedLocking);
   void compiler_fast_unlock_object(Register oop, Register box, Register temp1, Register temp2, bool try_bias = UseBiasedLocking);
 
-  // Write to card table for modification at store_addr - register is destroyed afterwards.
-  void card_write_barrier_post(Register store_addr, Register tmp);
-
   void resolve_jobject(Register value, Register tmp1, Register tmp2);
 
-#if INCLUDE_ALL_GCS
-  // General G1 pre-barrier generator.
-  // Purpose: record the previous value if it is not null.
-  // All non-tmps are preserved.
-  void g1_write_barrier_pre(Register           Robj,
-                            RegisterOrConstant offset,
-                            Register           Rpre_val,        // Ideally, this is a non-volatile register.
-                            Register           Rval,            // Will be preserved.
-                            Register           Rtmp1,           // If Rpre_val is volatile, either Rtmp1
-                            Register           Rtmp2,           // or Rtmp2 has to be non-volatile.
-                            bool               pre_val_needed); // Save Rpre_val across runtime call, caller uses it.
-
-  // General G1 post-barrier generator.
-  // Purpose: Store cross-region card.
-  void g1_write_barrier_post(Register Rstore_addr,
-                             Register Rnew_val,
-                             Register Rtmp1,
-                             Register Rtmp2,
-                             Register Rtmp3);
-#endif // INCLUDE_ALL_GCS
-
   // Support for last Java frame (but use call_VM instead where possible).
  private:
   void set_last_Java_frame(Register last_Java_sp, Register last_Java_pc, bool allow_relocation);
--- a/src/hotspot/cpu/s390/stubGenerator_s390.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/s390/stubGenerator_s390.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -1308,7 +1308,7 @@
       decorators |= ARRAYCOPY_ALIGNED;
     }
 
-    BarrierSetAssembler *bs = Universe::heap()->barrier_set()->barrier_set_assembler();
+    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
     bs->arraycopy_prologue(_masm, decorators, T_OBJECT, Z_ARG1, Z_ARG2, Z_ARG3);
 
     generate_disjoint_copy(aligned, size, true, true);
@@ -1400,7 +1400,7 @@
       decorators |= ARRAYCOPY_ALIGNED;
     }
 
-    BarrierSetAssembler *bs = Universe::heap()->barrier_set()->barrier_set_assembler();
+    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
     bs->arraycopy_prologue(_masm, decorators, T_OBJECT, Z_ARG1, Z_ARG2, Z_ARG3);
 
     generate_conjoint_copy(aligned, size, true);  // Must preserve ARG2, ARG3.
--- a/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2016, 2017, SAP SE. All rights reserved.
+ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2018, SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,6 +25,7 @@
 
 #include "precompiled.hpp"
 #include "asm/macroAssembler.inline.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
 #include "interpreter/abstractInterpreter.hpp"
 #include "interpreter/bytecodeHistogram.hpp"
 #include "interpreter/interpreter.hpp"
@@ -479,73 +480,55 @@
 }
 
 address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {
-#if INCLUDE_ALL_GCS
-  if (UseG1GC) {
-    // Inputs:
-    //  Z_ARG1 - receiver
-    //
-    // What we do:
-    //  - Load the referent field address.
-    //  - Load the value in the referent field.
-    //  - Pass that value to the pre-barrier.
-    //
-    // In the case of G1 this will record the value of the
-    // referent in an SATB buffer if marking is active.
-    // This will cause concurrent marking to mark the referent
-    // field as live.
-
-    Register  scratch1 = Z_tmp_2;
-    Register  scratch2 = Z_tmp_3;
-    Register  pre_val  = Z_RET;   // return value
-    // Z_esp is callers operand stack pointer, i.e. it points to the parameters.
-    Register  Rargp    = Z_esp;
-
-    Label     slow_path;
-    address   entry = __ pc();
-
-    const int referent_offset = java_lang_ref_Reference::referent_offset;
-    guarantee(referent_offset > 0, "referent offset not initialized");
-
-    BLOCK_COMMENT("Reference_get {");
-
-    //  If the receiver is null then it is OK to jump to the slow path.
-    __ load_and_test_long(pre_val, Address(Rargp, Interpreter::stackElementSize)); // Get receiver.
-    __ z_bre(slow_path);
-
-    //  Load the value of the referent field.
-    __ load_heap_oop(pre_val, referent_offset, pre_val);
-
-    // Restore caller sp for c2i case.
-    __ resize_frame_absolute(Z_R10, Z_R0, true); // Cut the stack back to where the caller started.
-
-    // Generate the G1 pre-barrier code to log the value of
-    // the referent field in an SATB buffer.
-    // Note:
-    //   With these parameters the write_barrier_pre does not
-    //   generate instructions to load the previous value.
-    __ g1_write_barrier_pre(noreg,      // obj
-                            noreg,      // offset
-                            pre_val,    // pre_val
-                            noreg,      // no new val to preserve
-                            scratch1,   // tmp
-                            scratch2,   // tmp
-                            true);      // pre_val_needed
-
-    __ z_br(Z_R14);
-
-    // Branch to previously generated regular method entry.
-    __ bind(slow_path);
-
-    address meth_entry = Interpreter::entry_for_kind(Interpreter::zerolocals);
-    __ jump_to_entry(meth_entry, Z_R1);
-
-    BLOCK_COMMENT("} Reference_get");
-
-    return entry;
-  }
-#endif // INCLUDE_ALL_GCS
-
-  return NULL;
+  // Inputs:
+  //  Z_ARG1 - receiver
+  //
+  // What we do:
+  //  - Load the referent field address.
+  //  - Load the value in the referent field.
+  //  - Pass that value to the pre-barrier.
+  //
+  // In the case of G1 this will record the value of the
+  // referent in an SATB buffer if marking is active.
+  // This will cause concurrent marking to mark the referent
+  // field as live.
+
+  Register  scratch1 = Z_tmp_2;
+  Register  scratch2 = Z_tmp_3;
+  Register  pre_val  = Z_RET;   // return value
+  // Z_esp is callers operand stack pointer, i.e. it points to the parameters.
+  Register  Rargp    = Z_esp;
+
+  Label     slow_path;
+  address   entry = __ pc();
+
+  const int referent_offset = java_lang_ref_Reference::referent_offset;
+  guarantee(referent_offset > 0, "referent offset not initialized");
+
+  BLOCK_COMMENT("Reference_get {");
+
+  //  If the receiver is null then it is OK to jump to the slow path.
+  __ load_and_test_long(pre_val, Address(Rargp, Interpreter::stackElementSize)); // Get receiver.
+  __ z_bre(slow_path);
+
+  //  Load the value of the referent field.
+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+ bs->load_at(_masm, IN_HEAP | ON_WEAK_OOP_REF, T_OBJECT,
+                   Address(pre_val, referent_offset), pre_val, scratch1, scratch2);
+
+  // Restore caller sp for c2i case.
+  __ resize_frame_absolute(Z_R10, Z_R0, true); // Cut the stack back to where the caller started.
+  __ z_br(Z_R14);
+
+  // Branch to previously generated regular method entry.
+  __ bind(slow_path);
+
+  address meth_entry = Interpreter::entry_for_kind(Interpreter::zerolocals);
+  __ jump_to_entry(meth_entry, Z_R1);
+
+  BLOCK_COMMENT("} Reference_get");
+
+  return entry;
 }
 
 address TemplateInterpreterGenerator::generate_StackOverflowError_handler() {
--- a/src/hotspot/cpu/s390/templateTable_s390.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/s390/templateTable_s390.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2016, 2017 SAP SE. All rights reserved.
+ * Copyright (c) 2016, 2018 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,6 +25,7 @@
 
 #include "precompiled.hpp"
 #include "asm/macroAssembler.inline.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
 #include "interpreter/interpreter.hpp"
 #include "interpreter/interpreterRuntime.hpp"
 #include "interpreter/interp_masm.hpp"
@@ -192,97 +193,27 @@
 // Do an oop store like *(base + offset) = val
 // offset can be a register or a constant.
 static void do_oop_store(InterpreterMacroAssembler* _masm,
-                         Register base,
-                         RegisterOrConstant offset,
-                         Register val,
-                         bool val_is_null, // == false does not guarantee that val really is not equal NULL.
-                         Register tmp1,    // If tmp3 is volatile, either tmp1 or tmp2 must be
-                         Register tmp2,    // non-volatile to hold a copy of pre_val across runtime calls.
-                         Register tmp3,    // Ideally, this tmp register is non-volatile, as it is used to
-                                           // hold pre_val (must survive runtime calls).
-                         BarrierSet::Name barrier,
-                         bool precise) {
-  BLOCK_COMMENT("do_oop_store {");
-  assert(val != noreg, "val must always be valid, even if it is zero");
-  assert_different_registers(tmp1, tmp2, tmp3, val, base, offset.register_or_noreg());
-  __ verify_oop(val);
-  switch (barrier) {
-#if INCLUDE_ALL_GCS
-    case BarrierSet::G1BarrierSet:
-      {
-#ifdef ASSERT
-        if (val_is_null) { // Check if the flag setting reflects reality.
-          Label OK;
-          __ z_ltgr(val, val);
-          __ z_bre(OK);
-          __ z_illtrap(0x11);
-          __ bind(OK);
-        }
-#endif
-        Register pre_val = tmp3;
-        // Load and record the previous value.
-        __ g1_write_barrier_pre(base, offset, pre_val, val,
-                                tmp1, tmp2,
-                                false);  // Needs to hold pre_val in non_volatile register?
-
-        if (val_is_null) {
-          __ store_heap_oop_null(val, offset, base);
-        } else {
-          Label Done;
-          // val_is_null == false does not guarantee that val really is not equal NULL.
-          // Checking for this case dynamically has some cost, but also some benefit (in GC).
-          // It's hard to say if cost or benefit is greater.
-          { Label OK;
-            __ z_ltgr(val, val);
-            __ z_brne(OK);
-            __ store_heap_oop_null(val, offset, base);
-            __ z_bru(Done);
-            __ bind(OK);
-          }
-          // G1 barrier needs uncompressed oop for region cross check.
-          // Store_heap_oop compresses the oop in the argument register.
-          Register val_work = val;
-          if (UseCompressedOops) {
-            val_work = tmp3;
-            __ z_lgr(val_work, val);
-          }
-          __ store_heap_oop_not_null(val_work, offset, base);
-
-          // We need precise card marks for oop array stores.
-          // Otherwise, cardmarking the object which contains the oop is sufficient.
-          if (precise && !(offset.is_constant() && offset.as_constant() == 0)) {
-            __ add2reg_with_index(base,
-                                  offset.constant_or_zero(),
-                                  offset.register_or_noreg(),
-                                  base);
-          }
-          __ g1_write_barrier_post(base /* store_adr */, val, tmp1, tmp2, tmp3);
-          __ bind(Done);
-        }
-      }
-      break;
-#endif // INCLUDE_ALL_GCS
-    case BarrierSet::CardTableBarrierSet:
-    {
-      if (val_is_null) {
-        __ store_heap_oop_null(val, offset, base);
-      } else {
-        __ store_heap_oop(val, offset, base);
-        // Flatten object address if needed.
-        if (precise && ((offset.register_or_noreg() != noreg) || (offset.constant_or_zero() != 0))) {
-          __ load_address(base, Address(base, offset.register_or_noreg(), offset.constant_or_zero()));
-        }
-        __ card_write_barrier_post(base, tmp1);
-      }
-    }
-    break;
-  case BarrierSet::ModRef:
-    // fall through
-  default:
-    ShouldNotReachHere();
-
-  }
-  BLOCK_COMMENT("} do_oop_store");
+                         const Address&     addr,
+                         Register           val,         // Noreg means always null.
+                         Register           tmp1,
+                         Register           tmp2,
+                         Register           tmp3,
+                         DecoratorSet       decorators) {
+  assert_different_registers(tmp1, tmp2, tmp3, val, addr.base());
+  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+  bs->store_at(_masm, decorators, T_OBJECT, addr, val, tmp1, tmp2, tmp3);
+}
+
+static void do_oop_load(InterpreterMacroAssembler* _masm,
+                        const Address& addr,
+                        Register dst,
+                        Register tmp1,
+                        Register tmp2,
+                        DecoratorSet decorators) {
+  assert_different_registers(addr.base(), tmp1, tmp2);
+  assert_different_registers(dst, tmp1, tmp2);
+  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+  bs->load_at(_masm, decorators, T_OBJECT, addr, dst, tmp1, tmp2);
 }
 
 Address TemplateTable::at_bcp(int offset) {
@@ -923,8 +854,8 @@
   Register index = Z_tos;
   index_check(Z_tmp_1, index, shift);
   // Now load array element.
-  __ load_heap_oop(Z_tos,
-                   Address(Z_tmp_1, index, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
+  do_oop_load(_masm, Address(Z_tmp_1, index, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), Z_tos,
+              Z_tmp_2, Z_tmp_3, IN_HEAP | IN_HEAP_ARRAY);
   __ verify_oop(Z_tos);
 }
 
@@ -1260,22 +1191,23 @@
   __ load_absolute_address(tmp1, Interpreter::_throw_ArrayStoreException_entry);
   __ z_br(tmp1);
 
-  // Come here on success.
-  __ bind(ok_is_subtype);
-
-  // Now store using the appropriate barrier.
   Register tmp3 = Rsub_klass;
-  do_oop_store(_masm, Rstore_addr, (intptr_t)0/*offset*/, Rvalue, false/*val==null*/,
-               tmp3, tmp2, tmp1, _bs->kind(), true);
-  __ z_bru(done);
 
   // Have a NULL in Rvalue.
   __ bind(is_null);
   __ profile_null_seen(tmp1);
 
   // Store a NULL.
-  do_oop_store(_masm, Rstore_addr, (intptr_t)0/*offset*/, Rvalue, true/*val==null*/,
-               tmp3, tmp2, tmp1, _bs->kind(), true);
+  do_oop_store(_masm, Address(Rstore_addr, (intptr_t)0), noreg,
+               tmp3, tmp2, tmp1, IN_HEAP | IN_HEAP_ARRAY);
+  __ z_bru(done);
+
+  // Come here on success.
+  __ bind(ok_is_subtype);
+
+  // Now store using the appropriate barrier.
+  do_oop_store(_masm, Address(Rstore_addr, (intptr_t)0), Rvalue,
+               tmp3, tmp2, tmp1, IN_HEAP | IN_HEAP_ARRAY | OOP_NOT_NULL);
 
   // Pop stack arguments.
   __ bind(done);
@@ -2831,7 +2763,7 @@
                       // to here is compensated for by the fallthru to "Done".
   {
     unsigned int b_off = __ offset();
-    __ load_heap_oop(Z_tos, field);
+    do_oop_load(_masm, field, Z_tos, Z_tmp_2, Z_tmp_3, IN_HEAP);
     __ verify_oop(Z_tos);
     __ push(atos);
     if (do_rewrite) {
@@ -3160,8 +3092,8 @@
       pop_and_check_object(obj);
     }
     // Store into the field
-    do_oop_store(_masm, obj, off, Z_tos, false,
-                 oopStore_tmp1, oopStore_tmp2, oopStore_tmp3, _bs->kind(), false);
+    do_oop_store(_masm, Address(obj, off), Z_tos,
+                 oopStore_tmp1, oopStore_tmp2, oopStore_tmp3, IN_HEAP);
     if (do_rewrite) {
       patch_bytecode(Bytecodes::_fast_aputfield, bc, Z_ARG5, true, byte_no);
     }
@@ -3322,8 +3254,8 @@
   // access field
   switch (bytecode()) {
     case Bytecodes::_fast_aputfield:
-      do_oop_store(_masm, obj, field_offset, Z_tos, false,
-                   Z_ARG2, Z_ARG3, Z_ARG4, _bs->kind(), false);
+      do_oop_store(_masm, Address(obj, field_offset), Z_tos,
+                   Z_ARG2, Z_ARG3, Z_ARG4, IN_HEAP);
       break;
     case Bytecodes::_fast_lputfield:
       __ reg2mem_opt(Z_tos, field);
@@ -3414,7 +3346,7 @@
   // access field
   switch (bytecode()) {
     case Bytecodes::_fast_agetfield:
-      __ load_heap_oop(Z_tos, field);
+      do_oop_load(_masm, field, Z_tos, Z_tmp_1, Z_tmp_2, IN_HEAP);
       __ verify_oop(Z_tos);
       return;
     case Bytecodes::_fast_lgetfield:
@@ -3470,7 +3402,7 @@
       __ mem2reg_opt(Z_tos, Address(receiver, index), false);
       break;
     case atos:
-      __ load_heap_oop(Z_tos, Address(receiver, index));
+      do_oop_load(_masm, Address(receiver, index), Z_tos, Z_tmp_1, Z_tmp_2, IN_HEAP);
       __ verify_oop(Z_tos);
       break;
     case ftos:
--- a/src/hotspot/cpu/sparc/c1_Runtime1_sparc.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/sparc/c1_Runtime1_sparc.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -43,6 +43,7 @@
 #if INCLUDE_ALL_GCS
 #include "gc/g1/g1BarrierSet.hpp"
 #include "gc/g1/g1CardTable.hpp"
+#include "gc/g1/g1ThreadLocalData.hpp"
 #endif
 
 // Implementation of StubAssembler
@@ -761,7 +762,7 @@
 #if INCLUDE_ALL_GCS
     case g1_pre_barrier_slow_id:
       { // G4: previous value of memory
-        BarrierSet* bs = Universe::heap()->barrier_set();
+        BarrierSet* bs = BarrierSet::barrier_set();
         if (bs->kind() != BarrierSet::G1BarrierSet) {
           __ save_frame(0);
           __ set((int)id, O1);
@@ -777,15 +778,9 @@
         Register tmp2 = G3_scratch;
 
         Label refill, restart;
-        int satb_q_active_byte_offset =
-          in_bytes(JavaThread::satb_mark_queue_offset() +
-                   SATBMarkQueue::byte_offset_of_active());
-        int satb_q_index_byte_offset =
-          in_bytes(JavaThread::satb_mark_queue_offset() +
-                   SATBMarkQueue::byte_offset_of_index());
-        int satb_q_buf_byte_offset =
-          in_bytes(JavaThread::satb_mark_queue_offset() +
-                   SATBMarkQueue::byte_offset_of_buf());
+        int satb_q_active_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
+        int satb_q_index_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset());
+        int satb_q_buf_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset());
 
         // Is marking still active?
         if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
@@ -832,7 +827,7 @@
 
     case g1_post_barrier_slow_id:
       {
-        BarrierSet* bs = Universe::heap()->barrier_set();
+        BarrierSet* bs = BarrierSet::barrier_set();
         if (bs->kind() != BarrierSet::G1BarrierSet) {
           __ save_frame(0);
           __ set((int)id, O1);
@@ -886,12 +881,8 @@
         // these registers are now dead
         addr = cardtable = tmp = noreg;
 
-        int dirty_card_q_index_byte_offset =
-          in_bytes(JavaThread::dirty_card_queue_offset() +
-                   DirtyCardQueue::byte_offset_of_index());
-        int dirty_card_q_buf_byte_offset =
-          in_bytes(JavaThread::dirty_card_queue_offset() +
-                   DirtyCardQueue::byte_offset_of_buf());
+        int dirty_card_q_index_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset());
+        int dirty_card_q_buf_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset());
 
         __ bind(restart);
 
--- a/src/hotspot/cpu/sparc/gc/g1/g1BarrierSetAssembler_sparc.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/sparc/gc/g1/g1BarrierSetAssembler_sparc.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -27,11 +27,10 @@
 #include "gc/g1/g1BarrierSet.hpp"
 #include "gc/g1/g1CardTable.hpp"
 #include "gc/g1/g1BarrierSetAssembler.hpp"
+#include "gc/g1/g1ThreadLocalData.hpp"
 #include "gc/g1/heapRegion.hpp"
-#include "gc/shared/collectedHeap.hpp"
 #include "interpreter/interp_masm.hpp"
 #include "runtime/sharedRuntime.hpp"
-#include "runtime/thread.hpp"
 #include "utilities/macros.hpp"
 
 #define __ masm->
@@ -46,11 +45,10 @@
     Label filtered;
     // Is marking active?
     if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
-      __ ld(G2, in_bytes(JavaThread::satb_mark_queue_offset() + SATBMarkQueue::byte_offset_of_active()), tmp);
+      __ ld(G2, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), tmp);
     } else {
-      guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1,
-                "Assumption");
-      __ ldsb(G2, in_bytes(JavaThread::satb_mark_queue_offset() + SATBMarkQueue::byte_offset_of_active()), tmp);
+      guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
+      __ ldsb(G2, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), tmp);
     }
     // Is marking active?
     __ cmp_and_br_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
@@ -91,3 +89,408 @@
   __ delayed()->mov(count->after_save(), O1);
   __ restore();
 }
+
+#undef __
+
+static address satb_log_enqueue_with_frame = NULL;
+static u_char* satb_log_enqueue_with_frame_end = NULL;
+
+static address satb_log_enqueue_frameless = NULL;
+static u_char* satb_log_enqueue_frameless_end = NULL;
+
+static int EnqueueCodeSize = 128 DEBUG_ONLY( + 256); // Instructions?
+
+static void generate_satb_log_enqueue(bool with_frame) {
+  BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize);
+  CodeBuffer buf(bb);
+  MacroAssembler masm(&buf);
+
+#define __ masm.
+
+  address start = __ pc();
+  Register pre_val;
+
+  Label refill, restart;
+  if (with_frame) {
+    __ save_frame(0);
+    pre_val = I0;  // Was O0 before the save.
+  } else {
+    pre_val = O0;
+  }
+
+  int satb_q_index_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset());
+  int satb_q_buf_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset());
+
+  assert(in_bytes(SATBMarkQueue::byte_width_of_index()) == sizeof(intptr_t) &&
+         in_bytes(SATBMarkQueue::byte_width_of_buf()) == sizeof(intptr_t),
+         "check sizes in assembly below");
+
+  __ bind(restart);
+
+  // Load the index into the SATB buffer. SATBMarkQueue::_index is a size_t
+  // so ld_ptr is appropriate.
+  __ ld_ptr(G2_thread, satb_q_index_byte_offset, L0);
+
+  // index == 0?
+  __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill);
+
+  __ ld_ptr(G2_thread, satb_q_buf_byte_offset, L1);
+  __ sub(L0, oopSize, L0);
+
+  __ st_ptr(pre_val, L1, L0);  // [_buf + index] := I0
+  if (!with_frame) {
+    // Use return-from-leaf
+    __ retl();
+    __ delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset);
+  } else {
+    // Not delayed.
+    __ st_ptr(L0, G2_thread, satb_q_index_byte_offset);
+  }
+  if (with_frame) {
+    __ ret();
+    __ delayed()->restore();
+  }
+  __ bind(refill);
+
+  address handle_zero =
+    CAST_FROM_FN_PTR(address,
+                     &SATBMarkQueueSet::handle_zero_index_for_thread);
+  // This should be rare enough that we can afford to save all the
+  // scratch registers that the calling context might be using.
+  __ mov(G1_scratch, L0);
+  __ mov(G3_scratch, L1);
+  __ mov(G4, L2);
+  // We need the value of O0 above (for the write into the buffer), so we
+  // save and restore it.
+  __ mov(O0, L3);
+  // Since the call will overwrite O7, we save and restore that, as well.
+  __ mov(O7, L4);
+  __ call_VM_leaf(L5, handle_zero, G2_thread);
+  __ mov(L0, G1_scratch);
+  __ mov(L1, G3_scratch);
+  __ mov(L2, G4);
+  __ mov(L3, O0);
+  __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
+  __ delayed()->mov(L4, O7);
+
+  if (with_frame) {
+    satb_log_enqueue_with_frame = start;
+    satb_log_enqueue_with_frame_end = __ pc();
+  } else {
+    satb_log_enqueue_frameless = start;
+    satb_log_enqueue_frameless_end = __ pc();
+  }
+
+#undef __
+}
+
+#define __ masm->
+
+void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
+                                                 Register obj,
+                                                 Register index,
+                                                 int offset,
+                                                 Register pre_val,
+                                                 Register tmp,
+                                                 bool preserve_o_regs) {
+  Label filtered;
+
+  if (obj == noreg) {
+    // We are not loading the previous value so make
+    // sure that we don't trash the value in pre_val
+    // with the code below.
+    assert_different_registers(pre_val, tmp);
+  } else {
+    // We will be loading the previous value
+    // in this code so...
+    assert(offset == 0 || index == noreg, "choose one");
+    assert(pre_val == noreg, "check this code");
+  }
+
+  // Is marking active?
+  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
+    __ ld(G2, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), tmp);
+  } else {
+    guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
+    __ ldsb(G2, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), tmp);
+  }
+
+  // Is marking active?
+  __ cmp_and_br_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
+
+  // Do we need to load the previous value?
+  if (obj != noreg) {
+    // Load the previous value...
+    if (index == noreg) {
+      if (Assembler::is_simm13(offset)) {
+        __ load_heap_oop(obj, offset, tmp);
+      } else {
+        __ set(offset, tmp);
+        __ load_heap_oop(obj, tmp, tmp);
+      }
+    } else {
+      __ load_heap_oop(obj, index, tmp);
+    }
+    // Previous value has been loaded into tmp
+    pre_val = tmp;
+  }
+
+  assert(pre_val != noreg, "must have a real register");
+
+  // Is the previous value null?
+  __ cmp_and_brx_short(pre_val, G0, Assembler::equal, Assembler::pt, filtered);
+
+  // OK, it's not filtered, so we'll need to call enqueue.  In the normal
+  // case, pre_val will be a scratch G-reg, but there are some cases in
+  // which it's an O-reg.  In the first case, do a normal call.  In the
+  // latter, do a save here and call the frameless version.
+
+  guarantee(pre_val->is_global() || pre_val->is_out(),
+            "Or we need to think harder.");
+
+  if (pre_val->is_global() && !preserve_o_regs) {
+    __ call(satb_log_enqueue_with_frame);
+    __ delayed()->mov(pre_val, O0);
+  } else {
+    __ save_frame(0);
+    __ call(satb_log_enqueue_frameless);
+    __ delayed()->mov(pre_val->after_save(), O0);
+    __ restore();
+  }
+
+  __ bind(filtered);
+}
+
+#undef __
+
+static address dirty_card_log_enqueue = 0;
+static u_char* dirty_card_log_enqueue_end = 0;
+
+// This gets to assume that o0 contains the object address.
+static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) {
+  BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2);
+  CodeBuffer buf(bb);
+  MacroAssembler masm(&buf);
+#define __ masm.
+  address start = __ pc();
+
+  Label not_already_dirty, restart, refill, young_card;
+
+  __ srlx(O0, CardTable::card_shift, O0);
+  AddressLiteral addrlit(byte_map_base);
+  __ set(addrlit, O1); // O1 := <card table base>
+  __ ldub(O0, O1, O2); // O2 := [O0 + O1]
+
+  __ cmp_and_br_short(O2, G1CardTable::g1_young_card_val(), Assembler::equal, Assembler::pt, young_card);
+
+  __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
+  __ ldub(O0, O1, O2); // O2 := [O0 + O1]
+
+  assert(G1CardTable::dirty_card_val() == 0, "otherwise check this code");
+  __ cmp_and_br_short(O2, G0, Assembler::notEqual, Assembler::pt, not_already_dirty);
+
+  __ bind(young_card);
+  // We didn't take the branch, so we're already dirty: return.
+  // Use return-from-leaf
+  __ retl();
+  __ delayed()->nop();
+
+  // Not dirty.
+  __ bind(not_already_dirty);
+
+  // Get O0 + O1 into a reg by itself
+  __ add(O0, O1, O3);
+
+  // First, dirty it.
+  __ stb(G0, O3, G0);  // [cardPtr] := 0  (i.e., dirty).
+
+  int dirty_card_q_index_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset());
+  int dirty_card_q_buf_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset());
+  __ bind(restart);
+
+  // Load the index into the update buffer. DirtyCardQueue::_index is
+  // a size_t so ld_ptr is appropriate here.
+  __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0);
+
+  // index == 0?
+  __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill);
+
+  __ ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1);
+  __ sub(L0, oopSize, L0);
+
+  __ st_ptr(O3, L1, L0);  // [_buf + index] := I0
+  // Use return-from-leaf
+  __ retl();
+  __ delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset);
+
+  __ bind(refill);
+  address handle_zero =
+    CAST_FROM_FN_PTR(address,
+                     &DirtyCardQueueSet::handle_zero_index_for_thread);
+  // This should be rare enough that we can afford to save all the
+  // scratch registers that the calling context might be using.
+  __ mov(G1_scratch, L3);
+  __ mov(G3_scratch, L5);
+  // We need the value of O3 above (for the write into the buffer), so we
+  // save and restore it.
+  __ mov(O3, L6);
+  // Since the call will overwrite O7, we save and restore that, as well.
+  __ mov(O7, L4);
+
+  __ call_VM_leaf(L7_thread_cache, handle_zero, G2_thread);
+  __ mov(L3, G1_scratch);
+  __ mov(L5, G3_scratch);
+  __ mov(L6, O3);
+  __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
+  __ delayed()->mov(L4, O7);
+
+  dirty_card_log_enqueue = start;
+  dirty_card_log_enqueue_end = __ pc();
+  // XXX Should have a guarantee here about not going off the end!
+  // Does it already do so?  Do an experiment...
+
+#undef __
+
+}
+
+#define __ masm->
+
+void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, Register store_addr, Register new_val, Register tmp) {
+  Label filtered;
+  MacroAssembler* post_filter_masm = masm;
+
+  if (new_val == G0) return;
+
+  G1BarrierSet* bs = barrier_set_cast<G1BarrierSet>(BarrierSet::barrier_set());
+
+  if (G1RSBarrierRegionFilter) {
+    __ xor3(store_addr, new_val, tmp);
+    __ srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
+
+    __ cmp_and_brx_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
+  }
+
+  // If the "store_addr" register is an "in" or "local" register, move it to
+  // a scratch reg so we can pass it as an argument.
+  bool use_scr = !(store_addr->is_global() || store_addr->is_out());
+  // Pick a scratch register different from "tmp".
+  Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch);
+  // Make sure we use up the delay slot!
+  if (use_scr) {
+    post_filter_masm->mov(store_addr, scr);
+  } else {
+    post_filter_masm->nop();
+  }
+  __ save_frame(0);
+  __ call(dirty_card_log_enqueue);
+  if (use_scr) {
+    __ delayed()->mov(scr, O0);
+  } else {
+    __ delayed()->mov(store_addr->after_save(), O0);
+  }
+  __ restore();
+
+  __ bind(filtered);
+}
+
+void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                         Register val, Address dst, Register tmp) {
+  bool in_heap = (decorators & IN_HEAP) != 0;
+  bool in_concurrent_root = (decorators & IN_CONCURRENT_ROOT) != 0;
+
+  bool needs_pre_barrier = in_heap || in_concurrent_root;
+  // No need for post barrier if storing NULL
+  bool needs_post_barrier = val != G0 && in_heap;
+
+  bool on_array = (decorators & IN_HEAP_ARRAY) != 0;
+  bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
+  bool precise = on_array || on_anonymous;
+
+  Register index = dst.has_index() ? dst.index() : noreg;
+  int disp = dst.has_disp() ? dst.disp() : 0;
+
+  if (needs_pre_barrier) {
+    // Load and record the previous value.
+    g1_write_barrier_pre(masm, dst.base(), index, disp,
+                         noreg /* pre_val */,
+                         tmp, true /*preserve_o_regs*/);
+  }
+
+  Register new_val = val;
+  if (needs_post_barrier) {
+    // G1 barrier needs uncompressed oop for region cross check.
+    if (UseCompressedOops && val != G0) {
+      new_val = tmp;
+      __ mov(val, new_val);
+    }
+  }
+
+  BarrierSetAssembler::store_at(masm, decorators, type, val, dst, tmp);
+
+  if (needs_post_barrier) {
+    Register base = dst.base();
+    if (precise) {
+      if (!dst.has_index()) {
+        __ add(base, disp, base);
+      } else {
+        assert(!dst.has_disp(), "not supported yet");
+        __ add(base, index, base);
+      }
+    }
+    g1_write_barrier_post(masm, base, new_val, tmp);
+  }
+}
+
+void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                    Address src, Register dst, Register tmp) {
+  bool on_oop = type == T_OBJECT || type == T_ARRAY;
+  bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
+  bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
+  bool on_reference = on_weak || on_phantom;
+  // Load the value of the referent field.
+  ModRefBarrierSetAssembler::load_at(masm, decorators, type, src, dst, tmp);
+  if (on_oop && on_reference) {
+    // Generate the G1 pre-barrier code to log the value of
+    // the referent field in an SATB buffer. Note with
+    // these parameters the pre-barrier does not generate
+    // the load of the previous value
+
+    Register pre_val = dst;
+    bool saved = false;
+    if (pre_val->is_in()) {
+      // The g1_write_barrier_pre method assumes that the pre_val
+      // is not in an input register.
+      __ save_frame_and_mov(0, pre_val, O0);
+      pre_val = O0;
+      saved = true;
+    }
+
+    g1_write_barrier_pre(masm, noreg /* obj */, noreg /* index */, 0 /* offset */,
+                         pre_val /* pre_val */,
+                         tmp /* tmp */,
+                         true /* preserve_o_regs */);
+
+    if (saved) {
+      __ restore();
+    }
+  }
+}
+
+#undef __
+
+void G1BarrierSetAssembler::barrier_stubs_init() {
+  if (dirty_card_log_enqueue == 0) {
+    G1BarrierSet* bs = barrier_set_cast<G1BarrierSet>(BarrierSet::barrier_set());
+    CardTable *ct = bs->card_table();
+    generate_dirty_card_log_enqueue(ct->byte_map_base());
+    assert(dirty_card_log_enqueue != 0, "postcondition.");
+  }
+  if (satb_log_enqueue_with_frame == 0) {
+    generate_satb_log_enqueue(true);
+    assert(satb_log_enqueue_with_frame != 0, "postcondition.");
+  }
+  if (satb_log_enqueue_frameless == 0) {
+    generate_satb_log_enqueue(false);
+    assert(satb_log_enqueue_frameless != 0, "postcondition.");
+  }
+}
--- a/src/hotspot/cpu/sparc/gc/g1/g1BarrierSetAssembler_sparc.hpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/sparc/gc/g1/g1BarrierSetAssembler_sparc.hpp	Fri Apr 13 14:06:39 2018 +0200
@@ -30,10 +30,19 @@
 
 class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
 protected:
-  virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
-                                               Register addr, Register count);
-  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
-                                                Register addr, Register count, Register tmp);
+  virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count);
+  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp);
+
+  void g1_write_barrier_pre(MacroAssembler* masm, Register obj, Register index, int offset, Register pre_val, Register tmp, bool preserve_o_regs);
+  void g1_write_barrier_post(MacroAssembler* masm, Register store_addr, Register new_val, Register tmp);
+
+  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                            Register val, Address dst, Register tmp);
+
+public:
+  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                       Address src, Register dst, Register tmp);
+  virtual void barrier_stubs_init();
 };
 
 #endif // CPU_SPARC_GC_G1_G1BARRIERSETASSEMBLER_SPARC_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hotspot/cpu/sparc/gc/shared/barrierSetAssembler_sparc.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
+#include "interpreter/interp_masm.hpp"
+
+#define __ masm->
+
+void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                   Register val, Address dst, Register tmp) {
+  bool on_heap = (decorators & IN_HEAP) != 0;
+  bool on_root = (decorators & IN_ROOT) != 0;
+  bool oop_not_null = (decorators & OOP_NOT_NULL) != 0;
+
+  switch (type) {
+  case T_ARRAY:
+  case T_OBJECT: {
+    if (on_heap) {
+      if (dst.has_disp() && !Assembler::is_simm13(dst.disp())) {
+        assert(!dst.has_index(), "not supported yet");
+        __ set(dst.disp(), tmp);
+        dst = Address(dst.base(), tmp);
+      }
+      if (UseCompressedOops) {
+        assert(dst.base() != val, "not enough registers");
+        if (oop_not_null) {
+          __ encode_heap_oop_not_null(val);
+        } else {
+          __ encode_heap_oop(val);
+        }
+        __ st(val, dst);
+      } else {
+        __ st_ptr(val, dst);
+      }
+    } else {
+      assert(on_root, "why else?");
+      __ st_ptr(val, dst);
+    }
+    break;
+  }
+  default: Unimplemented();
+  }
+}
+
+void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                  Address src, Register dst, Register tmp) {
+  bool on_heap = (decorators & IN_HEAP) != 0;
+  bool on_root = (decorators & IN_ROOT) != 0;
+  bool oop_not_null = (decorators & OOP_NOT_NULL) != 0;
+
+  switch (type) {
+  case T_ARRAY:
+  case T_OBJECT: {
+    if (on_heap) {
+      if (src.has_disp() && !Assembler::is_simm13(src.disp())) {
+        assert(!src.has_index(), "not supported yet");
+        __ set(src.disp(), tmp);
+        src = Address(src.base(), tmp);
+      }
+      if (UseCompressedOops) {
+        __ lduw(src, dst);
+        if (oop_not_null) {
+          __ decode_heap_oop_not_null(dst);
+        } else {
+          __ decode_heap_oop(dst);
+        }
+      } else {
+        __ ld_ptr(src, dst);
+      }
+    } else {
+      assert(on_root, "why else?");
+      __ ld_ptr(src, dst);
+    }
+    break;
+  }
+  default: Unimplemented();
+  }
+}
--- a/src/hotspot/cpu/sparc/gc/shared/barrierSetAssembler_sparc.hpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/sparc/gc/shared/barrierSetAssembler_sparc.hpp	Fri Apr 13 14:06:39 2018 +0200
@@ -37,6 +37,14 @@
                                   Register src, Register dst, Register count) {}
   virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
                                   Register src, Register dst, Register count) {}
+
+  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                        Register src, Address dst, Register tmp);
+
+  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                       Address src, Register dst, Register tmp);
+
+  virtual void barrier_stubs_init() {}
 };
 
 #endif // CPU_SPARC_GC_SHARED_BARRIERSETASSEMBLER_SPARC_HPP
--- a/src/hotspot/cpu/sparc/gc/shared/cardTableBarrierSetAssembler_sparc.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/sparc/gc/shared/cardTableBarrierSetAssembler_sparc.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -29,7 +29,6 @@
 #include "gc/shared/cardTable.hpp"
 #include "gc/shared/cardTableBarrierSet.hpp"
 #include "gc/shared/cardTableBarrierSetAssembler.hpp"
-#include "gc/shared/collectedHeap.hpp"
 #include "interpreter/interp_masm.hpp"
 
 #define __ masm->
@@ -44,7 +43,7 @@
 
 void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
                                                                     Register addr, Register count, Register tmp) {
-  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(Universe::heap()->barrier_set());
+  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
   CardTable* ct = ctbs->card_table();
   assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
   assert_different_registers(addr, count, tmp);
@@ -70,3 +69,45 @@
 
   __ BIND(L_done);
 }
+
+void CardTableBarrierSetAssembler::card_table_write(MacroAssembler* masm,
+                                                    jbyte* byte_map_base,
+                                                    Register tmp, Register obj) {
+  __ srlx(obj, CardTable::card_shift, obj);
+  assert(tmp != obj, "need separate temp reg");
+  __ set((address) byte_map_base, tmp);
+  __ stb(G0, tmp, obj);
+}
+
+void CardTableBarrierSetAssembler::card_write_barrier_post(MacroAssembler* masm, Register store_addr, Register new_val, Register tmp) {
+  // If we're writing constant NULL, we can skip the write barrier.
+  if (new_val == G0) return;
+  CardTableBarrierSet* bs = barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
+  card_table_write(masm, bs->card_table()->byte_map_base(), tmp, store_addr);
+}
+
+void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                                Register val, Address dst, Register tmp) {
+  bool in_heap = (decorators & IN_HEAP) != 0;
+
+  bool on_array = (decorators & IN_HEAP_ARRAY) != 0;
+  bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
+  bool precise = on_array || on_anonymous;
+
+  // No need for post barrier if storing NULL
+  bool needs_post_barrier = val != G0 && in_heap;
+
+  BarrierSetAssembler::store_at(masm, decorators, type, val, dst, tmp);
+  if (needs_post_barrier) {
+    Register base = dst.base();
+    if (precise) {
+      if (!dst.has_index()) {
+        __ add(base, dst.disp(), base);
+      } else {
+        assert(!dst.has_disp(), "not supported yet");
+        __ add(base, dst.index(), base);
+      }
+    }
+    card_write_barrier_post(masm, base, val, tmp);
+  }
+}
--- a/src/hotspot/cpu/sparc/gc/shared/cardTableBarrierSetAssembler_sparc.hpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/sparc/gc/shared/cardTableBarrierSetAssembler_sparc.hpp	Fri Apr 13 14:06:39 2018 +0200
@@ -32,6 +32,13 @@
 protected:
   virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
                                                 Register addr, Register count, Register tmp);
+
+  void card_table_write(MacroAssembler* masm, jbyte* byte_map_base, Register tmp, Register obj);
+
+  void card_write_barrier_post(MacroAssembler* masm, Register store_addr, Register new_val, Register tmp);
+
+  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                            Register val, Address dst, Register tmp);
 };
 
 #endif // CPU_SPARC_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_SPARC_HPP
--- a/src/hotspot/cpu/sparc/gc/shared/modRefBarrierSetAssembler_sparc.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/sparc/gc/shared/modRefBarrierSetAssembler_sparc.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -55,3 +55,12 @@
     }
   }
 }
+
+void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                         Register val, Address dst, Register tmp) {
+  if (type == T_OBJECT || type == T_ARRAY) {
+    oop_store_at(masm, decorators, type, val, dst, tmp);
+  } else {
+    BarrierSetAssembler::store_at(masm, decorators, type, val, dst, tmp);
+  }
+}
--- a/src/hotspot/cpu/sparc/gc/shared/modRefBarrierSetAssembler_sparc.hpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/sparc/gc/shared/modRefBarrierSetAssembler_sparc.hpp	Fri Apr 13 14:06:39 2018 +0200
@@ -28,16 +28,27 @@
 #include "asm/macroAssembler.hpp"
 #include "gc/shared/barrierSetAssembler.hpp"
 
+// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other
+// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected
+// accesses, which are overridden in the concrete BarrierSetAssembler.
+
 class ModRefBarrierSetAssembler: public BarrierSetAssembler {
 protected:
-  virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count) {}
-  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp) {}
+  virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
+                                               Register addr, Register count) {}
+  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
+                                                Register addr, Register count, Register tmp) {}
 
+  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                            Register val, Address dst, Register tmp) = 0;
 public:
   virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
                                   Register src, Register dst, Register count);
   virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
                                   Register src, Register dst, Register count);
+
+  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                        Register val, Address dst, Register tmp);
 };
 
 #endif // CPU_SPARC_GC_SHARED_MODREFBARRIERSETASSEMBLER_SPARC_HPP
--- a/src/hotspot/cpu/sparc/interp_masm_sparc.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/sparc/interp_masm_sparc.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -753,7 +753,7 @@
   resolve_oop_handle(result);
   // Add in the index
   add(result, tmp, result);
-  load_heap_oop(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT), result);
+  load_heap_oop(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT), result, tmp);
 }
 
 
--- a/src/hotspot/cpu/sparc/macroAssembler_sparc.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/sparc/macroAssembler_sparc.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -26,9 +26,9 @@
 #include "jvm.h"
 #include "asm/macroAssembler.inline.hpp"
 #include "compiler/disassembler.hpp"
-#include "gc/shared/cardTable.hpp"
-#include "gc/shared/cardTableBarrierSet.hpp"
 #include "gc/shared/collectedHeap.inline.hpp"
+#include "gc/shared/barrierSet.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
 #include "interpreter/interpreter.hpp"
 #include "memory/resourceArea.hpp"
 #include "memory/universe.hpp"
@@ -45,12 +45,6 @@
 #include "runtime/stubRoutines.hpp"
 #include "utilities/align.hpp"
 #include "utilities/macros.hpp"
-#if INCLUDE_ALL_GCS
-#include "gc/g1/g1BarrierSet.hpp"
-#include "gc/g1/g1CardTable.hpp"
-#include "gc/g1/g1CollectedHeap.inline.hpp"
-#include "gc/g1/heapRegion.hpp"
-#endif // INCLUDE_ALL_GCS
 #ifdef COMPILER2
 #include "opto/intrinsicnode.hpp"
 #endif
@@ -174,6 +168,24 @@
   return r;
 }
 
+void MacroAssembler::resolve_jobject(Register value, Register tmp) {
+  Label done, not_weak;
+  br_null(value, false, Assembler::pn, done); // Use NULL as-is.
+  delayed()->andcc(value, JNIHandles::weak_tag_mask, G0); // Test for jweak
+  brx(Assembler::zero, true, Assembler::pt, not_weak);
+  delayed()->nop();
+  access_load_at(T_OBJECT, IN_ROOT | ON_PHANTOM_OOP_REF,
+                 Address(value, -JNIHandles::weak_tag_value), value, tmp);
+  verify_oop(value);
+  br (Assembler::always, true, Assembler::pt, done);
+  delayed()->nop();
+  bind(not_weak);
+  access_load_at(T_OBJECT, IN_ROOT | IN_CONCURRENT_ROOT,
+                 Address(value, 0), value, tmp);
+  verify_oop(value);
+  bind(done);
+}
+
 void MacroAssembler::null_check(Register reg, int offset) {
   if (needs_explicit_null_check((intptr_t)offset)) {
     // provoke OS NULL exception if reg = NULL by
@@ -659,14 +671,6 @@
   }
 }
 
-void MacroAssembler::card_table_write(jbyte* byte_map_base,
-                                      Register tmp, Register obj) {
-  srlx(obj, CardTable::card_shift, obj);
-  assert(tmp != obj, "need separate temp reg");
-  set((address) byte_map_base, tmp);
-  stb(G0, tmp, obj);
-}
-
 
 void MacroAssembler::internal_sethi(const AddressLiteral& addrlit, Register d, bool ForceRelocatable) {
   address save_pc;
@@ -3387,361 +3391,6 @@
 
   bind(no_reserved_zone_enabling);
 }
-
-///////////////////////////////////////////////////////////////////////////////////
-#if INCLUDE_ALL_GCS
-
-static address satb_log_enqueue_with_frame = NULL;
-static u_char* satb_log_enqueue_with_frame_end = NULL;
-
-static address satb_log_enqueue_frameless = NULL;
-static u_char* satb_log_enqueue_frameless_end = NULL;
-
-static int EnqueueCodeSize = 128 DEBUG_ONLY( + 256); // Instructions?
-
-static void generate_satb_log_enqueue(bool with_frame) {
-  BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize);
-  CodeBuffer buf(bb);
-  MacroAssembler masm(&buf);
-
-#define __ masm.
-
-  address start = __ pc();
-  Register pre_val;
-
-  Label refill, restart;
-  if (with_frame) {
-    __ save_frame(0);
-    pre_val = I0;  // Was O0 before the save.
-  } else {
-    pre_val = O0;
-  }
-
-  int satb_q_index_byte_offset =
-    in_bytes(JavaThread::satb_mark_queue_offset() +
-             SATBMarkQueue::byte_offset_of_index());
-
-  int satb_q_buf_byte_offset =
-    in_bytes(JavaThread::satb_mark_queue_offset() +
-             SATBMarkQueue::byte_offset_of_buf());
-
-  assert(in_bytes(SATBMarkQueue::byte_width_of_index()) == sizeof(intptr_t) &&
-         in_bytes(SATBMarkQueue::byte_width_of_buf()) == sizeof(intptr_t),
-         "check sizes in assembly below");
-
-  __ bind(restart);
-
-  // Load the index into the SATB buffer. SATBMarkQueue::_index is a size_t
-  // so ld_ptr is appropriate.
-  __ ld_ptr(G2_thread, satb_q_index_byte_offset, L0);
-
-  // index == 0?
-  __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill);
-
-  __ ld_ptr(G2_thread, satb_q_buf_byte_offset, L1);
-  __ sub(L0, oopSize, L0);
-
-  __ st_ptr(pre_val, L1, L0);  // [_buf + index] := I0
-  if (!with_frame) {
-    // Use return-from-leaf
-    __ retl();
-    __ delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset);
-  } else {
-    // Not delayed.
-    __ st_ptr(L0, G2_thread, satb_q_index_byte_offset);
-  }
-  if (with_frame) {
-    __ ret();
-    __ delayed()->restore();
-  }
-  __ bind(refill);
-
-  address handle_zero =
-    CAST_FROM_FN_PTR(address,
-                     &SATBMarkQueueSet::handle_zero_index_for_thread);
-  // This should be rare enough that we can afford to save all the
-  // scratch registers that the calling context might be using.
-  __ mov(G1_scratch, L0);
-  __ mov(G3_scratch, L1);
-  __ mov(G4, L2);
-  // We need the value of O0 above (for the write into the buffer), so we
-  // save and restore it.
-  __ mov(O0, L3);
-  // Since the call will overwrite O7, we save and restore that, as well.
-  __ mov(O7, L4);
-  __ call_VM_leaf(L5, handle_zero, G2_thread);
-  __ mov(L0, G1_scratch);
-  __ mov(L1, G3_scratch);
-  __ mov(L2, G4);
-  __ mov(L3, O0);
-  __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
-  __ delayed()->mov(L4, O7);
-
-  if (with_frame) {
-    satb_log_enqueue_with_frame = start;
-    satb_log_enqueue_with_frame_end = __ pc();
-  } else {
-    satb_log_enqueue_frameless = start;
-    satb_log_enqueue_frameless_end = __ pc();
-  }
-
-#undef __
-}
-
-void MacroAssembler::g1_write_barrier_pre(Register obj,
-                                          Register index,
-                                          int offset,
-                                          Register pre_val,
-                                          Register tmp,
-                                          bool preserve_o_regs) {
-  Label filtered;
-
-  if (obj == noreg) {
-    // We are not loading the previous value so make
-    // sure that we don't trash the value in pre_val
-    // with the code below.
-    assert_different_registers(pre_val, tmp);
-  } else {
-    // We will be loading the previous value
-    // in this code so...
-    assert(offset == 0 || index == noreg, "choose one");
-    assert(pre_val == noreg, "check this code");
-  }
-
-  // Is marking active?
-  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
-    ld(G2,
-       in_bytes(JavaThread::satb_mark_queue_offset() +
-                SATBMarkQueue::byte_offset_of_active()),
-       tmp);
-  } else {
-    guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1,
-              "Assumption");
-    ldsb(G2,
-         in_bytes(JavaThread::satb_mark_queue_offset() +
-                  SATBMarkQueue::byte_offset_of_active()),
-         tmp);
-  }
-
-  // Is marking active?
-  cmp_and_br_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
-
-  // Do we need to load the previous value?
-  if (obj != noreg) {
-    // Load the previous value...
-    if (index == noreg) {
-      if (Assembler::is_simm13(offset)) {
-        load_heap_oop(obj, offset, tmp);
-      } else {
-        set(offset, tmp);
-        load_heap_oop(obj, tmp, tmp);
-      }
-    } else {
-      load_heap_oop(obj, index, tmp);
-    }
-    // Previous value has been loaded into tmp
-    pre_val = tmp;
-  }
-
-  assert(pre_val != noreg, "must have a real register");
-
-  // Is the previous value null?
-  cmp_and_brx_short(pre_val, G0, Assembler::equal, Assembler::pt, filtered);
-
-  // OK, it's not filtered, so we'll need to call enqueue.  In the normal
-  // case, pre_val will be a scratch G-reg, but there are some cases in
-  // which it's an O-reg.  In the first case, do a normal call.  In the
-  // latter, do a save here and call the frameless version.
-
-  guarantee(pre_val->is_global() || pre_val->is_out(),
-            "Or we need to think harder.");
-
-  if (pre_val->is_global() && !preserve_o_regs) {
-    call(satb_log_enqueue_with_frame);
-    delayed()->mov(pre_val, O0);
-  } else {
-    save_frame(0);
-    call(satb_log_enqueue_frameless);
-    delayed()->mov(pre_val->after_save(), O0);
-    restore();
-  }
-
-  bind(filtered);
-}
-
-static address dirty_card_log_enqueue = 0;
-static u_char* dirty_card_log_enqueue_end = 0;
-
-// This gets to assume that o0 contains the object address.
-static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) {
-  BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2);
-  CodeBuffer buf(bb);
-  MacroAssembler masm(&buf);
-#define __ masm.
-  address start = __ pc();
-
-  Label not_already_dirty, restart, refill, young_card;
-
-  __ srlx(O0, CardTable::card_shift, O0);
-  AddressLiteral addrlit(byte_map_base);
-  __ set(addrlit, O1); // O1 := <card table base>
-  __ ldub(O0, O1, O2); // O2 := [O0 + O1]
-
-  __ cmp_and_br_short(O2, G1CardTable::g1_young_card_val(), Assembler::equal, Assembler::pt, young_card);
-
-  __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
-  __ ldub(O0, O1, O2); // O2 := [O0 + O1]
-
-  assert(CardTable::dirty_card_val() == 0, "otherwise check this code");
-  __ cmp_and_br_short(O2, G0, Assembler::notEqual, Assembler::pt, not_already_dirty);
-
-  __ bind(young_card);
-  // We didn't take the branch, so we're already dirty: return.
-  // Use return-from-leaf
-  __ retl();
-  __ delayed()->nop();
-
-  // Not dirty.
-  __ bind(not_already_dirty);
-
-  // Get O0 + O1 into a reg by itself
-  __ add(O0, O1, O3);
-
-  // First, dirty it.
-  __ stb(G0, O3, G0);  // [cardPtr] := 0  (i.e., dirty).
-
-  int dirty_card_q_index_byte_offset =
-    in_bytes(JavaThread::dirty_card_queue_offset() +
-             DirtyCardQueue::byte_offset_of_index());
-  int dirty_card_q_buf_byte_offset =
-    in_bytes(JavaThread::dirty_card_queue_offset() +
-             DirtyCardQueue::byte_offset_of_buf());
-  __ bind(restart);
-
-  // Load the index into the update buffer. DirtyCardQueue::_index is
-  // a size_t so ld_ptr is appropriate here.
-  __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0);
-
-  // index == 0?
-  __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill);
-
-  __ ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1);
-  __ sub(L0, oopSize, L0);
-
-  __ st_ptr(O3, L1, L0);  // [_buf + index] := I0
-  // Use return-from-leaf
-  __ retl();
-  __ delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset);
-
-  __ bind(refill);
-  address handle_zero =
-    CAST_FROM_FN_PTR(address,
-                     &DirtyCardQueueSet::handle_zero_index_for_thread);
-  // This should be rare enough that we can afford to save all the
-  // scratch registers that the calling context might be using.
-  __ mov(G1_scratch, L3);
-  __ mov(G3_scratch, L5);
-  // We need the value of O3 above (for the write into the buffer), so we
-  // save and restore it.
-  __ mov(O3, L6);
-  // Since the call will overwrite O7, we save and restore that, as well.
-  __ mov(O7, L4);
-
-  __ call_VM_leaf(L7_thread_cache, handle_zero, G2_thread);
-  __ mov(L3, G1_scratch);
-  __ mov(L5, G3_scratch);
-  __ mov(L6, O3);
-  __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
-  __ delayed()->mov(L4, O7);
-
-  dirty_card_log_enqueue = start;
-  dirty_card_log_enqueue_end = __ pc();
-  // XXX Should have a guarantee here about not going off the end!
-  // Does it already do so?  Do an experiment...
-
-#undef __
-
-}
-
-void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val, Register tmp) {
-
-  Label filtered;
-  MacroAssembler* post_filter_masm = this;
-
-  if (new_val == G0) return;
-
-  G1BarrierSet* bs =
-    barrier_set_cast<G1BarrierSet>(Universe::heap()->barrier_set());
-  CardTable* ct = bs->card_table();
-
-  if (G1RSBarrierRegionFilter) {
-    xor3(store_addr, new_val, tmp);
-    srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
-
-    // XXX Should I predict this taken or not?  Does it matter?
-    cmp_and_brx_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
-  }
-
-  // If the "store_addr" register is an "in" or "local" register, move it to
-  // a scratch reg so we can pass it as an argument.
-  bool use_scr = !(store_addr->is_global() || store_addr->is_out());
-  // Pick a scratch register different from "tmp".
-  Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch);
-  // Make sure we use up the delay slot!
-  if (use_scr) {
-    post_filter_masm->mov(store_addr, scr);
-  } else {
-    post_filter_masm->nop();
-  }
-  save_frame(0);
-  call(dirty_card_log_enqueue);
-  if (use_scr) {
-    delayed()->mov(scr, O0);
-  } else {
-    delayed()->mov(store_addr->after_save(), O0);
-  }
-  restore();
-
-  bind(filtered);
-}
-
-// Called from init_globals() after universe_init() and before interpreter_init()
-void g1_barrier_stubs_init() {
-  CollectedHeap* heap = Universe::heap();
-  if (heap->kind() == CollectedHeap::G1) {
-    // Only needed for G1
-    if (dirty_card_log_enqueue == 0) {
-      G1BarrierSet* bs =
-        barrier_set_cast<G1BarrierSet>(heap->barrier_set());
-      CardTable *ct = bs->card_table();
-      generate_dirty_card_log_enqueue(ct->byte_map_base());
-      assert(dirty_card_log_enqueue != 0, "postcondition.");
-    }
-    if (satb_log_enqueue_with_frame == 0) {
-      generate_satb_log_enqueue(true);
-      assert(satb_log_enqueue_with_frame != 0, "postcondition.");
-    }
-    if (satb_log_enqueue_frameless == 0) {
-      generate_satb_log_enqueue(false);
-      assert(satb_log_enqueue_frameless != 0, "postcondition.");
-    }
-  }
-}
-
-#endif // INCLUDE_ALL_GCS
-///////////////////////////////////////////////////////////////////////////////////
-
-void MacroAssembler::card_write_barrier_post(Register store_addr, Register new_val, Register tmp) {
-  // If we're writing constant NULL, we can skip the write barrier.
-  if (new_val == G0) return;
-  CardTableBarrierSet* bs =
-    barrier_set_cast<CardTableBarrierSet>(Universe::heap()->barrier_set());
-  CardTable* ct = bs->card_table();
-
-  assert(bs->kind() == BarrierSet::CardTableBarrierSet, "wrong barrier");
-  card_table_write(ct->byte_map_base(), tmp, store_addr);
-}
-
 // ((OopHandle)result).resolve();
 void MacroAssembler::resolve_oop_handle(Register result) {
   // OopHandle::resolve is an indirection.
@@ -3786,65 +3435,63 @@
   }
 }
 
-void MacroAssembler::load_heap_oop(const Address& s, Register d) {
-  if (UseCompressedOops) {
-    lduw(s, d);
-    decode_heap_oop(d);
+void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
+                                     Register src, Address dst, Register tmp) {
+  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
+  bool as_raw = (decorators & AS_RAW) != 0;
+  if (as_raw) {
+    bs->BarrierSetAssembler::store_at(this, decorators, type, src, dst, tmp);
   } else {
-    ld_ptr(s, d);
+    bs->store_at(this, decorators, type, src, dst, tmp);
   }
 }
 
-void MacroAssembler::load_heap_oop(Register s1, Register s2, Register d) {
-   if (UseCompressedOops) {
-    lduw(s1, s2, d);
-    decode_heap_oop(d, d);
+void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators,
+                                    Address src, Register dst, Register tmp) {
+  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
+  bool as_raw = (decorators & AS_RAW) != 0;
+  if (as_raw) {
+    bs->BarrierSetAssembler::load_at(this, decorators, type, src, dst, tmp);
   } else {
-    ld_ptr(s1, s2, d);
+    bs->load_at(this, decorators, type, src, dst, tmp);
   }
 }
 
-void MacroAssembler::load_heap_oop(Register s1, int simm13a, Register d) {
-   if (UseCompressedOops) {
-    lduw(s1, simm13a, d);
-    decode_heap_oop(d, d);
+void MacroAssembler::load_heap_oop(const Address& s, Register d, Register tmp, DecoratorSet decorators) {
+  access_load_at(T_OBJECT, IN_HEAP | decorators, s, d, tmp);
+}
+
+void MacroAssembler::load_heap_oop(Register s1, Register s2, Register d, Register tmp, DecoratorSet decorators) {
+  access_load_at(T_OBJECT, IN_HEAP | decorators, Address(s1, s2), d, tmp);
+}
+
+void MacroAssembler::load_heap_oop(Register s1, int simm13a, Register d, Register tmp, DecoratorSet decorators) {
+  access_load_at(T_OBJECT, IN_HEAP | decorators, Address(s1, simm13a), d, tmp);
+}
+
+void MacroAssembler::load_heap_oop(Register s1, RegisterOrConstant s2, Register d, Register tmp, DecoratorSet decorators) {
+  if (s2.is_constant()) {
+    access_load_at(T_OBJECT, IN_HEAP | decorators, Address(s1, s2.as_constant()), d, tmp);
   } else {
-    ld_ptr(s1, simm13a, d);
+    access_load_at(T_OBJECT, IN_HEAP | decorators, Address(s1, s2.as_register()), d, tmp);
   }
 }
 
-void MacroAssembler::load_heap_oop(Register s1, RegisterOrConstant s2, Register d) {
-  if (s2.is_constant())  load_heap_oop(s1, s2.as_constant(), d);
-  else                   load_heap_oop(s1, s2.as_register(), d);
+void MacroAssembler::store_heap_oop(Register d, Register s1, Register s2, Register tmp, DecoratorSet decorators) {
+  access_store_at(T_OBJECT, IN_HEAP | decorators, d, Address(s1, s2), tmp);
 }
 
-void MacroAssembler::store_heap_oop(Register d, Register s1, Register s2) {
-  if (UseCompressedOops) {
-    assert(s1 != d && s2 != d, "not enough registers");
-    encode_heap_oop(d);
-    st(d, s1, s2);
+void MacroAssembler::store_heap_oop(Register d, Register s1, int simm13a, Register tmp, DecoratorSet decorators) {
+  access_store_at(T_OBJECT, IN_HEAP | decorators, d, Address(s1, simm13a), tmp);
+}
+
+void MacroAssembler::store_heap_oop(Register d, const Address& a, int offset, Register tmp, DecoratorSet decorators) {
+  if (a.has_index()) {
+    assert(!a.has_disp(), "not supported yet");
+    assert(offset == 0, "not supported yet");
+    access_store_at(T_OBJECT, IN_HEAP | decorators, d, Address(a.base(), a.index()), tmp);
   } else {
-    st_ptr(d, s1, s2);
-  }
-}
-
-void MacroAssembler::store_heap_oop(Register d, Register s1, int simm13a) {
-  if (UseCompressedOops) {
-    assert(s1 != d, "not enough registers");
-    encode_heap_oop(d);
-    st(d, s1, simm13a);
-  } else {
-    st_ptr(d, s1, simm13a);
-  }
-}
-
-void MacroAssembler::store_heap_oop(Register d, const Address& a, int offset) {
-  if (UseCompressedOops) {
-    assert(a.base() != d, "not enough registers");
-    encode_heap_oop(d);
-    st(d, a, offset);
-  } else {
-    st_ptr(d, a, offset);
+    access_store_at(T_OBJECT, IN_HEAP | decorators, d, Address(a.base(), a.disp() + offset), tmp);
   }
 }
 
--- a/src/hotspot/cpu/sparc/macroAssembler_sparc.hpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/sparc/macroAssembler_sparc.hpp	Fri Apr 13 14:06:39 2018 +0200
@@ -703,6 +703,9 @@
   // cas_ptr will perform cas for 32 bit VM's and casx for 64 bit VM's
   inline void cas_ptr(  Register s1, Register s2, Register d);
 
+  // Resolve a jobject or jweak
+  void resolve_jobject(Register value, Register tmp);
+
   // Functions for isolating 64 bit shifts for LP64
   inline void sll_ptr( Register s1, Register s2, Register d );
   inline void sll_ptr( Register s1, int imm6a,   Register d );
@@ -974,13 +977,25 @@
   void store_klass_gap(Register s, Register dst_oop);
 
    // oop manipulations
-  void load_heap_oop(const Address& s, Register d);
-  void load_heap_oop(Register s1, Register s2, Register d);
-  void load_heap_oop(Register s1, int simm13a, Register d);
-  void load_heap_oop(Register s1, RegisterOrConstant s2, Register d);
-  void store_heap_oop(Register d, Register s1, Register s2);
-  void store_heap_oop(Register d, Register s1, int simm13a);
-  void store_heap_oop(Register d, const Address& a, int offset = 0);
+  void access_store_at(BasicType type, DecoratorSet decorators,
+                       Register src, Address dst, Register tmp);
+  void access_load_at(BasicType type, DecoratorSet decorators,
+                      Address src, Register dst, Register tmp);
+
+  void load_heap_oop(const Address& s, Register d,
+                     Register tmp = noreg, DecoratorSet decorators = 0);
+  void load_heap_oop(Register s1, Register s2, Register d,
+                     Register tmp = noreg, DecoratorSet decorators = 0);
+  void load_heap_oop(Register s1, int simm13a, Register d,
+                     Register tmp = noreg, DecoratorSet decorators = 0);
+  void load_heap_oop(Register s1, RegisterOrConstant s2, Register d,
+                     Register tmp = noreg, DecoratorSet decorators = 0);
+  void store_heap_oop(Register d, Register s1, Register s2,
+                      Register tmp = noreg, DecoratorSet decorators = 0);
+  void store_heap_oop(Register d, Register s1, int simm13a,
+                      Register tmp = noreg, DecoratorSet decorators = 0);
+  void store_heap_oop(Register d, const Address& a, int offset = 0,
+                      Register tmp = noreg, DecoratorSet decorators = 0);
 
   void encode_heap_oop(Register src, Register dst);
   void encode_heap_oop(Register r) {
@@ -1043,19 +1058,6 @@
   // check_and_forward_exception to handle exceptions when it is safe
   void check_and_forward_exception(Register scratch_reg);
 
-  // Write to card table for - register is destroyed afterwards.
-  void card_table_write(jbyte* byte_map_base, Register tmp, Register obj);
-
-  void card_write_barrier_post(Register store_addr, Register new_val, Register tmp);
-
-#if INCLUDE_ALL_GCS
-  // General G1 pre-barrier generator.
-  void g1_write_barrier_pre(Register obj, Register index, int offset, Register pre_val, Register tmp, bool preserve_o_regs);
-
-  // General G1 post-barrier generator
-  void g1_write_barrier_post(Register store_addr, Register new_val, Register tmp);
-#endif // INCLUDE_ALL_GCS
-
   // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack
   void push_fTOS();
 
--- a/src/hotspot/cpu/sparc/memset_with_concurrent_readers_sparc.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/sparc/memset_with_concurrent_readers_sparc.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -31,8 +31,6 @@
 #include "utilities/globalDefinitions.hpp"
 #include "utilities/macros.hpp"
 
-#if INCLUDE_ALL_GCS
-
 // An implementation of memset, for use when there may be concurrent
 // readers of the region being stored into.
 //
@@ -156,5 +154,3 @@
   // Fill any partial word suffix.  Also the prefix if size < BytesPerWord.
   fill_subword(to, end, value);
 }
-
-#endif // INCLUDE_ALL_GCS
--- a/src/hotspot/cpu/sparc/methodHandles_sparc.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/sparc/methodHandles_sparc.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -180,13 +180,13 @@
 
   // Load the invoker, as MH -> MH.form -> LF.vmentry
   __ verify_oop(recv);
-  __ load_heap_oop(Address(recv,        NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())),   method_temp);
+  __ load_heap_oop(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes()), method_temp, temp2);
   __ verify_oop(method_temp);
-  __ load_heap_oop(Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())),  method_temp);
+  __ load_heap_oop(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes()), method_temp, temp2);
   __ verify_oop(method_temp);
-  __ load_heap_oop(Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())), method_temp);
+  __ load_heap_oop(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes()), method_temp, temp2);
   __ verify_oop(method_temp);
-  __ ld_ptr(       Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())),   method_temp);
+  __ ld_ptr(Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())), method_temp);
 
   if (VerifyMethodHandles && !for_compiler_entry) {
     // make sure recv is already on stack
@@ -362,7 +362,7 @@
       if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) {
         Label L_ok;
         Register temp2_defc = temp2;
-        __ load_heap_oop(member_clazz, temp2_defc);
+        __ load_heap_oop(member_clazz, temp2_defc, temp3);
         load_klass_from_Class(_masm, temp2_defc, temp3, temp4);
         __ verify_klass_ptr(temp2_defc);
         __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, temp4, L_ok);
@@ -389,7 +389,7 @@
       if (VerifyMethodHandles) {
         verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp2);
       }
-      __ load_heap_oop(member_vmtarget, G5_method);
+      __ load_heap_oop(member_vmtarget, G5_method, temp3);
       __ ld_ptr(vmtarget_method, G5_method);
       break;
 
@@ -397,7 +397,7 @@
       if (VerifyMethodHandles) {
         verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp2);
       }
-      __ load_heap_oop(member_vmtarget, G5_method);
+      __ load_heap_oop(member_vmtarget, G5_method, temp3);
       __ ld_ptr(vmtarget_method, G5_method);
       break;
 
@@ -438,7 +438,7 @@
       }
 
       Register temp2_intf = temp2;
-      __ load_heap_oop(member_clazz, temp2_intf);
+      __ load_heap_oop(member_clazz, temp2_intf, temp3);
       load_klass_from_Class(_masm, temp2_intf, temp3, temp4);
       __ verify_klass_ptr(temp2_intf);
 
--- a/src/hotspot/cpu/sparc/sharedRuntime_sparc.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/sparc/sharedRuntime_sparc.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -2516,28 +2516,7 @@
 
   // Unbox oop result, e.g. JNIHandles::resolve value in I0.
   if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
-    Label done, not_weak;
-    __ br_null(I0, false, Assembler::pn, done); // Use NULL as-is.
-    __ delayed()->andcc(I0, JNIHandles::weak_tag_mask, G0); // Test for jweak
-    __ brx(Assembler::zero, true, Assembler::pt, not_weak);
-    __ delayed()->ld_ptr(I0, 0, I0); // Maybe resolve (untagged) jobject.
-    // Resolve jweak.
-    __ ld_ptr(I0, -JNIHandles::weak_tag_value, I0);
-#if INCLUDE_ALL_GCS
-    if (UseG1GC) {
-      // Copy to O0 because macro doesn't allow pre_val in input reg.
-      __ mov(I0, O0);
-      __ g1_write_barrier_pre(noreg /* obj */,
-                              noreg /* index */,
-                              0 /* offset */,
-                              O0 /* pre_val */,
-                              G3_scratch /* tmp */,
-                              true /* preserve_o_regs */);
-    }
-#endif // INCLUDE_ALL_GCS
-    __ bind(not_weak);
-    __ verify_oop(I0);
-    __ bind(done);
+    __ resolve_jobject(I0, G3_scratch);
   }
 
   if (CheckJNICalls) {
--- a/src/hotspot/cpu/sparc/stubGenerator_sparc.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/sparc/stubGenerator_sparc.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -2277,7 +2277,7 @@
       decorators |= ARRAYCOPY_ALIGNED;
     }
 
-    BarrierSetAssembler *bs = Universe::heap()->barrier_set()->barrier_set_assembler();
+    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
     bs->arraycopy_prologue(_masm, decorators, T_OBJECT, from, to, count);
 
     assert_clean_int(count, O3);     // Make sure 'count' is clean int.
@@ -2334,7 +2334,7 @@
       decorators |= ARRAYCOPY_ALIGNED;
     }
 
-    BarrierSetAssembler *bs = Universe::heap()->barrier_set()->barrier_set_assembler();
+    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
     bs->arraycopy_prologue(_masm, decorators, T_OBJECT, from, to, count);
 
     if (UseCompressedOops) {
@@ -2451,7 +2451,7 @@
       decorators |= AS_DEST_NOT_INITIALIZED;
     }
 
-    BarrierSetAssembler *bs = Universe::heap()->barrier_set()->barrier_set_assembler();
+    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
     bs->arraycopy_prologue(_masm, decorators, T_OBJECT, O0_from, O1_to, O2_count);
 
     Label load_element, store_element, do_epilogue, fail, done;
@@ -2474,14 +2474,14 @@
 
     __ BIND(store_element);
     __ deccc(G1_remain);                // decrement the count
-    __ store_heap_oop(G3_oop, O1_to, O5_offset); // store the oop
+    __ store_heap_oop(G3_oop, O1_to, O5_offset, noreg, AS_RAW); // store the oop
     __ inc(O5_offset, heapOopSize);     // step to next offset
     __ brx(Assembler::zero, true, Assembler::pt, do_epilogue);
     __ delayed()->set(0, O0);           // return -1 on success
 
     // ======== loop entry is here ========
     __ BIND(load_element);
-    __ load_heap_oop(O0_from, O5_offset, G3_oop);  // load the oop
+    __ load_heap_oop(O0_from, O5_offset, G3_oop, noreg, AS_RAW);  // load the oop
     __ br_null_short(G3_oop, Assembler::pt, store_element);
 
     __ load_klass(G3_oop, G4_klass); // query the object klass
--- a/src/hotspot/cpu/sparc/templateInterpreterGenerator_sparc.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/sparc/templateInterpreterGenerator_sparc.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -24,6 +24,7 @@
 
 #include "precompiled.hpp"
 #include "asm/macroAssembler.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
 #include "interpreter/bytecodeHistogram.hpp"
 #include "interpreter/interpreter.hpp"
 #include "interpreter/interpreterRuntime.hpp"
@@ -827,29 +828,18 @@
 
 // Method entry for java.lang.ref.Reference.get.
 address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {
-#if INCLUDE_ALL_GCS
   // Code: _aload_0, _getfield, _areturn
   // parameter size = 1
   //
   // The code that gets generated by this routine is split into 2 parts:
-  //    1. The "intrinsified" code for G1 (or any SATB based GC),
+  //    1. The "intrinsified" code performing an ON_WEAK_OOP_REF load,
   //    2. The slow path - which is an expansion of the regular method entry.
   //
   // Notes:-
-  // * In the G1 code we do not check whether we need to block for
-  //   a safepoint. If G1 is enabled then we must execute the specialized
-  //   code for Reference.get (except when the Reference object is null)
-  //   so that we can log the value in the referent field with an SATB
-  //   update buffer.
-  //   If the code for the getfield template is modified so that the
-  //   G1 pre-barrier code is executed when the current method is
-  //   Reference.get() then going through the normal method entry
-  //   will be fine.
-  // * The G1 code can, however, check the receiver object (the instance
-  //   of java.lang.Reference) and jump to the slow path if null. If the
-  //   Reference object is null then we obviously cannot fetch the referent
-  //   and so we don't need to call the G1 pre-barrier. Thus we can use the
-  //   regular method entry code to generate the NPE.
+  // * An intrinsic is always executed, where an ON_WEAK_OOP_REF load is performed.
+  // * We may jump to the slow path iff the receiver is null. If the
+  //   Reference object is null then we no longer perform an ON_WEAK_OOP_REF load
+  //   Thus we can use the regular method entry code to generate the NPE.
   //
   // This code is based on generate_accessor_enty.
 
@@ -858,51 +848,27 @@
   const int referent_offset = java_lang_ref_Reference::referent_offset;
   guarantee(referent_offset > 0, "referent offset not initialized");
 
-  if (UseG1GC) {
-     Label slow_path;
+  Label slow_path;
 
-    // In the G1 code we don't check if we need to reach a safepoint. We
-    // continue and the thread will safepoint at the next bytecode dispatch.
+  // In the G1 code we don't check if we need to reach a safepoint. We
+  // continue and the thread will safepoint at the next bytecode dispatch.
 
-    // Check if local 0 != NULL
-    // If the receiver is null then it is OK to jump to the slow path.
-    __ ld_ptr(Gargs, G0, Otos_i ); // get local 0
-    // check if local 0 == NULL and go the slow path
-    __ cmp_and_brx_short(Otos_i, 0, Assembler::equal, Assembler::pn, slow_path);
+  // Check if local 0 != NULL
+  // If the receiver is null then it is OK to jump to the slow path.
+  __ ld_ptr(Gargs, G0, Otos_i ); // get local 0
+  // check if local 0 == NULL and go the slow path
+  __ cmp_and_brx_short(Otos_i, 0, Assembler::equal, Assembler::pn, slow_path);
 
+  __ load_heap_oop(Otos_i, referent_offset, Otos_i, G3_scratch, ON_WEAK_OOP_REF);
 
-    // Load the value of the referent field.
-    if (Assembler::is_simm13(referent_offset)) {
-      __ load_heap_oop(Otos_i, referent_offset, Otos_i);
-    } else {
-      __ set(referent_offset, G3_scratch);
-      __ load_heap_oop(Otos_i, G3_scratch, Otos_i);
-    }
+  // _areturn
+  __ retl();                      // return from leaf routine
+  __ delayed()->mov(O5_savedSP, SP);
 
-    // Generate the G1 pre-barrier code to log the value of
-    // the referent field in an SATB buffer. Note with
-    // these parameters the pre-barrier does not generate
-    // the load of the previous value
-
-    __ g1_write_barrier_pre(noreg /* obj */, noreg /* index */, 0 /* offset */,
-                            Otos_i /* pre_val */,
-                            G3_scratch /* tmp */,
-                            true /* preserve_o_regs */);
-
-    // _areturn
-    __ retl();                      // return from leaf routine
-    __ delayed()->mov(O5_savedSP, SP);
-
-    // Generate regular method entry
-    __ bind(slow_path);
-    __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals));
-    return entry;
-  }
-#endif // INCLUDE_ALL_GCS
-
-  // If G1 is not enabled then attempt to go through the accessor entry point
-  // Reference.get is an accessor
-  return NULL;
+  // Generate regular method entry
+  __ bind(slow_path);
+  __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals));
+  return entry;
 }
 
 /**
@@ -1469,33 +1435,15 @@
   // If we have an oop result store it where it will be safe for any further gc
   // until we return now that we've released the handle it might be protected by
 
-  { Label no_oop, store_result;
+  { Label no_oop;
 
     __ set((intptr_t)AbstractInterpreter::result_handler(T_OBJECT), G3_scratch);
     __ cmp_and_brx_short(G3_scratch, Lscratch, Assembler::notEqual, Assembler::pt, no_oop);
-    // Unbox oop result, e.g. JNIHandles::resolve value in O0.
-    __ br_null(O0, false, Assembler::pn, store_result); // Use NULL as-is.
-    __ delayed()->andcc(O0, JNIHandles::weak_tag_mask, G0); // Test for jweak
-    __ brx(Assembler::zero, true, Assembler::pt, store_result);
-    __ delayed()->ld_ptr(O0, 0, O0); // Maybe resolve (untagged) jobject.
-    // Resolve jweak.
-    __ ld_ptr(O0, -JNIHandles::weak_tag_value, O0);
-#if INCLUDE_ALL_GCS
-    if (UseG1GC) {
-      __ g1_write_barrier_pre(noreg /* obj */,
-                              noreg /* index */,
-                              0 /* offset */,
-                              O0 /* pre_val */,
-                              G3_scratch /* tmp */,
-                              true /* preserve_o_regs */);
-    }
-#endif // INCLUDE_ALL_GCS
-    __ bind(store_result);
+    __ resolve_jobject(O0, G3_scratch);
     // Store it where gc will look for it and result handler expects it.
     __ st_ptr(O0, FP, (frame::interpreter_frame_oop_temp_offset*wordSize) + STACK_BIAS);
 
     __ bind(no_oop);
-
   }
 
 
--- a/src/hotspot/cpu/sparc/templateTable_sparc.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/sparc/templateTable_sparc.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -23,6 +23,7 @@
  */
 
 #include "precompiled.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
 #include "interpreter/interpreter.hpp"
 #include "interpreter/interpreterRuntime.hpp"
 #include "interpreter/interp_masm.hpp"
@@ -51,74 +52,31 @@
                          int offset,
                          Register val,
                          Register tmp,
-                         BarrierSet::Name barrier,
-                         bool precise) {
+                         DecoratorSet decorators = 0) {
   assert(tmp != val && tmp != base && tmp != index, "register collision");
   assert(index == noreg || offset == 0, "only one offset");
-  switch (barrier) {
-#if INCLUDE_ALL_GCS
-    case BarrierSet::G1BarrierSet:
-      {
-        // Load and record the previous value.
-        __ g1_write_barrier_pre(base, index, offset,
-                                noreg /* pre_val */,
-                                tmp, true /*preserve_o_regs*/);
-
-        // G1 barrier needs uncompressed oop for region cross check.
-        Register new_val = val;
-        if (UseCompressedOops && val != G0) {
-          new_val = tmp;
-          __ mov(val, new_val);
-        }
-
-        if (index == noreg ) {
-          assert(Assembler::is_simm13(offset), "fix this code");
-          __ store_heap_oop(val, base, offset);
-        } else {
-          __ store_heap_oop(val, base, index);
-        }
-
-        // No need for post barrier if storing NULL
-        if (val != G0) {
-          if (precise) {
-            if (index == noreg) {
-              __ add(base, offset, base);
-            } else {
-              __ add(base, index, base);
-            }
-          }
-          __ g1_write_barrier_post(base, new_val, tmp);
-        }
-      }
-      break;
-#endif // INCLUDE_ALL_GCS
-    case BarrierSet::CardTableBarrierSet:
-      {
-        if (index == noreg ) {
-          assert(Assembler::is_simm13(offset), "fix this code");
-          __ store_heap_oop(val, base, offset);
-        } else {
-          __ store_heap_oop(val, base, index);
-        }
-        // No need for post barrier if storing NULL
-        if (val != G0) {
-          if (precise) {
-            if (index == noreg) {
-              __ add(base, offset, base);
-            } else {
-              __ add(base, index, base);
-            }
-          }
-          __ card_write_barrier_post(base, val, tmp);
-        }
-      }
-      break;
-    case BarrierSet::ModRef:
-      ShouldNotReachHere();
-      break;
-    default      :
-      ShouldNotReachHere();
-
+  if (index == noreg) {
+    __ store_heap_oop(val, base, offset, tmp, decorators);
+  } else {
+    __ store_heap_oop(val, base, index, tmp, decorators);
+  }
+}
+
+// Do an oop load like val = *(base + index + offset)
+// index can be noreg.
+static void do_oop_load(InterpreterMacroAssembler* _masm,
+                        Register base,
+                        Register index,
+                        int offset,
+                        Register dst,
+                        Register tmp,
+                        DecoratorSet decorators = 0) {
+  assert(tmp != dst && tmp != base && tmp != index, "register collision");
+  assert(index == noreg || offset == 0, "only one offset");
+  if (index == noreg) {
+    __ load_heap_oop(base, offset, dst, tmp, decorators);
+  } else {
+    __ load_heap_oop(base, index, dst, tmp, decorators);
   }
 }
 
@@ -587,7 +545,13 @@
   // Otos_i: index
   // tos: array
   __ index_check(O2, Otos_i, UseCompressedOops ? 2 : LogBytesPerWord, G3_scratch, O3);
-  __ load_heap_oop(O3, arrayOopDesc::base_offset_in_bytes(T_OBJECT), Otos_i);
+  do_oop_load(_masm,
+              O3,
+              noreg,
+              arrayOopDesc::base_offset_in_bytes(T_OBJECT),
+              Otos_i,
+              G3_scratch,
+              IN_HEAP_ARRAY);
   __ verify_oop(Otos_i);
 }
 
@@ -887,13 +851,13 @@
 
   // Store is OK.
   __ bind(store_ok);
-  do_oop_store(_masm, O1, noreg, arrayOopDesc::base_offset_in_bytes(T_OBJECT), Otos_i, G3_scratch, _bs->kind(), true);
+  do_oop_store(_masm, O1, noreg, arrayOopDesc::base_offset_in_bytes(T_OBJECT), Otos_i, G3_scratch, IN_HEAP_ARRAY);
 
   __ ba(done);
   __ delayed()->inc(Lesp, 3* Interpreter::stackElementSize); // adj sp (pops array, index and value)
 
   __ bind(is_null);
-  do_oop_store(_masm, O1, noreg, arrayOopDesc::base_offset_in_bytes(T_OBJECT), G0, G4_scratch, _bs->kind(), true);
+  do_oop_store(_masm, O1, noreg, arrayOopDesc::base_offset_in_bytes(T_OBJECT), G0, G4_scratch, IN_HEAP_ARRAY);
 
   __ profile_null_seen(G3_scratch);
   __ inc(Lesp, 3* Interpreter::stackElementSize);     // adj sp (pops array, index and value)
@@ -2155,7 +2119,7 @@
   __ delayed() ->cmp(Rflags, itos);
 
   // atos
-  __ load_heap_oop(Rclass, Roffset, Otos_i);
+  do_oop_load(_masm, Rclass, Roffset, 0, Otos_i, noreg);
   __ verify_oop(Otos_i);
   __ push(atos);
   if (!is_static && rc == may_rewrite) {
@@ -2354,7 +2318,7 @@
       __ ldf(FloatRegisterImpl::D, Otos_i, Roffset, Ftos_d);
       break;
     case Bytecodes::_fast_agetfield:
-      __ load_heap_oop(Otos_i, Roffset, Otos_i);
+      do_oop_load(_masm, Otos_i, Roffset, 0, Otos_i, noreg);
       break;
     default:
       ShouldNotReachHere();
@@ -2537,7 +2501,7 @@
     {
       __ pop_ptr();
       __ verify_oop(Otos_i);
-      do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch, _bs->kind(), false);
+      do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch);
       __ ba(checkVolatile);
       __ delayed()->tst(Lscratch);
     }
@@ -2582,7 +2546,7 @@
       __ pop_ptr();
       pop_and_check_object(Rclass);
       __ verify_oop(Otos_i);
-      do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch, _bs->kind(), false);
+      do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch);
       if (rc == may_rewrite) patch_bytecode(Bytecodes::_fast_aputfield, G3_scratch, G4_scratch, true, byte_no);
       __ ba(checkVolatile);
       __ delayed()->tst(Lscratch);
@@ -2763,7 +2727,7 @@
       __ stf(FloatRegisterImpl::D, Ftos_d, Rclass, Roffset);
       break;
     case Bytecodes::_fast_aputfield:
-      do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch, _bs->kind(), false);
+      do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch);
       break;
     default:
       ShouldNotReachHere();
@@ -2805,7 +2769,7 @@
   __ verify_oop(Rreceiver);
   __ null_check(Rreceiver);
   if (state == atos) {
-    __ load_heap_oop(Rreceiver, Roffset, Otos_i);
+    do_oop_load(_masm, Rreceiver, Roffset, 0, Otos_i, noreg);
   } else if (state == itos) {
     __ ld (Rreceiver, Roffset, Otos_i) ;
   } else if (state == ftos) {
--- a/src/hotspot/cpu/x86/assembler_x86.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/x86/assembler_x86.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -38,7 +38,6 @@
 #include "utilities/macros.hpp"
 #if INCLUDE_ALL_GCS
 #include "gc/g1/g1BarrierSet.hpp"
-#include "gc/g1/g1CollectedHeap.inline.hpp"
 #include "gc/g1/heapRegion.hpp"
 #endif // INCLUDE_ALL_GCS
 
--- a/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -44,6 +44,7 @@
 #if INCLUDE_ALL_GCS
 #include "gc/g1/g1BarrierSet.hpp"
 #include "gc/g1/g1CardTable.hpp"
+#include "gc/g1/g1ThreadLocalData.hpp"
 #endif
 
 
@@ -1562,7 +1563,7 @@
         StubFrame f(sasm, "g1_pre_barrier", dont_gc_arguments);
         // arg0 : previous value of memory
 
-        BarrierSet* bs = Universe::heap()->barrier_set();
+        BarrierSet* bs = BarrierSet::barrier_set();
         if (bs->kind() != BarrierSet::G1BarrierSet) {
           __ movptr(rax, (int)id);
           __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), rax);
@@ -1578,12 +1579,9 @@
 
         NOT_LP64(__ get_thread(thread);)
 
-        Address queue_active(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
-                                              SATBMarkQueue::byte_offset_of_active()));
-        Address queue_index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
-                                             SATBMarkQueue::byte_offset_of_index()));
-        Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
-                                        SATBMarkQueue::byte_offset_of_buf()));
+        Address queue_active(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
+        Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
+        Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
 
         Label done;
         Label runtime;
@@ -1632,7 +1630,7 @@
       {
         StubFrame f(sasm, "g1_post_barrier", dont_gc_arguments);
 
-        BarrierSet* bs = Universe::heap()->barrier_set();
+        BarrierSet* bs = BarrierSet::barrier_set();
         if (bs->kind() != BarrierSet::G1BarrierSet) {
           __ movptr(rax, (int)id);
           __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), rax);
@@ -1652,10 +1650,8 @@
 
         const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread);
 
-        Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
-                                             DirtyCardQueue::byte_offset_of_index()));
-        Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
-                                        DirtyCardQueue::byte_offset_of_buf()));
+        Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
+        Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
 
         __ push(rax);
         __ push(rcx);
--- a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -27,11 +27,10 @@
 #include "gc/g1/g1BarrierSet.hpp"
 #include "gc/g1/g1BarrierSetAssembler.hpp"
 #include "gc/g1/g1CardTable.hpp"
+#include "gc/g1/g1ThreadLocalData.hpp"
 #include "gc/g1/heapRegion.hpp"
-#include "gc/shared/collectedHeap.hpp"
 #include "interpreter/interp_masm.hpp"
 #include "runtime/sharedRuntime.hpp"
-#include "runtime/thread.hpp"
 #include "utilities/macros.hpp"
 
 #define __ masm->
@@ -48,8 +47,7 @@
 #endif
 
     Label filtered;
-    Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
-                                         SATBMarkQueue::byte_offset_of_active()));
+    Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
     // Is marking active?
     if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
       __ cmpl(in_progress, 0);
@@ -111,3 +109,293 @@
 #endif
   __ popa();
 }
+
+void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                    Register dst, Address src, Register tmp1, Register tmp_thread) {
+  bool on_oop = type == T_OBJECT || type == T_ARRAY;
+  bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
+  bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
+  bool on_reference = on_weak || on_phantom;
+  ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
+  if (on_oop && on_reference) {
+    const Register thread = NOT_LP64(tmp_thread) LP64_ONLY(r15_thread);
+    NOT_LP64(__ get_thread(thread));
+
+    // Generate the G1 pre-barrier code to log the value of
+    // the referent field in an SATB buffer.
+    g1_write_barrier_pre(masm /* masm */,
+                         noreg /* obj */,
+                         dst /* pre_val */,
+                         thread /* thread */,
+                         tmp1 /* tmp */,
+                         true /* tosca_live */,
+                         true /* expand_call */);
+  }
+}
+
+void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
+                                                 Register obj,
+                                                 Register pre_val,
+                                                 Register thread,
+                                                 Register tmp,
+                                                 bool tosca_live,
+                                                 bool expand_call) {
+  // If expand_call is true then we expand the call_VM_leaf macro
+  // directly to skip generating the check by
+  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
+
+#ifdef _LP64
+  assert(thread == r15_thread, "must be");
+#endif // _LP64
+
+  Label done;
+  Label runtime;
+
+  assert(pre_val != noreg, "check this code");
+
+  if (obj != noreg) {
+    assert_different_registers(obj, pre_val, tmp);
+    assert(pre_val != rax, "check this code");
+  }
+
+  Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
+  Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
+  Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
+
+  // Is marking active?
+  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
+    __ cmpl(in_progress, 0);
+  } else {
+    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
+    __ cmpb(in_progress, 0);
+  }
+  __ jcc(Assembler::equal, done);
+
+  // Do we need to load the previous value?
+  if (obj != noreg) {
+    __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
+  }
+
+  // Is the previous value null?
+  __ cmpptr(pre_val, (int32_t) NULL_WORD);
+  __ jcc(Assembler::equal, done);
+
+  // Can we store original value in the thread's buffer?
+  // Is index == 0?
+  // (The index field is typed as size_t.)
+
+  __ movptr(tmp, index);                   // tmp := *index_adr
+  __ cmpptr(tmp, 0);                       // tmp == 0?
+  __ jcc(Assembler::equal, runtime);       // If yes, goto runtime
+
+  __ subptr(tmp, wordSize);                // tmp := tmp - wordSize
+  __ movptr(index, tmp);                   // *index_adr := tmp
+  __ addptr(tmp, buffer);                  // tmp := tmp + *buffer_adr
+
+  // Record the previous value
+  __ movptr(Address(tmp, 0), pre_val);
+  __ jmp(done);
+
+  __ bind(runtime);
+  // save the live input values
+  if(tosca_live) __ push(rax);
+
+  if (obj != noreg && obj != rax)
+    __ push(obj);
+
+  if (pre_val != rax)
+    __ push(pre_val);
+
+  // Calling the runtime using the regular call_VM_leaf mechanism generates
+  // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
+  // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
+  //
+  // If we care generating the pre-barrier without a frame (e.g. in the
+  // intrinsified Reference.get() routine) then ebp might be pointing to
+  // the caller frame and so this check will most likely fail at runtime.
+  //
+  // Expanding the call directly bypasses the generation of the check.
+  // So when we do not have have a full interpreter frame on the stack
+  // expand_call should be passed true.
+
+  NOT_LP64( __ push(thread); )
+
+  if (expand_call) {
+    LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); )
+#ifdef _LP64
+    if (c_rarg1 != thread) {
+      __ mov(c_rarg1, thread);
+    }
+    if (c_rarg0 != pre_val) {
+      __ mov(c_rarg0, pre_val);
+    }
+#else
+    __ push(thread);
+    __ push(pre_val);
+#endif
+    __ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
+  } else {
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
+  }
+
+  NOT_LP64( __ pop(thread); )
+
+  // save the live input values
+  if (pre_val != rax)
+    __ pop(pre_val);
+
+  if (obj != noreg && obj != rax)
+    __ pop(obj);
+
+  if(tosca_live) __ pop(rax);
+
+  __ bind(done);
+}
+
+void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
+                                                  Register store_addr,
+                                                  Register new_val,
+                                                  Register thread,
+                                                  Register tmp,
+                                                  Register tmp2) {
+#ifdef _LP64
+  assert(thread == r15_thread, "must be");
+#endif // _LP64
+
+  Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
+  Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
+
+  CardTableBarrierSet* ct =
+    barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
+  assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code");
+
+  Label done;
+  Label runtime;
+
+  // Does store cross heap regions?
+
+  __ movptr(tmp, store_addr);
+  __ xorptr(tmp, new_val);
+  __ shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
+  __ jcc(Assembler::equal, done);
+
+  // crosses regions, storing NULL?
+
+  __ cmpptr(new_val, (int32_t) NULL_WORD);
+  __ jcc(Assembler::equal, done);
+
+  // storing region crossing non-NULL, is card already dirty?
+
+  const Register card_addr = tmp;
+  const Register cardtable = tmp2;
+
+  __ movptr(card_addr, store_addr);
+  __ shrptr(card_addr, CardTable::card_shift);
+  // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
+  // a valid address and therefore is not properly handled by the relocation code.
+  __ movptr(cardtable, (intptr_t)ct->card_table()->byte_map_base());
+  __ addptr(card_addr, cardtable);
+
+  __ cmpb(Address(card_addr, 0), (int)G1CardTable::g1_young_card_val());
+  __ jcc(Assembler::equal, done);
+
+  __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
+  __ cmpb(Address(card_addr, 0), (int)G1CardTable::dirty_card_val());
+  __ jcc(Assembler::equal, done);
+
+
+  // storing a region crossing, non-NULL oop, card is clean.
+  // dirty card and log.
+
+  __ movb(Address(card_addr, 0), (int)G1CardTable::dirty_card_val());
+
+  __ cmpl(queue_index, 0);
+  __ jcc(Assembler::equal, runtime);
+  __ subl(queue_index, wordSize);
+  __ movptr(tmp2, buffer);
+#ifdef _LP64
+  __ movslq(rscratch1, queue_index);
+  __ addq(tmp2, rscratch1);
+  __ movq(Address(tmp2, 0), card_addr);
+#else
+  __ addl(tmp2, queue_index);
+  __ movl(Address(tmp2, 0), card_addr);
+#endif
+  __ jmp(done);
+
+  __ bind(runtime);
+  // save the live input values
+  __ push(store_addr);
+  __ push(new_val);
+#ifdef _LP64
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
+#else
+  __ push(thread);
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
+  __ pop(thread);
+#endif
+  __ pop(new_val);
+  __ pop(store_addr);
+
+  __ bind(done);
+}
+
+void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                         Address dst, Register val, Register tmp1, Register tmp2) {
+  bool in_heap = (decorators & IN_HEAP) != 0;
+  bool in_concurrent_root = (decorators & IN_CONCURRENT_ROOT) != 0;
+
+  bool needs_pre_barrier = in_heap || in_concurrent_root;
+  bool needs_post_barrier = val != noreg && in_heap;
+
+  Register tmp3 = LP64_ONLY(r8) NOT_LP64(rsi);
+  Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rcx);
+  // flatten object address if needed
+  // We do it regardless of precise because we need the registers
+  if (dst.index() == noreg && dst.disp() == 0) {
+    if (dst.base() != tmp1) {
+      __ movptr(tmp1, dst.base());
+    }
+  } else {
+    __ lea(tmp1, dst);
+  }
+
+#ifndef _LP64
+  InterpreterMacroAssembler *imasm = static_cast<InterpreterMacroAssembler*>(masm);
+#endif
+
+  NOT_LP64(__ get_thread(rcx));
+  NOT_LP64(imasm->save_bcp());
+
+  if (needs_pre_barrier) {
+    g1_write_barrier_pre(masm /*masm*/,
+                         tmp1 /* obj */,
+                         tmp2 /* pre_val */,
+                         rthread /* thread */,
+                         tmp3  /* tmp */,
+                         val != noreg /* tosca_live */,
+                         false /* expand_call */);
+  }
+  if (val == noreg) {
+    BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg);
+  } else {
+    Register new_val = val;
+    if (needs_post_barrier) {
+      // G1 barrier needs uncompressed oop for region cross check.
+      if (UseCompressedOops) {
+        new_val = tmp2;
+        __ movptr(new_val, val);
+      }
+    }
+    BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg);
+    if (needs_post_barrier) {
+      g1_write_barrier_post(masm /*masm*/,
+                            tmp1 /* store_adr */,
+                            new_val /* new_val */,
+                            rthread /* thread */,
+                            tmp3 /* tmp */,
+                            tmp2 /* tmp2 */);
+    }
+  }
+  NOT_LP64(imasm->restore_bcp());
+}
--- a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.hpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.hpp	Fri Apr 13 14:06:39 2018 +0200
@@ -30,10 +30,30 @@
 
 class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
  protected:
-  virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
-                                               Register addr, Register count);
-  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
-                                                Register addr, Register count, Register tmp);
+  virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count);
+  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp);
+
+  void g1_write_barrier_pre(MacroAssembler* masm,
+                            Register obj,
+                            Register pre_val,
+                            Register thread,
+                            Register tmp,
+                            bool tosca_live,
+                            bool expand_call);
+
+  void g1_write_barrier_post(MacroAssembler* masm,
+                             Register store_addr,
+                             Register new_val,
+                             Register thread,
+                             Register tmp,
+                             Register tmp2);
+
+  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                            Address dst, Register val, Register tmp1, Register tmp2);
+
+ public:
+  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                       Register dst, Address src, Register tmp1, Register tmp_thread);
 };
 
 #endif // CPU_X86_GC_G1_G1BARRIERSETASSEMBLER_X86_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
+#include "interpreter/interp_masm.hpp"
+
+#define __ masm->
+
+void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                  Register dst, Address src, Register tmp1, Register tmp_thread) {
+  bool on_heap = (decorators & IN_HEAP) != 0;
+  bool on_root = (decorators & IN_ROOT) != 0;
+  bool oop_not_null = (decorators & OOP_NOT_NULL) != 0;
+
+  switch (type) {
+  case T_OBJECT:
+  case T_ARRAY: {
+    if (on_heap) {
+#ifdef _LP64
+      if (UseCompressedOops) {
+        __ movl(dst, src);
+        if (oop_not_null) {
+          __ decode_heap_oop_not_null(dst);
+        } else {
+          __ decode_heap_oop(dst);
+        }
+      } else
+#endif
+      {
+        __ movptr(dst, src);
+      }
+    } else {
+      assert(on_root, "why else?");
+      __ movptr(dst, src);
+    }
+    break;
+  }
+  default: Unimplemented();
+  }
+}
+
+void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                   Address dst, Register val, Register tmp1, Register tmp2) {
+  bool on_heap = (decorators & IN_HEAP) != 0;
+  bool on_root = (decorators & IN_ROOT) != 0;
+  bool oop_not_null = (decorators & OOP_NOT_NULL) != 0;
+
+  switch (type) {
+  case T_OBJECT:
+  case T_ARRAY: {
+    if (on_heap) {
+      if (val == noreg) {
+        assert(!oop_not_null, "inconsistent access");
+#ifdef _LP64
+        if (UseCompressedOops) {
+          __ movl(dst, (int32_t)NULL_WORD);
+        } else {
+          __ movslq(dst, (int32_t)NULL_WORD);
+        }
+#else
+        __ movl(dst, (int32_t)NULL_WORD);
+#endif
+      } else {
+#ifdef _LP64
+        if (UseCompressedOops) {
+          assert(!dst.uses(val), "not enough registers");
+          if (oop_not_null) {
+            __ encode_heap_oop_not_null(val);
+          } else {
+            __ encode_heap_oop(val);
+          }
+          __ movl(dst, val);
+        } else
+#endif
+        {
+          __ movptr(dst, val);
+        }
+      }
+    } else {
+      assert(on_root, "why else?");
+      assert(val != noreg, "not supported");
+      __ movptr(dst, val);
+    }
+    break;
+  }
+  default: Unimplemented();
+  }
+}
--- a/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.hpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.hpp	Fri Apr 13 14:06:39 2018 +0200
@@ -22,8 +22,8 @@
  *
  */
 
-#ifndef CPU_X86_GC_G1_BARRIERSETASSEMBLER_X86_HPP
-#define CPU_X86_GC_G1_BARRIERSETASSEMBLER_X86_HPP
+#ifndef CPU_X86_GC_SHARED_BARRIERSETASSEMBLER_X86_HPP
+#define CPU_X86_GC_SHARED_BARRIERSETASSEMBLER_X86_HPP
 
 #include "asm/macroAssembler.hpp"
 #include "memory/allocation.hpp"
@@ -38,6 +38,13 @@
                                   Register src, Register dst, Register count) {}
   virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
                                   Register src, Register dst, Register count) {}
+
+  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                       Register dst, Address src, Register tmp1, Register tmp_thread);
+  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                        Address dst, Register val, Register tmp1, Register tmp2);
+
+  virtual void barrier_stubs_init() {}
 };
 
-#endif // CPU_X86_GC_G1_BARRIERSETASSEMBLER_X86_HPP
+#endif // CPU_X86_GC_SHARED_BARRIERSETASSEMBLER_X86_HPP
--- a/src/hotspot/cpu/x86/gc/shared/cardTableBarrierSetAssembler_x86.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/x86/gc/shared/cardTableBarrierSetAssembler_x86.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -28,7 +28,6 @@
 #include "gc/shared/cardTable.hpp"
 #include "gc/shared/cardTableBarrierSet.hpp"
 #include "gc/shared/cardTableBarrierSetAssembler.hpp"
-#include "gc/shared/collectedHeap.hpp"
 
 #define __ masm->
 
@@ -44,7 +43,7 @@
 
 void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
                                                                     Register addr, Register count, Register tmp) {
-  BarrierSet *bs = Universe::heap()->barrier_set();
+  BarrierSet *bs = BarrierSet::barrier_set();
   CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
   CardTable* ct = ctbs->card_table();
   assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
@@ -85,3 +84,70 @@
 
 __ BIND(L_done);
 }
+
+void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Address dst) {
+  // Does a store check for the oop in register obj. The content of
+  // register obj is destroyed afterwards.
+  BarrierSet* bs = BarrierSet::barrier_set();
+
+  CardTableBarrierSet* ct = barrier_set_cast<CardTableBarrierSet>(bs);
+  assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code");
+
+  __ shrptr(obj, CardTable::card_shift);
+
+  Address card_addr;
+
+  // The calculation for byte_map_base is as follows:
+  // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift);
+  // So this essentially converts an address to a displacement and it will
+  // never need to be relocated. On 64bit however the value may be too
+  // large for a 32bit displacement.
+  intptr_t disp = (intptr_t) ct->card_table()->byte_map_base();
+  if (__ is_simm32(disp)) {
+    card_addr = Address(noreg, obj, Address::times_1, disp);
+  } else {
+    // By doing it as an ExternalAddress 'disp' could be converted to a rip-relative
+    // displacement and done in a single instruction given favorable mapping and a
+    // smarter version of as_Address. However, 'ExternalAddress' generates a relocation
+    // entry and that entry is not properly handled by the relocation code.
+    AddressLiteral cardtable((address)ct->card_table()->byte_map_base(), relocInfo::none);
+    Address index(noreg, obj, Address::times_1);
+    card_addr = __ as_Address(ArrayAddress(cardtable, index));
+  }
+
+  int dirty = CardTable::dirty_card_val();
+  if (UseCondCardMark) {
+    Label L_already_dirty;
+    if (UseConcMarkSweepGC) {
+      __ membar(Assembler::StoreLoad);
+    }
+    __ cmpb(card_addr, dirty);
+    __ jcc(Assembler::equal, L_already_dirty);
+    __ movb(card_addr, dirty);
+    __ bind(L_already_dirty);
+  } else {
+    __ movb(card_addr, dirty);
+  }
+}
+
+void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                                Address dst, Register val, Register tmp1, Register tmp2) {
+  bool in_heap = (decorators & IN_HEAP) != 0;
+
+  bool on_array = (decorators & IN_HEAP_ARRAY) != 0;
+  bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
+  bool precise = on_array || on_anonymous;
+
+  bool needs_post_barrier = val != noreg && in_heap;
+
+  BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg);
+  if (needs_post_barrier) {
+    // flatten object address if needed
+    if (!precise || (dst.index() == noreg && dst.disp() == 0)) {
+      store_check(masm, dst.base(), dst);
+    } else {
+      __ lea(tmp1, dst);
+      store_check(masm, tmp1, dst);
+    }
+  }
+}
--- a/src/hotspot/cpu/x86/gc/shared/cardTableBarrierSetAssembler_x86.hpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/x86/gc/shared/cardTableBarrierSetAssembler_x86.hpp	Fri Apr 13 14:06:39 2018 +0200
@@ -30,8 +30,12 @@
 
 class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler {
 protected:
-  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr,
-                                                Register count, Register tmp);
+  void store_check(MacroAssembler* masm, Register obj, Address dst);
+
+  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp);
+
+  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                            Address dst, Register val, Register tmp1, Register tmp2);
 };
 
 #endif // CPU_X86_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_X86_HPP
--- a/src/hotspot/cpu/x86/gc/shared/modRefBarrierSetAssembler_x86.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/x86/gc/shared/modRefBarrierSetAssembler_x86.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -78,3 +78,12 @@
     gen_write_ref_array_post_barrier(masm, decorators, dst, count, tmp);
   }
 }
+
+void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                         Address dst, Register val, Register tmp1, Register tmp2) {
+  if (type == T_OBJECT || type == T_ARRAY) {
+    oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2);
+  } else {
+    BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
+  }
+}
--- a/src/hotspot/cpu/x86/gc/shared/modRefBarrierSetAssembler_x86.hpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/x86/gc/shared/modRefBarrierSetAssembler_x86.hpp	Fri Apr 13 14:06:39 2018 +0200
@@ -28,16 +28,26 @@
 #include "asm/macroAssembler.hpp"
 #include "gc/shared/barrierSetAssembler.hpp"
 
+// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other
+// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected
+// accesses, which are overridden in the concrete BarrierSetAssembler.
+
 class ModRefBarrierSetAssembler: public BarrierSetAssembler {
 protected:
-  virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count) {}
-  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp) {}
-
+  virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
+                                               Register addr, Register count) {}
+  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
+                                                Register addr, Register count, Register tmp) {}
+  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                            Address dst, Register val, Register tmp1, Register tmp2) = 0;
 public:
   virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
                                   Register src, Register dst, Register count);
   virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
                                   Register src, Register dst, Register count);
+
+  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                        Address dst, Register val, Register tmp1, Register tmp2);
 };
 
 #endif // CPU_X86_GC_SHARED_MODREFBARRIERSETASSEMBLER_X86_HPP
--- a/src/hotspot/cpu/x86/interp_masm_x86.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/x86/interp_masm_x86.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -516,9 +516,7 @@
   resolve_oop_handle(result);
   // Add in the index
   addptr(result, tmp);
-  load_heap_oop(result, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
-  // The resulting oop is null if the reference is not yet resolved.
-  // It is Universe::the_null_sentinel() if the reference resolved to NULL via condy.
+  load_heap_oop(result, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), tmp);
 }
 
 // load cpool->resolved_klass_at(index)
--- a/src/hotspot/cpu/x86/interp_masm_x86.hpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/x86/interp_masm_x86.hpp	Fri Apr 13 14:06:39 2018 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -35,12 +35,13 @@
 typedef ByteSize (*OffsetFunction)(uint);
 
 class InterpreterMacroAssembler: public MacroAssembler {
-
- protected:
+ public:
   // Interpreter specific version of call_VM_base
   virtual void call_VM_leaf_base(address entry_point,
                                  int number_of_arguments);
 
+ protected:
+
   virtual void call_VM_base(Register oop_result,
                             Register java_thread,
                             Register last_java_sp,
--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -27,12 +27,13 @@
 #include "asm/assembler.hpp"
 #include "asm/assembler.inline.hpp"
 #include "compiler/disassembler.hpp"
-#include "gc/shared/cardTable.hpp"
-#include "gc/shared/cardTableBarrierSet.hpp"
+#include "gc/shared/barrierSet.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
 #include "gc/shared/collectedHeap.inline.hpp"
 #include "interpreter/interpreter.hpp"
 #include "memory/resourceArea.hpp"
 #include "memory/universe.hpp"
+#include "oops/access.hpp"
 #include "oops/klass.inline.hpp"
 #include "prims/methodHandles.hpp"
 #include "runtime/biasedLocking.hpp"
@@ -45,12 +46,6 @@
 #include "runtime/stubRoutines.hpp"
 #include "runtime/thread.hpp"
 #include "utilities/macros.hpp"
-#if INCLUDE_ALL_GCS
-#include "gc/g1/g1BarrierSet.hpp"
-#include "gc/g1/g1CardTable.hpp"
-#include "gc/g1/g1CollectedHeap.inline.hpp"
-#include "gc/g1/heapRegion.hpp"
-#endif // INCLUDE_ALL_GCS
 #include "crc32c.h"
 #ifdef COMPILER2
 #include "opto/intrinsicnode.hpp"
@@ -5240,6 +5235,12 @@
   }
 }
 
+void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
+  const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
+  STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code
+  // The inverted mask is sign-extended
+  andptr(possibly_jweak, inverted_jweak_mask);
+}
 
 void MacroAssembler::resolve_jobject(Register value,
                                      Register thread,
@@ -5251,296 +5252,18 @@
   testptr(value, JNIHandles::weak_tag_mask); // Test for jweak tag.
   jcc(Assembler::zero, not_weak);
   // Resolve jweak.
-  movptr(value, Address(value, -JNIHandles::weak_tag_value));
+  access_load_at(T_OBJECT, IN_ROOT | ON_PHANTOM_OOP_REF,
+                 value, Address(value, -JNIHandles::weak_tag_value), tmp, thread);
   verify_oop(value);
-#if INCLUDE_ALL_GCS
-  if (UseG1GC) {
-    g1_write_barrier_pre(noreg /* obj */,
-                         value /* pre_val */,
-                         thread /* thread */,
-                         tmp /* tmp */,
-                         true /* tosca_live */,
-                         true /* expand_call */);
-  }
-#endif // INCLUDE_ALL_GCS
   jmp(done);
   bind(not_weak);
   // Resolve (untagged) jobject.
-  movptr(value, Address(value, 0));
+  access_load_at(T_OBJECT, IN_ROOT | IN_CONCURRENT_ROOT,
+                 value, Address(value, 0), tmp, thread);
   verify_oop(value);
   bind(done);
 }
 
-void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
-  const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
-  STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code
-  // The inverted mask is sign-extended
-  andptr(possibly_jweak, inverted_jweak_mask);
-}
-
-//////////////////////////////////////////////////////////////////////////////////
-#if INCLUDE_ALL_GCS
-
-void MacroAssembler::g1_write_barrier_pre(Register obj,
-                                          Register pre_val,
-                                          Register thread,
-                                          Register tmp,
-                                          bool tosca_live,
-                                          bool expand_call) {
-
-  // If expand_call is true then we expand the call_VM_leaf macro
-  // directly to skip generating the check by
-  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
-
-#ifdef _LP64
-  assert(thread == r15_thread, "must be");
-#endif // _LP64
-
-  Label done;
-  Label runtime;
-
-  assert(pre_val != noreg, "check this code");
-
-  if (obj != noreg) {
-    assert_different_registers(obj, pre_val, tmp);
-    assert(pre_val != rax, "check this code");
-  }
-
-  Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
-                                       SATBMarkQueue::byte_offset_of_active()));
-  Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
-                                       SATBMarkQueue::byte_offset_of_index()));
-  Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
-                                       SATBMarkQueue::byte_offset_of_buf()));
-
-
-  // Is marking active?
-  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
-    cmpl(in_progress, 0);
-  } else {
-    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
-    cmpb(in_progress, 0);
-  }
-  jcc(Assembler::equal, done);
-
-  // Do we need to load the previous value?
-  if (obj != noreg) {
-    load_heap_oop(pre_val, Address(obj, 0));
-  }
-
-  // Is the previous value null?
-  cmpptr(pre_val, (int32_t) NULL_WORD);
-  jcc(Assembler::equal, done);
-
-  // Can we store original value in the thread's buffer?
-  // Is index == 0?
-  // (The index field is typed as size_t.)
-
-  movptr(tmp, index);                   // tmp := *index_adr
-  cmpptr(tmp, 0);                       // tmp == 0?
-  jcc(Assembler::equal, runtime);       // If yes, goto runtime
-
-  subptr(tmp, wordSize);                // tmp := tmp - wordSize
-  movptr(index, tmp);                   // *index_adr := tmp
-  addptr(tmp, buffer);                  // tmp := tmp + *buffer_adr
-
-  // Record the previous value
-  movptr(Address(tmp, 0), pre_val);
-  jmp(done);
-
-  bind(runtime);
-  // save the live input values
-  if(tosca_live) push(rax);
-
-  if (obj != noreg && obj != rax)
-    push(obj);
-
-  if (pre_val != rax)
-    push(pre_val);
-
-  // Calling the runtime using the regular call_VM_leaf mechanism generates
-  // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
-  // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
-  //
-  // If we care generating the pre-barrier without a frame (e.g. in the
-  // intrinsified Reference.get() routine) then ebp might be pointing to
-  // the caller frame and so this check will most likely fail at runtime.
-  //
-  // Expanding the call directly bypasses the generation of the check.
-  // So when we do not have have a full interpreter frame on the stack
-  // expand_call should be passed true.
-
-  NOT_LP64( push(thread); )
-
-  if (expand_call) {
-    LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); )
-    pass_arg1(this, thread);
-    pass_arg0(this, pre_val);
-    MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
-  } else {
-    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
-  }
-
-  NOT_LP64( pop(thread); )
-
-  // save the live input values
-  if (pre_val != rax)
-    pop(pre_val);
-
-  if (obj != noreg && obj != rax)
-    pop(obj);
-
-  if(tosca_live) pop(rax);
-
-  bind(done);
-}
-
-void MacroAssembler::g1_write_barrier_post(Register store_addr,
-                                           Register new_val,
-                                           Register thread,
-                                           Register tmp,
-                                           Register tmp2) {
-#ifdef _LP64
-  assert(thread == r15_thread, "must be");
-#endif // _LP64
-
-  Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
-                                       DirtyCardQueue::byte_offset_of_index()));
-  Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
-                                       DirtyCardQueue::byte_offset_of_buf()));
-
-  CardTableBarrierSet* ctbs =
-    barrier_set_cast<CardTableBarrierSet>(Universe::heap()->barrier_set());
-  CardTable* ct = ctbs->card_table();
-  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
-
-  Label done;
-  Label runtime;
-
-  // Does store cross heap regions?
-
-  movptr(tmp, store_addr);
-  xorptr(tmp, new_val);
-  shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
-  jcc(Assembler::equal, done);
-
-  // crosses regions, storing NULL?
-
-  cmpptr(new_val, (int32_t) NULL_WORD);
-  jcc(Assembler::equal, done);
-
-  // storing region crossing non-NULL, is card already dirty?
-
-  const Register card_addr = tmp;
-  const Register cardtable = tmp2;
-
-  movptr(card_addr, store_addr);
-  shrptr(card_addr, CardTable::card_shift);
-  // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
-  // a valid address and therefore is not properly handled by the relocation code.
-  movptr(cardtable, (intptr_t)ct->byte_map_base());
-  addptr(card_addr, cardtable);
-
-  cmpb(Address(card_addr, 0), (int)G1CardTable::g1_young_card_val());
-  jcc(Assembler::equal, done);
-
-  membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
-  cmpb(Address(card_addr, 0), (int)CardTable::dirty_card_val());
-  jcc(Assembler::equal, done);
-
-
-  // storing a region crossing, non-NULL oop, card is clean.
-  // dirty card and log.
-
-  movb(Address(card_addr, 0), (int)CardTable::dirty_card_val());
-
-  cmpl(queue_index, 0);
-  jcc(Assembler::equal, runtime);
-  subl(queue_index, wordSize);
-  movptr(tmp2, buffer);
-#ifdef _LP64
-  movslq(rscratch1, queue_index);
-  addq(tmp2, rscratch1);
-  movq(Address(tmp2, 0), card_addr);
-#else
-  addl(tmp2, queue_index);
-  movl(Address(tmp2, 0), card_addr);
-#endif
-  jmp(done);
-
-  bind(runtime);
-  // save the live input values
-  push(store_addr);
-  push(new_val);
-#ifdef _LP64
-  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
-#else
-  push(thread);
-  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
-  pop(thread);
-#endif
-  pop(new_val);
-  pop(store_addr);
-
-  bind(done);
-}
-
-#endif // INCLUDE_ALL_GCS
-//////////////////////////////////////////////////////////////////////////////////
-
-
-void MacroAssembler::store_check(Register obj, Address dst) {
-  store_check(obj);
-}
-
-void MacroAssembler::store_check(Register obj) {
-  // Does a store check for the oop in register obj. The content of
-  // register obj is destroyed afterwards.
-  BarrierSet* bs = Universe::heap()->barrier_set();
-  assert(bs->kind() == BarrierSet::CardTableBarrierSet,
-         "Wrong barrier set kind");
-
-  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
-  CardTable* ct = ctbs->card_table();
-  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
-
-  shrptr(obj, CardTable::card_shift);
-
-  Address card_addr;
-
-  // The calculation for byte_map_base is as follows:
-  // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift);
-  // So this essentially converts an address to a displacement and it will
-  // never need to be relocated. On 64bit however the value may be too
-  // large for a 32bit displacement.
-  intptr_t disp = (intptr_t) ct->byte_map_base();
-  if (is_simm32(disp)) {
-    card_addr = Address(noreg, obj, Address::times_1, disp);
-  } else {
-    // By doing it as an ExternalAddress 'disp' could be converted to a rip-relative
-    // displacement and done in a single instruction given favorable mapping and a
-    // smarter version of as_Address. However, 'ExternalAddress' generates a relocation
-    // entry and that entry is not properly handled by the relocation code.
-    AddressLiteral cardtable((address)ct->byte_map_base(), relocInfo::none);
-    Address index(noreg, obj, Address::times_1);
-    card_addr = as_Address(ArrayAddress(cardtable, index));
-  }
-
-  int dirty = CardTable::dirty_card_val();
-  if (UseCondCardMark) {
-    Label L_already_dirty;
-    if (UseConcMarkSweepGC) {
-      membar(Assembler::StoreLoad);
-    }
-    cmpb(card_addr, dirty);
-    jcc(Assembler::equal, L_already_dirty);
-    movb(card_addr, dirty);
-    bind(L_already_dirty);
-  } else {
-    movb(card_addr, dirty);
-  }
-}
-
 void MacroAssembler::subptr(Register dst, int32_t imm32) {
   LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32));
 }
@@ -6591,69 +6314,47 @@
     movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
 }
 
-void MacroAssembler::load_heap_oop(Register dst, Address src) {
-#ifdef _LP64
-  // FIXME: Must change all places where we try to load the klass.
-  if (UseCompressedOops) {
-    movl(dst, src);
-    decode_heap_oop(dst);
-  } else
-#endif
-    movptr(dst, src);
+void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src,
+                                    Register tmp1, Register thread_tmp) {
+  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
+  bool as_raw = (decorators & AS_RAW) != 0;
+  if (as_raw) {
+    bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
+  } else {
+    bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
+  }
+}
+
+void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src,
+                                     Register tmp1, Register tmp2) {
+  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
+  bool as_raw = (decorators & AS_RAW) != 0;
+  if (as_raw) {
+    bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2);
+  } else {
+    bs->store_at(this, decorators, type, dst, src, tmp1, tmp2);
+  }
+}
+
+void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
+                                   Register thread_tmp, DecoratorSet decorators) {
+  access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
 }
 
 // Doesn't do verfication, generates fixed size code
-void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) {
-#ifdef _LP64
-  if (UseCompressedOops) {
-    movl(dst, src);
-    decode_heap_oop_not_null(dst);
-  } else
-#endif
-    movptr(dst, src);
-}
-
-void MacroAssembler::store_heap_oop(Address dst, Register src) {
-#ifdef _LP64
-  if (UseCompressedOops) {
-    assert(!dst.uses(src), "not enough registers");
-    encode_heap_oop(src);
-    movl(dst, src);
-  } else
-#endif
-    movptr(dst, src);
-}
-
-void MacroAssembler::cmp_heap_oop(Register src1, Address src2, Register tmp) {
-  assert_different_registers(src1, tmp);
-#ifdef _LP64
-  if (UseCompressedOops) {
-    bool did_push = false;
-    if (tmp == noreg) {
-      tmp = rax;
-      push(tmp);
-      did_push = true;
-      assert(!src2.uses(rsp), "can't push");
-    }
-    load_heap_oop(tmp, src2);
-    cmpptr(src1, tmp);
-    if (did_push)  pop(tmp);
-  } else
-#endif
-    cmpptr(src1, src2);
+void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1,
+                                            Register thread_tmp, DecoratorSet decorators) {
+  access_load_at(T_OBJECT, IN_HEAP | OOP_NOT_NULL | decorators, dst, src, tmp1, thread_tmp);
+}
+
+void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1,
+                                    Register tmp2, DecoratorSet decorators) {
+  access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2);
 }
 
 // Used for storing NULLs.
 void MacroAssembler::store_heap_oop_null(Address dst) {
-#ifdef _LP64
-  if (UseCompressedOops) {
-    movl(dst, (int32_t)NULL_WORD);
-  } else {
-    movslq(dst, (int32_t)NULL_WORD);
-  }
-#else
-  movl(dst, (int32_t)NULL_WORD);
-#endif
+  access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg);
 }
 
 #ifdef _LP64
--- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp	Fri Apr 13 14:06:39 2018 +0200
@@ -38,11 +38,7 @@
   friend class LIR_Assembler;
   friend class Runtime1;      // as_Address()
 
- protected:
-
-  Address as_Address(AddressLiteral adr);
-  Address as_Address(ArrayAddress adr);
-
+ public:
   // Support for VM calls
   //
   // This is the base routine called by the different versions of call_VM_leaf. The interpreter
@@ -54,6 +50,7 @@
     int     number_of_arguments        // the number of arguments to pop after the call
   );
 
+ protected:
   // This is the base routine called by the different versions of call_VM. The interpreter
   // may customize this version by overriding it for its purposes (e.g., to save/restore
   // additional registers when doing a VM call).
@@ -87,6 +84,9 @@
  virtual void check_and_handle_popframe(Register java_thread);
  virtual void check_and_handle_earlyret(Register java_thread);
 
+  Address as_Address(AddressLiteral adr);
+  Address as_Address(ArrayAddress adr);
+
   // Support for NULL-checks
   //
   // Generates code that causes a NULL OS exception if the content of reg is NULL.
@@ -293,29 +293,9 @@
   // thread in the default location (r15_thread on 64bit)
   void reset_last_Java_frame(bool clear_fp);
 
-  // Stores
-  void store_check(Register obj);                // store check for obj - register is destroyed afterwards
-  void store_check(Register obj, Address dst);   // same as above, dst is exact store location (reg. is destroyed)
-
+  // jobjects
+  void clear_jweak_tag(Register possibly_jweak);
   void resolve_jobject(Register value, Register thread, Register tmp);
-  void clear_jweak_tag(Register possibly_jweak);
-
-#if INCLUDE_ALL_GCS
-
-  void g1_write_barrier_pre(Register obj,
-                            Register pre_val,
-                            Register thread,
-                            Register tmp,
-                            bool tosca_live,
-                            bool expand_call);
-
-  void g1_write_barrier_post(Register store_addr,
-                             Register new_val,
-                             Register thread,
-                             Register tmp,
-                             Register tmp2);
-
-#endif // INCLUDE_ALL_GCS
 
   // C 'boolean' to Java boolean: x == 0 ? 0 : 1
   void c2bool(Register x);
@@ -334,10 +314,17 @@
   void load_klass(Register dst, Register src);
   void store_klass(Register dst, Register src);
 
-  void load_heap_oop(Register dst, Address src);
-  void load_heap_oop_not_null(Register dst, Address src);
-  void store_heap_oop(Address dst, Register src);
-  void cmp_heap_oop(Register src1, Address src2, Register tmp = noreg);
+  void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src,
+                      Register tmp1, Register thread_tmp);
+  void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src,
+                       Register tmp1, Register tmp2);
+
+  void load_heap_oop(Register dst, Address src, Register tmp1 = noreg,
+                     Register thread_tmp = noreg, DecoratorSet decorators = 0);
+  void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg,
+                              Register thread_tmp = noreg, DecoratorSet decorators = 0);
+  void store_heap_oop(Address dst, Register src, Register tmp1 = noreg,
+                      Register tmp2 = noreg, DecoratorSet decorators = 0);
 
   // Used for storing NULL. All other oop constants should be
   // stored using routines that take a jobject.
--- a/src/hotspot/cpu/x86/methodHandles_x86.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/x86/methodHandles_x86.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -168,11 +168,11 @@
 
   // Load the invoker, as MH -> MH.form -> LF.vmentry
   __ verify_oop(recv);
-  __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())));
+  __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())), temp2);
   __ verify_oop(method_temp);
-  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())));
+  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), temp2);
   __ verify_oop(method_temp);
-  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())));
+  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())), temp2);
   __ verify_oop(method_temp);
   __ movptr(method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())));
 
@@ -361,7 +361,7 @@
       if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) {
         Label L_ok;
         Register temp2_defc = temp2;
-        __ load_heap_oop(temp2_defc, member_clazz);
+        __ load_heap_oop(temp2_defc, member_clazz, temp3);
         load_klass_from_Class(_masm, temp2_defc);
         __ verify_klass_ptr(temp2_defc);
         __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok);
@@ -437,6 +437,8 @@
         verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3);
       }
 
+      BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
+
       Register temp3_intf = temp3;
       __ load_heap_oop(temp3_intf, member_clazz);
       load_klass_from_Class(_masm, temp3_intf);
--- a/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -845,7 +845,7 @@
       decorators |= ARRAYCOPY_ALIGNED;
     }
 
-    BarrierSetAssembler *bs = Universe::heap()->barrier_set()->barrier_set_assembler();
+    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
     bs->arraycopy_prologue(_masm, decorators, t, from, to, count);
 
     __ subptr(to, from); // to --> to_from
@@ -1034,7 +1034,7 @@
       decorators |= ARRAYCOPY_ALIGNED;
     }
 
-    BarrierSetAssembler *bs = Universe::heap()->barrier_set()->barrier_set_assembler();
+    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
     bs->arraycopy_prologue(_masm, decorators, t, from, to, count);
 
     // copy from high to low
@@ -1389,7 +1389,7 @@
     }
 
     BasicType type = T_OBJECT;
-    BarrierSetAssembler *bs = Universe::heap()->barrier_set()->barrier_set_assembler();
+    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
     bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
 
     // Copy from low to high addresses, indexed from the end of each array.
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -1829,7 +1829,7 @@
     }
 
     BasicType type = is_oop ? T_OBJECT : T_INT;
-    BarrierSetAssembler *bs = Universe::heap()->barrier_set()->barrier_set_assembler();
+    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
     bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
 
     // 'from', 'to' and 'count' are now valid
@@ -1923,7 +1923,7 @@
     }
 
     BasicType type = is_oop ? T_OBJECT : T_INT;
-    BarrierSetAssembler *bs = Universe::heap()->barrier_set()->barrier_set_assembler();
+    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
     // no registers are destroyed by this call
     bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
 
@@ -2027,7 +2027,7 @@
     }
 
     BasicType type = is_oop ? T_OBJECT : T_LONG;
-    BarrierSetAssembler *bs = Universe::heap()->barrier_set()->barrier_set_assembler();
+    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
     bs->arraycopy_prologue(_masm, decorators, type, from, to, qword_count);
 
     // Copy from low to high addresses.  Use 'to' as scratch.
@@ -2120,7 +2120,7 @@
     }
 
     BasicType type = is_oop ? T_OBJECT : T_LONG;
-    BarrierSetAssembler *bs = Universe::heap()->barrier_set()->barrier_set_assembler();
+    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
     bs->arraycopy_prologue(_masm, decorators, type, from, to, qword_count);
 
     __ jmp(L_copy_bytes);
@@ -2300,7 +2300,7 @@
     }
 
     BasicType type = T_OBJECT;
-    BarrierSetAssembler *bs = Universe::heap()->barrier_set()->barrier_set_assembler();
+    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
     bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
 
     // Copy from low to high addresses, indexed from the end of each array.
@@ -2323,13 +2323,13 @@
     __ align(OptoLoopAlignment);
 
     __ BIND(L_store_element);
-    __ store_heap_oop(to_element_addr, rax_oop);  // store the oop
+    __ store_heap_oop(to_element_addr, rax_oop, noreg, noreg, AS_RAW);  // store the oop
     __ increment(count);               // increment the count toward zero
     __ jcc(Assembler::zero, L_do_card_marks);
 
     // ======== loop entry is here ========
     __ BIND(L_load_element);
-    __ load_heap_oop(rax_oop, from_element_addr); // load the oop
+    __ load_heap_oop(rax_oop, from_element_addr, noreg, noreg, AS_RAW); // load the oop
     __ testptr(rax_oop, rax_oop);
     __ jcc(Assembler::zero, L_store_element);
 
--- a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -24,6 +24,7 @@
 
 #include "precompiled.hpp"
 #include "asm/macroAssembler.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
 #include "interpreter/bytecodeHistogram.hpp"
 #include "interpreter/interp_masm.hpp"
 #include "interpreter/interpreter.hpp"
@@ -696,29 +697,18 @@
 
 // Method entry for java.lang.ref.Reference.get.
 address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {
-#if INCLUDE_ALL_GCS
   // Code: _aload_0, _getfield, _areturn
   // parameter size = 1
   //
   // The code that gets generated by this routine is split into 2 parts:
-  //    1. The "intrinsified" code for G1 (or any SATB based GC),
+  //    1. The "intrinsified" code performing an ON_WEAK_OOP_REF load,
   //    2. The slow path - which is an expansion of the regular method entry.
   //
   // Notes:-
-  // * In the G1 code we do not check whether we need to block for
-  //   a safepoint. If G1 is enabled then we must execute the specialized
-  //   code for Reference.get (except when the Reference object is null)
-  //   so that we can log the value in the referent field with an SATB
-  //   update buffer.
-  //   If the code for the getfield template is modified so that the
-  //   G1 pre-barrier code is executed when the current method is
-  //   Reference.get() then going through the normal method entry
-  //   will be fine.
-  // * The G1 code can, however, check the receiver object (the instance
-  //   of java.lang.Reference) and jump to the slow path if null. If the
-  //   Reference object is null then we obviously cannot fetch the referent
-  //   and so we don't need to call the G1 pre-barrier. Thus we can use the
-  //   regular method entry code to generate the NPE.
+  // * An intrinsic is always executed, where an ON_WEAK_OOP_REF load is performed.
+  // * We may jump to the slow path iff the receiver is null. If the
+  //   Reference object is null then we no longer perform an ON_WEAK_OOP_REF load
+  //   Thus we can use the regular method entry code to generate the NPE.
   //
   // rbx: Method*
 
@@ -729,63 +719,41 @@
   const int referent_offset = java_lang_ref_Reference::referent_offset;
   guarantee(referent_offset > 0, "referent offset not initialized");
 
-  if (UseG1GC) {
-    Label slow_path;
-    // rbx: method
+  Label slow_path;
+  // rbx: method
 
-    // Check if local 0 != NULL
-    // If the receiver is null then it is OK to jump to the slow path.
-    __ movptr(rax, Address(rsp, wordSize));
+  // Check if local 0 != NULL
+  // If the receiver is null then it is OK to jump to the slow path.
+  __ movptr(rax, Address(rsp, wordSize));
 
-    __ testptr(rax, rax);
-    __ jcc(Assembler::zero, slow_path);
+  __ testptr(rax, rax);
+  __ jcc(Assembler::zero, slow_path);
 
-    // rax: local 0
-    // rbx: method (but can be used as scratch now)
-    // rdx: scratch
-    // rdi: scratch
+  // rax: local 0
+  // rbx: method (but can be used as scratch now)
+  // rdx: scratch
+  // rdi: scratch
 
-    // Preserve the sender sp in case the pre-barrier
-    // calls the runtime
-    NOT_LP64(__ push(rsi));
+  // Preserve the sender sp in case the load barrier
+  // calls the runtime
+  NOT_LP64(__ push(rsi));
 
-    // Generate the G1 pre-barrier code to log the value of
-    // the referent field in an SATB buffer.
+  // Load the value of the referent field.
+  const Address field_address(rax, referent_offset);
+  __ load_heap_oop(rax, field_address, /*tmp1*/ rbx, /*tmp_thread*/ rdx, ON_WEAK_OOP_REF);
 
-    // Load the value of the referent field.
-    const Address field_address(rax, referent_offset);
-    __ load_heap_oop(rax, field_address);
+  // _areturn
+  const Register sender_sp = NOT_LP64(rsi) LP64_ONLY(r13);
+  NOT_LP64(__ pop(rsi));      // get sender sp
+  __ pop(rdi);                // get return address
+  __ mov(rsp, sender_sp);     // set sp to sender sp
+  __ jmp(rdi);
+  __ ret(0);
 
-    const Register sender_sp = NOT_LP64(rsi) LP64_ONLY(r13);
-    const Register thread = NOT_LP64(rcx) LP64_ONLY(r15_thread);
-    NOT_LP64(__ get_thread(thread));
-
-    // Generate the G1 pre-barrier code to log the value of
-    // the referent field in an SATB buffer.
-    __ g1_write_barrier_pre(noreg /* obj */,
-                            rax /* pre_val */,
-                            thread /* thread */,
-                            rbx /* tmp */,
-                            true /* tosca_live */,
-                            true /* expand_call */);
-
-    // _areturn
-    NOT_LP64(__ pop(rsi));      // get sender sp
-    __ pop(rdi);                // get return address
-    __ mov(rsp, sender_sp);     // set sp to sender sp
-    __ jmp(rdi);
-    __ ret(0);
-
-    // generate a vanilla interpreter entry as the slow path
-    __ bind(slow_path);
-    __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals));
-    return entry;
-  }
-#endif // INCLUDE_ALL_GCS
-
-  // If G1 is not enabled then attempt to go through the accessor entry point
-  // Reference.get is an accessor
-  return NULL;
+  // generate a vanilla interpreter entry as the slow path
+  __ bind(slow_path);
+  __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals));
+  return entry;
 }
 
 void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
--- a/src/hotspot/cpu/x86/templateTable_x86.cpp	Fri Apr 13 03:05:19 2018 +0200
+++ b/src/hotspot/cpu/x86/templateTable_x86.cpp	Fri Apr 13 14:06:39 2018 +0200
@@ -149,84 +149,18 @@
 
 
 static void do_oop_store(InterpreterMacroAssembler* _masm,
-                         Address obj,
+                         Address dst,
                          Register val,
-                         BarrierSet::Name barrier,
-                         bool precise) {
+                         DecoratorSet decorators = 0) {
   assert(val == noreg || val == rax, "parameter is just for looks");
-  switch (barrier) {
-#if INCLUDE_ALL_GCS
-    case BarrierSet::G1BarrierSet:
-      {
-        // flatten object address if needed
-        // We do it regardless of precise because we need the registers
-        if (obj.index() == noreg && obj.disp() == 0) {
-          if (obj.base() != rdx) {
-            __ movptr(rdx, obj.base());
-          }
-        } else {
-          __ lea(rdx, obj);
-        }
-
-        Register rtmp    = LP64_ONLY(r8)         NOT_LP64(rsi);
-        Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rcx);
-
-        NOT_LP64(__ get_thread(rcx));
-        NOT_LP64(__ save_bcp());
-
-        __ g1_write_barrier_pre(rdx /* obj */,
-                                rbx /* pre_val */,
-                                rthread /* thread */,
-                                rtmp  /* tmp */,
-                                val != noreg /* tosca_live */,
-                                false /* expand_call */);
-        if (val == noreg) {
-          __ store_heap_oop_null(Address(rdx, 0));
-        } else {
-          // G1 barrier needs uncompressed oop for region cross check.
-          Register new_val = val;
-          if (UseCompressedOops) {
-            new_val = rbx;
-            __ movptr(new_val, val);
-          }
-          __ store_heap_oop(Address(rdx, 0), val);
-          __ g1_write_barrier_post(rdx /* store_adr */,
-                                   new_val /* new_val */,
-                                   rthread /* thread */,
-                                   rtmp /* tmp */,
-                                   rbx /* tmp2 */);
-        }
-        NOT_LP64( __ restore_bcp());
-      }
-      break;
-#endif // INCLUDE_ALL_GCS
-    case BarrierSet::CardTableBarrierSet:
-      {
-        if (val == noreg) {
-          __ store_heap_oop_null(obj);<