changeset 48195:fb0275c320a0

8189871: Refactor GC barriers to use declarative semantics Reviewed-by: pliden, rkennke, coleenp, dholmes, kbarrett, stefank
author eosterlund
date Mon, 20 Nov 2017 13:07:44 +0100
parents 55c43e677ded
children d8486f1f5a84
files src/hotspot/share/c1/c1_Runtime1.cpp src/hotspot/share/classfile/javaClasses.cpp src/hotspot/share/classfile/javaClasses.hpp src/hotspot/share/classfile/javaClasses.inline.hpp src/hotspot/share/gc/g1/g1SATBCardTableModRefBS.cpp src/hotspot/share/gc/g1/g1SATBCardTableModRefBS.hpp src/hotspot/share/gc/g1/g1SATBCardTableModRefBS.inline.hpp src/hotspot/share/gc/parallel/cardTableExtension.hpp src/hotspot/share/gc/shared/accessBarrierSupport.cpp src/hotspot/share/gc/shared/accessBarrierSupport.hpp src/hotspot/share/gc/shared/accessBarrierSupport.inline.hpp src/hotspot/share/gc/shared/barrierSet.cpp src/hotspot/share/gc/shared/barrierSet.hpp src/hotspot/share/gc/shared/barrierSet.inline.hpp src/hotspot/share/gc/shared/barrierSetConfig.hpp src/hotspot/share/gc/shared/barrierSetConfig.inline.hpp src/hotspot/share/gc/shared/cardTableModRefBS.cpp src/hotspot/share/gc/shared/cardTableModRefBS.hpp src/hotspot/share/gc/shared/cardTableModRefBS.inline.hpp src/hotspot/share/gc/shared/cardTableModRefBSForCTRS.hpp src/hotspot/share/gc/shared/cardTableRS.hpp src/hotspot/share/gc/shared/collectedHeap.cpp src/hotspot/share/gc/shared/modRefBarrierSet.hpp src/hotspot/share/gc/shared/modRefBarrierSet.inline.hpp src/hotspot/share/gc/shared/referenceProcessor.cpp src/hotspot/share/jvmci/jvmciJavaClasses.hpp src/hotspot/share/oops/access.hpp src/hotspot/share/oops/access.inline.hpp src/hotspot/share/oops/accessBackend.cpp src/hotspot/share/oops/accessBackend.hpp src/hotspot/share/oops/accessBackend.inline.hpp src/hotspot/share/oops/klass.hpp src/hotspot/share/oops/klass.inline.hpp src/hotspot/share/oops/objArrayKlass.cpp src/hotspot/share/oops/objArrayOop.cpp src/hotspot/share/oops/objArrayOop.hpp src/hotspot/share/oops/objArrayOop.inline.hpp src/hotspot/share/oops/oop.cpp src/hotspot/share/oops/oop.hpp src/hotspot/share/oops/oop.inline.hpp src/hotspot/share/prims/jni.cpp src/hotspot/share/prims/jvm.cpp src/hotspot/share/prims/unsafe.cpp src/hotspot/share/runtime/stubRoutines.cpp src/hotspot/share/runtime/vmStructs.cpp
diffstat 45 files changed, 3458 insertions(+), 806 deletions(-) [+]
line wrap: on
line diff
--- a/src/hotspot/share/c1/c1_Runtime1.cpp	Mon Nov 20 12:04:13 2017 +0100
+++ b/src/hotspot/share/c1/c1_Runtime1.cpp	Mon Nov 20 13:07:44 2017 +0100
@@ -46,6 +46,7 @@
 #include "memory/allocation.inline.hpp"
 #include "memory/oopFactory.hpp"
 #include "memory/resourceArea.hpp"
+#include "oops/access.inline.hpp"
 #include "oops/objArrayKlass.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/atomic.hpp"
@@ -1367,25 +1368,16 @@
 template <class T> int obj_arraycopy_work(oopDesc* src, T* src_addr,
                                           oopDesc* dst, T* dst_addr,
                                           int length) {
-
-  // For performance reasons, we assume we are using a card marking write
-  // barrier. The assert will fail if this is not the case.
-  // Note that we use the non-virtual inlineable variant of write_ref_array.
-  BarrierSet* bs = Universe::heap()->barrier_set();
   if (src == dst) {
     // same object, no check
-    bs->write_ref_array_pre(dst_addr, length);
-    Copy::conjoint_oops_atomic(src_addr, dst_addr, length);
-    bs->write_ref_array((HeapWord*)dst_addr, length);
+    HeapAccess<>::oop_arraycopy(arrayOop(src), arrayOop(dst), src_addr, dst_addr, length);
     return ac_ok;
   } else {
     Klass* bound = ObjArrayKlass::cast(dst->klass())->element_klass();
     Klass* stype = ObjArrayKlass::cast(src->klass())->element_klass();
     if (stype == bound || stype->is_subtype_of(bound)) {
       // Elements are guaranteed to be subtypes, so no check necessary
-      bs->write_ref_array_pre(dst_addr, length);
-      Copy::conjoint_oops_atomic(src_addr, dst_addr, length);
-      bs->write_ref_array((HeapWord*)dst_addr, length);
+      HeapAccess<ARRAYCOPY_DISJOINT>::oop_arraycopy(arrayOop(src), arrayOop(dst), src_addr, dst_addr, length);
       return ac_ok;
     }
   }
--- a/src/hotspot/share/classfile/javaClasses.cpp	Mon Nov 20 12:04:13 2017 +0100
+++ b/src/hotspot/share/classfile/javaClasses.cpp	Mon Nov 20 13:07:44 2017 +0100
@@ -3064,6 +3064,25 @@
   }
 }
 
+// Support for java_lang_ref_Reference
+
+bool java_lang_ref_Reference::is_referent_field(oop obj, ptrdiff_t offset) {
+  assert(!oopDesc::is_null(obj), "sanity");
+  if (offset != java_lang_ref_Reference::referent_offset) {
+    return false;
+  }
+
+  Klass* k = obj->klass();
+  if (!k->is_instance_klass()) {
+    return false;
+  }
+
+  InstanceKlass* ik = InstanceKlass::cast(obj->klass());
+  bool is_reference = ik->reference_type() != REF_NONE;
+  assert(!is_reference || ik->is_subclass_of(SystemDictionary::Reference_klass()), "sanity");
+  return is_reference;
+}
+
 // Support for java_lang_ref_SoftReference
 
 jlong java_lang_ref_SoftReference::timestamp(oop ref) {
--- a/src/hotspot/share/classfile/javaClasses.hpp	Mon Nov 20 12:04:13 2017 +0100
+++ b/src/hotspot/share/classfile/javaClasses.hpp	Mon Nov 20 13:07:44 2017 +0100
@@ -893,6 +893,8 @@
   static inline void set_discovered(oop ref, oop value);
   static inline void set_discovered_raw(oop ref, oop value);
   static inline HeapWord* discovered_addr(oop ref);
+  static bool is_referent_field(oop obj, ptrdiff_t offset);
+  static inline bool is_phantom(oop ref);
 };
 
 
--- a/src/hotspot/share/classfile/javaClasses.inline.hpp	Mon Nov 20 12:04:13 2017 +0100
+++ b/src/hotspot/share/classfile/javaClasses.inline.hpp	Mon Nov 20 13:07:44 2017 +0100
@@ -121,6 +121,9 @@
 HeapWord* java_lang_ref_Reference::discovered_addr(oop ref) {
   return ref->obj_field_addr<HeapWord>(discovered_offset);
 }
+bool java_lang_ref_Reference::is_phantom(oop ref) {
+  return InstanceKlass::cast(ref->klass())->reference_type() == REF_PHANTOM;
+}
 
 inline void java_lang_invoke_CallSite::set_target_volatile(oop site, oop target) {
   site->obj_field_put_volatile(_target_offset, target);
--- a/src/hotspot/share/gc/g1/g1SATBCardTableModRefBS.cpp	Mon Nov 20 12:04:13 2017 +0100
+++ b/src/hotspot/share/gc/g1/g1SATBCardTableModRefBS.cpp	Mon Nov 20 13:07:44 2017 +0100
@@ -73,6 +73,7 @@
     write_ref_array_pre_work(dst, count);
   }
 }
+
 void G1SATBCardTableModRefBS::write_ref_array_pre(narrowOop* dst, int count, bool dest_uninitialized) {
   if (!dest_uninitialized) {
     write_ref_array_pre_work(dst, count);
@@ -154,14 +155,9 @@
   log_trace(gc, barrier)("    byte_map_base: " INTPTR_FORMAT,  p2i(byte_map_base));
 }
 
-void
-G1SATBCardTableLoggingModRefBS::write_ref_field_work(void* field,
-                                                     oop new_val,
-                                                     bool release) {
-  volatile jbyte* byte = byte_for(field);
-  if (*byte == g1_young_gen) {
-    return;
-  }
+void G1SATBCardTableLoggingModRefBS::write_ref_field_post_slow(volatile jbyte* byte) {
+  // In the slow path, we know a card is not young
+  assert(*byte != g1_young_gen, "slow path invoked without filtering");
   OrderAccess::storeload();
   if (*byte != dirty_card) {
     *byte = dirty_card;
--- a/src/hotspot/share/gc/g1/g1SATBCardTableModRefBS.hpp	Mon Nov 20 12:04:13 2017 +0100
+++ b/src/hotspot/share/gc/g1/g1SATBCardTableModRefBS.hpp	Mon Nov 20 13:07:44 2017 +0100
@@ -54,18 +54,15 @@
   // pre-marking object graph.
   static void enqueue(oop pre_val);
 
-  // We export this to make it available in cases where the static
-  // type of the barrier set is known.  Note that it is non-virtual.
-  template <class T> inline void inline_write_ref_field_pre(T* field, oop newVal);
-
-  // These are the more general virtual versions.
-  inline virtual void write_ref_field_pre_work(oop* field, oop new_val);
-  inline virtual void write_ref_field_pre_work(narrowOop* field, oop new_val);
+  static void enqueue_if_weak(DecoratorSet decorators, oop value);
 
   template <class T> void write_ref_array_pre_work(T* dst, int count);
   virtual void write_ref_array_pre(oop* dst, int count, bool dest_uninitialized);
   virtual void write_ref_array_pre(narrowOop* dst, int count, bool dest_uninitialized);
 
+  template <DecoratorSet decorators, typename T>
+  void write_ref_field_pre(T* field);
+
 /*
    Claimed and deferred bits are used together in G1 during the evacuation
    pause. These bits can have the following state transitions:
@@ -102,6 +99,11 @@
   static const BarrierSet::Name value = BarrierSet::G1SATBCT;
 };
 
+template<>
+struct BarrierSet::GetType<BarrierSet::G1SATBCT> {
+  typedef G1SATBCardTableModRefBS type;
+};
+
 class G1SATBCardTableLoggingModRefBSChangedListener : public G1MappingChangedListener {
  private:
   G1SATBCardTableLoggingModRefBS* _card_table;
@@ -121,9 +123,6 @@
   G1SATBCardTableLoggingModRefBSChangedListener _listener;
   DirtyCardQueueSet& _dcqs;
 
- protected:
-  virtual void write_ref_field_work(void* field, oop new_val, bool release);
-
  public:
   static size_t compute_size(size_t mem_region_size_in_words) {
     size_t number_of_slots = (mem_region_size_in_words / card_size_in_words);
@@ -148,6 +147,33 @@
 
   void write_region_work(MemRegion mr)    { invalidate(mr); }
   void write_ref_array_work(MemRegion mr) { invalidate(mr); }
+
+  template <DecoratorSet decorators, typename T>
+  void write_ref_field_post(T* field, oop new_val);
+  void write_ref_field_post_slow(volatile jbyte* byte);
+
+  // Callbacks for runtime accesses.
+  template <DecoratorSet decorators, typename BarrierSetT = G1SATBCardTableLoggingModRefBS>
+  class AccessBarrier: public ModRefBarrierSet::AccessBarrier<decorators, BarrierSetT> {
+    typedef ModRefBarrierSet::AccessBarrier<decorators, BarrierSetT> ModRef;
+    typedef BarrierSet::AccessBarrier<decorators, BarrierSetT> Raw;
+
+  public:
+    // Needed for loads on non-heap weak references
+    template <typename T>
+    static oop oop_load_not_in_heap(T* addr);
+
+    // Needed for non-heap stores
+    template <typename T>
+    static void oop_store_not_in_heap(T* addr, oop new_value);
+
+    // Needed for weak references
+    static oop oop_load_in_heap_at(oop base, ptrdiff_t offset);
+
+    // Defensive: will catch weak oops at addresses in heap
+    template <typename T>
+    static oop oop_load_in_heap(T* addr);
+  };
 };
 
 template<>
@@ -155,4 +181,9 @@
   static const BarrierSet::Name value = BarrierSet::G1SATBCTLogging;
 };
 
+template<>
+struct BarrierSet::GetType<BarrierSet::G1SATBCTLogging> {
+  typedef G1SATBCardTableLoggingModRefBS type;
+};
+
 #endif // SHARE_VM_GC_G1_G1SATBCARDTABLEMODREFBS_HPP
--- a/src/hotspot/share/gc/g1/g1SATBCardTableModRefBS.inline.hpp	Mon Nov 20 12:04:13 2017 +0100
+++ b/src/hotspot/share/gc/g1/g1SATBCardTableModRefBS.inline.hpp	Mon Nov 20 13:07:44 2017 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,24 +25,30 @@
 #ifndef SHARE_VM_GC_G1_G1SATBCARDTABLEMODREFBS_INLINE_HPP
 #define SHARE_VM_GC_G1_G1SATBCARDTABLEMODREFBS_INLINE_HPP
 
+#include "gc/shared/accessBarrierSupport.inline.hpp"
 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
 #include "oops/oop.inline.hpp"
 
-// We export this to make it available in cases where the static
-// type of the barrier set is known.  Note that it is non-virtual.
-template <class T> void G1SATBCardTableModRefBS::inline_write_ref_field_pre(T* field, oop newVal) {
+template <DecoratorSet decorators, typename T>
+inline void G1SATBCardTableModRefBS::write_ref_field_pre(T* field) {
+  if (HasDecorator<decorators, ARRAYCOPY_DEST_NOT_INITIALIZED>::value ||
+      HasDecorator<decorators, AS_NO_KEEPALIVE>::value) {
+    return;
+  }
+
   T heap_oop = oopDesc::load_heap_oop(field);
   if (!oopDesc::is_null(heap_oop)) {
-    enqueue(oopDesc::decode_heap_oop(heap_oop));
+    enqueue(oopDesc::decode_heap_oop_not_null(heap_oop));
   }
 }
 
-// These are the more general virtual versions.
-void G1SATBCardTableModRefBS::write_ref_field_pre_work(oop* field, oop new_val) {
-  inline_write_ref_field_pre(field, new_val);
-}
-void G1SATBCardTableModRefBS::write_ref_field_pre_work(narrowOop* field, oop new_val) {
-  inline_write_ref_field_pre(field, new_val);
+template <DecoratorSet decorators, typename T>
+inline void G1SATBCardTableLoggingModRefBS::write_ref_field_post(T* field, oop new_val) {
+  volatile jbyte* byte = byte_for(field);
+  if (*byte != g1_young_gen) {
+    // Take a slow path for cards in old
+    write_ref_field_post_slow(byte);
+  }
 }
 
 void G1SATBCardTableModRefBS::set_card_claimed(size_t card_index) {
@@ -55,4 +61,53 @@
   _byte_map[card_index] = val;
 }
 
+inline void G1SATBCardTableModRefBS::enqueue_if_weak(DecoratorSet decorators, oop value) {
+  assert((decorators & ON_UNKNOWN_OOP_REF) == 0, "Reference strength must be known");
+  const bool on_strong_oop_ref = (decorators & ON_STRONG_OOP_REF) != 0;
+  const bool peek              = (decorators & AS_NO_KEEPALIVE) != 0;
+
+  if (!peek && !on_strong_oop_ref && value != NULL) {
+    enqueue(value);
+  }
+}
+
+template <DecoratorSet decorators, typename BarrierSetT>
+template <typename T>
+inline oop G1SATBCardTableLoggingModRefBS::AccessBarrier<decorators, BarrierSetT>::
+oop_load_not_in_heap(T* addr) {
+  oop value = ModRef::oop_load_not_in_heap(addr);
+  enqueue_if_weak(decorators, value);
+  return value;
+}
+
+template <DecoratorSet decorators, typename BarrierSetT>
+template <typename T>
+inline oop G1SATBCardTableLoggingModRefBS::AccessBarrier<decorators, BarrierSetT>::
+oop_load_in_heap(T* addr) {
+  oop value = ModRef::oop_load_in_heap(addr);
+  enqueue_if_weak(decorators, value);
+  return value;
+}
+
+template <DecoratorSet decorators, typename BarrierSetT>
+inline oop G1SATBCardTableLoggingModRefBS::AccessBarrier<decorators, BarrierSetT>::
+oop_load_in_heap_at(oop base, ptrdiff_t offset) {
+  oop value = ModRef::oop_load_in_heap_at(base, offset);
+  enqueue_if_weak(AccessBarrierSupport::resolve_possibly_unknown_oop_ref_strength<decorators>(base, offset), value);
+  return value;
+}
+
+template <DecoratorSet decorators, typename BarrierSetT>
+template <typename T>
+inline void G1SATBCardTableLoggingModRefBS::AccessBarrier<decorators, BarrierSetT>::
+oop_store_not_in_heap(T* addr, oop new_value) {
+  if (HasDecorator<decorators, IN_CONCURRENT_ROOT>::value) {
+    // For roots not scanned in a safepoint, we have to apply SATB barriers
+    // even for roots.
+    G1SATBCardTableLoggingModRefBS *bs = barrier_set_cast<G1SATBCardTableLoggingModRefBS>(BarrierSet::barrier_set());
+    bs->write_ref_field_pre<decorators>(addr);
+  }
+  Raw::oop_store(addr, new_value);
+}
+
 #endif // SHARE_VM_GC_G1_G1SATBCARDTABLEMODREFBS_INLINE_HPP
--- a/src/hotspot/share/gc/parallel/cardTableExtension.hpp	Mon Nov 20 12:04:13 2017 +0100
+++ b/src/hotspot/share/gc/parallel/cardTableExtension.hpp	Mon Nov 20 13:07:44 2017 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -115,4 +115,9 @@
   static const BarrierSet::Name value = BarrierSet::CardTableExtension;
 };
 
+template<>
+struct BarrierSet::GetType<BarrierSet::CardTableExtension> {
+  typedef ::CardTableExtension type;
+};
+
 #endif // SHARE_VM_GC_PARALLEL_CARDTABLEEXTENSION_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hotspot/share/gc/shared/accessBarrierSupport.cpp	Mon Nov 20 13:07:44 2017 +0100
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "classfile/javaClasses.inline.hpp"
+#include "gc/shared/accessBarrierSupport.inline.hpp"
+#include "oops/access.hpp"
+
+DecoratorSet AccessBarrierSupport::resolve_unknown_oop_ref_strength(DecoratorSet decorators, oop base, ptrdiff_t offset) {
+  DecoratorSet ds = decorators & ~ON_UNKNOWN_OOP_REF;
+  if (!java_lang_ref_Reference::is_referent_field(base, offset)) {
+    ds |= ON_STRONG_OOP_REF;
+  } else if (java_lang_ref_Reference::is_phantom(base)) {
+    ds |= ON_PHANTOM_OOP_REF;
+  } else {
+    ds |= ON_WEAK_OOP_REF;
+  }
+  return ds;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hotspot/share/gc/shared/accessBarrierSupport.hpp	Mon Nov 20 13:07:44 2017 +0100
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_SHARED_ACCESSBARRIERSUPPORT_HPP
+#define SHARE_VM_GC_SHARED_ACCESSBARRIERSUPPORT_HPP
+
+#include "memory/allocation.hpp"
+#include "oops/access.hpp"
+
+class AccessBarrierSupport: AllStatic {
+private:
+  static DecoratorSet resolve_unknown_oop_ref_strength(DecoratorSet decorators, oop base, ptrdiff_t offset);
+
+public:
+  // Some collectors, like G1, needs to keep referents alive when loading them.
+  // Therefore, for APIs that accept unknown oop ref strength (e.g. unsafe),
+  // we need to dynamically find out if a given field is on a java.lang.ref.Reference object.
+  // and in that case what strength it has.
+  template<DecoratorSet decorators>
+  static DecoratorSet resolve_possibly_unknown_oop_ref_strength(oop base, ptrdiff_t offset);
+};
+
+#endif // SHARE_VM_GC_SHARED_ACCESSBARRIERSUPPORT_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hotspot/share/gc/shared/accessBarrierSupport.inline.hpp	Mon Nov 20 13:07:44 2017 +0100
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_SHARED_ACCESSBARRIERSUPPORT_INLINE_HPP
+#define SHARE_VM_GC_SHARED_ACCESSBARRIERSUPPORT_INLINE_HPP
+
+#include "gc/shared/accessBarrierSupport.hpp"
+
+template <DecoratorSet decorators>
+DecoratorSet AccessBarrierSupport::resolve_possibly_unknown_oop_ref_strength(oop base, ptrdiff_t offset) {
+  if (!HasDecorator<decorators, ON_UNKNOWN_OOP_REF>::value) {
+    return decorators;
+  } else {
+    return resolve_unknown_oop_ref_strength(decorators, base, offset);
+  }
+}
+
+#endif // SHARE_VM_GC_SHARED_ACCESSBARRIERSUPPORT_INLINE_HPP
--- a/src/hotspot/share/gc/shared/barrierSet.cpp	Mon Nov 20 12:04:13 2017 +0100
+++ b/src/hotspot/share/gc/shared/barrierSet.cpp	Mon Nov 20 13:07:44 2017 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,6 +27,8 @@
 #include "gc/shared/collectedHeap.hpp"
 #include "memory/universe.hpp"
 
+BarrierSet* BarrierSet::_bs = NULL;
+
 // count is number of array elements being written
 void BarrierSet::static_write_ref_array_pre(HeapWord* start, size_t count) {
   assert(count <= (size_t)max_intx, "count too large");
--- a/src/hotspot/share/gc/shared/barrierSet.hpp	Mon Nov 20 12:04:13 2017 +0100
+++ b/src/hotspot/share/gc/shared/barrierSet.hpp	Mon Nov 20 13:07:44 2017 +0100
@@ -25,7 +25,10 @@
 #ifndef SHARE_VM_GC_SHARED_BARRIERSET_HPP
 #define SHARE_VM_GC_SHARED_BARRIERSET_HPP
 
+#include "gc/shared/barrierSetConfig.hpp"
 #include "memory/memRegion.hpp"
+#include "oops/access.hpp"
+#include "oops/accessBackend.hpp"
 #include "oops/oopsHierarchy.hpp"
 #include "utilities/fakeRttiSupport.hpp"
 
@@ -34,7 +37,20 @@
 
 class BarrierSet: public CHeapObj<mtGC> {
   friend class VMStructs;
+
+  static BarrierSet* _bs;
+
 public:
+  enum Name {
+#define BARRIER_SET_DECLARE_BS_ENUM(bs_name) bs_name ,
+    FOR_EACH_BARRIER_SET_DO(BARRIER_SET_DECLARE_BS_ENUM)
+#undef BARRIER_SET_DECLARE_BS_ENUM
+    UnknownBS
+  };
+
+  static BarrierSet* barrier_set() { return _bs; }
+
+protected:
   // Fake RTTI support.  For a derived class T to participate
   // - T must have a corresponding Name entry.
   // - GetName<T> must be specialized to return the corresponding Name
@@ -45,32 +61,20 @@
   // - If T is a concrete class, the constructor must create a
   //   FakeRtti object whose tag set includes the corresponding Name
   //   entry, and pass it up to its base class.
-
-  enum Name {                   // associated class
-    ModRef,                     // ModRefBarrierSet
-    CardTableModRef,            // CardTableModRefBS
-    CardTableForRS,             // CardTableModRefBSForCTRS
-    CardTableExtension,         // CardTableExtension
-    G1SATBCT,                   // G1SATBCardTableModRefBS
-    G1SATBCTLogging             // G1SATBCardTableLoggingModRefBS
-  };
-
-protected:
   typedef FakeRttiSupport<BarrierSet, Name> FakeRtti;
 
 private:
   FakeRtti _fake_rtti;
 
+public:
   // Metafunction mapping a class derived from BarrierSet to the
   // corresponding Name enum tag.
   template<typename T> struct GetName;
 
-  // Downcast argument to a derived barrier set type.
-  // The cast is checked in a debug build.
-  // T must have a specialization for BarrierSet::GetName<T>.
-  template<typename T> friend T* barrier_set_cast(BarrierSet* bs);
+  // Metafunction mapping a Name enum type to the corresponding
+  // lass derived from BarrierSet.
+  template<BarrierSet::Name T> struct GetType;
 
-public:
   // Note: This is not presently the Name corresponding to the
   // concrete class of this object.
   BarrierSet::Name kind() const { return _fake_rtti.concrete_tag(); }
@@ -85,23 +89,6 @@
   ~BarrierSet() { }
 
 public:
-  // Invoke the barrier, if any, necessary when writing "new_val" into the
-  // ref field at "offset" in "obj".
-  // (For efficiency reasons, this operation is specialized for certain
-  // barrier types.  Semantically, it should be thought of as a call to the
-  // virtual "_work" function below, which must implement the barrier.)
-  // First the pre-write versions...
-  template <class T> inline void write_ref_field_pre(T* field, oop new_val);
-
-  // ...then the post-write version.
-  inline void write_ref_field(void* field, oop new_val, bool release = false);
-
-protected:
-  virtual void write_ref_field_pre_work(      oop* field, oop new_val) {};
-  virtual void write_ref_field_pre_work(narrowOop* field, oop new_val) {};
-  virtual void write_ref_field_work(void* field, oop new_val, bool release) = 0;
-
-public:
   // Operations on arrays, or general regions (e.g., for "clone") may be
   // optimized by some barriers.
 
@@ -144,6 +131,147 @@
 
   // Print a description of the memory for the barrier set
   virtual void print_on(outputStream* st) const = 0;
+
+  static void set_bs(BarrierSet* bs) { _bs = bs; }
+
+  // The AccessBarrier of a BarrierSet subclass is called by the Access API
+  // (cf. oops/access.hpp) to perform decorated accesses. GC implementations
+  // may override these default access operations by declaring an
+  // AccessBarrier class in its BarrierSet. Its accessors will then be
+  // automatically resolved at runtime.
+  //
+  // In order to register a new FooBarrierSet::AccessBarrier with the Access API,
+  // the following steps should be taken:
+  // 1) Provide an enum "name" for the BarrierSet in barrierSetConfig.hpp
+  // 2) Make sure the barrier set headers are included from barrierSetConfig.inline.hpp
+  // 3) Provide specializations for BarrierSet::GetName and BarrierSet::GetType.
+  template <DecoratorSet decorators, typename BarrierSetT>
+  class AccessBarrier: protected RawAccessBarrier<decorators> {
+  protected:
+    typedef RawAccessBarrier<decorators> Raw;
+    typedef typename BarrierSetT::template AccessBarrier<decorators> CRTPAccessBarrier;
+
+  public:
+    // Primitive heap accesses. These accessors get resolved when
+    // IN_HEAP is set (e.g. when using the HeapAccess API), it is
+    // not an oop_* overload, and the barrier strength is AS_NORMAL.
+    template <typename T>
+    static T load_in_heap(T* addr) {
+      return Raw::template load<T>(addr);
+    }
+
+    template <typename T>
+    static T load_in_heap_at(oop base, ptrdiff_t offset) {
+      return Raw::template load_at<T>(base, offset);
+    }
+
+    template <typename T>
+    static void store_in_heap(T* addr, T value) {
+      Raw::store(addr, value);
+    }
+
+    template <typename T>
+    static void store_in_heap_at(oop base, ptrdiff_t offset, T value) {
+      Raw::store_at(base, offset, value);
+    }
+
+    template <typename T>
+    static T atomic_cmpxchg_in_heap(T new_value, T* addr, T compare_value) {
+      return Raw::atomic_cmpxchg(new_value, addr, compare_value);
+    }
+
+    template <typename T>
+    static T atomic_cmpxchg_in_heap_at(T new_value, oop base, ptrdiff_t offset, T compare_value) {
+      return Raw::oop_atomic_cmpxchg_at(new_value, base, offset, compare_value);
+    }
+
+    template <typename T>
+    static T atomic_xchg_in_heap(T new_value, T* addr) {
+      return Raw::atomic_xchg(new_value, addr);
+    }
+
+    template <typename T>
+    static T atomic_xchg_in_heap_at(T new_value, oop base, ptrdiff_t offset) {
+      return Raw::atomic_xchg_at(new_value, base, offset);
+    }
+
+    template <typename T>
+    static bool arraycopy_in_heap(arrayOop src_obj, arrayOop dst_obj, T* src, T* dst, size_t length) {
+      return Raw::arraycopy(src_obj, dst_obj, src, dst, length);
+    }
+
+    // Heap oop accesses. These accessors get resolved when
+    // IN_HEAP is set (e.g. when using the HeapAccess API), it is
+    // an oop_* overload, and the barrier strength is AS_NORMAL.
+    template <typename T>
+    static oop oop_load_in_heap(T* addr) {
+      return Raw::template oop_load<oop>(addr);
+    }
+
+    static oop oop_load_in_heap_at(oop base, ptrdiff_t offset) {
+      return Raw::template oop_load_at<oop>(base, offset);
+    }
+
+    template <typename T>
+    static void oop_store_in_heap(T* addr, oop value) {
+      Raw::oop_store(addr, value);
+    }
+
+    static void oop_store_in_heap_at(oop base, ptrdiff_t offset, oop value) {
+      Raw::oop_store_at(base, offset, value);
+    }
+
+    template <typename T>
+    static oop oop_atomic_cmpxchg_in_heap(oop new_value, T* addr, oop compare_value) {
+      return Raw::oop_atomic_cmpxchg(new_value, addr, compare_value);
+    }
+
+    static oop oop_atomic_cmpxchg_in_heap_at(oop new_value, oop base, ptrdiff_t offset, oop compare_value) {
+      return Raw::oop_atomic_cmpxchg_at(new_value, base, offset, compare_value);
+    }
+
+    template <typename T>
+    static oop oop_atomic_xchg_in_heap(oop new_value, T* addr) {
+      return Raw::oop_atomic_xchg(new_value, addr);
+    }
+
+    static oop oop_atomic_xchg_in_heap_at(oop new_value, oop base, ptrdiff_t offset) {
+      return Raw::oop_atomic_xchg_at(new_value, base, offset);
+    }
+
+    template <typename T>
+    static bool oop_arraycopy_in_heap(arrayOop src_obj, arrayOop dst_obj, T* src, T* dst, size_t length) {
+      return Raw::oop_arraycopy(src_obj, dst_obj, src, dst, length);
+    }
+
+    // Off-heap oop accesses. These accessors get resolved when
+    // IN_HEAP is not set (e.g. when using the RootAccess API), it is
+    // an oop* overload, and the barrier strength is AS_NORMAL.
+    template <typename T>
+    static oop oop_load_not_in_heap(T* addr) {
+      return Raw::template oop_load<oop>(addr);
+    }
+
+    template <typename T>
+    static void oop_store_not_in_heap(T* addr, oop value) {
+      Raw::oop_store(addr, value);
+    }
+
+    template <typename T>
+    static oop oop_atomic_cmpxchg_not_in_heap(oop new_value, T* addr, oop compare_value) {
+      return Raw::oop_atomic_cmpxchg(new_value, addr, compare_value);
+    }
+
+    template <typename T>
+    static oop oop_atomic_xchg_not_in_heap(oop new_value, T* addr) {
+      return Raw::oop_atomic_xchg(new_value, addr);
+    }
+
+    // Clone barrier support
+    static void clone_in_heap(oop src, oop dst, size_t size) {
+      Raw::clone(src, dst, size);
+    }
+  };
 };
 
 template<typename T>
--- a/src/hotspot/share/gc/shared/barrierSet.inline.hpp	Mon Nov 20 12:04:13 2017 +0100
+++ b/src/hotspot/share/gc/shared/barrierSet.inline.hpp	Mon Nov 20 13:07:44 2017 +0100
@@ -26,17 +26,9 @@
 #define SHARE_VM_GC_SHARED_BARRIERSET_INLINE_HPP
 
 #include "gc/shared/barrierSet.hpp"
+#include "gc/shared/barrierSetConfig.inline.hpp"
 #include "utilities/align.hpp"
 
-
-template <class T> void BarrierSet::write_ref_field_pre(T* field, oop new_val) {
-  write_ref_field_pre_work(field, new_val);
-}
-
-void BarrierSet::write_ref_field(void* field, oop new_val, bool release) {
-  write_ref_field_work(field, new_val, release);
-}
-
 // count is number of array elements being written
 void BarrierSet::write_ref_array(HeapWord* start, size_t count) {
   assert(count <= (size_t)max_intx, "count too large");
@@ -60,7 +52,6 @@
   write_ref_array_work(MemRegion(aligned_start, aligned_end));
 }
 
-
 inline void BarrierSet::write_region(MemRegion mr) {
   write_region_work(mr);
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hotspot/share/gc/shared/barrierSetConfig.hpp	Mon Nov 20 13:07:44 2017 +0100
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_SHARED_BARRIERSETCONFIG_HPP
+#define SHARE_VM_GC_SHARED_BARRIERSETCONFIG_HPP
+
+#include "utilities/macros.hpp"
+
+#if INCLUDE_ALL_GCS
+#define FOR_EACH_CONCRETE_INCLUDE_ALL_GC_BARRIER_SET_DO(f) \
+  f(CardTableExtension)                                    \
+  f(G1SATBCTLogging)
+#else
+#define FOR_EACH_CONCRETE_INCLUDE_ALL_GC_BARRIER_SET_DO(f)
+#endif
+
+// Do something for each concrete barrier set part of the build.
+#define FOR_EACH_CONCRETE_BARRIER_SET_DO(f)          \
+  f(CardTableForRS)                                  \
+  FOR_EACH_CONCRETE_INCLUDE_ALL_GC_BARRIER_SET_DO(f)
+
+// Do something for each known barrier set.
+#define FOR_EACH_BARRIER_SET_DO(f)    \
+  f(ModRef)                           \
+  f(CardTableModRef)                  \
+  f(CardTableForRS)                   \
+  f(CardTableExtension)               \
+  f(G1SATBCT)                         \
+  f(G1SATBCTLogging)
+
+// To enable runtime-resolution of GC barriers on primitives, please
+// define SUPPORT_BARRIER_ON_PRIMITIVES.
+#ifdef SUPPORT_BARRIER_ON_PRIMITIVES
+#define BT_BUILDTIME_DECORATORS INTERNAL_BT_BARRIER_ON_PRIMITIVES
+#else
+#define BT_BUILDTIME_DECORATORS INTERNAL_EMPTY
+#endif
+
+#endif // SHARE_VM_GC_SHARED_BARRIERSETCONFIG_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hotspot/share/gc/shared/barrierSetConfig.inline.hpp	Mon Nov 20 13:07:44 2017 +0100
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_SHARED_BARRIERSETCONFIG_INLINE_HPP
+#define SHARE_VM_GC_SHARED_BARRIERSETCONFIG_INLINE_HPP
+
+#include "gc/shared/barrierSetConfig.hpp"
+
+#include "gc/shared/modRefBarrierSet.inline.hpp"
+#include "gc/shared/cardTableModRefBS.inline.hpp"
+#include "gc/shared/cardTableModRefBSForCTRS.hpp"
+
+#if INCLUDE_ALL_GCS
+#include "gc/parallel/cardTableExtension.hpp"       // Parallel support
+#include "gc/g1/g1SATBCardTableModRefBS.inline.hpp" // G1 support
+#endif
+
+#endif // SHARE_VM_GC_SHARED_BARRIERSETCONFIG_INLINE_HPP
--- a/src/hotspot/share/gc/shared/cardTableModRefBS.cpp	Mon Nov 20 12:04:13 2017 +0100
+++ b/src/hotspot/share/gc/shared/cardTableModRefBS.cpp	Mon Nov 20 13:07:44 2017 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,8 +27,9 @@
 #include "gc/shared/collectedHeap.hpp"
 #include "gc/shared/genCollectedHeap.hpp"
 #include "gc/shared/space.inline.hpp"
+#include "logging/log.hpp"
 #include "memory/virtualspace.hpp"
-#include "logging/log.hpp"
+#include "oops/oop.inline.hpp"
 #include "services/memTracker.hpp"
 #include "utilities/align.hpp"
 #include "utilities/macros.hpp"
@@ -363,11 +364,6 @@
 // Note that these versions are precise!  The scanning code has to handle the
 // fact that the write barrier may be either precise or imprecise.
 
-void CardTableModRefBS::write_ref_field_work(void* field, oop newVal, bool release) {
-  inline_write_ref_field(field, newVal, release);
-}
-
-
 void CardTableModRefBS::dirty_MemRegion(MemRegion mr) {
   assert(align_down(mr.start(), HeapWordSize) == mr.start(), "Unaligned start");
   assert(align_up  (mr.end(),   HeapWordSize) == mr.end(),   "Unaligned end"  );
@@ -525,4 +521,3 @@
   st->print_cr("Card table byte_map: [" INTPTR_FORMAT "," INTPTR_FORMAT "] byte_map_base: " INTPTR_FORMAT,
                p2i(_byte_map), p2i(_byte_map + _byte_map_size), p2i(byte_map_base));
 }
-
--- a/src/hotspot/share/gc/shared/cardTableModRefBS.hpp	Mon Nov 20 12:04:13 2017 +0100
+++ b/src/hotspot/share/gc/shared/cardTableModRefBS.hpp	Mon Nov 20 13:07:44 2017 +0100
@@ -26,7 +26,6 @@
 #define SHARE_VM_GC_SHARED_CARDTABLEMODREFBS_HPP
 
 #include "gc/shared/modRefBarrierSet.hpp"
-#include "oops/oop.hpp"
 #include "utilities/align.hpp"
 
 // This kind of "BarrierSet" allows a "CollectedHeap" to detect and
@@ -181,14 +180,6 @@
   CardTableModRefBS(MemRegion whole_heap, const BarrierSet::FakeRtti& fake_rtti);
   ~CardTableModRefBS();
 
-  // Record a reference update. Note that these versions are precise!
-  // The scanning code has to handle the fact that the write barrier may be
-  // either precise or imprecise. We make non-virtual inline variants of
-  // these functions here for performance.
-
-  void write_ref_field_work(oop obj, size_t offset, oop newVal);
-  virtual void write_ref_field_work(void* field, oop newVal, bool release);
-
  protected:
   void write_region_work(MemRegion mr) {
     dirty_MemRegion(mr);
@@ -206,9 +197,12 @@
 
   // *** Card-table-barrier-specific things.
 
-  template <class T> inline void inline_write_ref_field_pre(T* field, oop newVal) {}
-
-  template <class T> inline void inline_write_ref_field(T* field, oop newVal, bool release);
+  // Record a reference update. Note that these versions are precise!
+  // The scanning code has to handle the fact that the write barrier may be
+  // either precise or imprecise. We make non-virtual inline variants of
+  // these functions here for performance.
+  template <DecoratorSet decorators, typename T>
+  void write_ref_field_post(T* field, oop newVal);
 
   // These are used by G1, when it uses the card table as a temporary data
   // structure for card claiming.
@@ -319,6 +313,9 @@
   void verify_region(MemRegion mr, jbyte val, bool val_equals) PRODUCT_RETURN;
   void verify_not_dirty_region(MemRegion mr) PRODUCT_RETURN;
   void verify_dirty_region(MemRegion mr) PRODUCT_RETURN;
+
+  template <DecoratorSet decorators, typename BarrierSetT = CardTableModRefBS>
+  class AccessBarrier: public ModRefBarrierSet::AccessBarrier<decorators, BarrierSetT> {};
 };
 
 template<>
@@ -326,5 +323,9 @@
   static const BarrierSet::Name value = BarrierSet::CardTableModRef;
 };
 
+template<>
+struct BarrierSet::GetType<BarrierSet::CardTableModRef> {
+  typedef CardTableModRefBS type;
+};
 
 #endif // SHARE_VM_GC_SHARED_CARDTABLEMODREFBS_HPP
--- a/src/hotspot/share/gc/shared/cardTableModRefBS.inline.hpp	Mon Nov 20 12:04:13 2017 +0100
+++ b/src/hotspot/share/gc/shared/cardTableModRefBS.inline.hpp	Mon Nov 20 13:07:44 2017 +0100
@@ -26,13 +26,14 @@
 #define SHARE_VM_GC_SHARED_CARDTABLEMODREFBS_INLINE_HPP
 
 #include "gc/shared/cardTableModRefBS.hpp"
-#include "oops/oopsHierarchy.hpp"
 #include "runtime/orderAccess.inline.hpp"
 
-template <class T> inline void CardTableModRefBS::inline_write_ref_field(T* field, oop newVal, bool release) {
-  volatile jbyte* byte = byte_for((void*)field);
-  if (release) {
-    // Perform a releasing store if requested.
+template <DecoratorSet decorators, typename T>
+inline void CardTableModRefBS::write_ref_field_post(T* field, oop newVal) {
+  volatile jbyte* byte = byte_for(field);
+  if (UseConcMarkSweepGC) {
+    // Perform a releasing store if using CMS so that it may
+    // scan and clear the cards concurrently during pre-cleaning.
     OrderAccess::release_store(byte, jbyte(dirty_card));
   } else {
     *byte = dirty_card;
--- a/src/hotspot/share/gc/shared/cardTableModRefBSForCTRS.hpp	Mon Nov 20 12:04:13 2017 +0100
+++ b/src/hotspot/share/gc/shared/cardTableModRefBSForCTRS.hpp	Mon Nov 20 13:07:44 2017 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -139,5 +139,9 @@
   static const BarrierSet::Name value = BarrierSet::CardTableForRS;
 };
 
-#endif // include guard
+template<>
+struct BarrierSet::GetType<BarrierSet::CardTableForRS> {
+  typedef CardTableModRefBSForCTRS type;
+};
 
+#endif // SHARE_VM_GC_SHARED_CARDTABLEMODREFBSFORCTRS_HPP
--- a/src/hotspot/share/gc/shared/cardTableRS.hpp	Mon Nov 20 12:04:13 2017 +0100
+++ b/src/hotspot/share/gc/shared/cardTableRS.hpp	Mon Nov 20 13:07:44 2017 +0100
@@ -28,6 +28,7 @@
 #include "gc/shared/cardTableModRefBSForCTRS.hpp"
 #include "memory/memRegion.hpp"
 
+class Generation;
 class Space;
 class OopsInGenClosure;
 
--- a/src/hotspot/share/gc/shared/collectedHeap.cpp	Mon Nov 20 12:04:13 2017 +0100
+++ b/src/hotspot/share/gc/shared/collectedHeap.cpp	Mon Nov 20 13:07:44 2017 +0100
@@ -235,7 +235,7 @@
 
 void CollectedHeap::set_barrier_set(BarrierSet* barrier_set) {
   _barrier_set = barrier_set;
-  oopDesc::set_bs(_barrier_set);
+  BarrierSet::set_bs(barrier_set);
 }
 
 void CollectedHeap::pre_initialize() {
--- a/src/hotspot/share/gc/shared/modRefBarrierSet.hpp	Mon Nov 20 12:04:13 2017 +0100
+++ b/src/hotspot/share/gc/shared/modRefBarrierSet.hpp	Mon Nov 20 13:07:44 2017 +0100
@@ -26,13 +26,9 @@
 #define SHARE_VM_GC_SHARED_MODREFBARRIERSET_HPP
 
 #include "gc/shared/barrierSet.hpp"
+#include "memory/memRegion.hpp"
 
-// This kind of "BarrierSet" allows a "CollectedHeap" to detect and
-// enumerate ref fields that have been modified (since the last
-// enumeration), using a card table.
-
-class OopClosure;
-class Generation;
+class Klass;
 
 class ModRefBarrierSet: public BarrierSet {
 protected:
@@ -41,12 +37,49 @@
   ~ModRefBarrierSet() { }
 
 public:
+  template <DecoratorSet decorators, typename T>
+  inline void write_ref_field_pre(T* addr) {}
+
+  template <DecoratorSet decorators, typename T>
+  inline void write_ref_field_post(T *addr, oop new_value) {}
+
   // Causes all refs in "mr" to be assumed to be modified.
   virtual void invalidate(MemRegion mr) = 0;
 
   // The caller guarantees that "mr" contains no references.  (Perhaps it's
   // objects have been moved elsewhere.)
   virtual void clear(MemRegion mr) = 0;
+
+  // The ModRef abstraction introduces pre and post barriers
+  template <DecoratorSet decorators, typename BarrierSetT>
+  class AccessBarrier: public BarrierSet::AccessBarrier<decorators, BarrierSetT> {
+    typedef BarrierSet::AccessBarrier<decorators, BarrierSetT> Raw;
+
+  public:
+    template <typename T>
+    static void oop_store_in_heap(T* addr, oop value);
+    template <typename T>
+    static oop oop_atomic_cmpxchg_in_heap(oop new_value, T* addr, oop compare_value);
+    template <typename T>
+    static oop oop_atomic_xchg_in_heap(oop new_value, T* addr);
+
+    template <typename T>
+    static bool oop_arraycopy_in_heap(arrayOop src_obj, arrayOop dst_obj, T* src, T* dst, size_t length);
+
+    static void clone_in_heap(oop src, oop dst, size_t size);
+
+    static void oop_store_in_heap_at(oop base, ptrdiff_t offset, oop value) {
+      oop_store_in_heap(AccessInternal::oop_field_addr<decorators>(base, offset), value);
+    }
+
+    static oop oop_atomic_xchg_in_heap_at(oop new_value, oop base, ptrdiff_t offset) {
+      return oop_atomic_xchg_in_heap(new_value, AccessInternal::oop_field_addr<decorators>(base, offset));
+    }
+
+    static oop oop_atomic_cmpxchg_in_heap_at(oop new_value, oop base, ptrdiff_t offset, oop compare_value) {
+      return oop_atomic_cmpxchg_in_heap(new_value, AccessInternal::oop_field_addr<decorators>(base, offset), compare_value);
+    }
+  };
 };
 
 template<>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hotspot/share/gc/shared/modRefBarrierSet.inline.hpp	Mon Nov 20 13:07:44 2017 +0100
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_SHARED_MODREFBARRIERSET_INLINE_HPP
+#define SHARE_VM_GC_SHARED_MODREFBARRIERSET_INLINE_HPP
+
+#include "gc/shared/modRefBarrierSet.hpp"
+#include "oops/klass.inline.hpp"
+#include "oops/objArrayOop.hpp"
+#include "oops/oop.hpp"
+
+template <DecoratorSet decorators, typename BarrierSetT>
+template <typename T>
+inline void ModRefBarrierSet::AccessBarrier<decorators, BarrierSetT>::
+oop_store_in_heap(T* addr, oop value) {
+  BarrierSetT *bs = barrier_set_cast<BarrierSetT>(barrier_set());
+  bs->template write_ref_field_pre<decorators>(addr);
+  Raw::oop_store(addr, value);
+  bs->template write_ref_field_post<decorators>(addr, value);
+}
+
+template <DecoratorSet decorators, typename BarrierSetT>
+template <typename T>
+inline oop ModRefBarrierSet::AccessBarrier<decorators, BarrierSetT>::
+oop_atomic_cmpxchg_in_heap(oop new_value, T* addr, oop compare_value) {
+  BarrierSetT *bs = barrier_set_cast<BarrierSetT>(barrier_set());
+  bs->template write_ref_field_pre<decorators>(addr);
+  oop result = Raw::oop_atomic_cmpxchg(new_value, addr, compare_value);
+  if (result == compare_value) {
+    bs->template write_ref_field_post<decorators>(addr, new_value);
+  }
+  return result;
+}
+
+template <DecoratorSet decorators, typename BarrierSetT>
+template <typename T>
+inline oop ModRefBarrierSet::AccessBarrier<decorators, BarrierSetT>::
+oop_atomic_xchg_in_heap(oop new_value, T* addr) {
+  BarrierSetT *bs = barrier_set_cast<BarrierSetT>(barrier_set());
+  bs->template write_ref_field_pre<decorators>(addr);
+  oop result = Raw::oop_atomic_xchg(new_value, addr);
+  bs->template write_ref_field_post<decorators>(addr, new_value);
+  return result;
+}
+
+template <DecoratorSet decorators, typename BarrierSetT>
+template <typename T>
+inline bool ModRefBarrierSet::AccessBarrier<decorators, BarrierSetT>::
+oop_arraycopy_in_heap(arrayOop src_obj, arrayOop dst_obj, T* src, T* dst, size_t length) {
+  BarrierSetT *bs = barrier_set_cast<BarrierSetT>(barrier_set());
+
+  if (!HasDecorator<decorators, ARRAYCOPY_CHECKCAST>::value) {
+    // Optimized covariant case
+    bs->write_ref_array_pre(dst, (int)length,
+                            HasDecorator<decorators, ARRAYCOPY_DEST_NOT_INITIALIZED>::value);
+    Raw::oop_arraycopy(src_obj, dst_obj, src, dst, length);
+    bs->write_ref_array((HeapWord*)dst, length);
+  } else {
+    Klass* bound = objArrayOop(dst_obj)->element_klass();
+    T* from = src;
+    T* end = from + length;
+    for (T* p = dst; from < end; from++, p++) {
+      T element = *from;
+      if (bound->is_instanceof_or_null(element)) {
+        bs->template write_ref_field_pre<decorators>(p);
+        *p = element;
+      } else {
+        // We must do a barrier to cover the partial copy.
+        const size_t pd = pointer_delta(p, dst, (size_t)heapOopSize);
+        // pointer delta is scaled to number of elements (length field in
+        // objArrayOop) which we assume is 32 bit.
+        assert(pd == (size_t)(int)pd, "length field overflow");
+        bs->write_ref_array((HeapWord*)dst, pd);
+        return false;
+      }
+    }
+    bs->write_ref_array((HeapWord*)dst, length);
+  }
+  return true;
+}
+
+template <DecoratorSet decorators, typename BarrierSetT>
+inline void ModRefBarrierSet::AccessBarrier<decorators, BarrierSetT>::
+clone_in_heap(oop src, oop dst, size_t size) {
+  Raw::clone(src, dst, size);
+  BarrierSetT *bs = barrier_set_cast<BarrierSetT>(barrier_set());
+  bs->write_region(MemRegion((HeapWord*)(void*)dst, size));
+}
+
+#endif // SHARE_VM_GC_SHARED_MODREFBARRIERSET_INLINE_HPP
--- a/src/hotspot/share/gc/shared/referenceProcessor.cpp	Mon Nov 20 12:04:13 2017 +0100
+++ b/src/hotspot/share/gc/shared/referenceProcessor.cpp	Mon Nov 20 13:07:44 2017 +0100
@@ -34,6 +34,7 @@
 #include "logging/log.hpp"
 #include "memory/allocation.hpp"
 #include "memory/resourceArea.hpp"
+#include "oops/access.inline.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/java.hpp"
 
@@ -294,14 +295,13 @@
     // Self-loop next, so as to make Ref not active.
     java_lang_ref_Reference::set_next_raw(obj, obj);
     if (next_d != obj) {
-      oopDesc::bs()->write_ref_field(java_lang_ref_Reference::discovered_addr(obj), next_d);
+      HeapAccess<AS_NO_KEEPALIVE>::oop_store_at(obj, java_lang_ref_Reference::discovered_offset, next_d);
     } else {
       // This is the last object.
       // Swap refs_list into pending list and set obj's
       // discovered to what we read from the pending list.
       oop old = Universe::swap_reference_pending_list(refs_list.head());
-      java_lang_ref_Reference::set_discovered_raw(obj, old); // old may be NULL
-      oopDesc::bs()->write_ref_field(java_lang_ref_Reference::discovered_addr(obj), old);
+      HeapAccess<AS_NO_KEEPALIVE>::oop_store_at(obj, java_lang_ref_Reference::discovered_offset, old);
     }
   }
 }
@@ -382,7 +382,7 @@
 
 void DiscoveredListIterator::remove() {
   assert(oopDesc::is_oop(_ref), "Dropping a bad reference");
-  oop_store_raw(_discovered_addr, NULL);
+  RawAccess<>::oop_store(_discovered_addr, oop(NULL));
 
   // First _prev_next ref actually points into DiscoveredList (gross).
   oop new_next;
@@ -397,13 +397,13 @@
   // Remove Reference object from discovered list. Note that G1 does not need a
   // pre-barrier here because we know the Reference has already been found/marked,
   // that's how it ended up in the discovered list in the first place.
-  oop_store_raw(_prev_next, new_next);
+  RawAccess<>::oop_store(_prev_next, new_next);
   NOT_PRODUCT(_removed++);
   _refs_list.dec_length(1);
 }
 
 void DiscoveredListIterator::clear_referent() {
-  oop_store_raw(_referent_addr, NULL);
+  RawAccess<>::oop_store(_referent_addr, oop(NULL));
 }
 
 // NOTE: process_phase*() are largely similar, and at a high level
@@ -917,8 +917,8 @@
   // The last ref must have its discovered field pointing to itself.
   oop next_discovered = (current_head != NULL) ? current_head : obj;
 
-  oop retest = oopDesc::atomic_compare_exchange_oop(next_discovered, discovered_addr,
-                                                    NULL);
+  oop retest = RawAccess<>::oop_atomic_cmpxchg(next_discovered, discovered_addr, oop(NULL));
+
   if (retest == NULL) {
     // This thread just won the right to enqueue the object.
     // We have separate lists for enqueueing, so no synchronization
@@ -933,8 +933,8 @@
     // The reference has already been discovered...
     log_develop_trace(gc, ref)("Already discovered reference (" INTPTR_FORMAT ": %s)",
                                p2i(obj), obj->klass()->internal_name());
-    }
   }
+}
 
 #ifndef PRODUCT
 // Non-atomic (i.e. concurrent) discovery might allow us
@@ -1076,7 +1076,7 @@
     oop next_discovered = (current_head != NULL) ? current_head : obj;
 
     assert(discovered == NULL, "control point invariant");
-    oop_store_raw(discovered_addr, next_discovered);
+    RawAccess<>::oop_store(discovered_addr, next_discovered);
     list->set_head(obj);
     list->inc_length(1);
 
--- a/src/hotspot/share/jvmci/jvmciJavaClasses.hpp	Mon Nov 20 12:04:13 2017 +0100
+++ b/src/hotspot/share/jvmci/jvmciJavaClasses.hpp	Mon Nov 20 13:07:44 2017 +0100
@@ -25,6 +25,7 @@
 #define SHARE_VM_JVMCI_JVMCIJAVACLASSES_HPP
 
 #include "classfile/systemDictionary.hpp"
+#include "oops/access.inline.hpp"
 #include "oops/instanceMirrorKlass.hpp"
 #include "oops/oop.inline.hpp"
 
@@ -351,22 +352,15 @@
       assert(klassName::klass() != NULL && klassName::klass()->is_linked(), "Class not yet linked: " #klassName); \
       InstanceKlass* ik = klassName::klass();                                                                  \
       address addr = ik->static_field_addr(_##name##_offset - InstanceMirrorKlass::offset_of_static_fields()); \
-      if (UseCompressedOops) {                                                                                 \
-        return (type) oopDesc::load_decode_heap_oop((narrowOop *)addr);                                        \
-      } else {                                                                                                 \
-        return (type) oopDesc::load_decode_heap_oop((oop*)addr);                                               \
-      }                                                                                                        \
+      oop result = HeapAccess<>::oop_load((HeapWord*)addr);                                                    \
+      return type(result);                                                                                     \
     }                                                                                                          \
     static void set_##name(type x) {                                                                           \
       assert(klassName::klass() != NULL && klassName::klass()->is_linked(), "Class not yet linked: " #klassName); \
       assert(klassName::klass() != NULL, "Class not yet loaded: " #klassName);                                 \
       InstanceKlass* ik = klassName::klass();                                                                  \
       address addr = ik->static_field_addr(_##name##_offset - InstanceMirrorKlass::offset_of_static_fields()); \
-      if (UseCompressedOops) {                                                                                 \
-        oop_store((narrowOop *)addr, x);                                                                       \
-      } else {                                                                                                 \
-        oop_store((oop*)addr, x);                                                                              \
-      }                                                                                                        \
+      HeapAccess<>::oop_store((HeapWord*)addr, x);                                                             \
     }
 #define STATIC_PRIMITIVE_FIELD(klassName, name, jtypename)                                                     \
     static int _##name##_offset;                                                                               \
@@ -374,13 +368,13 @@
       assert(klassName::klass() != NULL && klassName::klass()->is_linked(), "Class not yet linked: " #klassName); \
       InstanceKlass* ik = klassName::klass();                                                                  \
       address addr = ik->static_field_addr(_##name##_offset - InstanceMirrorKlass::offset_of_static_fields()); \
-      return *((jtypename *)addr);                                                                             \
+      return HeapAccess<>::load((jtypename*)addr);                                                             \
     }                                                                                                          \
     static void set_##name(jtypename x) {                                                                      \
       assert(klassName::klass() != NULL && klassName::klass()->is_linked(), "Class not yet linked: " #klassName); \
       InstanceKlass* ik = klassName::klass();                                                                  \
       address addr = ik->static_field_addr(_##name##_offset - InstanceMirrorKlass::offset_of_static_fields()); \
-      *((jtypename *)addr) = x;                                                                                \
+      HeapAccess<>::store((jtypename*)addr, x);                                                                \
     }
 
 #define STATIC_INT_FIELD(klassName, name) STATIC_PRIMITIVE_FIELD(klassName, name, jint)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hotspot/share/oops/access.hpp	Mon Nov 20 13:07:44 2017 +0100
@@ -0,0 +1,519 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_RUNTIME_ACCESS_HPP
+#define SHARE_VM_RUNTIME_ACCESS_HPP
+
+#include "memory/allocation.hpp"
+#include "metaprogramming/decay.hpp"
+#include "metaprogramming/integralConstant.hpp"
+#include "oops/oopsHierarchy.hpp"
+#include "utilities/debug.hpp"
+#include "utilities/globalDefinitions.hpp"
+
+// = GENERAL =
+// Access is an API for performing accesses with declarative semantics. Each access can have a number of "decorators".
+// A decorator is an attribute or property that affects the way a memory access is performed in some way.
+// There are different groups of decorators. Some have to do with memory ordering, others to do with,
+// e.g. strength of references, strength of GC barriers, or whether compression should be applied or not.
+// Some decorators are set at buildtime, such as whether primitives require GC barriers or not, others
+// at callsites such as whether an access is in the heap or not, and others are resolved at runtime
+// such as GC-specific barriers and encoding/decoding compressed oops.
+// By pipelining handling of these decorators, the design of the Access API allows separation of concern
+// over the different orthogonal concerns of decorators, while providing a powerful way of
+// expressing these orthogonal semantic properties in a unified way.
+
+// == OPERATIONS ==
+// * load: Load a value from an address.
+// * load_at: Load a value from an internal pointer relative to a base object.
+// * store: Store a value at an address.
+// * store_at: Store a value in an internal pointer relative to a base object.
+// * atomic_cmpxchg: Atomically compare-and-swap a new value at an address if previous value matched the compared value.
+// * atomic_cmpxchg_at: Atomically compare-and-swap a new value at an internal pointer address if previous value matched the compared value.
+// * atomic_xchg: Atomically swap a new value at an address if previous value matched the compared value.
+// * atomic_xchg_at: Atomically swap a new value at an internal pointer address if previous value matched the compared value.
+// * arraycopy: Copy data from one heap array to another heap array.
+// * clone: Clone the contents of an object to a newly allocated object.
+
+typedef uint64_t DecoratorSet;
+
+// == Internal Decorators - do not use ==
+// * INTERNAL_EMPTY: This is the name for the empty decorator set (in absence of other decorators).
+// * INTERNAL_CONVERT_COMPRESSED_OOPS: This is an oop access that will require converting an oop
+//   to a narrowOop or vice versa, if UseCompressedOops is known to be set.
+// * INTERNAL_VALUE_IS_OOP: Remember that the involved access is on oop rather than primitive.
+const DecoratorSet INTERNAL_EMPTY                    = UCONST64(0);
+const DecoratorSet INTERNAL_CONVERT_COMPRESSED_OOP   = UCONST64(1) << 1;
+const DecoratorSet INTERNAL_VALUE_IS_OOP             = UCONST64(1) << 2;
+
+// == Internal build-time Decorators ==
+// * INTERNAL_BT_BARRIER_ON_PRIMITIVES: This is set in the barrierSetConfig.hpp file.
+const DecoratorSet INTERNAL_BT_BARRIER_ON_PRIMITIVES = UCONST64(1) << 3;
+
+// == Internal run-time Decorators ==
+// * INTERNAL_RT_USE_COMPRESSED_OOPS: This decorator will be set in runtime resolved
+//   access backends iff UseCompressedOops is true.
+const DecoratorSet INTERNAL_RT_USE_COMPRESSED_OOPS   = UCONST64(1) << 4;
+
+const DecoratorSet INTERNAL_DECORATOR_MASK           = INTERNAL_CONVERT_COMPRESSED_OOP | INTERNAL_VALUE_IS_OOP |
+                                                       INTERNAL_BT_BARRIER_ON_PRIMITIVES | INTERNAL_RT_USE_COMPRESSED_OOPS;
+
+// == Memory Ordering Decorators ==
+// The memory ordering decorators can be described in the following way:
+// === Decorator Rules ===
+// The different types of memory ordering guarantees have a strict order of strength.
+// Explicitly specifying the stronger ordering implies that the guarantees of the weaker
+// property holds too. The names come from the C++11 atomic operations, and typically
+// have a JMM equivalent property.
+// The equivalence may be viewed like this:
+// MO_UNORDERED is equivalent to JMM plain.
+// MO_VOLATILE has no equivalence in JMM, because it's a C++ thing.
+// MO_RELAXED is equivalent to JMM opaque.
+// MO_ACQUIRE is equivalent to JMM acquire.
+// MO_RELEASE is equivalent to JMM release.
+// MO_SEQ_CST is equivalent to JMM volatile.
+//
+// === Stores ===
+//  * MO_UNORDERED (Default): No guarantees.
+//    - The compiler and hardware are free to reorder aggressively. And they will.
+//  * MO_VOLATILE: Volatile stores (in the C++ sense).
+//    - The stores are not reordered by the compiler (but possibly the HW) w.r.t. other
+//      volatile accesses in program order (but possibly non-volatile accesses).
+//  * MO_RELAXED: Relaxed atomic stores.
+//    - The stores are atomic.
+//    - Guarantees from volatile stores hold.
+//  * MO_RELEASE: Releasing stores.
+//    - The releasing store will make its preceding memory accesses observable to memory accesses
+//      subsequent to an acquiring load observing this releasing store.
+//    - Guarantees from relaxed stores hold.
+//  * MO_SEQ_CST: Sequentially consistent stores.
+//    - The stores are observed in the same order by MO_SEQ_CST loads on other processors
+//    - Preceding loads and stores in program order are not reordered with subsequent loads and stores in program order.
+//    - Guarantees from releasing stores hold.
+// === Loads ===
+//  * MO_UNORDERED (Default): No guarantees
+//    - The compiler and hardware are free to reorder aggressively. And they will.
+//  * MO_VOLATILE: Volatile loads (in the C++ sense).
+//    - The loads are not reordered by the compiler (but possibly the HW) w.r.t. other
+//      volatile accesses in program order (but possibly non-volatile accesses).
+//  * MO_RELAXED: Relaxed atomic loads.
+//    - The stores are atomic.
+//    - Guarantees from volatile loads hold.
+//  * MO_ACQUIRE: Acquiring loads.
+//    - An acquiring load will make subsequent memory accesses observe the memory accesses
+//      preceding the releasing store that the acquiring load observed.
+//    - Guarantees from relaxed loads hold.
+//  * MO_SEQ_CST: Sequentially consistent loads.
+//    - These loads observe MO_SEQ_CST stores in the same order on other processors
+//    - Preceding loads and stores in program order are not reordered with subsequent loads and stores in program order.
+//    - Guarantees from acquiring loads hold.
+// === Atomic Cmpxchg ===
+//  * MO_RELAXED: Atomic but relaxed cmpxchg.
+//    - Guarantees from MO_RELAXED loads and MO_RELAXED stores hold unconditionally.
+//  * MO_SEQ_CST: Sequentially consistent cmpxchg.
+//    - Guarantees from MO_SEQ_CST loads and MO_SEQ_CST stores hold unconditionally.
+// === Atomic Xchg ===
+//  * MO_RELAXED: Atomic but relaxed atomic xchg.
+//    - Guarantees from MO_RELAXED loads and MO_RELAXED stores hold.
+//  * MO_SEQ_CST: Sequentially consistent xchg.
+//    - Guarantees from MO_SEQ_CST loads and MO_SEQ_CST stores hold.
+const DecoratorSet MO_UNORDERED      = UCONST64(1) << 5;
+const DecoratorSet MO_VOLATILE       = UCONST64(1) << 6;
+const DecoratorSet MO_RELAXED        = UCONST64(1) << 7;
+const DecoratorSet MO_ACQUIRE        = UCONST64(1) << 8;
+const DecoratorSet MO_RELEASE        = UCONST64(1) << 9;
+const DecoratorSet MO_SEQ_CST        = UCONST64(1) << 10;
+const DecoratorSet MO_DECORATOR_MASK = MO_UNORDERED | MO_VOLATILE | MO_RELAXED |
+                                       MO_ACQUIRE | MO_RELEASE | MO_SEQ_CST;
+
+// === Barrier Strength Decorators ===
+// * AS_RAW: The access will translate into a raw memory access, hence ignoring all semantic concerns
+//   except memory ordering and compressed oops. This will bypass runtime function pointer dispatching
+//   in the pipeline and hardwire to raw accesses without going trough the GC access barriers.
+//  - Accesses on oop* translate to raw memory accesses without runtime checks
+//  - Accesses on narrowOop* translate to encoded/decoded memory accesses without runtime checks
+//  - Accesses on HeapWord* translate to a runtime check choosing one of the above
+//  - Accesses on other types translate to raw memory accesses without runtime checks
+// * AS_NO_KEEPALIVE: The barrier is used only on oop references and will not keep any involved objects
+//   alive, regardless of the type of reference being accessed. It will however perform the memory access
+//   in a consistent way w.r.t. e.g. concurrent compaction, so that the right field is being accessed,
+//   or maintain, e.g. intergenerational or interregional pointers if applicable. This should be used with
+//   extreme caution in isolated scopes.
+// * AS_NORMAL: The accesses will be resolved to an accessor on the BarrierSet class, giving the
+//   responsibility of performing the access and what barriers to be performed to the GC. This is the default.
+//   Note that primitive accesses will only be resolved on the barrier set if the appropriate build-time
+//   decorator for enabling primitive barriers is enabled for the build.
+const DecoratorSet AS_RAW            = UCONST64(1) << 11;
+const DecoratorSet AS_NO_KEEPALIVE   = UCONST64(1) << 12;
+const DecoratorSet AS_NORMAL         = UCONST64(1) << 13;
+const DecoratorSet AS_DECORATOR_MASK = AS_RAW | AS_NO_KEEPALIVE | AS_NORMAL;
+
+// === Reference Strength Decorators ===
+// These decorators only apply to accesses on oop-like types (oop/narrowOop).
+// * ON_STRONG_OOP_REF: Memory access is performed on a strongly reachable reference.
+// * ON_WEAK_OOP_REF: The memory access is performed on a weakly reachable reference.
+// * ON_PHANTOM_OOP_REF: The memory access is performed on a phantomly reachable reference.
+//   This is the same ring of strength as jweak and weak oops in the VM.
+// * ON_UNKNOWN_OOP_REF: The memory access is performed on a reference of unknown strength.
+//   This could for example come from the unsafe API.
+// * Default (no explicit reference strength specified): ON_STRONG_OOP_REF
+const DecoratorSet ON_STRONG_OOP_REF  = UCONST64(1) << 14;
+const DecoratorSet ON_WEAK_OOP_REF    = UCONST64(1) << 15;
+const DecoratorSet ON_PHANTOM_OOP_REF = UCONST64(1) << 16;
+const DecoratorSet ON_UNKNOWN_OOP_REF = UCONST64(1) << 17;
+const DecoratorSet ON_DECORATOR_MASK  = ON_STRONG_OOP_REF | ON_WEAK_OOP_REF |
+                                        ON_PHANTOM_OOP_REF | ON_UNKNOWN_OOP_REF;
+
+// === Access Location ===
+// Accesses can take place in, e.g. the heap, old or young generation and different native roots.
+// The location is important to the GC as it may imply different actions. The following decorators are used:
+// * IN_HEAP: The access is performed in the heap. Many barriers such as card marking will
+//   be omitted if this decorator is not set.
+// * IN_HEAP_ARRAY: The access is performed on a heap allocated array. This is sometimes a special case
+//   for some GCs, and implies that it is an IN_HEAP.
+// * IN_ROOT: The access is performed in an off-heap data structure pointing into the Java heap.
+// * IN_CONCURRENT_ROOT: The access is performed in an off-heap data structure pointing into the Java heap,
+//   but is notably not scanned during safepoints. This is sometimes a special case for some GCs and
+//   implies that it is also an IN_ROOT.
+const DecoratorSet IN_HEAP            = UCONST64(1) << 18;
+const DecoratorSet IN_HEAP_ARRAY      = UCONST64(1) << 19;
+const DecoratorSet IN_ROOT            = UCONST64(1) << 20;
+const DecoratorSet IN_CONCURRENT_ROOT = UCONST64(1) << 21;
+const DecoratorSet IN_DECORATOR_MASK  = IN_HEAP | IN_HEAP_ARRAY |
+                                        IN_ROOT | IN_CONCURRENT_ROOT;
+
+// == Value Decorators ==
+// * OOP_NOT_NULL: This property can make certain barriers faster such as compressing oops.
+const DecoratorSet OOP_NOT_NULL       = UCONST64(1) << 22;
+const DecoratorSet OOP_DECORATOR_MASK = OOP_NOT_NULL;
+
+// == Arraycopy Decorators ==
+// * ARRAYCOPY_DEST_NOT_INITIALIZED: This property can be important to e.g. SATB barriers by
+//   marking that the previous value uninitialized nonsense rather than a real value.
+// * ARRAYCOPY_CHECKCAST: This property means that the class of the objects in source
+//   are not guaranteed to be subclasses of the class of the destination array. This requires
+//   a check-cast barrier during the copying operation. If this is not set, it is assumed
+//   that the array is covariant: (the source array type is-a destination array type)
+// * ARRAYCOPY_DISJOINT: This property means that it is known that the two array ranges
+//   are disjoint.
+// * ARRAYCOPY_ARRAYOF: The copy is in the arrayof form.
+// * ARRAYCOPY_ATOMIC: The accesses have to be atomic over the size of its elements.
+// * ARRAYCOPY_ALIGNED: The accesses have to be aligned on a HeapWord.
+const DecoratorSet ARRAYCOPY_DEST_NOT_INITIALIZED = UCONST64(1) << 24;
+const DecoratorSet ARRAYCOPY_CHECKCAST            = UCONST64(1) << 25;
+const DecoratorSet ARRAYCOPY_DISJOINT             = UCONST64(1) << 26;
+const DecoratorSet ARRAYCOPY_ARRAYOF              = UCONST64(1) << 27;
+const DecoratorSet ARRAYCOPY_ATOMIC               = UCONST64(1) << 28;
+const DecoratorSet ARRAYCOPY_ALIGNED              = UCONST64(1) << 29;
+const DecoratorSet ARRAYCOPY_DECORATOR_MASK       = ARRAYCOPY_DEST_NOT_INITIALIZED |
+                                                    ARRAYCOPY_CHECKCAST | ARRAYCOPY_DISJOINT |
+                                                    ARRAYCOPY_DISJOINT | ARRAYCOPY_ARRAYOF |
+                                                    ARRAYCOPY_ATOMIC | ARRAYCOPY_ALIGNED;
+
+// The HasDecorator trait can help at compile-time determining whether a decorator set
+// has an intersection with a certain other decorator set
+template <DecoratorSet decorators, DecoratorSet decorator>
+struct HasDecorator: public IntegralConstant<bool, (decorators & decorator) != 0> {};
+
+namespace AccessInternal {
+  template <typename T>
+  struct OopOrNarrowOopInternal: AllStatic {
+    typedef oop type;
+  };
+
+  template <>
+  struct OopOrNarrowOopInternal<narrowOop>: AllStatic {
+    typedef narrowOop type;
+  };
+
+  // This metafunction returns a canonicalized oop/narrowOop type for a passed
+  // in oop-like types passed in from oop_* overloads where the user has sworn
+  // that the passed in values should be oop-like (e.g. oop, oopDesc*, arrayOop,
+  // narrowOoop, instanceOopDesc*, and random other things).
+  // In the oop_* overloads, it must hold that if the passed in type T is not
+  // narrowOop, then it by contract has to be one of many oop-like types implicitly
+  // convertible to oop, and hence returns oop as the canonical oop type.
+  // If it turns out it was not, then the implicit conversion to oop will fail
+  // to compile, as desired.
+  template <typename T>
+  struct OopOrNarrowOop: AllStatic {
+    typedef typename OopOrNarrowOopInternal<typename Decay<T>::type>::type type;
+  };
+
+  inline void* field_addr(oop base, ptrdiff_t byte_offset) {
+    return reinterpret_cast<void*>(reinterpret_cast<intptr_t>((void*)base) + byte_offset);
+  }
+
+  template <DecoratorSet decorators, typename T>
+  void store_at(oop base, ptrdiff_t offset, T value);
+
+  template <DecoratorSet decorators, typename T>
+  T load_at(oop base, ptrdiff_t offset);
+
+  template <DecoratorSet decorators, typename T>
+  T atomic_cmpxchg_at(T new_value, oop base, ptrdiff_t offset, T compare_value);
+
+  template <DecoratorSet decorators, typename T>
+  T atomic_xchg_at(T new_value, oop base, ptrdiff_t offset);
+
+  template <DecoratorSet decorators, typename P, typename T>
+  void store(P* addr, T value);
+
+  template <DecoratorSet decorators, typename P, typename T>
+  T load(P* addr);
+
+  template <DecoratorSet decorators, typename P, typename T>
+  T atomic_cmpxchg(T new_value, P* addr, T compare_value);
+
+  template <DecoratorSet decorators, typename P, typename T>
+  T atomic_xchg(T new_value, P* addr);
+
+  template <DecoratorSet decorators, typename T>
+  bool arraycopy(arrayOop src_obj, arrayOop dst_obj, T *src, T *dst, size_t length);
+
+  template <DecoratorSet decorators>
+  void clone(oop src, oop dst, size_t size);
+
+  // Infer the type that should be returned from a load.
+  template <typename P, DecoratorSet decorators>
+  class LoadProxy: public StackObj {
+  private:
+    P *const _addr;
+  public:
+    LoadProxy(P* addr) : _addr(addr) {}
+
+    template <typename T>
+    inline operator T() {
+      return load<decorators, P, T>(_addr);
+    }
+
+    inline operator P() {
+      return load<decorators, P, P>(_addr);
+    }
+  };
+
+  // Infer the type that should be returned from a load_at.
+  template <DecoratorSet decorators>
+  class LoadAtProxy: public StackObj {
+  private:
+    const oop _base;
+    const ptrdiff_t _offset;
+  public:
+    LoadAtProxy(oop base, ptrdiff_t offset) : _base(base), _offset(offset) {}
+
+    template <typename T>
+    inline operator T() const {
+      return load_at<decorators, T>(_base, _offset);
+    }
+  };
+}
+
+template <DecoratorSet decorators = INTERNAL_EMPTY>
+class Access: public AllStatic {
+  // This function asserts that if an access gets passed in a decorator outside
+  // of the expected_decorators, then something is wrong. It additionally checks
+  // the consistency of the decorators so that supposedly disjoint decorators are indeed
+  // disjoint. For example, an access can not be both in heap and on root at the
+  // same time.
+  template <DecoratorSet expected_decorators>
+  static void verify_decorators();
+
+  template <DecoratorSet expected_mo_decorators>
+  static void verify_primitive_decorators() {
+    const DecoratorSet primitive_decorators = (AS_DECORATOR_MASK ^ AS_NO_KEEPALIVE) | IN_HEAP |
+                                               IN_HEAP_ARRAY | MO_DECORATOR_MASK;
+    verify_decorators<expected_mo_decorators | primitive_decorators>();
+  }
+
+  template <DecoratorSet expected_mo_decorators>
+  static void verify_oop_decorators() {
+    const DecoratorSet oop_decorators = AS_DECORATOR_MASK | IN_DECORATOR_MASK |
+                                        (ON_DECORATOR_MASK ^ ON_UNKNOWN_OOP_REF) | // no unknown oop refs outside of the heap
+                                        OOP_DECORATOR_MASK | MO_DECORATOR_MASK;
+    verify_decorators<expected_mo_decorators | oop_decorators>();
+  }
+
+  template <DecoratorSet expected_mo_decorators>
+  static void verify_heap_oop_decorators() {
+    const DecoratorSet heap_oop_decorators = AS_DECORATOR_MASK | ON_DECORATOR_MASK |
+                                             OOP_DECORATOR_MASK | (IN_DECORATOR_MASK ^
+                                                                  (IN_ROOT ^ IN_CONCURRENT_ROOT)) | // no root accesses in the heap
+                                             MO_DECORATOR_MASK;
+    verify_decorators<expected_mo_decorators | heap_oop_decorators>();
+  }
+
+  static const DecoratorSet load_mo_decorators = MO_UNORDERED | MO_VOLATILE | MO_RELAXED | MO_ACQUIRE | MO_SEQ_CST;
+  static const DecoratorSet store_mo_decorators = MO_UNORDERED | MO_VOLATILE | MO_RELAXED | MO_RELEASE | MO_SEQ_CST;
+  static const DecoratorSet atomic_xchg_mo_decorators = MO_SEQ_CST;
+  static const DecoratorSet atomic_cmpxchg_mo_decorators = MO_RELAXED | MO_SEQ_CST;
+
+public:
+  // Primitive heap accesses
+  static inline AccessInternal::LoadAtProxy<decorators> load_at(oop base, ptrdiff_t offset) {
+    verify_primitive_decorators<load_mo_decorators>();
+    return AccessInternal::LoadAtProxy<decorators>(base, offset);
+  }
+
+  template <typename T>
+  static inline void store_at(oop base, ptrdiff_t offset, T value) {
+    verify_primitive_decorators<store_mo_decorators>();
+    AccessInternal::store_at<decorators>(base, offset, value);
+  }
+
+  template <typename T>
+  static inline T atomic_cmpxchg_at(T new_value, oop base, ptrdiff_t offset, T compare_value) {
+    verify_primitive_decorators<atomic_cmpxchg_mo_decorators>();
+    return AccessInternal::atomic_cmpxchg_at<decorators>(new_value, base, offset, compare_value);
+  }
+
+  template <typename T>
+  static inline T atomic_xchg_at(T new_value, oop base, ptrdiff_t offset) {
+    verify_primitive_decorators<atomic_xchg_mo_decorators>();
+    return AccessInternal::atomic_xchg_at<decorators>(new_value, base, offset);
+  }
+
+  template <typename T>
+  static inline bool arraycopy(arrayOop src_obj, arrayOop dst_obj, T *src, T *dst, size_t length) {
+    verify_decorators<ARRAYCOPY_DECORATOR_MASK | IN_HEAP |
+                      AS_DECORATOR_MASK>();
+    return AccessInternal::arraycopy<decorators>(src_obj, dst_obj, src, dst, length);
+  }
+
+  // Oop heap accesses
+  static inline AccessInternal::LoadAtProxy<decorators | INTERNAL_VALUE_IS_OOP> oop_load_at(oop base, ptrdiff_t offset) {
+    verify_heap_oop_decorators<load_mo_decorators>();
+    return AccessInternal::LoadAtProxy<decorators | INTERNAL_VALUE_IS_OOP>(base, offset);
+  }
+
+  template <typename T>
+  static inline void oop_store_at(oop base, ptrdiff_t offset, T value) {
+    verify_heap_oop_decorators<store_mo_decorators>();
+    typedef typename AccessInternal::OopOrNarrowOop<T>::type OopType;
+    OopType oop_value = value;
+    AccessInternal::store_at<decorators | INTERNAL_VALUE_IS_OOP>(base, offset, oop_value);
+  }
+
+  template <typename T>
+  static inline T oop_atomic_cmpxchg_at(T new_value, oop base, ptrdiff_t offset, T compare_value) {
+    verify_heap_oop_decorators<atomic_cmpxchg_mo_decorators>();
+    typedef typename AccessInternal::OopOrNarrowOop<T>::type OopType;
+    OopType new_oop_value = new_value;
+    OopType compare_oop_value = compare_value;
+    return AccessInternal::atomic_cmpxchg_at<decorators | INTERNAL_VALUE_IS_OOP>(new_oop_value, base, offset, compare_oop_value);
+  }
+
+  template <typename T>
+  static inline T oop_atomic_xchg_at(T new_value, oop base, ptrdiff_t offset) {
+    verify_heap_oop_decorators<atomic_xchg_mo_decorators>();
+    typedef typename AccessInternal::OopOrNarrowOop<T>::type OopType;
+    OopType new_oop_value = new_value;
+    return AccessInternal::atomic_xchg_at<decorators | INTERNAL_VALUE_IS_OOP>(new_oop_value, base, offset);
+  }
+
+  template <typename T>
+  static inline bool oop_arraycopy(arrayOop src_obj, arrayOop dst_obj, T *src, T *dst, size_t length) {
+    verify_decorators<ARRAYCOPY_DECORATOR_MASK | IN_HEAP | AS_DECORATOR_MASK>();
+    return AccessInternal::arraycopy<decorators | INTERNAL_VALUE_IS_OOP>(src_obj, dst_obj, src, dst, length);
+  }
+
+  // Clone an object from src to dst
+  static inline void clone(oop src, oop dst, size_t size) {
+    verify_decorators<IN_HEAP>();
+    AccessInternal::clone<decorators>(src, dst, size);
+  }
+
+  // Primitive accesses
+  template <typename P>
+  static inline P load(P* addr) {
+    verify_primitive_decorators<load_mo_decorators>();
+    return AccessInternal::load<decorators, P, P>(addr);
+  }
+
+  template <typename P, typename T>
+  static inline void store(P* addr, T value) {
+    verify_primitive_decorators<store_mo_decorators>();
+    AccessInternal::store<decorators>(addr, value);
+  }
+
+  template <typename P, typename T>
+  static inline T atomic_cmpxchg(T new_value, P* addr, T compare_value) {
+    verify_primitive_decorators<atomic_cmpxchg_mo_decorators>();
+    return AccessInternal::atomic_cmpxchg<decorators>(new_value, addr, compare_value);
+  }
+
+  template <typename P, typename T>
+  static inline T atomic_xchg(T new_value, P* addr) {
+    verify_primitive_decorators<atomic_xchg_mo_decorators>();
+    return AccessInternal::atomic_xchg<decorators>(new_value, addr);
+  }
+
+  // Oop accesses
+  template <typename P>
+  static inline AccessInternal::LoadProxy<P, decorators | INTERNAL_VALUE_IS_OOP> oop_load(P* addr) {
+    verify_oop_decorators<load_mo_decorators>();
+    return AccessInternal::LoadProxy<P, decorators | INTERNAL_VALUE_IS_OOP>(addr);
+  }
+
+  template <typename P, typename T>
+  static inline void oop_store(P* addr, T value) {
+    verify_oop_decorators<store_mo_decorators>();
+    typedef typename AccessInternal::OopOrNarrowOop<T>::type OopType;
+    OopType oop_value = value;
+    AccessInternal::store<decorators | INTERNAL_VALUE_IS_OOP>(addr, oop_value);
+  }
+
+  template <typename P, typename T>
+  static inline T oop_atomic_cmpxchg(T new_value, P* addr, T compare_value) {
+    verify_oop_decorators<atomic_cmpxchg_mo_decorators>();
+    typedef typename AccessInternal::OopOrNarrowOop<T>::type OopType;
+    OopType new_oop_value = new_value;
+    OopType compare_oop_value = compare_value;
+    return AccessInternal::atomic_cmpxchg<decorators | INTERNAL_VALUE_IS_OOP>(new_oop_value, addr, compare_oop_value);
+  }
+
+  template <typename P, typename T>
+  static inline T oop_atomic_xchg(T new_value, P* addr) {
+    verify_oop_decorators<atomic_xchg_mo_decorators>();
+    typedef typename AccessInternal::OopOrNarrowOop<T>::type OopType;
+    OopType new_oop_value = new_value;
+    return AccessInternal::atomic_xchg<decorators | INTERNAL_VALUE_IS_OOP>(new_oop_value, addr);
+  }
+};
+
+// Helper for performing raw accesses (knows only of memory ordering
+// atomicity decorators as well as compressed oops)
+template <DecoratorSet decorators = INTERNAL_EMPTY>
+class RawAccess: public Access<AS_RAW | decorators> {};
+
+// Helper for performing normal accesses on the heap. These accesses
+// may resolve an accessor on a GC barrier set
+template <DecoratorSet decorators = INTERNAL_EMPTY>
+class HeapAccess: public Access<IN_HEAP | decorators> {};
+
+// Helper for performing normal accesses in roots. These accesses
+// may resolve an accessor on a GC barrier set
+template <DecoratorSet decorators = INTERNAL_EMPTY>
+class RootAccess: public Access<IN_ROOT | decorators> {};
+
+#endif // SHARE_VM_RUNTIME_ACCESS_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hotspot/share/oops/access.inline.hpp	Mon Nov 20 13:07:44 2017 +0100
@@ -0,0 +1,1044 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_RUNTIME_ACCESS_INLINE_HPP
+#define SHARE_VM_RUNTIME_ACCESS_INLINE_HPP
+
+#include "gc/shared/barrierSet.inline.hpp"
+#include "metaprogramming/conditional.hpp"
+#include "metaprogramming/isFloatingPoint.hpp"
+#include "metaprogramming/isIntegral.hpp"
+#include "metaprogramming/isPointer.hpp"
+#include "metaprogramming/isVolatile.hpp"
+#include "oops/access.hpp"
+#include "oops/accessBackend.inline.hpp"
+#include "runtime/atomic.hpp"
+#include "runtime/orderAccess.inline.hpp"
+
+// This file outlines the template pipeline of accesses going through the Access
+// API. There are essentially 5 steps for each access.
+// * Step 1: Set default decorators and decay types. This step gets rid of CV qualifiers
+//           and sets default decorators to sensible values.
+// * Step 2: Reduce types. This step makes sure there is only a single T type and not
+//           multiple types. The P type of the address and T type of the value must
+//           match.
+// * Step 3: Pre-runtime dispatch. This step checks whether a runtime call can be
+//           avoided, and in that case avoids it (calling raw accesses or
+//           primitive accesses in a build that does not require primitive GC barriers)
+// * Step 4: Runtime-dispatch. This step performs a runtime dispatch to the corresponding
+//           BarrierSet::AccessBarrier accessor that attaches GC-required barriers
+//           to the access.
+// * Step 5: Post-runtime dispatch. This step now casts previously unknown types such
+//           as the address type of an oop on the heap (is it oop* or narrowOop*) to
+//           the appropriate type. It also splits sufficiently orthogonal accesses into
+//           different functions, such as whether the access involves oops or primitives
+//           and whether the access is performed on the heap or outside. Then the
+//           appropriate BarrierSet::AccessBarrier is called to perform the access.
+
+namespace AccessInternal {
+
+  // Step 5: Post-runtime dispatch.
+  // This class is the last step before calling the BarrierSet::AccessBarrier.
+  // Here we make sure to figure out types that were not known prior to the
+  // runtime dispatch, such as whether an oop on the heap is oop or narrowOop.
+  // We also split orthogonal barriers such as handling primitives vs oops
+  // and on-heap vs off-heap into different calls to the barrier set.
+  template <class GCBarrierType, BarrierType type, DecoratorSet decorators>
+  struct PostRuntimeDispatch: public AllStatic { };
+
+  template <class GCBarrierType, DecoratorSet decorators>
+  struct PostRuntimeDispatch<GCBarrierType, BARRIER_STORE, decorators>: public AllStatic {
+    template <typename T>
+    static void access_barrier(void* addr, T value) {
+      GCBarrierType::store_in_heap(reinterpret_cast<T*>(addr), value);
+    }
+
+    static void oop_access_barrier(void* addr, oop value) {
+      typedef typename HeapOopType<decorators>::type OopType;
+      if (HasDecorator<decorators, IN_HEAP>::value) {
+        GCBarrierType::oop_store_in_heap(reinterpret_cast<OopType*>(addr), value);
+      } else {
+        GCBarrierType::oop_store_not_in_heap(reinterpret_cast<OopType*>(addr), value);
+      }
+    }
+  };
+
+  template <class GCBarrierType, DecoratorSet decorators>
+  struct PostRuntimeDispatch<GCBarrierType, BARRIER_LOAD, decorators>: public AllStatic {
+    template <typename T>
+    static T access_barrier(void* addr) {
+      return GCBarrierType::load_in_heap(reinterpret_cast<T*>(addr));
+    }
+
+    static oop oop_access_barrier(void* addr) {
+      typedef typename HeapOopType<decorators>::type OopType;
+      if (HasDecorator<decorators, IN_HEAP>::value) {
+        return GCBarrierType::oop_load_in_heap(reinterpret_cast<OopType*>(addr));
+      } else {
+        return GCBarrierType::oop_load_not_in_heap(reinterpret_cast<OopType*>(addr));
+      }
+    }
+  };
+
+  template <class GCBarrierType, DecoratorSet decorators>
+  struct PostRuntimeDispatch<GCBarrierType, BARRIER_ATOMIC_XCHG, decorators>: public AllStatic {
+    template <typename T>
+    static T access_barrier(T new_value, void* addr) {
+      return GCBarrierType::atomic_xchg_in_heap(new_value, reinterpret_cast<T*>(addr));
+    }
+
+    static oop oop_access_barrier(oop new_value, void* addr) {
+      typedef typename HeapOopType<decorators>::type OopType;
+      if (HasDecorator<decorators, IN_HEAP>::value) {
+        return GCBarrierType::oop_atomic_xchg_in_heap(new_value, reinterpret_cast<OopType*>(addr));
+      } else {
+        return GCBarrierType::oop_atomic_xchg_not_in_heap(new_value, reinterpret_cast<OopType*>(addr));
+      }
+    }
+  };
+
+  template <class GCBarrierType, DecoratorSet decorators>
+  struct PostRuntimeDispatch<GCBarrierType, BARRIER_ATOMIC_CMPXCHG, decorators>: public AllStatic {
+    template <typename T>
+    static T access_barrier(T new_value, void* addr, T compare_value) {
+      return GCBarrierType::atomic_cmpxchg_in_heap(new_value, reinterpret_cast<T*>(addr), compare_value);
+    }
+
+    static oop oop_access_barrier(oop new_value, void* addr, oop compare_value) {
+      typedef typename HeapOopType<decorators>::type OopType;
+      if (HasDecorator<decorators, IN_HEAP>::value) {
+        return GCBarrierType::oop_atomic_cmpxchg_in_heap(new_value, reinterpret_cast<OopType*>(addr), compare_value);
+      } else {
+        return GCBarrierType::oop_atomic_cmpxchg_not_in_heap(new_value, reinterpret_cast<OopType*>(addr), compare_value);
+      }
+    }
+  };
+
+  template <class GCBarrierType, DecoratorSet decorators>
+  struct PostRuntimeDispatch<GCBarrierType, BARRIER_ARRAYCOPY, decorators>: public AllStatic {
+    template <typename T>
+    static bool access_barrier(arrayOop src_obj, arrayOop dst_obj, T* src, T* dst, size_t length) {
+      return GCBarrierType::arraycopy_in_heap(src_obj, dst_obj, src, dst, length);
+    }
+
+    template <typename T>
+    static bool oop_access_barrier(arrayOop src_obj, arrayOop dst_obj, T* src, T* dst, size_t length) {
+      typedef typename HeapOopType<decorators>::type OopType;
+      return GCBarrierType::oop_arraycopy_in_heap(src_obj, dst_obj,
+                                                  reinterpret_cast<OopType*>(src),
+                                                  reinterpret_cast<OopType*>(dst), length);
+    }
+  };
+
+  template <class GCBarrierType, DecoratorSet decorators>
+  struct PostRuntimeDispatch<GCBarrierType, BARRIER_STORE_AT, decorators>: public AllStatic {
+    template <typename T>
+    static void access_barrier(oop base, ptrdiff_t offset, T value) {
+      GCBarrierType::store_in_heap_at(base, offset, value);
+    }
+
+    static void oop_access_barrier(oop base, ptrdiff_t offset, oop value) {
+      GCBarrierType::oop_store_in_heap_at(base, offset, value);
+    }
+  };
+
+  template <class GCBarrierType, DecoratorSet decorators>
+  struct PostRuntimeDispatch<GCBarrierType, BARRIER_LOAD_AT, decorators>: public AllStatic {
+    template <typename T>
+    static T access_barrier(oop base, ptrdiff_t offset) {
+      return GCBarrierType::template load_in_heap_at<T>(base, offset);
+    }
+
+    static oop oop_access_barrier(oop base, ptrdiff_t offset) {
+      return GCBarrierType::oop_load_in_heap_at(base, offset);
+    }
+  };
+
+  template <class GCBarrierType, DecoratorSet decorators>
+  struct PostRuntimeDispatch<GCBarrierType, BARRIER_ATOMIC_XCHG_AT, decorators>: public AllStatic {
+    template <typename T>
+    static T access_barrier(T new_value, oop base, ptrdiff_t offset) {
+      return GCBarrierType::atomic_xchg_in_heap_at(new_value, base, offset);
+    }
+
+    static oop oop_access_barrier(oop new_value, oop base, ptrdiff_t offset) {
+      return GCBarrierType::oop_atomic_xchg_in_heap_at(new_value, base, offset);
+    }
+  };
+
+  template <class GCBarrierType, DecoratorSet decorators>
+  struct PostRuntimeDispatch<GCBarrierType, BARRIER_ATOMIC_CMPXCHG_AT, decorators>: public AllStatic {
+    template <typename T>
+    static T access_barrier(T new_value, oop base, ptrdiff_t offset, T compare_value) {
+      return GCBarrierType::atomic_cmpxchg_in_heap_at(new_value, base, offset, compare_value);
+    }
+
+    static oop oop_access_barrier(oop new_value, oop base, ptrdiff_t offset, oop compare_value) {
+      return GCBarrierType::oop_atomic_cmpxchg_in_heap_at(new_value, base, offset, compare_value);
+    }
+  };
+
+  template <class GCBarrierType, DecoratorSet decorators>
+  struct PostRuntimeDispatch<GCBarrierType, BARRIER_CLONE, decorators>: public AllStatic {
+    static void access_barrier(oop src, oop dst, size_t size) {
+      GCBarrierType::clone_in_heap(src, dst, size);
+    }
+  };
+
+  // Resolving accessors with barriers from the barrier set happens in two steps.
+  // 1. Expand paths with runtime-decorators, e.g. is UseCompressedOops on or off.
+  // 2. Expand paths for each BarrierSet available in the system.
+  template <DecoratorSet decorators, typename FunctionPointerT, BarrierType barrier_type>
+  struct BarrierResolver: public AllStatic {
+    template <DecoratorSet ds>
+    static typename EnableIf<
+      HasDecorator<ds, INTERNAL_VALUE_IS_OOP>::value,
+      FunctionPointerT>::type
+    resolve_barrier_gc() {
+      BarrierSet* bs = BarrierSet::barrier_set();
+      assert(bs != NULL, "GC barriers invoked before BarrierSet is set");
+      switch (bs->kind()) {
+#define BARRIER_SET_RESOLVE_BARRIER_CLOSURE(bs_name)                    \
+        case BarrierSet::bs_name: {                                     \
+          return PostRuntimeDispatch<typename BarrierSet::GetType<BarrierSet::bs_name>::type:: \
+            AccessBarrier<ds>, barrier_type, ds>::oop_access_barrier; \
+        }                                                               \
+        break;
+        FOR_EACH_CONCRETE_BARRIER_SET_DO(BARRIER_SET_RESOLVE_BARRIER_CLOSURE)
+#undef BARRIER_SET_RESOLVE_BARRIER_CLOSURE
+
+      default:
+        fatal("BarrierSet AccessBarrier resolving not implemented");
+        return NULL;
+      };
+    }
+
+    template <DecoratorSet ds>
+    static typename EnableIf<
+      !HasDecorator<ds, INTERNAL_VALUE_IS_OOP>::value,
+      FunctionPointerT>::type
+    resolve_barrier_gc() {
+      BarrierSet* bs = BarrierSet::barrier_set();
+      assert(bs != NULL, "GC barriers invoked before BarrierSet is set");
+      switch (bs->kind()) {
+#define BARRIER_SET_RESOLVE_BARRIER_CLOSURE(bs_name)                    \
+        case BarrierSet::bs_name: {                                       \
+          return PostRuntimeDispatch<typename BarrierSet::GetType<BarrierSet::bs_name>::type:: \
+            AccessBarrier<ds>, barrier_type, ds>::access_barrier; \
+        }                                                                 \
+        break;
+        FOR_EACH_CONCRETE_BARRIER_SET_DO(BARRIER_SET_RESOLVE_BARRIER_CLOSURE)
+#undef BARRIER_SET_RESOLVE_BARRIER_CLOSURE
+
+      default:
+        fatal("BarrierSet AccessBarrier resolving not implemented");
+        return NULL;
+      };
+    }
+
+    static FunctionPointerT resolve_barrier_rt() {
+      if (UseCompressedOops) {
+        const DecoratorSet expanded_decorators = decorators | INTERNAL_RT_USE_COMPRESSED_OOPS;
+        return resolve_barrier_gc<expanded_decorators>();
+      } else {
+        return resolve_barrier_gc<decorators>();
+      }
+    }
+
+    static FunctionPointerT resolve_barrier() {
+      return resolve_barrier_rt();
+    }
+  };
+
+  // Step 4: Runtime dispatch
+  // The RuntimeDispatch class is responsible for performing a runtime dispatch of the
+  // accessor. This is required when the access either depends on whether compressed oops
+  // is being used, or it depends on which GC implementation was chosen (e.g. requires GC
+  // barriers). The way it works is that a function pointer initially pointing to an
+  // accessor resolution function gets called for each access. Upon first invocation,
+  // it resolves which accessor to be used in future invocations and patches the
+  // function pointer to this new accessor.
+
+  template <DecoratorSet decorators, typename T, BarrierType type>
+  struct RuntimeDispatch: AllStatic {};
+
+  template <DecoratorSet decorators, typename T>
+  struct RuntimeDispatch<decorators, T, BARRIER_STORE>: AllStatic {
+    typedef typename AccessFunction<decorators, T, BARRIER_STORE>::type func_t;
+    static func_t _store_func;
+
+    static void store_init(void* addr, T value) {
+      func_t function = BarrierResolver<decorators, func_t, BARRIER_STORE>::resolve_barrier();
+      _store_func = function;
+      function(addr, value);
+    }
+
+    static inline void store(void* addr, T value) {
+      _store_func(addr, value);
+    }
+  };
+
+  template <DecoratorSet decorators, typename T>
+  struct RuntimeDispatch<decorators, T, BARRIER_STORE_AT>: AllStatic {
+    typedef typename AccessFunction<decorators, T, BARRIER_STORE_AT>::type func_t;
+    static func_t _store_at_func;
+
+    static void store_at_init(oop base, ptrdiff_t offset, T value) {
+      func_t function = BarrierResolver<decorators, func_t, BARRIER_STORE_AT>::resolve_barrier();
+      _store_at_func = function;
+      function(base, offset, value);
+    }
+
+    static inline void store_at(oop base, ptrdiff_t offset, T value) {
+      _store_at_func(base, offset, value);
+    }
+  };
+
+  template <DecoratorSet decorators, typename T>
+  struct RuntimeDispatch<decorators, T, BARRIER_LOAD>: AllStatic {
+    typedef typename AccessFunction<decorators, T, BARRIER_LOAD>::type func_t;
+    static func_t _load_func;
+
+    static T load_init(void* addr) {
+      func_t function = BarrierResolver<decorators, func_t, BARRIER_LOAD>::resolve_barrier();
+      _load_func = function;
+      return function(addr);
+    }
+
+    static inline T load(void* addr) {
+      return _load_func(addr);
+    }
+  };
+
+  template <DecoratorSet decorators, typename T>
+  struct RuntimeDispatch<decorators, T, BARRIER_LOAD_AT>: AllStatic {
+    typedef typename AccessFunction<decorators, T, BARRIER_LOAD_AT>::type func_t;
+    static func_t _load_at_func;
+
+    static T load_at_init(oop base, ptrdiff_t offset) {
+      func_t function = BarrierResolver<decorators, func_t, BARRIER_LOAD_AT>::resolve_barrier();
+      _load_at_func = function;
+      return function(base, offset);
+    }
+
+    static inline T load_at(oop base, ptrdiff_t offset) {
+      return _load_at_func(base, offset);
+    }
+  };
+
+  template <DecoratorSet decorators, typename T>
+  struct RuntimeDispatch<decorators, T, BARRIER_ATOMIC_CMPXCHG>: AllStatic {
+    typedef typename AccessFunction<decorators, T, BARRIER_ATOMIC_CMPXCHG>::type func_t;
+    static func_t _atomic_cmpxchg_func;
+
+    static T atomic_cmpxchg_init(T new_value, void* addr, T compare_value) {
+      func_t function = BarrierResolver<decorators, func_t, BARRIER_ATOMIC_CMPXCHG>::resolve_barrier();
+      _atomic_cmpxchg_func = function;
+      return function(new_value, addr, compare_value);
+    }
+
+    static inline T atomic_cmpxchg(T new_value, void* addr, T compare_value) {
+      return _atomic_cmpxchg_func(new_value, addr, compare_value);
+    }
+  };
+
+  template <DecoratorSet decorators, typename T>
+  struct RuntimeDispatch<decorators, T, BARRIER_ATOMIC_CMPXCHG_AT>: AllStatic {
+    typedef typename AccessFunction<decorators, T, BARRIER_ATOMIC_CMPXCHG_AT>::type func_t;
+    static func_t _atomic_cmpxchg_at_func;
+
+    static T atomic_cmpxchg_at_init(T new_value, oop base, ptrdiff_t offset, T compare_value) {
+      func_t function = BarrierResolver<decorators, func_t, BARRIER_ATOMIC_CMPXCHG_AT>::resolve_barrier();
+      _atomic_cmpxchg_at_func = function;
+      return function(new_value, base, offset, compare_value);
+    }
+
+    static inline T atomic_cmpxchg_at(T new_value, oop base, ptrdiff_t offset, T compare_value) {
+      return _atomic_cmpxchg_at_func(new_value, base, offset, compare_value);
+    }
+  };
+
+  template <DecoratorSet decorators, typename T>
+  struct RuntimeDispatch<decorators, T, BARRIER_ATOMIC_XCHG>: AllStatic {
+    typedef typename AccessFunction<decorators, T, BARRIER_ATOMIC_XCHG>::type func_t;
+    static func_t _atomic_xchg_func;
+
+    static T atomic_xchg_init(T new_value, void* addr) {
+      func_t function = BarrierResolver<decorators, func_t, BARRIER_ATOMIC_XCHG>::resolve_barrier();
+      _atomic_xchg_func = function;
+      return function(new_value, addr);
+    }
+
+    static inline T atomic_xchg(T new_value, void* addr) {
+      return _atomic_xchg_func(new_value, addr);
+    }
+  };
+
+  template <DecoratorSet decorators, typename T>
+  struct RuntimeDispatch<decorators, T, BARRIER_ATOMIC_XCHG_AT>: AllStatic {
+    typedef typename AccessFunction<decorators, T, BARRIER_ATOMIC_XCHG_AT>::type func_t;
+    static func_t _atomic_xchg_at_func;
+
+    static T atomic_xchg_at_init(T new_value, oop base, ptrdiff_t offset) {
+      func_t function = BarrierResolver<decorators, func_t, BARRIER_ATOMIC_XCHG_AT>::resolve_barrier();
+      _atomic_xchg_at_func = function;
+      return function(new_value, base, offset);
+    }
+
+    static inline T atomic_xchg_at(T new_value, oop base, ptrdiff_t offset) {
+      return _atomic_xchg_at_func(new_value, base, offset);
+    }
+  };
+
+  template <DecoratorSet decorators, typename T>
+  struct RuntimeDispatch<decorators, T, BARRIER_ARRAYCOPY>: AllStatic {
+    typedef typename AccessFunction<decorators, T, BARRIER_ARRAYCOPY>::type func_t;
+    static func_t _arraycopy_func;
+
+    static bool arraycopy_init(arrayOop src_obj, arrayOop dst_obj, T *src, T* dst, size_t length) {
+      func_t function = BarrierResolver<decorators, func_t, BARRIER_ARRAYCOPY>::resolve_barrier();
+      _arraycopy_func = function;
+      return function(src_obj, dst_obj, src, dst, length);
+    }
+
+    static inline bool arraycopy(arrayOop src_obj, arrayOop dst_obj, T *src, T* dst, size_t length) {
+      return _arraycopy_func(src_obj, dst_obj, src, dst, length);
+    }
+  };
+
+  template <DecoratorSet decorators, typename T>
+  struct RuntimeDispatch<decorators, T, BARRIER_CLONE>: AllStatic {
+    typedef typename AccessFunction<decorators, T, BARRIER_CLONE>::type func_t;
+    static func_t _clone_func;
+
+    static void clone_init(oop src, oop dst, size_t size) {
+      func_t function = BarrierResolver<decorators, func_t, BARRIER_CLONE>::resolve_barrier();
+      _clone_func = function;
+      function(src, dst, size);
+    }
+
+    static inline void clone(oop src, oop dst, size_t size) {
+      _clone_func(src, dst, size);
+    }
+  };
+
+  // Initialize the function pointers to point to the resolving function.
+  template <DecoratorSet decorators, typename T>
+  typename AccessFunction<decorators, T, BARRIER_STORE>::type
+  RuntimeDispatch<decorators, T, BARRIER_STORE>::_store_func = &store_init;
+
+  template <DecoratorSet decorators, typename T>
+  typename AccessFunction<decorators, T, BARRIER_STORE_AT>::type
+  RuntimeDispatch<decorators, T, BARRIER_STORE_AT>::_store_at_func = &store_at_init;
+
+  template <DecoratorSet decorators, typename T>
+  typename AccessFunction<decorators, T, BARRIER_LOAD>::type
+  RuntimeDispatch<decorators, T, BARRIER_LOAD>::_load_func = &load_init;
+
+  template <DecoratorSet decorators, typename T>
+  typename AccessFunction<decorators, T, BARRIER_LOAD_AT>::type
+  RuntimeDispatch<decorators, T, BARRIER_LOAD_AT>::_load_at_func = &load_at_init;
+
+  template <DecoratorSet decorators, typename T>
+  typename AccessFunction<decorators, T, BARRIER_ATOMIC_CMPXCHG>::type
+  RuntimeDispatch<decorators, T, BARRIER_ATOMIC_CMPXCHG>::_atomic_cmpxchg_func = &atomic_cmpxchg_init;
+
+  template <DecoratorSet decorators, typename T>
+  typename AccessFunction<decorators, T, BARRIER_ATOMIC_CMPXCHG_AT>::type
+  RuntimeDispatch<decorators, T, BARRIER_ATOMIC_CMPXCHG_AT>::_atomic_cmpxchg_at_func = &atomic_cmpxchg_at_init;
+
+  template <DecoratorSet decorators, typename T>
+  typename AccessFunction<decorators, T, BARRIER_ATOMIC_XCHG>::type
+  RuntimeDispatch<decorators, T, BARRIER_ATOMIC_XCHG>::_atomic_xchg_func = &atomic_xchg_init;
+
+  template <DecoratorSet decorators, typename T>
+  typename AccessFunction<decorators, T, BARRIER_ATOMIC_XCHG_AT>::type
+  RuntimeDispatch<decorators, T, BARRIER_ATOMIC_XCHG_AT>::_atomic_xchg_at_func = &atomic_xchg_at_init;
+
+  template <DecoratorSet decorators, typename T>
+  typename AccessFunction<decorators, T, BARRIER_ARRAYCOPY>::type
+  RuntimeDispatch<decorators, T, BARRIER_ARRAYCOPY>::_arraycopy_func = &arraycopy_init;
+
+  template <DecoratorSet decorators, typename T>
+  typename AccessFunction<decorators, T, BARRIER_CLONE>::type
+  RuntimeDispatch<decorators, T, BARRIER_CLONE>::_clone_func = &clone_init;
+
+  // Step 3: Pre-runtime dispatching.
+  // The PreRuntimeDispatch class is responsible for filtering the barrier strength
+  // decorators. That is, for AS_RAW, it hardwires the accesses without a runtime
+  // dispatch point. Otherwise it goes through a runtime check if hardwiring was
+  // not possible.
+  struct PreRuntimeDispatch: AllStatic {
+    template<DecoratorSet decorators>
+    static bool can_hardwire_raw() {
+      return !HasDecorator<decorators, INTERNAL_VALUE_IS_OOP>::value || // primitive access
+             !HasDecorator<decorators, INTERNAL_CONVERT_COMPRESSED_OOP>::value || // don't care about compressed oops (oop* address)
+             HasDecorator<decorators, INTERNAL_RT_USE_COMPRESSED_OOPS>::value; // we can infer we use compressed oops (narrowOop* address)
+    }
+
+    static const DecoratorSet convert_compressed_oops = INTERNAL_RT_USE_COMPRESSED_OOPS | INTERNAL_CONVERT_COMPRESSED_OOP;
+
+    template<DecoratorSet decorators>
+    static bool is_hardwired_primitive() {
+      return !HasDecorator<decorators, INTERNAL_BT_BARRIER_ON_PRIMITIVES>::value &&
+             !HasDecorator<decorators, INTERNAL_VALUE_IS_OOP>::value;
+    }
+
+    template <DecoratorSet decorators, typename T>
+    inline static typename EnableIf<
+      HasDecorator<decorators, AS_RAW>::value>::type
+    store(void* addr, T value) {
+      typedef RawAccessBarrier<decorators & RAW_DECORATOR_MASK> Raw;
+      if (can_hardwire_raw<decorators>()) {
+        if (HasDecorator<decorators, INTERNAL_VALUE_IS_OOP>::value) {
+          Raw::oop_store(addr, value);
+        } else {
+          Raw::store(addr, value);
+        }
+      } else if (UseCompressedOops) {
+        const DecoratorSet expanded_decorators = decorators | convert_compressed_oops;
+        PreRuntimeDispatch::store<expanded_decorators>(addr, value);
+      } else {
+        const DecoratorSet expanded_decorators = decorators & ~convert_compressed_oops;
+        PreRuntimeDispatch::store<expanded_decorators>(addr, value);
+      }
+    }
+
+    template <DecoratorSet decorators, typename T>
+    inline static typename EnableIf<
+      !HasDecorator<decorators, AS_RAW>::value>::type
+    store(void* addr, T value) {
+      if (is_hardwired_primitive<decorators>()) {
+        const DecoratorSet expanded_decorators = decorators | AS_RAW;
+        PreRuntimeDispatch::store<expanded_decorators>(addr, value);
+      } else {
+        RuntimeDispatch<decorators, T, BARRIER_STORE>::store(addr, value);
+      }
+    }
+
+    template <DecoratorSet decorators, typename T>
+    inline static typename EnableIf<
+      HasDecorator<decorators, AS_RAW>::value>::type
+    store_at(oop base, ptrdiff_t offset, T value) {
+      store<decorators>(field_addr(base, offset), value);
+    }
+
+    template <DecoratorSet decorators, typename T>
+    inline static typename EnableIf<
+      !HasDecorator<decorators, AS_RAW>::value>::type
+    store_at(oop base, ptrdiff_t offset, T value) {
+      if (is_hardwired_primitive<decorators>()) {
+        const DecoratorSet expanded_decorators = decorators | AS_RAW;
+        PreRuntimeDispatch::store_at<expanded_decorators>(base, offset, value);
+      } else {
+        RuntimeDispatch<decorators, T, BARRIER_STORE_AT>::store_at(base, offset, value);
+      }
+    }
+
+    template <DecoratorSet decorators, typename T>
+    inline static typename EnableIf<
+      HasDecorator<decorators, AS_RAW>::value, T>::type
+    load(void* addr) {
+      typedef RawAccessBarrier<decorators & RAW_DECORATOR_MASK> Raw;
+      if (can_hardwire_raw<decorators>()) {
+        if (HasDecorator<decorators, INTERNAL_VALUE_IS_OOP>::value) {
+          return Raw::template oop_load<T>(addr);
+        } else {
+          return Raw::template load<T>(addr);
+        }
+      } else if (UseCompressedOops) {
+        const DecoratorSet expanded_decorators = decorators | convert_compressed_oops;
+        return PreRuntimeDispatch::load<expanded_decorators, T>(addr);
+      } else {
+        const DecoratorSet expanded_decorators = decorators & ~convert_compressed_oops;
+        return PreRuntimeDispatch::load<expanded_decorators, T>(addr);
+      }
+    }
+
+    template <DecoratorSet decorators, typename T>
+    inline static typename EnableIf<
+      !HasDecorator<decorators, AS_RAW>::value, T>::type
+    load(void* addr) {
+      if (is_hardwired_primitive<decorators>()) {
+        const DecoratorSet expanded_decorators = decorators | AS_RAW;
+        return PreRuntimeDispatch::load<expanded_decorators, T>(addr);
+      } else {
+        return RuntimeDispatch<decorators, T, BARRIER_LOAD>::load(addr);
+      }
+    }
+
+    template <DecoratorSet decorators, typename T>
+    inline static typename EnableIf<
+      HasDecorator<decorators, AS_RAW>::value, T>::type
+    load_at(oop base, ptrdiff_t offset) {
+      return load<decorators, T>(field_addr(base, offset));
+    }
+
+    template <DecoratorSet decorators, typename T>
+    inline static typename EnableIf<
+      !HasDecorator<decorators, AS_RAW>::value, T>::type
+    load_at(oop base, ptrdiff_t offset) {
+      if (is_hardwired_primitive<decorators>()) {
+        const DecoratorSet expanded_decorators = decorators | AS_RAW;
+        return PreRuntimeDispatch::load_at<expanded_decorators, T>(base, offset);
+      } else {
+        return RuntimeDispatch<decorators, T, BARRIER_LOAD_AT>::load_at(base, offset);
+      }
+    }
+
+    template <DecoratorSet decorators, typename T>
+    inline static typename EnableIf<
+      HasDecorator<decorators, AS_RAW>::value, T>::type
+    atomic_cmpxchg(T new_value, void* addr, T compare_value) {
+      typedef RawAccessBarrier<decorators & RAW_DECORATOR_MASK> Raw;
+      if (can_hardwire_raw<decorators>()) {
+        if (HasDecorator<decorators, INTERNAL_VALUE_IS_OOP>::value) {
+          return Raw::oop_atomic_cmpxchg(new_value, addr, compare_value);
+        } else {
+          return Raw::atomic_cmpxchg(new_value, addr, compare_value);
+        }
+      } else if (UseCompressedOops) {
+        const DecoratorSet expanded_decorators = decorators | convert_compressed_oops;
+        return PreRuntimeDispatch::atomic_cmpxchg<expanded_decorators>(new_value, addr, compare_value);
+      } else {
+        const DecoratorSet expanded_decorators = decorators & ~convert_compressed_oops;
+        return PreRuntimeDispatch::atomic_cmpxchg<expanded_decorators>(new_value, addr, compare_value);
+      }
+    }
+
+    template <DecoratorSet decorators, typename T>
+    inline static typename EnableIf<
+      !HasDecorator<decorators, AS_RAW>::value, T>::type
+    atomic_cmpxchg(T new_value, void* addr, T compare_value) {
+      typedef RawAccessBarrier<decorators & RAW_DECORATOR_MASK> Raw;
+      if (is_hardwired_primitive<decorators>()) {
+        const DecoratorSet expanded_decorators = decorators | AS_RAW;
+        return PreRuntimeDispatch::atomic_cmpxchg<expanded_decorators>(new_value, addr, compare_value);
+      } else {
+        return RuntimeDispatch<decorators, T, BARRIER_ATOMIC_CMPXCHG>::atomic_cmpxchg(new_value, addr, compare_value);
+      }
+    }
+
+    template <DecoratorSet decorators, typename T>
+    inline static typename EnableIf<
+      HasDecorator<decorators, AS_RAW>::value, T>::type
+    atomic_cmpxchg_at(T new_value, oop base, ptrdiff_t offset, T compare_value) {
+      return atomic_cmpxchg<decorators>(new_value, field_addr(base, offset), compare_value);
+    }
+
+    template <DecoratorSet decorators, typename T>
+    inline static typename EnableIf<
+      !HasDecorator<decorators, AS_RAW>::value, T>::type
+    atomic_cmpxchg_at(T new_value, oop base, ptrdiff_t offset, T compare_value) {
+      if (is_hardwired_primitive<decorators>()) {
+        const DecoratorSet expanded_decorators = decorators | AS_RAW;
+        return PreRuntimeDispatch::atomic_cmpxchg_at<expanded_decorators>(new_value, base, offset, compare_value);
+      } else {
+        return RuntimeDispatch<decorators, T, BARRIER_ATOMIC_CMPXCHG_AT>::atomic_cmpxchg_at(new_value, base, offset, compare_value);
+      }
+    }
+
+    template <DecoratorSet decorators, typename T>
+    inline static typename EnableIf<
+      HasDecorator<decorators, AS_RAW>::value, T>::type
+    atomic_xchg(T new_value, void* addr) {
+      typedef RawAccessBarrier<decorators & RAW_DECORATOR_MASK> Raw;
+      if (can_hardwire_raw<decorators>()) {
+        if (HasDecorator<decorators, INTERNAL_VALUE_IS_OOP>::value) {
+          return Raw::oop_atomic_xchg(new_value, addr);
+        } else {
+          return Raw::atomic_xchg(new_value, addr);
+        }
+      } else if (UseCompressedOops) {
+        const DecoratorSet expanded_decorators = decorators | convert_compressed_oops;
+        return PreRuntimeDispatch::atomic_xchg<expanded_decorators>(new_value, addr);
+      } else {
+        const DecoratorSet expanded_decorators = decorators & ~convert_compressed_oops;
+        return PreRuntimeDispatch::atomic_xchg<expanded_decorators>(new_value, addr);
+      }
+    }
+
+    template <DecoratorSet decorators, typename T>
+    inline static typename EnableIf<
+      !HasDecorator<decorators, AS_RAW>::value, T>::type
+    atomic_xchg(T new_value, void* addr) {
+      if (is_hardwired_primitive<decorators>()) {
+        const DecoratorSet expanded_decorators = decorators | AS_RAW;
+        return PreRuntimeDispatch::atomic_xchg<expanded_decorators>(new_value, addr);
+      } else {
+        return RuntimeDispatch<decorators, T, BARRIER_ATOMIC_XCHG>::atomic_xchg(new_value, addr);
+      }
+    }
+
+    template <DecoratorSet decorators, typename T>
+    inline static typename EnableIf<
+      HasDecorator<decorators, AS_RAW>::value, T>::type
+    atomic_xchg_at(T new_value, oop base, ptrdiff_t offset) {
+      return atomic_xchg<decorators>(new_value, field_addr(base, offset));
+    }
+
+    template <DecoratorSet decorators, typename T>
+    inline static typename EnableIf<
+      !HasDecorator<decorators, AS_RAW>::value, T>::type
+    atomic_xchg_at(T new_value, oop base, ptrdiff_t offset) {
+      if (is_hardwired_primitive<decorators>()) {
+        const DecoratorSet expanded_decorators = decorators | AS_RAW;
+        return PreRuntimeDispatch::atomic_xchg<expanded_decorators>(new_value, base, offset);
+      } else {
+        return RuntimeDispatch<decorators, T, BARRIER_ATOMIC_XCHG_AT>::atomic_xchg_at(new_value, base, offset);
+      }
+    }
+
+    template <DecoratorSet decorators, typename T>
+    inline static typename EnableIf<
+      HasDecorator<decorators, AS_RAW>::value, bool>::type
+    arraycopy(arrayOop src_obj, arrayOop dst_obj, T *src, T* dst, size_t length) {
+      typedef RawAccessBarrier<decorators & RAW_DECORATOR_MASK> Raw;
+      return Raw::arraycopy(src, dst, length);
+    }
+
+    template <DecoratorSet decorators, typename T>
+    inline static typename EnableIf<
+      !HasDecorator<decorators, AS_RAW>::value, bool>::type
+    arraycopy(arrayOop src_obj, arrayOop dst_obj, T *src, T* dst, size_t length) {
+      typedef RawAccessBarrier<decorators & RAW_DECORATOR_MASK> Raw;
+      if (is_hardwired_primitive<decorators>()) {
+        const DecoratorSet expanded_decorators = decorators | AS_RAW;
+        return PreRuntimeDispatch::arraycopy<expanded_decorators>(src_obj, dst_obj, src, dst, length);
+      } else {
+        return RuntimeDispatch<decorators, T, BARRIER_ARRAYCOPY>::arraycopy(src_obj, dst_obj, src, dst, length);
+      }
+    }
+
+    template <DecoratorSet decorators>
+    inline static typename EnableIf<
+      HasDecorator<decorators, AS_RAW>::value>::type
+    clone(oop src, oop dst, size_t size) {
+      typedef RawAccessBarrier<decorators & RAW_DECORATOR_MASK> Raw;
+      Raw::clone(src, dst, size);
+    }
+
+    template <DecoratorSet decorators>
+    inline static typename EnableIf<
+      !HasDecorator<decorators, AS_RAW>::value>::type
+    clone(oop src, oop dst, size_t size) {
+      RuntimeDispatch<decorators, oop, BARRIER_CLONE>::clone(src, dst, size);
+    }
+  };
+
+  // This class adds implied decorators that follow according to decorator rules.
+  // For example adding default reference strength and default memory ordering
+  // semantics.
+  template <DecoratorSet input_decorators>
+  struct DecoratorFixup: AllStatic {
+    // If no reference strength has been picked, then strong will be picked
+    static const DecoratorSet ref_strength_default = input_decorators |
+      (((ON_DECORATOR_MASK & input_decorators) == 0 && (INTERNAL_VALUE_IS_OOP & input_decorators) != 0) ?
+       ON_STRONG_OOP_REF : INTERNAL_EMPTY);
+    // If no memory ordering has been picked, unordered will be picked
+    static const DecoratorSet memory_ordering_default = ref_strength_default |
+      ((MO_DECORATOR_MASK & ref_strength_default) == 0 ? MO_UNORDERED : INTERNAL_EMPTY);
+    // If no barrier strength has been picked, normal will be used
+    static const DecoratorSet barrier_strength_default = memory_ordering_default |
+      ((AS_DECORATOR_MASK & memory_ordering_default) == 0 ? AS_NORMAL : INTERNAL_EMPTY);
+    // Heap array accesses imply it is a heap access
+    static const DecoratorSet heap_array_is_in_heap = barrier_strength_default |
+      ((IN_HEAP_ARRAY & barrier_strength_default) != 0 ? IN_HEAP : INTERNAL_EMPTY);
+    static const DecoratorSet conc_root_is_root = heap_array_is_in_heap |
+      ((IN_CONCURRENT_ROOT & heap_array_is_in_heap) != 0 ? IN_ROOT : INTERNAL_EMPTY);
+    static const DecoratorSet value = conc_root_is_root | BT_BUILDTIME_DECORATORS;
+  };
+
+  // Step 2: Reduce types.
+  // Enforce that for non-oop types, T and P have to be strictly the same.
+  // P is the type of the address and T is the type of the values.
+  // As for oop types, it is allow to send T in {narrowOop, oop} and
+  // P in {narrowOop, oop, HeapWord*}. The following rules apply according to
+  // the subsequent table. (columns are P, rows are T)
+  // |           | HeapWord  |   oop   | narrowOop |
+  // |   oop     |  rt-comp  | hw-none |  hw-comp  |
+  // | narrowOop |     x     |    x    |  hw-none  |
+  //
+  // x means not allowed
+  // rt-comp means it must be checked at runtime whether the oop is compressed.
+  // hw-none means it is statically known the oop will not be compressed.
+  // hw-comp means it is statically known the oop will be compressed.
+
+  template <DecoratorSet decorators, typename T>
+  inline void store_reduce_types(T* addr, T value) {
+    PreRuntimeDispatch::store<decorators>(addr, value);
+  }
+
+  template <DecoratorSet decorators>
+  inline void store_reduce_types(narrowOop* addr, oop value) {
+    const DecoratorSet expanded_decorators = decorators | INTERNAL_CONVERT_COMPRESSED_OOP |
+                                             INTERNAL_RT_USE_COMPRESSED_OOPS;
+    PreRuntimeDispatch::store<expanded_decorators>(addr, value);
+  }
+
+  template <DecoratorSet decorators>
+  inline void store_reduce_types(HeapWord* addr, oop value) {
+    const DecoratorSet expanded_decorators = decorators | INTERNAL_CONVERT_COMPRESSED_OOP;
+    PreRuntimeDispatch::store<expanded_decorators>(addr, value);
+  }
+
+  template <DecoratorSet decorators, typename T>
+  inline T atomic_cmpxchg_reduce_types(T new_value, T* addr, T compare_value) {
+    return PreRuntimeDispatch::atomic_cmpxchg<decorators>(new_value, addr, compare_value);
+  }
+
+  template <DecoratorSet decorators>
+  inline oop atomic_cmpxchg_reduce_types(oop new_value, narrowOop* addr, oop compare_value) {
+    const DecoratorSet expanded_decorators = decorators | INTERNAL_CONVERT_COMPRESSED_OOP |
+                                             INTERNAL_RT_USE_COMPRESSED_OOPS;
+    return PreRuntimeDispatch::atomic_cmpxchg<expanded_decorators>(new_value, addr, compare_value);
+  }
+
+  template <DecoratorSet decorators>
+  inline oop atomic_cmpxchg_reduce_types(oop new_value, HeapWord* addr, oop compare_value) {
+    const DecoratorSet expanded_decorators = decorators | INTERNAL_CONVERT_COMPRESSED_OOP;
+    return PreRuntimeDispatch::atomic_cmpxchg<expanded_decorators>(new_value, addr, compare_value);
+  }
+
+  template <DecoratorSet decorators, typename T>
+  inline T atomic_xchg_reduce_types(T new_value, T* addr) {
+    const DecoratorSet expanded_decorators = decorators;
+    return PreRuntimeDispatch::atomic_xchg<expanded_decorators>(new_value, addr);
+  }
+
+  template <DecoratorSet decorators>
+  inline oop atomic_xchg_reduce_types(oop new_value, narrowOop* addr) {
+    const DecoratorSet expanded_decorators = decorators | INTERNAL_CONVERT_COMPRESSED_OOP |
+                                             INTERNAL_RT_USE_COMPRESSED_OOPS;
+    return PreRuntimeDispatch::atomic_xchg<expanded_decorators>(new_value, addr);
+  }
+
+  template <DecoratorSet decorators>
+  inline oop atomic_xchg_reduce_types(oop new_value, HeapWord* addr) {
+    const DecoratorSet expanded_decorators = decorators | INTERNAL_CONVERT_COMPRESSED_OOP;
+    return PreRuntimeDispatch::atomic_xchg<expanded_decorators>(new_value, addr);
+  }
+
+  template <DecoratorSet decorators, typename T>
+  inline T load_reduce_types(T* addr) {
+    return PreRuntimeDispatch::load<decorators, T>(addr);
+  }
+
+  template <DecoratorSet decorators, typename T>
+  inline oop load_reduce_types(narrowOop* addr) {
+    const DecoratorSet expanded_decorators = decorators | INTERNAL_CONVERT_COMPRESSED_OOP | INTERNAL_RT_USE_COMPRESSED_OOPS;
+    return PreRuntimeDispatch::load<expanded_decorators, oop>(addr);
+  }
+
+  template <DecoratorSet decorators, typename T>
+  inline oop load_reduce_types(HeapWord* addr) {
+    const DecoratorSet expanded_decorators = decorators | INTERNAL_CONVERT_COMPRESSED_OOP;
+    return PreRuntimeDispatch::load<expanded_decorators, oop>(addr);
+  }
+
+  // Step 1: Set default decorators. This step remembers if a type was volatile
+  // and then sets the MO_VOLATILE decorator by default. Otherwise, a default
+  // memory ordering is set for the access, and the implied decorator rules
+  // are applied to select sensible defaults for decorators that have not been
+  // explicitly set. For example, default object referent strength is set to strong.
+  // This step also decays the types passed in (e.g. getting rid of CV qualifiers
+  // and references from the types). This step also perform some type verification
+  // that the passed in types make sense.
+
+  template <DecoratorSet decorators, typename T>
+  static void verify_types(){
+    // If this fails to compile, then you have sent in something that is
+    // not recognized as a valid primitive type to a primitive Access function.
+    STATIC_ASSERT((HasDecorator<decorators, INTERNAL_VALUE_IS_OOP>::value || // oops have already been validated
+                   (IsPointer<T>::value || IsIntegral<T>::value) ||
+                    IsFloatingPoint<T>::value)); // not allowed primitive type
+  }
+
+  template <DecoratorSet decorators, typename P, typename T>
+  inline void store(P* addr, T value) {
+    verify_types<decorators, T>();
+    typedef typename Decay<P>::type DecayedP;
+    typedef typename Decay<T>::type DecayedT;
+    DecayedT decayed_value = value;
+    // If a volatile address is passed in but no memory ordering decorator,
+    // set the memory ordering to MO_VOLATILE by default.
+    const DecoratorSet expanded_decorators = DecoratorFixup<
+      (IsVolatile<P>::value && !HasDecorator<decorators, MO_DECORATOR_MASK>::value) ?
+      (MO_VOLATILE | decorators) : decorators>::value;
+    store_reduce_types<expanded_decorators>(const_cast<DecayedP*>(addr), decayed_value);
+  }
+
+  template <DecoratorSet decorators, typename T>
+  inline void store_at(oop base, ptrdiff_t offset, T value) {
+    verify_types<decorators, T>();
+    typedef typename Decay<T>::type DecayedT;
+    DecayedT decayed_value = value;
+    const DecoratorSet expanded_decorators = DecoratorFixup<decorators |
+                                             (HasDecorator<decorators, INTERNAL_VALUE_IS_OOP>::value ?
+                                              INTERNAL_CONVERT_COMPRESSED_OOP : INTERNAL_EMPTY)>::value;
+    PreRuntimeDispatch::store_at<expanded_decorators>(base, offset, decayed_value);
+  }
+
+  template <DecoratorSet decorators, typename P, typename T>
+  inline T load(P* addr) {
+    verify_types<decorators, T>();
+    typedef typename Decay<P>::type DecayedP;
+    typedef typename Conditional<HasDecorator<decorators, INTERNAL_VALUE_IS_OOP>::value,
+                                 typename OopOrNarrowOop<T>::type,
+                                 typename Decay<T>::type>::type DecayedT;
+    // If a volatile address is passed in but no memory ordering decorator,
+    // set the memory ordering to MO_VOLATILE by default.
+    const DecoratorSet expanded_decorators = DecoratorFixup<
+      (IsVolatile<P>::value && !HasDecorator<decorators, MO_DECORATOR_MASK>::value) ?
+      (MO_VOLATILE | decorators) : decorators>::value;
+    return load_reduce_types<expanded_decorators, DecayedT>(const_cast<DecayedP*>(addr));
+  }
+
+  template <DecoratorSet decorators, typename T>
+  inline T load_at(oop base, ptrdiff_t offset) {
+    verify_types<decorators, T>();
+    typedef typename Conditional<HasDecorator<decorators, INTERNAL_VALUE_IS_OOP>::value,
+                                 typename OopOrNarrowOop<T>::type,
+                                 typename Decay<T>::type>::type DecayedT;
+    // Expand the decorators (figure out sensible defaults)
+    // Potentially remember if we need compressed oop awareness
+    const DecoratorSet expanded_decorators = DecoratorFixup<decorators |
+                                             (HasDecorator<decorators, INTERNAL_VALUE_IS_OOP>::value ?
+                                              INTERNAL_CONVERT_COMPRESSED_OOP : INTERNAL_EMPTY)>::value;
+    return PreRuntimeDispatch::load_at<expanded_decorators, DecayedT>(base, offset);
+  }
+
+  template <DecoratorSet decorators, typename P, typename T>
+  inline T atomic_cmpxchg(T new_value, P* addr, T compare_value) {
+    verify_types<decorators, T>();
+    typedef typename Decay<P>::type DecayedP;
+    typedef typename Decay<T>::type DecayedT;
+    DecayedT new_decayed_value = new_value;
+    DecayedT compare_decayed_value = compare_value;
+    const DecoratorSet expanded_decorators = DecoratorFixup<
+      (!HasDecorator<decorators, MO_DECORATOR_MASK>::value) ?
+      (MO_SEQ_CST | decorators) : decorators>::value;
+    return atomic_cmpxchg_reduce_types<expanded_decorators>(new_decayed_value,
+                                                            const_cast<DecayedP*>(addr),
+                                                            compare_decayed_value);
+  }
+
+  template <DecoratorSet decorators, typename T>
+  inline T atomic_cmpxchg_at(T new_value, oop base, ptrdiff_t offset, T compare_value) {
+    verify_types<decorators, T>();
+    typedef typename Decay<T>::type DecayedT;
+    DecayedT new_decayed_value = new_value;
+    DecayedT compare_decayed_value = compare_value;
+    // Determine default memory ordering
+    const DecoratorSet expanded_decorators = DecoratorFixup<
+      (!HasDecorator<decorators, MO_DECORATOR_MASK>::value) ?
+      (MO_SEQ_CST | decorators) : decorators>::value;
+    // Potentially remember that we need compressed oop awareness
+    const DecoratorSet final_decorators = expanded_decorators |
+                                          (HasDecorator<decorators, INTERNAL_VALUE_IS_OOP>::value ?
+                                           INTERNAL_CONVERT_COMPRESSED_OOP : INTERNAL_EMPTY);
+    return PreRuntimeDispatch::atomic_cmpxchg_at<final_decorators>(new_decayed_value, base,
+                                                                   offset, compare_decayed_value);
+  }
+
+  template <DecoratorSet decorators, typename P, typename T>
+  inline T atomic_xchg(T new_value, P* addr) {
+    verify_types<decorators, T>();
+    typedef typename Decay<P>::type DecayedP;
+    typedef typename Decay<T>::type DecayedT;
+    DecayedT new_decayed_value = new_value;
+    // atomic_xchg is only available in SEQ_CST flavour.
+    const DecoratorSet expanded_decorators = DecoratorFixup<decorators | MO_SEQ_CST>::value;
+    return atomic_xchg_reduce_types<expanded_decorators>(new_decayed_value,
+                                                         const_cast<DecayedP*>(addr));
+  }
+
+  template <DecoratorSet decorators, typename T>
+  inline T atomic_xchg_at(T new_value, oop base, ptrdiff_t offset) {
+    verify_types<decorators, T>();
+    typedef typename Decay<T>::type DecayedT;
+    DecayedT new_decayed_value = new_value;
+    // atomic_xchg is only available in SEQ_CST flavour.
+    const DecoratorSet expanded_decorators = DecoratorFixup<decorators | MO_SEQ_CST |
+                                             (HasDecorator<decorators, INTERNAL_VALUE_IS_OOP>::value ?
+                                              INTERNAL_CONVERT_COMPRESSED_OOP : INTERNAL_EMPTY)>::value;
+    return PreRuntimeDispatch::atomic_xchg_at<expanded_decorators>(new_decayed_value, base, offset);
+  }
+
+  template <DecoratorSet decorators, typename T>
+  inline bool arraycopy(arrayOop src_obj, arrayOop dst_obj, T *src, T *dst, size_t length) {
+    verify_types<decorators, T>();
+    typedef typename Decay<T>::type DecayedT;
+    const DecoratorSet expanded_decorators = DecoratorFixup<decorators | IN_HEAP_ARRAY | IN_HEAP |
+                                             (HasDecorator<decorators, INTERNAL_VALUE_IS_OOP>::value ?
+                                              INTERNAL_CONVERT_COMPRESSED_OOP : INTERNAL_EMPTY)>::value;
+    return PreRuntimeDispatch::arraycopy<expanded_decorators>(src_obj, dst_obj,
+                                                              const_cast<DecayedT*>(src),
+                                                              const_cast<DecayedT*>(dst),
+                                                              length);
+  }
+
+  template <DecoratorSet decorators>
+  inline void clone(oop src, oop dst, size_t size) {
+    const DecoratorSet expanded_decorators = DecoratorFixup<decorators>::value;
+    PreRuntimeDispatch::clone<expanded_decorators>(src, dst, size);
+  }
+}
+
+template <DecoratorSet decorators>
+template <DecoratorSet expected_decorators>
+void Access<decorators>::verify_decorators() {
+  STATIC_ASSERT((~expected_decorators & decorators) == 0); // unexpected decorator used
+  const DecoratorSet barrier_strength_decorators = decorators & AS_DECORATOR_MASK;
+  STATIC_ASSERT(barrier_strength_decorators == 0 || ( // make sure barrier strength decorators are disjoint if set
+    (barrier_strength_decorators ^ AS_NO_KEEPALIVE) == 0 ||
+    (barrier_strength_decorators ^ AS_RAW) == 0 ||
+    (barrier_strength_decorators ^ AS_NORMAL) == 0
+  ));
+  const DecoratorSet ref_strength_decorators = decorators & ON_DECORATOR_MASK;
+  STATIC_ASSERT(ref_strength_decorators == 0 || ( // make sure ref strength decorators are disjoint if set
+    (ref_strength_decorators ^ ON_STRONG_OOP_REF) == 0 ||
+    (ref_strength_decorators ^ ON_WEAK_OOP_REF) == 0 ||
+    (ref_strength_decorators ^ ON_PHANTOM_OOP_REF) == 0 ||
+    (ref_strength_decorators ^ ON_UNKNOWN_OOP_REF) == 0
+  ));
+  const DecoratorSet memory_ordering_decorators = decorators & MO_DECORATOR_MASK;
+  STATIC_ASSERT(memory_ordering_decorators == 0 || ( // make sure memory ordering decorators are disjoint if set
+    (memory_ordering_decorators ^ MO_UNORDERED) == 0 ||
+    (memory_ordering_decorators ^ MO_VOLATILE) == 0 ||
+    (memory_ordering_decorators ^ MO_RELAXED) == 0 ||
+    (memory_ordering_decorators ^ MO_ACQUIRE) == 0 ||
+    (memory_ordering_decorators ^ MO_RELEASE) == 0 ||
+    (memory_ordering_decorators ^ MO_SEQ_CST) == 0
+  ));
+  const DecoratorSet location_decorators = decorators & IN_DECORATOR_MASK;
+  STATIC_ASSERT(location_decorators == 0 || ( // make sure location decorators are disjoint if set
+    (location_decorators ^ IN_ROOT) == 0 ||
+    (location_decorators ^ IN_HEAP) == 0 ||
+    (location_decorators ^ (IN_HEAP | IN_HEAP_ARRAY)) == 0 ||
+    (location_decorators ^ (IN_ROOT | IN_CONCURRENT_ROOT)) == 0
+  ));
+}
+
+#endif // SHARE_VM_RUNTIME_ACCESS_INLINE_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hotspot/share/oops/accessBackend.cpp	Mon Nov 20 13:07:44 2017 +0100
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "accessBackend.inline.hpp"
+#include "gc/shared/collectedHeap.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/mutexLocker.hpp"
+#include "runtime/vm_version.hpp"
+#include "utilities/copy.hpp"
+
+namespace AccessInternal {
+  // VM_Version::supports_cx8() is a surrogate for 'supports atomic long memory ops'.
+  //
+  // On platforms which do not support atomic compare-and-swap of jlong (8 byte)
+  // values we have to use a lock-based scheme to enforce atomicity. This has to be
+  // applied to all Unsafe operations that set the value of a jlong field. Even so
+  // the compareAndSwapLong operation will not be atomic with respect to direct stores
+  // to the field from Java code. It is important therefore that any Java code that
+  // utilizes these Unsafe jlong operations does not perform direct stores. To permit
+  // direct loads of the field from Java code we must also use Atomic::store within the
+  // locked regions. And for good measure, in case there are direct stores, we also
+  // employ Atomic::load within those regions. Note that the field in question must be
+  // volatile and so must have atomic load/store accesses applied at the Java level.
+  //
+  // The locking scheme could utilize a range of strategies for controlling the locking
+  // granularity: from a lock per-field through to a single global lock. The latter is
+  // the simplest and is used for the current implementation. Note that the Java object
+  // that contains the field, can not, in general, be used for locking. To do so can lead
+  // to deadlocks as we may introduce locking into what appears to the Java code to be a
+  // lock-free path.
+  //
+  // As all the locked-regions are very short and themselves non-blocking we can treat
+  // them as leaf routines and elide safepoint checks (ie we don't perform any thread
+  // state transitions even when blocking for the lock). Note that if we do choose to
+  // add safepoint checks and thread state transitions, we must ensure that we calculate
+  // the address of the field _after_ we have acquired the lock, else the object may have
+  // been moved by the GC
+
+#ifndef SUPPORTS_NATIVE_CX8
+
+  // This is intentionally in the cpp file rather than the .inline.hpp file. It seems
+  // desirable to trade faster JDK build times (not propagating vm_version.hpp)
+  // for slightly worse runtime atomic jlong performance on 32 bit machines with
+  // support for 64 bit atomics.
+  bool wide_atomic_needs_locking() {
+    return !VM_Version::supports_cx8();
+  }
+
+  AccessLocker::AccessLocker() {
+    assert(!VM_Version::supports_cx8(), "why else?");
+    UnsafeJlong_lock->lock_without_safepoint_check();
+  }
+
+  AccessLocker::~AccessLocker() {
+    UnsafeJlong_lock->unlock();
+  }
+
+#endif
+
+// These forward copying calls to Copy without exposing the Copy type in headers unnecessarily
+
+  void arraycopy_arrayof_conjoint_oops(void* src, void* dst, size_t length) {
+    Copy::arrayof_conjoint_oops(reinterpret_cast<HeapWord*>(src),
+                                reinterpret_cast<HeapWord*>(dst), length);
+  }
+
+  void arraycopy_conjoint_oops(oop* src, oop* dst, size_t length) {
+    Copy::conjoint_oops_atomic(src, dst, length);
+  }
+
+  void arraycopy_conjoint_oops(narrowOop* src, narrowOop* dst, size_t length) {
+    Copy::conjoint_oops_atomic(src, dst, length);
+  }
+
+  void arraycopy_disjoint_words(void* src, void* dst, size_t length) {
+    Copy::disjoint_words(reinterpret_cast<HeapWord*>(src),
+                         reinterpret_cast<HeapWord*>(dst), length);
+  }
+
+  void arraycopy_disjoint_words_atomic(void* src, void* dst, size_t length) {
+    Copy::disjoint_words_atomic(reinterpret_cast<HeapWord*>(src),
+                                reinterpret_cast<HeapWord*>(dst), length);
+  }
+
+  template<>
+  void arraycopy_conjoint<jbyte>(jbyte* src, jbyte* dst, size_t length) {
+    Copy::conjoint_jbytes(src, dst, length);
+  }
+
+  template<>
+  void arraycopy_conjoint<jshort>(jshort* src, jshort* dst, size_t length) {
+    Copy::conjoint_jshorts_atomic(src, dst, length);
+  }
+
+  template<>
+  void arraycopy_conjoint<jint>(jint* src, jint* dst, size_t length) {
+    Copy::conjoint_jints_atomic(src, dst, length);
+  }
+
+  template<>
+  void arraycopy_conjoint<jlong>(jlong* src, jlong* dst, size_t length) {
+    Copy::conjoint_jlongs_atomic(src, dst, length);
+  }
+
+  template<>
+  void arraycopy_arrayof_conjoint<jbyte>(jbyte* src, jbyte* dst, size_t length) {
+    Copy::arrayof_conjoint_jbytes(reinterpret_cast<HeapWord*>(src),
+                                  reinterpret_cast<HeapWord*>(dst),
+                                  length);
+  }
+
+  template<>
+  void arraycopy_arrayof_conjoint<jshort>(jshort* src, jshort* dst, size_t length) {
+    Copy::arrayof_conjoint_jshorts(reinterpret_cast<HeapWord*>(src),
+                                   reinterpret_cast<HeapWord*>(dst),
+                                   length);
+  }
+
+  template<>
+  void arraycopy_arrayof_conjoint<jint>(jint* src, jint* dst, size_t length) {
+    Copy::arrayof_conjoint_jints(reinterpret_cast<HeapWord*>(src),
+                                 reinterpret_cast<HeapWord*>(dst),
+                                 length);
+  }
+
+  template<>
+  void arraycopy_arrayof_conjoint<jlong>(jlong* src, jlong* dst, size_t length) {
+    Copy::arrayof_conjoint_jlongs(reinterpret_cast<HeapWord*>(src),
+                                  reinterpret_cast<HeapWord*>(dst),
+                                  length);
+  }
+
+  template<>
+  void arraycopy_conjoint_atomic<jbyte>(jbyte* src, jbyte* dst, size_t length) {
+    Copy::conjoint_jbytes_atomic(src, dst, length);
+  }
+
+  template<>
+  void arraycopy_conjoint_atomic<jshort>(jshort* src, jshort* dst, size_t length) {
+    Copy::conjoint_jshorts_atomic(src, dst, length);
+  }
+
+  template<>
+  void arraycopy_conjoint_atomic<jint>(jint* src, jint* dst, size_t length) {
+    Copy::conjoint_jints_atomic(src, dst, length);
+  }
+
+  template<>
+  void arraycopy_conjoint_atomic<jlong>(jlong* src, jlong* dst, size_t length) {
+    Copy::conjoint_jlongs_atomic(src, dst, length);
+  }
+}
+
+template void AccessInternal::arraycopy_conjoint<jbyte>(jbyte* src, jbyte* dst, size_t length);
+template void AccessInternal::arraycopy_conjoint<jshort>(jshort* src, jshort* dst, size_t length);
+template void AccessInternal::arraycopy_conjoint<jint>(jint* src, jint* dst, size_t length);
+template void AccessInternal::arraycopy_conjoint<jlong>(jlong* src, jlong* dst, size_t length);
+
+template void AccessInternal::arraycopy_arrayof_conjoint<jbyte>(jbyte* src, jbyte* dst, size_t length);
+template void AccessInternal::arraycopy_arrayof_conjoint<jshort>(jshort* src, jshort* dst, size_t length);
+template void AccessInternal::arraycopy_arrayof_conjoint<jint>(jint* src, jint* dst, size_t length);
+template void AccessInternal::arraycopy_arrayof_conjoint<jlong>(jlong* src, jlong* dst, size_t length);
+
+template void AccessInternal::arraycopy_conjoint_atomic<jbyte>(jbyte* src, jbyte* dst, size_t length);
+template void AccessInternal::arraycopy_conjoint_atomic<jshort>(jshort* src, jshort* dst, size_t length);
+template void AccessInternal::arraycopy_conjoint_atomic<jint>(jint* src, jint* dst, size_t length);
+template void AccessInternal::arraycopy_conjoint_atomic<jlong>(jlong* src, jlong* dst, size_t length);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hotspot/share/oops/accessBackend.hpp	Mon Nov 20 13:07:44 2017 +0100
@@ -0,0 +1,383 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_RUNTIME_ACCESSBACKEND_HPP
+#define SHARE_VM_RUNTIME_ACCESSBACKEND_HPP
+
+#include "metaprogramming/conditional.hpp"
+#include "metaprogramming/enableIf.hpp"
+#include "metaprogramming/integralConstant.hpp"
+#include "utilities/debug.hpp"
+#include "utilities/globalDefinitions.hpp"
+
+// This metafunction returns either oop or narrowOop depending on whether
+// an access needs to use compressed oops or not.
+template <DecoratorSet decorators>
+struct HeapOopType: AllStatic {
+  static const bool needs_oop_compress = HasDecorator<decorators, INTERNAL_CONVERT_COMPRESSED_OOP>::value &&
+                                         HasDecorator<decorators, INTERNAL_RT_USE_COMPRESSED_OOPS>::value;
+  typedef typename Conditional<needs_oop_compress, narrowOop, oop>::type type;
+};
+
+namespace AccessInternal {
+  enum BarrierType {
+    BARRIER_STORE,
+    BARRIER_STORE_AT,
+    BARRIER_LOAD,
+    BARRIER_LOAD_AT,
+    BARRIER_ATOMIC_CMPXCHG,
+    BARRIER_ATOMIC_CMPXCHG_AT,
+    BARRIER_ATOMIC_XCHG,
+    BARRIER_ATOMIC_XCHG_AT,
+    BARRIER_ARRAYCOPY,
+    BARRIER_CLONE
+  };
+
+  template <DecoratorSet decorators>
+  struct MustConvertCompressedOop: public IntegralConstant<bool,
+    HasDecorator<decorators, INTERNAL_VALUE_IS_OOP>::value &&
+    HasDecorator<decorators, INTERNAL_CONVERT_COMPRESSED_OOP>::value &&
+    HasDecorator<decorators, INTERNAL_RT_USE_COMPRESSED_OOPS>::value> {};
+
+  // This metafunction returns an appropriate oop type if the value is oop-like
+  // and otherwise returns the same type T.
+  template <DecoratorSet decorators, typename T>
+  struct EncodedType: AllStatic {
+    typedef typename Conditional<
+      HasDecorator<decorators, INTERNAL_VALUE_IS_OOP>::value,
+      typename HeapOopType<decorators>::type, T>::type type;
+  };
+
+  template <DecoratorSet decorators>
+  inline typename HeapOopType<decorators>::type*
+  oop_field_addr(oop base, ptrdiff_t byte_offset) {
+    return reinterpret_cast<typename HeapOopType<decorators>::type*>(
+             reinterpret_cast<intptr_t>((void*)base) + byte_offset);
+  }
+
+  // This metafunction returns whether it is possible for a type T to require
+  // locking to support wide atomics or not.
+  template <typename T>
+#ifdef SUPPORTS_NATIVE_CX8
+  struct PossiblyLockedAccess: public IntegralConstant<bool, false> {};
+#else
+  struct PossiblyLockedAccess: public IntegralConstant<bool, (sizeof(T) > 4)>::value> {};
+#endif
+
+  template <DecoratorSet decorators, typename T>
+  struct AccessFunctionTypes {
+    typedef T (*load_at_func_t)(oop base, ptrdiff_t offset);
+    typedef void (*store_at_func_t)(oop base, ptrdiff_t offset, T value);
+    typedef T (*atomic_cmpxchg_at_func_t)(T new_value, oop base, ptrdiff_t offset, T compare_value);
+    typedef T (*atomic_xchg_at_func_t)(T new_value, oop base, ptrdiff_t offset);
+
+    typedef T (*load_func_t)(void* addr);
+    typedef void (*store_func_t)(void* addr, T value);
+    typedef T (*atomic_cmpxchg_func_t)(T new_value, void* addr, T compare_value);
+    typedef T (*atomic_xchg_func_t)(T new_value, void* addr);
+
+    typedef bool (*arraycopy_func_t)(arrayOop src_obj, arrayOop dst_obj, T* src, T* dst, size_t length);
+    typedef void (*clone_func_t)(oop src, oop dst, size_t size);
+  };
+
+  template <DecoratorSet decorators, typename T, BarrierType barrier> struct AccessFunction {};
+
+#define ACCESS_GENERATE_ACCESS_FUNCTION(bt, func)                   \
+  template <DecoratorSet decorators, typename T>                    \
+  struct AccessFunction<decorators, T, bt>: AllStatic{              \
+    typedef typename AccessFunctionTypes<decorators, T>::func type; \
+  }
+  ACCESS_GENERATE_ACCESS_FUNCTION(BARRIER_STORE, store_func_t);
+  ACCESS_GENERATE_ACCESS_FUNCTION(BARRIER_STORE_AT, store_at_func_t);
+  ACCESS_GENERATE_ACCESS_FUNCTION(BARRIER_LOAD, load_func_t);
+  ACCESS_GENERATE_ACCESS_FUNCTION(BARRIER_LOAD_AT, load_at_func_t);
+  ACCESS_GENERATE_ACCESS_FUNCTION(BARRIER_ATOMIC_CMPXCHG, atomic_cmpxchg_func_t);
+  ACCESS_GENERATE_ACCESS_FUNCTION(BARRIER_ATOMIC_CMPXCHG_AT, atomic_cmpxchg_at_func_t);
+  ACCESS_GENERATE_ACCESS_FUNCTION(BARRIER_ATOMIC_XCHG, atomic_xchg_func_t);
+  ACCESS_GENERATE_ACCESS_FUNCTION(BARRIER_ATOMIC_XCHG_AT, atomic_xchg_at_func_t);
+  ACCESS_GENERATE_ACCESS_FUNCTION(BARRIER_ARRAYCOPY, arraycopy_func_t);
+  ACCESS_GENERATE_ACCESS_FUNCTION(BARRIER_CLONE, clone_func_t);
+#undef ACCESS_GENERATE_ACCESS_FUNCTION
+
+  template <DecoratorSet decorators, typename T, BarrierType barrier_type>
+  typename AccessFunction<decorators, T, barrier_type>::type resolve_barrier();
+
+  template <DecoratorSet decorators, typename T, BarrierType barrier_type>
+  typename AccessFunction<decorators, T, barrier_type>::type resolve_oop_barrier();
+
+  class AccessLocker VALUE_OBJ_CLASS_SPEC {
+  public:
+    AccessLocker();
+    ~AccessLocker();
+  };
+  bool wide_atomic_needs_locking();
+
+  void* field_addr(oop base, ptrdiff_t offset);
+
+  // Forward calls to Copy:: in the cpp file to reduce dependencies and allow
+  // faster build times, given how frequently included access is.
+  void arraycopy_arrayof_conjoint_oops(void* src, void* dst, size_t length);
+  void arraycopy_conjoint_oops(oop* src, oop* dst, size_t length);
+  void arraycopy_conjoint_oops(narrowOop* src, narrowOop* dst, size_t length);
+
+  void arraycopy_disjoint_words(void* src, void* dst, size_t length);
+  void arraycopy_disjoint_words_atomic(void* src, void* dst, size_t length);
+
+  template<typename T>
+  void arraycopy_conjoint(T* src, T* dst, size_t length);
+  template<typename T>
+  void arraycopy_arrayof_conjoint(T* src, T* dst, size_t length);
+  template<typename T>
+  void arraycopy_conjoint_atomic(T* src, T* dst, size_t length);
+}
+
+// This mask specifies what decorators are relevant for raw accesses. When passing
+// accesses to the raw layer, irrelevant decorators are removed.
+const DecoratorSet RAW_DECORATOR_MASK = INTERNAL_DECORATOR_MASK | MO_DECORATOR_MASK |
+                                        ARRAYCOPY_DECORATOR_MASK | OOP_DECORATOR_MASK;
+
+// The RawAccessBarrier performs raw accesses with additional knowledge of
+// memory ordering, so that OrderAccess/Atomic is called when necessary.
+// It additionally handles compressed oops, and hence is not completely "raw"
+// strictly speaking.
+template <DecoratorSet decorators>
+class RawAccessBarrier: public AllStatic {
+protected:
+  static inline void* field_addr(oop base, ptrdiff_t byte_offset) {
+    return AccessInternal::field_addr(base, byte_offset);
+  }
+
+protected:
+  // Only encode if INTERNAL_VALUE_IS_OOP
+  template <DecoratorSet idecorators, typename T>
+  static inline typename EnableIf<
+    AccessInternal::MustConvertCompressedOop<idecorators>::value,
+    typename HeapOopType<idecorators>::type>::type
+  encode_internal(T value);
+
+  template <DecoratorSet idecorators, typename T>
+  static inline typename EnableIf<
+    !AccessInternal::MustConvertCompressedOop<idecorators>::value, T>::type
+  encode_internal(T value) {
+    return value;
+  }
+
+  template <typename T>
+  static inline typename AccessInternal::EncodedType<decorators, T>::type
+  encode(T value) {
+    return encode_internal<decorators, T>(value);
+  }
+
+  // Only decode if INTERNAL_VALUE_IS_OOP
+  template <DecoratorSet idecorators, typename T>
+  static inline typename EnableIf<
+    AccessInternal::MustConvertCompressedOop<idecorators>::value, T>::type
+  decode_internal(typename HeapOopType<idecorators>::type value);
+
+  template <DecoratorSet idecorators, typename T>
+  static inline typename EnableIf<
+    !AccessInternal::MustConvertCompressedOop<idecorators>::value, T>::type
+  decode_internal(T value) {
+    return value;
+  }
+
+  template <typename T>
+  static inline T decode(typename AccessInternal::EncodedType<decorators, T>::type value) {
+    return decode_internal<decorators, T>(value);
+  }
+
+protected:
+  template <DecoratorSet ds, typename T>
+  static typename EnableIf<
+    HasDecorator<ds, MO_SEQ_CST>::value, T>::type
+  load_internal(void* addr);
+
+  template <DecoratorSet ds, typename T>
+  static typename EnableIf<
+    HasDecorator<ds, MO_ACQUIRE>::value, T>::type
+  load_internal(void* addr);
+
+  template <DecoratorSet ds, typename T>
+  static typename EnableIf<
+    HasDecorator<ds, MO_RELAXED>::value, T>::type
+  load_internal(void* addr);
+
+  template <DecoratorSet ds, typename T>
+  static inline typename EnableIf<
+    HasDecorator<ds, MO_VOLATILE>::value, T>::type
+  load_internal(void* addr) {
+    return *reinterpret_cast<const volatile T*>(addr);
+  }
+
+  template <DecoratorSet ds, typename T>
+  static inline typename EnableIf<
+    HasDecorator<ds, MO_UNORDERED>::value, T>::type
+  load_internal(void* addr) {
+    return *reinterpret_cast<const T*>(addr);
+  }
+
+  template <DecoratorSet ds, typename T>
+  static typename EnableIf<
+    HasDecorator<ds, MO_SEQ_CST>::value>::type
+  store_internal(void* addr, T value);
+
+  template <DecoratorSet ds, typename T>
+  static typename EnableIf<
+    HasDecorator<ds, MO_RELEASE>::value>::type
+  store_internal(void* addr, T value);
+
+  template <DecoratorSet ds, typename T>
+  static typename EnableIf<
+    HasDecorator<ds, MO_RELAXED>::value>::type
+  store_internal(void* addr, T value);
+
+  template <DecoratorSet ds, typename T>
+  static inline typename EnableIf<
+    HasDecorator<ds, MO_VOLATILE>::value>::type
+  store_internal(void* addr, T value) {
+    (void)const_cast<T&>(*reinterpret_cast<volatile T*>(addr) = value);
+  }
+
+  template <DecoratorSet ds, typename T>
+  static inline typename EnableIf<
+    HasDecorator<ds, MO_UNORDERED>::value>::type
+  store_internal(void* addr, T value) {
+    *reinterpret_cast<T*>(addr) = value;
+  }
+
+  template <DecoratorSet ds, typename T>
+  static typename EnableIf<
+    HasDecorator<ds, MO_SEQ_CST>::value, T>::type
+  atomic_cmpxchg_internal(T new_value, void* addr, T compare_value);
+
+  template <DecoratorSet ds, typename T>
+  static typename EnableIf<
+    HasDecorator<ds, MO_RELAXED>::value, T>::type
+  atomic_cmpxchg_internal(T new_value, void* addr, T compare_value);
+
+  template <DecoratorSet ds, typename T>
+  static typename EnableIf<
+    HasDecorator<ds, MO_SEQ_CST>::value, T>::type
+  atomic_xchg_internal(T new_value, void* addr);
+
+  // The following *_locked mechanisms serve the purpose of handling atomic operations
+  // that are larger than a machine can handle, and then possibly opt for using
+  // a slower path using a mutex to perform the operation.
+
+  template <DecoratorSet ds, typename T>
+  static inline typename EnableIf<
+    !AccessInternal::PossiblyLockedAccess<T>::value, T>::type
+  atomic_cmpxchg_maybe_locked(T new_value, void* addr, T compare_value) {
+    return atomic_cmpxchg_internal<ds>(new_value, addr, compare_value);
+  }
+
+  template <DecoratorSet ds, typename T>
+  static typename EnableIf<
+    AccessInternal::PossiblyLockedAccess<T>::value, T>::type
+  atomic_cmpxchg_maybe_locked(T new_value, void* addr, T compare_value);
+
+  template <DecoratorSet ds, typename T>
+  static inline typename EnableIf<
+    !AccessInternal::PossiblyLockedAccess<T>::value, T>::type
+  atomic_xchg_maybe_locked(T new_value, void* addr) {
+    return atomic_xchg_internal<ds>(new_value, addr);
+  }
+
+  template <DecoratorSet ds, typename T>
+  static typename EnableIf<
+    AccessInternal::PossiblyLockedAccess<T>::value, T>::type
+  atomic_xchg_maybe_locked(T new_value, void* addr);
+
+public:
+  template <typename T>
+  static inline void store(void* addr, T value) {
+    store_internal<decorators>(addr, value);
+  }
+
+  template <typename T>
+  static inline T load(void* addr) {
+    return load_internal<decorators, T>(addr);
+  }
+
+  template <typename T>
+  static inline T atomic_cmpxchg(T new_value, void* addr, T compare_value) {
+    return atomic_cmpxchg_maybe_locked<decorators>(new_value, addr, compare_value);
+  }
+
+  template <typename T>
+  static inline T atomic_xchg(T new_value, void* addr) {
+    return atomic_xchg_maybe_locked<decorators>(new_value, addr);
+  }
+
+  template <typename T>
+  static bool arraycopy(T* src, T* dst, size_t length);
+
+  template <typename T>
+  static void oop_store(void* addr, T value);
+  template <typename T>
+  static void oop_store_at(oop base, ptrdiff_t offset, T value);
+
+  template <typename T>
+  static T oop_load(void* addr);
+  template <typename T>
+  static T oop_load_at(oop base, ptrdiff_t offset);
+
+  template <typename T>
+  static T oop_atomic_cmpxchg(T new_value, void* addr, T compare_value);
+  template <typename T>
+  static T oop_atomic_cmpxchg_at(T new_value, oop base, ptrdiff_t offset, T compare_value);
+
+  template <typename T>
+  static T oop_atomic_xchg(T new_value, void* addr);
+  template <typename T>
+  static T oop_atomic_xchg_at(T new_value, oop base, ptrdiff_t offset);
+
+  template <typename T>
+  static void store_at(oop base, ptrdiff_t offset, T value) {
+    store(field_addr(base, offset), value);
+  }
+
+  template <typename T>
+  static T load_at(oop base, ptrdiff_t offset) {
+    return load<T>(field_addr(base, offset));
+  }
+
+  template <typename T>
+  static T atomic_cmpxchg_at(T new_value, oop base, ptrdiff_t offset, T compare_value) {
+    return atomic_cmpxchg(new_value, field_addr(base, offset), compare_value);
+  }
+
+  template <typename T>
+  static T atomic_xchg_at(T new_value, oop base, ptrdiff_t offset) {
+    return atomic_xchg(new_value, field_addr(base, offset));
+  }
+
+  template <typename T>
+  static bool oop_arraycopy(arrayOop src_obj, arrayOop dst_obj, T* src, T* dst, size_t length);
+  static bool oop_arraycopy(arrayOop src_obj, arrayOop dst_obj, HeapWord* src, HeapWord* dst, size_t length);
+
+  static void clone(oop src, oop dst, size_t size);
+};
+
+#endif // SHARE_VM_RUNTIME_ACCESSBACKEND_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hotspot/share/oops/accessBackend.inline.hpp	Mon Nov 20 13:07:44 2017 +0100
@@ -0,0 +1,321 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_RUNTIME_ACCESSBACKEND_INLINE_HPP
+#define SHARE_VM_RUNTIME_ACCESSBACKEND_INLINE_HPP
+
+#include "oops/access.hpp"
+#include "oops/accessBackend.hpp"
+#include "oops/oop.inline.hpp"
+
+template <DecoratorSet decorators>
+template <DecoratorSet idecorators, typename T>
+inline typename EnableIf<
+  AccessInternal::MustConvertCompressedOop<idecorators>::value, T>::type
+RawAccessBarrier<decorators>::decode_internal(typename HeapOopType<idecorators>::type value) {
+  if (HasDecorator<decorators, OOP_NOT_NULL>::value) {
+    return oopDesc::decode_heap_oop_not_null(value);
+  } else {
+    return oopDesc::decode_heap_oop(value);
+  }
+}
+
+template <DecoratorSet decorators>
+template <DecoratorSet idecorators, typename T>
+inline typename EnableIf<
+  AccessInternal::MustConvertCompressedOop<idecorators>::value,
+  typename HeapOopType<idecorators>::type>::type
+RawAccessBarrier<decorators>::encode_internal(T value) {
+  if (HasDecorator<decorators, OOP_NOT_NULL>::value) {
+    return oopDesc::encode_heap_oop_not_null(value);
+  } else {
+    return oopDesc::encode_heap_oop(value);
+  }
+}
+
+template <DecoratorSet decorators>
+template <typename T>
+inline void RawAccessBarrier<decorators>::oop_store(void* addr, T value) {
+  typedef typename AccessInternal::EncodedType<decorators, T>::type Encoded;
+  Encoded encoded = encode(value);
+  store(reinterpret_cast<Encoded*>(addr), encoded);
+}
+
+template <DecoratorSet decorators>
+template <typename T>
+inline void RawAccessBarrier<decorators>::oop_store_at(oop base, ptrdiff_t offset, T value) {
+  oop_store(field_addr(base, offset), value);
+}
+
+template <DecoratorSet decorators>
+template <typename T>
+inline T RawAccessBarrier<decorators>::oop_load(void* addr) {
+  typedef typename AccessInternal::EncodedType<decorators, T>::type Encoded;
+  Encoded encoded = load<Encoded>(reinterpret_cast<Encoded*>(addr));
+  return decode<T>(encoded);
+}
+
+template <DecoratorSet decorators>
+template <typename T>
+inline T RawAccessBarrier<decorators>::oop_load_at(oop base, ptrdiff_t offset) {
+  return oop_load<T>(field_addr(base, offset));
+}
+
+template <DecoratorSet decorators>
+template <typename T>
+inline T RawAccessBarrier<decorators>::oop_atomic_cmpxchg(T new_value, void* addr, T compare_value) {
+  typedef typename AccessInternal::EncodedType<decorators, T>::type Encoded;
+  Encoded encoded_new = encode(new_value);
+  Encoded encoded_compare = encode(compare_value);
+  Encoded encoded_result = atomic_cmpxchg(encoded_new,
+                                          reinterpret_cast<Encoded*>(addr),
+                                          encoded_compare);
+  return decode<T>(encoded_result);
+}
+
+template <DecoratorSet decorators>
+template <typename T>
+inline T RawAccessBarrier<decorators>::oop_atomic_cmpxchg_at(T new_value, oop base, ptrdiff_t offset, T compare_value) {
+  return oop_atomic_cmpxchg(new_value, field_addr(base, offset), compare_value);
+}
+
+template <DecoratorSet decorators>
+template <typename T>
+inline T RawAccessBarrier<decorators>::oop_atomic_xchg(T new_value, void* addr) {
+  typedef typename AccessInternal::EncodedType<decorators, T>::type Encoded;
+  Encoded encoded_new = encode(new_value);
+  Encoded encoded_result = atomic_xchg(encoded_new, reinterpret_cast<Encoded*>(addr));
+  return decode<T>(encoded_result);
+}
+
+template <DecoratorSet decorators>
+template <typename T>
+inline T RawAccessBarrier<decorators>::oop_atomic_xchg_at(T new_value, oop base, ptrdiff_t offset) {
+  return oop_atomic_xchg(new_value, field_addr(base, offset));
+}
+
+template <DecoratorSet decorators>
+template <typename T>
+inline bool RawAccessBarrier<decorators>::oop_arraycopy(arrayOop src_obj, arrayOop dst_obj, T* src, T* dst, size_t length) {
+  return arraycopy(src, dst, length);
+}
+
+template <DecoratorSet decorators>
+inline bool RawAccessBarrier<decorators>::oop_arraycopy(arrayOop src_obj, arrayOop dst_obj, HeapWord* src, HeapWord* dst, size_t length) {
+  bool needs_oop_compress = HasDecorator<decorators, INTERNAL_CONVERT_COMPRESSED_OOP>::value &&
+                            HasDecorator<decorators, INTERNAL_RT_USE_COMPRESSED_OOPS>::value;
+  if (needs_oop_compress) {
+    return arraycopy(reinterpret_cast<narrowOop*>(src), reinterpret_cast<narrowOop*>(dst), length);
+  } else {
+    return arraycopy(reinterpret_cast<oop*>(src), reinterpret_cast<oop*>(dst), length);
+  }
+}
+
+template <DecoratorSet decorators>
+template <DecoratorSet ds, typename T>
+inline typename EnableIf<
+  HasDecorator<ds, MO_SEQ_CST>::value, T>::type
+RawAccessBarrier<decorators>::load_internal(void* addr) {
+  if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+    OrderAccess::fence();
+  }
+  return OrderAccess::load_acquire(reinterpret_cast<const volatile T*>(addr));
+}
+
+template <DecoratorSet decorators>
+template <DecoratorSet ds, typename T>
+inline typename EnableIf<
+  HasDecorator<ds, MO_ACQUIRE>::value, T>::type
+RawAccessBarrier<decorators>::load_internal(void* addr) {
+  return OrderAccess::load_acquire(reinterpret_cast<const volatile T*>(addr));
+}
+
+template <DecoratorSet decorators>
+template <DecoratorSet ds, typename T>
+inline typename EnableIf<
+  HasDecorator<ds, MO_RELAXED>::value, T>::type
+RawAccessBarrier<decorators>::load_internal(void* addr) {
+  return Atomic::load(reinterpret_cast<const volatile T*>(addr));
+}
+
+template <DecoratorSet decorators>
+template <DecoratorSet ds, typename T>
+inline typename EnableIf<
+  HasDecorator<ds, MO_SEQ_CST>::value>::type
+RawAccessBarrier<decorators>::store_internal(void* addr, T value) {
+  OrderAccess::release_store_fence(reinterpret_cast<volatile T*>(addr), value);
+}
+
+template <DecoratorSet decorators>
+template <DecoratorSet ds, typename T>
+inline typename EnableIf<
+  HasDecorator<ds, MO_RELEASE>::value>::type
+RawAccessBarrier<decorators>::store_internal(void* addr, T value) {
+  OrderAccess::release_store(reinterpret_cast<volatile T*>(addr), value);
+}
+
+template <DecoratorSet decorators>
+template <DecoratorSet ds, typename T>
+inline typename EnableIf<
+  HasDecorator<ds, MO_RELAXED>::value>::type
+RawAccessBarrier<decorators>::store_internal(void* addr, T value) {
+  Atomic::store(value, reinterpret_cast<volatile T*>(addr));
+}
+
+template <DecoratorSet decorators>
+template <DecoratorSet ds, typename T>
+inline typename EnableIf<
+  HasDecorator<ds, MO_RELAXED>::value, T>::type
+RawAccessBarrier<decorators>::atomic_cmpxchg_internal(T new_value, void* addr, T compare_value) {
+  return Atomic::cmpxchg(new_value,
+                         reinterpret_cast<volatile T*>(addr),
+                         compare_value,
+                         memory_order_relaxed);
+}
+
+template <DecoratorSet decorators>
+template <DecoratorSet ds, typename T>
+inline typename EnableIf<
+  HasDecorator<ds, MO_SEQ_CST>::value, T>::type
+RawAccessBarrier<decorators>::atomic_cmpxchg_internal(T new_value, void* addr, T compare_value) {
+  return Atomic::cmpxchg(new_value,
+                         reinterpret_cast<volatile T*>(addr),
+                         compare_value,
+                         memory_order_conservative);
+}
+
+template <DecoratorSet decorators>
+template <DecoratorSet ds, typename T>
+inline typename EnableIf<
+  HasDecorator<ds, MO_SEQ_CST>::value, T>::type
+RawAccessBarrier<decorators>::atomic_xchg_internal(T new_value, void* addr) {
+  return Atomic::xchg(new_value,
+                      reinterpret_cast<volatile T*>(addr));
+}
+
+// For platforms that do not have native support for wide atomics,
+// we can emulate the atomicity using a lock. So here we check
+// whether that is necessary or not.
+
+template <DecoratorSet ds>
+template <DecoratorSet decorators, typename T>
+inline typename EnableIf<
+  AccessInternal::PossiblyLockedAccess<T>::value, T>::type
+RawAccessBarrier<ds>::atomic_xchg_maybe_locked(T new_value, void* addr) {
+  if (!AccessInternal::wide_atomic_needs_locking()) {
+    return atomic_xchg_internal<ds>(new_value, addr);
+  } else {
+    AccessInternal::AccessLocker access_lock;
+    volatile T* p = reinterpret_cast<volatile T*>(addr);
+    T old_val = RawAccess<>::load(p);
+    RawAccess<>::store(p, new_value);
+    return old_val;
+  }
+}
+
+template <DecoratorSet ds>
+template <DecoratorSet decorators, typename T>
+inline typename EnableIf<
+  AccessInternal::PossiblyLockedAccess<T>::value, T>::type
+RawAccessBarrier<ds>::atomic_cmpxchg_maybe_locked(T new_value, void* addr, T compare_value) {
+  if (!AccessInternal::wide_atomic_needs_locking()) {
+    return atomic_cmpxchg_internal<ds>(new_value, addr, compare_value);
+  } else {
+    AccessInternal::AccessLocker access_lock;
+    volatile T* p = reinterpret_cast<volatile T*>(addr);
+    T old_val = RawAccess<>::load(p);
+    if (old_val == compare_value) {
+      RawAccess<>::store(p, new_value);
+    }
+    return old_val;
+  }
+}
+
+class RawAccessBarrierArrayCopy: public AllStatic {
+public:
+  template <DecoratorSet decorators, typename T>
+  static inline typename EnableIf<
+  HasDecorator<decorators, INTERNAL_VALUE_IS_OOP>::value>::type
+  arraycopy(T* src, T* dst, size_t length) {
+    // We do not check for ARRAYCOPY_ATOMIC for oops, because they are unconditionally always atomic.
+    if (HasDecorator<decorators, ARRAYCOPY_ARRAYOF>::value) {
+      AccessInternal::arraycopy_arrayof_conjoint_oops(src, dst, length);
+    } else {
+      typedef typename HeapOopType<decorators>::type OopType;
+      AccessInternal::arraycopy_conjoint_oops(reinterpret_cast<OopType*>(src),
+                                              reinterpret_cast<OopType*>(dst), length);
+    }
+  }
+
+  template <DecoratorSet decorators, typename T>
+  static inline typename EnableIf<
+    !HasDecorator<decorators, INTERNAL_VALUE_IS_OOP>::value>::type
+  arraycopy(T* src, T* dst, size_t length) {
+    if (HasDecorator<decorators, ARRAYCOPY_ARRAYOF>::value) {
+      AccessInternal::arraycopy_arrayof_conjoint(src, dst, length);
+    } else if (HasDecorator<decorators, ARRAYCOPY_DISJOINT>::value && sizeof(T) == HeapWordSize) {
+      // There is only a disjoint optimization for word granularity copying
+      if (HasDecorator<decorators, ARRAYCOPY_ATOMIC>::value) {
+        AccessInternal::arraycopy_disjoint_words_atomic(src, dst, length);
+      } else {
+        AccessInternal::arraycopy_disjoint_words(src, dst, length);
+      }
+    } else {
+      if (HasDecorator<decorators, ARRAYCOPY_ATOMIC>::value) {
+        AccessInternal::arraycopy_conjoint_atomic(src, dst, length);
+      } else {
+        AccessInternal::arraycopy_conjoint(src, dst, length);
+      }
+    }
+  }
+};
+
+template <DecoratorSet decorators>
+template <typename T>
+inline bool RawAccessBarrier<decorators>::arraycopy(T* src, T* dst, size_t length) {
+  RawAccessBarrierArrayCopy::arraycopy<decorators>(src, dst, length);
+  return true;
+}
+
+template <DecoratorSet decorators>
+inline void RawAccessBarrier<decorators>::clone(oop src, oop dst, size_t size) {
+  // 4839641 (4840070): We must do an oop-atomic copy, because if another thread
+  // is modifying a reference field in the clonee, a non-oop-atomic copy might
+  // be suspended in the middle of copying the pointer and end up with parts
+  // of two different pointers in the field.  Subsequent dereferences will crash.
+  // 4846409: an oop-copy of objects with long or double fields or arrays of same
+  // won't copy the longs/doubles atomically in 32-bit vm's, so we copy jlongs instead
+  // of oops.  We know objects are aligned on a minimum of an jlong boundary.
+  // The same is true of StubRoutines::object_copy and the various oop_copy
+  // variants, and of the code generated by the inline_native_clone intrinsic.
+
+  assert(MinObjAlignmentInBytes >= BytesPerLong, "objects misaligned");
+  AccessInternal::arraycopy_conjoint_atomic(reinterpret_cast<jlong*>((oopDesc*)src),
+                                            reinterpret_cast<jlong*>((oopDesc*)dst),
+                                            align_object_size(size) / HeapWordsPerLong);
+  // Clear the header
+  dst->init_mark();
+}
+
+#endif // SHARE_VM_RUNTIME_ACCESSBACKEND_INLINE_HPP
--- a/src/hotspot/share/oops/klass.hpp	Mon Nov 20 12:04:13 2017 +0100
+++ b/src/hotspot/share/oops/klass.hpp	Mon Nov 20 13:07:44 2017 +0100
@@ -408,6 +408,11 @@
       return search_secondary_supers(k);
     }
   }
+
+  // Is an oop/narrowOop null or subtype of this Klass?
+  template <typename T>
+  bool is_instanceof_or_null(T element);
+
   bool search_secondary_supers(Klass* k) const;
 
   // Find LCA in class hierarchy
--- a/src/hotspot/share/oops/klass.inline.hpp	Mon Nov 20 12:04:13 2017 +0100
+++ b/src/hotspot/share/oops/klass.inline.hpp	Mon Nov 20 13:07:44 2017 +0100
@@ -71,4 +71,13 @@
   return is_null(v) ? (Klass*)NULL : decode_klass_not_null(v);
 }
 
+template <typename T>
+bool Klass::is_instanceof_or_null(T element) {
+  if (oopDesc::is_null(element)) {
+    return true;
+  }
+  oop obj = oopDesc::decode_heap_oop_not_null(element);
+  return obj->klass()->is_subtype_of(this);
+}
+
 #endif // SHARE_VM_OOPS_KLASS_INLINE_HPP
--- a/src/hotspot/share/oops/objArrayKlass.cpp	Mon Nov 20 12:04:13 2017 +0100
+++ b/src/hotspot/share/oops/objArrayKlass.cpp	Mon Nov 20 13:07:44 2017 +0100
@@ -44,7 +44,6 @@
 #include "oops/symbol.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/mutexLocker.hpp"
-#include "utilities/copy.hpp"
 #include "utilities/macros.hpp"
 
 ObjArrayKlass* ObjArrayKlass::allocate(ClassLoaderData* loader_data, int n, Klass* k, Symbol* name, TRAPS) {
@@ -221,55 +220,25 @@
 // Either oop or narrowOop depending on UseCompressedOops.
 template <class T> void ObjArrayKlass::do_copy(arrayOop s, T* src,
                                arrayOop d, T* dst, int length, TRAPS) {
-
-  BarrierSet* bs = Universe::heap()->barrier_set();
-  // For performance reasons, we assume we are that the write barrier we
-  // are using has optimized modes for arrays of references.  At least one
-  // of the asserts below will fail if this is not the case.
-
   if (s == d) {
     // since source and destination are equal we do not need conversion checks.
     assert(length > 0, "sanity check");
-    bs->write_ref_array_pre(dst, length);
-    Copy::conjoint_oops_atomic(src, dst, length);
+    HeapAccess<>::oop_arraycopy(s, d, src, dst, length);
   } else {
     // We have to make sure all elements conform to the destination array
     Klass* bound = ObjArrayKlass::cast(d->klass())->element_klass();
     Klass* stype = ObjArrayKlass::cast(s->klass())->element_klass();
     if (stype == bound || stype->is_subtype_of(bound)) {
       // elements are guaranteed to be subtypes, so no check necessary
-      bs->write_ref_array_pre(dst, length);
-      Copy::conjoint_oops_atomic(src, dst, length);
+      HeapAccess<ARRAYCOPY_DISJOINT>::oop_arraycopy(s, d, src, dst, length);
     } else {
       // slow case: need individual subtype checks
       // note: don't use obj_at_put below because it includes a redundant store check
-      T* from = src;
-      T* end = from + length;
-      for (T* p = dst; from < end; from++, p++) {
-        // XXX this is going to be slow.
-        T element = *from;
-        // even slower now
-        bool element_is_null = oopDesc::is_null(element);
-        oop new_val = element_is_null ? oop(NULL)
-                                      : oopDesc::decode_heap_oop_not_null(element);
-        if (element_is_null ||
-            (new_val->klass())->is_subtype_of(bound)) {
-          bs->write_ref_field_pre(p, new_val);
-          *p = element;
-        } else {
-          // We must do a barrier to cover the partial copy.
-          const size_t pd = pointer_delta(p, dst, (size_t)heapOopSize);
-          // pointer delta is scaled to number of elements (length field in
-          // objArrayOop) which we assume is 32 bit.
-          assert(pd == (size_t)(int)pd, "length field overflow");
-          bs->write_ref_array((HeapWord*)dst, pd);
-          THROW(vmSymbols::java_lang_ArrayStoreException());
-          return;
-        }
+      if (!HeapAccess<ARRAYCOPY_DISJOINT | ARRAYCOPY_CHECKCAST>::oop_arraycopy(s, d, src, dst, length)) {
+        THROW(vmSymbols::java_lang_ArrayStoreException());
       }
     }
   }
-  bs->write_ref_array((HeapWord*)dst, length);
 }
 
 void ObjArrayKlass::copy_array(arrayOop s, int src_pos, arrayOop d,
--- a/src/hotspot/share/oops/objArrayOop.cpp	Mon Nov 20 12:04:13 2017 +0100
+++ b/src/hotspot/share/oops/objArrayOop.cpp	Mon Nov 20 13:07:44 2017 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -24,6 +24,7 @@
 
 #include "precompiled.hpp"
 #include "gc/shared/specialized_oop_closures.hpp"
+#include "oops/access.inline.hpp"
 #include "oops/objArrayKlass.hpp"
 #include "oops/objArrayOop.hpp"
 #include "oops/oop.inline.hpp"
@@ -36,12 +37,11 @@
   } else {
     dest = (HeapWord*)obj_at_addr<oop>(index);
   }
-  oop res = oopDesc::atomic_compare_exchange_oop(exchange_value, dest, compare_value, true);
-  // update card mark if success
-  if (res == compare_value) {
-    update_barrier_set((void*)dest, exchange_value);
-  }
-  return res;
+  return HeapAccess<>::oop_atomic_cmpxchg(exchange_value, dest, compare_value);
+}
+
+Klass* objArrayOopDesc::element_klass() {
+  return ObjArrayKlass::cast(klass())->element_klass();
 }
 
 #define ObjArrayOop_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)                    \
--- a/src/hotspot/share/oops/objArrayOop.hpp	Mon Nov 20 12:04:13 2017 +0100
+++ b/src/hotspot/share/oops/objArrayOop.hpp	Mon Nov 20 13:07:44 2017 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,6 +29,8 @@
 #include "oops/arrayOop.hpp"
 #include "utilities/align.hpp"
 
+class Klass;
+
 // An objArrayOop is an array containing oops.
 // Evaluating "String arg[10]" will create an objArrayOop.
 
@@ -44,6 +46,11 @@
     return &((T*)base())[index];
   }
 
+  template <class T>
+  static ptrdiff_t obj_at_offset(int index) {
+    return base_offset_in_bytes() + sizeof(T) * index;
+  }
+
 private:
   // Give size of objArrayOop in HeapWords minus the header
   static int array_size(int length) {
@@ -82,7 +89,7 @@
   // Accessing
   oop obj_at(int index) const;
 
-  void inline obj_at_put(int index, oop value);
+  void obj_at_put(int index, oop value);
 
   oop atomic_compare_exchange_oop(int index, oop exchange_value, oop compare_value);
 
@@ -99,6 +106,8 @@
     return (int)osz;
   }
 
+  Klass* element_klass();
+
   // special iterators for index ranges, returns size of object
 #define ObjArrayOop_OOP_ITERATE_DECL(OopClosureType, nv_suffix)     \
   void oop_iterate_range(OopClosureType* blk, int start, int end);
--- a/src/hotspot/share/oops/objArrayOop.inline.hpp	Mon Nov 20 12:04:13 2017 +0100
+++ b/src/hotspot/share/oops/objArrayOop.inline.hpp	Mon Nov 20 13:07:44 2017 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,26 +25,19 @@
 #ifndef SHARE_VM_OOPS_OBJARRAYOOP_INLINE_HPP
 #define SHARE_VM_OOPS_OBJARRAYOOP_INLINE_HPP
 
+#include "oops/access.inline.hpp"
 #include "oops/objArrayOop.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/globals.hpp"
 
 inline oop objArrayOopDesc::obj_at(int index) const {
-  // With UseCompressedOops decode the narrow oop in the objArray to an
-  // uncompressed oop.  Otherwise this is simply a "*" operator.
-  if (UseCompressedOops) {
-    return load_decode_heap_oop(obj_at_addr<narrowOop>(index));
-  } else {
-    return load_decode_heap_oop(obj_at_addr<oop>(index));
-  }
+  ptrdiff_t offset = UseCompressedOops ? obj_at_offset<narrowOop>(index) : obj_at_offset<oop>(index);
+  return HeapAccess<IN_HEAP_ARRAY>::oop_load_at(as_oop(), offset);
 }
 
-void objArrayOopDesc::obj_at_put(int index, oop value) {
-  if (UseCompressedOops) {
-    oop_store(obj_at_addr<narrowOop>(index), value);
-  } else {
-    oop_store(obj_at_addr<oop>(index), value);
-  }
+inline void objArrayOopDesc::obj_at_put(int index, oop value) {
+  ptrdiff_t offset = UseCompressedOops ? obj_at_offset<narrowOop>(index) : obj_at_offset<oop>(index);
+  HeapAccess<IN_HEAP_ARRAY>::oop_store_at(as_oop(), offset, value);
 }
 
 #endif // SHARE_VM_OOPS_OBJARRAYOOP_INLINE_HPP
--- a/src/hotspot/share/oops/oop.cpp	Mon Nov 20 12:04:13 2017 +0100
+++ b/src/hotspot/share/oops/oop.cpp	Mon Nov 20 13:07:44 2017 +0100
@@ -37,8 +37,6 @@
 
 bool always_do_update_barrier = false;
 
-BarrierSet* oopDesc::_bs = NULL;
-
 void oopDesc::print_on(outputStream* st) const {
   if (this == NULL) {
     st->print_cr("NULL");
@@ -175,6 +173,48 @@
   return UseCompressedClassPointers;
 }
 
+oop oopDesc::obj_field_acquire(int offset) const                      { return HeapAccess<MO_ACQUIRE>::oop_load_at(as_oop(), offset); }
+
+void oopDesc::obj_field_put_raw(int offset, oop value)                { RawAccess<>::oop_store_at(as_oop(), offset, value); }
+void oopDesc::release_obj_field_put(int offset, oop value)            { HeapAccess<MO_RELEASE>::oop_store_at(as_oop(), offset, value); }
+void oopDesc::obj_field_put_volatile(int offset, oop value)           { HeapAccess<MO_SEQ_CST>::oop_store_at(as_oop(), offset, value); }
+
+address oopDesc::address_field(int offset) const                      { return HeapAccess<>::load_at(as_oop(), offset); }
+address oopDesc::address_field_acquire(int offset) const              { return HeapAccess<MO_ACQUIRE>::load_at(as_oop(), offset); }
+
+void oopDesc::address_field_put(int offset, address value)            { HeapAccess<>::store_at(as_oop(), offset, value); }
+void oopDesc::release_address_field_put(int offset, address value)    { HeapAccess<MO_RELEASE>::store_at(as_oop(), offset, value); }
+
+Metadata* oopDesc::metadata_field(int offset) const                   { return HeapAccess<>::load_at(as_oop(), offset); }
+void oopDesc::metadata_field_put(int offset, Metadata* value)         { HeapAccess<>::store_at(as_oop(), offset, value); }
+
+Metadata* oopDesc::metadata_field_acquire(int offset) const           { return HeapAccess<MO_ACQUIRE>::load_at(as_oop(), offset); }
+void oopDesc::release_metadata_field_put(int offset, Metadata* value) { HeapAccess<MO_RELEASE>::store_at(as_oop(), offset, value); }
+
+jbyte oopDesc::byte_field_acquire(int offset) const                   { return HeapAccess<MO_ACQUIRE>::load_at(as_oop(), offset); }
+void oopDesc::release_byte_field_put(int offset, jbyte value)         { HeapAccess<MO_RELEASE>::store_at(as_oop(), offset, value); }
+
+jchar oopDesc::char_field_acquire(int offset) const                   { return HeapAccess<MO_ACQUIRE>::load_at(as_oop(), offset); }
+void oopDesc::release_char_field_put(int offset, jchar value)         { HeapAccess<MO_RELEASE>::store_at(as_oop(), offset, value); }
+
+jboolean oopDesc::bool_field_acquire(int offset) const                { return HeapAccess<MO_ACQUIRE>::load_at(as_oop(), offset); }
+void oopDesc::release_bool_field_put(int offset, jboolean value)      { HeapAccess<MO_RELEASE>::store_at(as_oop(), offset, jboolean(value & 1)); }
+
+jint oopDesc::int_field_acquire(int offset) const                     { return HeapAccess<MO_ACQUIRE>::load_at(as_oop(), offset); }
+void oopDesc::release_int_field_put(int offset, jint value)           { HeapAccess<MO_RELEASE>::store_at(as_oop(), offset, value); }
+
+jshort oopDesc::short_field_acquire(int offset) const                 { return HeapAccess<MO_ACQUIRE>::load_at(as_oop(), offset); }
+void oopDesc::release_short_field_put(int offset, jshort value)       { HeapAccess<MO_RELEASE>::store_at(as_oop(), offset, value); }
+
+jlong oopDesc::long_field_acquire(int offset) const                   { return HeapAccess<MO_ACQUIRE>::load_at(as_oop(), offset); }
+void oopDesc::release_long_field_put(int offset, jlong value)         { HeapAccess<MO_RELEASE>::store_at(as_oop(), offset, value); }
+
+jfloat oopDesc::float_field_acquire(int offset) const                 { return HeapAccess<MO_ACQUIRE>::load_at(as_oop(), offset); }
+void oopDesc::release_float_field_put(int offset, jfloat value)       { HeapAccess<MO_RELEASE>::store_at(as_oop(), offset, value); }
+
+jdouble oopDesc::double_field_acquire(int offset) const               { return HeapAccess<MO_ACQUIRE>::load_at(as_oop(), offset); }
+void oopDesc::release_double_field_put(int offset, jdouble value)     { HeapAccess<MO_RELEASE>::store_at(as_oop(), offset, value); }
+
 #if INCLUDE_CDS_JAVA_HEAP
 bool oopDesc::is_archive_object(oop p) {
   return (p == NULL) ? false : G1ArchiveAllocator::is_archive_object(p);
--- a/src/hotspot/share/oops/oop.hpp	Mon Nov 20 12:04:13 2017 +0100
+++ b/src/hotspot/share/oops/oop.hpp	Mon Nov 20 13:07:44 2017 +0100
@@ -38,10 +38,6 @@
 //
 // no virtual functions allowed
 
-// store into oop with store check
-template <class T> inline void oop_store(T* p, oop v);
-template <class T> inline void oop_store(volatile T* p, oop v);
-
 extern bool always_do_update_barrier;
 
 // Forward declarations.
@@ -65,9 +61,6 @@
     narrowKlass _compressed_klass;
   } _metadata;
 
-  // Fast access to barrier set. Must be initialized.
-  static BarrierSet* _bs;
-
  public:
   markOop  mark()      const { return _mark; }
   markOop* mark_addr() const { return (markOop*) &_mark; }
@@ -122,6 +115,9 @@
   bool is_objArray_noinline()          const;
   bool is_typeArray_noinline()         const;
 
+ protected:
+  inline oop        as_oop() const { return const_cast<oopDesc*>(this); }
+
  private:
   // field addresses in oop
   inline void*      field_base(int offset)          const;
@@ -162,107 +158,93 @@
 
   // Load an oop out of the Java heap as is without decoding.
   // Called by GC to check for null before decoding.
-  static inline narrowOop load_heap_oop(narrowOop* p) { return *p; }
-  static inline oop       load_heap_oop(oop* p)       { return *p; }
+  static inline narrowOop load_heap_oop(narrowOop* p);
+  static inline oop       load_heap_oop(oop* p);
 
   // Load an oop out of Java heap and decode it to an uncompressed oop.
   static inline oop load_decode_heap_oop_not_null(narrowOop* p);
-  static inline oop load_decode_heap_oop_not_null(oop* p) { return *p; }
+  static inline oop load_decode_heap_oop_not_null(oop* p);
   static inline oop load_decode_heap_oop(narrowOop* p);
-  static inline oop load_decode_heap_oop(oop* p) { return *p; }
+  static inline oop load_decode_heap_oop(oop* p);
 
   // Store already encoded heap oop into the heap.
-  static inline void store_heap_oop(narrowOop* p, narrowOop v) { *p = v; }
-  static inline void store_heap_oop(oop* p, oop v)             { *p = v; }
+  static inline void store_heap_oop(narrowOop* p, narrowOop v);
+  static inline void store_heap_oop(oop* p, oop v);
 
   // Encode oop if UseCompressedOops and store into the heap.
   static inline void encode_store_heap_oop_not_null(narrowOop* p, oop v);
-  static inline void encode_store_heap_oop_not_null(oop* p, oop v) { *p = v; }
+  static inline void encode_store_heap_oop_not_null(oop* p, oop v);
   static inline void encode_store_heap_oop(narrowOop* p, oop v);
-  static inline void encode_store_heap_oop(oop* p, oop v) { *p = v; }
-
-  static inline void release_store_heap_oop(volatile narrowOop* p, narrowOop v);
-  static inline void release_store_heap_oop(volatile oop* p, oop v);
-
-  static inline void release_encode_store_heap_oop_not_null(volatile narrowOop* p, oop v);
-  static inline void release_encode_store_heap_oop_not_null(volatile oop* p, oop v);
-  static inline void release_encode_store_heap_oop(volatile narrowOop* p, oop v);
-  static inline void release_encode_store_heap_oop(volatile oop* p, oop v);
-
-  static inline oop atomic_exchange_oop(oop exchange_value, volatile HeapWord *dest);
-  static inline oop atomic_compare_exchange_oop(oop exchange_value,
-                                                volatile HeapWord *dest,
-                                                oop compare_value,
-                                                bool prebarrier = false);
+  static inline void encode_store_heap_oop(oop* p, oop v);
 
   // Access to fields in a instanceOop through these methods.
-  inline oop obj_field(int offset) const;
-  inline void obj_field_put(int offset, oop value);
-  inline void obj_field_put_raw(int offset, oop value);
-  inline void obj_field_put_volatile(int offset, oop value);
+  oop obj_field(int offset) const;
+  void obj_field_put(int offset, oop value);
+  void obj_field_put_raw(int offset, oop value);
+  void obj_field_put_volatile(int offset, oop value);
 
-  inline Metadata* metadata_field(int offset) const;
-  inline void metadata_field_put(int offset, Metadata* value);
+  Metadata* metadata_field(int offset) const;
+  void metadata_field_put(int offset, Metadata* value);
 
-  inline Metadata* metadata_field_acquire(int offset) const;
-  inline void release_metadata_field_put(int offset, Metadata* value);
+  Metadata* metadata_field_acquire(int offset) const;
+  void release_metadata_field_put(int offset, Metadata* value);
 
-  inline jbyte byte_field(int offset) const;
-  inline void byte_field_put(int offset, jbyte contents);
+  jbyte byte_field(int offset) const;
+  void byte_field_put(int offset, jbyte contents);
 
-  inline jchar char_field(int offset) const;
-  inline void char_field_put(int offset, jchar contents);
+  jchar char_field(int offset) const;
+  void char_field_put(int offset, jchar contents);
 
-  inline jboolean bool_field(int offset) const;
-  inline void bool_field_put(int offset, jboolean contents);
+  jboolean bool_field(int offset) const;
+  void bool_field_put(int offset, jboolean contents);
 
-  inline jint int_field(int offset) const;
-  inline void int_field_put(int offset, jint contents);
+  jint int_field(int offset) const;
+  void int_field_put(int offset, jint contents);
 
-  inline jshort short_field(int offset) const;
-  inline void short_field_put(int offset, jshort contents);
+  jshort short_field(int offset) const;
+  void short_field_put(int offset, jshort contents);
 
-  inline jlong long_field(int offset) const;
-  inline void long_field_put(int offset, jlong contents);
+  jlong long_field(int offset) const;
+  void long_field_put(int offset, jlong contents);
 
-  inline jfloat float_field(int offset) const;
-  inline void float_field_put(int offset, jfloat contents);
+  jfloat float_field(int offset) const;
+  void float_field_put(int offset, jfloat contents);
 
-  inline jdouble double_field(int offset) const;
-  inline void double_field_put(int offset, jdouble contents);
+  jdouble double_field(int offset) const;
+  void double_field_put(int offset, jdouble contents);
 
-  inline address address_field(int offset) const;
-  inline void address_field_put(int offset, address contents);
+  address address_field(int offset) const;
+  void address_field_put(int offset, address contents);
 
-  inline oop obj_field_acquire(int offset) const;
-  inline void release_obj_field_put(int offset, oop value);
+  oop obj_field_acquire(int offset) const;
+  void release_obj_field_put(int offset, oop value);
 
-  inline jbyte byte_field_acquire(int offset) const;
-  inline void release_byte_field_put(int offset, jbyte contents);
+  jbyte byte_field_acquire(int offset) const;
+  void release_byte_field_put(int offset, jbyte contents);
 
-  inline jchar char_field_acquire(int offset) const;
-  inline void release_char_field_put(int offset, jchar contents);
+  jchar char_field_acquire(int offset) const;
+  void release_char_field_put(int offset, jchar contents);
 
-  inline jboolean bool_field_acquire(int offset) const;
-  inline void release_bool_field_put(int offset, jboolean contents);
+  jboolean bool_field_acquire(int offset) const;
+  void release_bool_field_put(int offset, jboolean contents);
 
-  inline jint int_field_acquire(int offset) const;
-  inline void release_int_field_put(int offset, jint contents);
+  jint int_field_acquire(int offset) const;
+  void release_int_field_put(int offset, jint contents);
 
-  inline jshort short_field_acquire(int offset) const;
-  inline void release_short_field_put(int offset, jshort contents);
+  jshort short_field_acquire(int offset) const;
+  void release_short_field_put(int offset, jshort contents);
 
-  inline jlong long_field_acquire(int offset) const;
-  inline void release_long_field_put(int offset, jlong contents);
+  jlong long_field_acquire(int offset) const;
+  void release_long_field_put(int offset, jlong contents);
 
-  inline jfloat float_field_acquire(int offset) const;
-  inline void release_float_field_put(int offset, jfloat contents);
+  jfloat float_field_acquire(int offset) const;
+  void release_float_field_put(int offset, jfloat contents);
 
-  inline jdouble double_field_acquire(int offset) const;
-  inline void release_double_field_put(int offset, jdouble contents);
+  jdouble double_field_acquire(int offset) const;
+  void release_double_field_put(int offset, jdouble contents);
 
-  inline address address_field_acquire(int offset) const;
-  inline void release_address_field_put(int offset, address contents);
+  address address_field_acquire(int offset) const;
+  void release_address_field_put(int offset, address contents);
 
   // printing functions for VM debugging
   void print_on(outputStream* st) const;         // First level print
@@ -322,10 +304,6 @@
   // mark-sweep support
   void follow_body(int begin, int end);
 
-  // Fast access to barrier set
-  static BarrierSet* bs()            { return _bs; }
-  static void set_bs(BarrierSet* bs) { _bs = bs; }
-
   // Garbage Collection support
 
 #if INCLUDE_ALL_GCS
--- a/src/hotspot/share/oops/oop.inline.hpp	Mon Nov 20 12:04:13 2017 +0100
+++ b/src/hotspot/share/oops/oop.inline.hpp	Mon Nov 20 13:07:44 2017 +0100
@@ -26,11 +26,10 @@
 #define SHARE_VM_OOPS_OOP_INLINE_HPP
 
 #include "gc/shared/ageTable.hpp"
-#include "gc/shared/barrierSet.inline.hpp"
-#include "gc/shared/cardTableModRefBS.hpp"
 #include "gc/shared/collectedHeap.inline.hpp"
 #include "gc/shared/genCollectedHeap.hpp"
 #include "gc/shared/generation.hpp"
+#include "oops/access.inline.hpp"
 #include "oops/arrayKlass.hpp"
 #include "oops/arrayOop.hpp"
 #include "oops/klass.inline.hpp"
@@ -42,50 +41,6 @@
 #include "utilities/align.hpp"
 #include "utilities/macros.hpp"
 
-inline void update_barrier_set(void* p, oop v, bool release = false) {
-  assert(oopDesc::bs() != NULL, "Uninitialized bs in oop!");
-  oopDesc::bs()->write_ref_field(p, v, release);
-}
-
-template <class T> inline void update_barrier_set_pre(T* p, oop v) {
-  oopDesc::bs()->write_ref_field_pre(p, v);
-}
-
-template <class T> void oop_store(T* p, oop v) {
-  if (always_do_update_barrier) {
-    oop_store((volatile T*)p, v);
-  } else {
-    update_barrier_set_pre(p, v);
-    oopDesc::encode_store_heap_oop(p, v);
-    // always_do_update_barrier == false =>
-    // Either we are at a safepoint (in GC) or CMS is not used. In both
-    // cases it's unnecessary to mark the card as dirty with release sematics.
-    update_barrier_set((void*)p, v, false /* release */);  // cast away type
-  }
-}
-
-template <class T> void oop_store(volatile T* p, oop v) {
-  update_barrier_set_pre((T*)p, v);   // cast away volatile
-  // Used by release_obj_field_put, so use release_store.
-  oopDesc::release_encode_store_heap_oop(p, v);
-  // When using CMS we must mark the card corresponding to p as dirty
-  // with release sematics to prevent that CMS sees the dirty card but
-  // not the new value v at p due to reordering of the two
-  // stores. Note that CMS has a concurrent precleaning phase, where
-  // it reads the card table while the Java threads are running.
-  update_barrier_set((void*)p, v, true /* release */);    // cast away type
-}
-
-// Should replace *addr = oop assignments where addr type depends on UseCompressedOops
-// (without having to remember the function name this calls).
-inline void oop_store_raw(HeapWord* addr, oop value) {
-  if (UseCompressedOops) {
-    oopDesc::encode_store_heap_oop((narrowOop*)addr, value);
-  } else {
-    oopDesc::encode_store_heap_oop((oop*)addr, value);
-  }
-}
-
 // Implementation of all inlined member functions defined in oop.hpp
 // We need a separate file to avoid circular references
 
@@ -339,16 +294,28 @@
   return (is_null(v)) ? (narrowOop)0 : encode_heap_oop_not_null(v);
 }
 
+narrowOop oopDesc::load_heap_oop(narrowOop* p) { return *p; }
+oop       oopDesc::load_heap_oop(oop* p)       { return *p; }
+
+void oopDesc::store_heap_oop(narrowOop* p, narrowOop v) { *p = v; }
+void oopDesc::store_heap_oop(oop* p, oop v)             { *p = v; }
+
 // Load and decode an oop out of the Java heap into a wide oop.
 oop oopDesc::load_decode_heap_oop_not_null(narrowOop* p) {
-  return decode_heap_oop_not_null(*p);
+  return decode_heap_oop_not_null(load_heap_oop(p));
 }
 
 // Load and decode an oop out of the heap accepting null
 oop oopDesc::load_decode_heap_oop(narrowOop* p) {
-  return decode_heap_oop(*p);
+  return decode_heap_oop(load_heap_oop(p));
 }
 
+oop oopDesc::load_decode_heap_oop_not_null(oop* p) { return *p; }
+oop oopDesc::load_decode_heap_oop(oop* p)          { return *p; }
+
+void oopDesc::encode_store_heap_oop_not_null(oop* p, oop v) { *p = v; }
+void oopDesc::encode_store_heap_oop(oop* p, oop v)          { *p = v; }
+
 // Encode and store a heap oop.
 void oopDesc::encode_store_heap_oop_not_null(narrowOop* p, oop v) {
   *p = encode_heap_oop_not_null(v);
@@ -359,167 +326,32 @@
   *p = encode_heap_oop(v);
 }
 
-// Store heap oop as is for volatile fields.
-void oopDesc::release_store_heap_oop(volatile oop* p, oop v) {
-  OrderAccess::release_store(p, v);
-}
-void oopDesc::release_store_heap_oop(volatile narrowOop* p, narrowOop v) {
-  OrderAccess::release_store(p, v);
-}
+inline oop  oopDesc::obj_field(int offset) const                    { return HeapAccess<>::oop_load_at(as_oop(), offset);  }
+inline void oopDesc::obj_field_put(int offset, oop value)           { HeapAccess<>::oop_store_at(as_oop(), offset, value); }
 
-void oopDesc::release_encode_store_heap_oop_not_null(volatile narrowOop* p, oop v) {
-  // heap oop is not pointer sized.
-  OrderAccess::release_store(p, encode_heap_oop_not_null(v));
-}
-void oopDesc::release_encode_store_heap_oop_not_null(volatile oop* p, oop v) {
-  OrderAccess::release_store(p, v);
-}
+inline jbyte oopDesc::byte_field(int offset) const                  { return HeapAccess<>::load_at(as_oop(), offset);  }
+inline void  oopDesc::byte_field_put(int offset, jbyte value)       { HeapAccess<>::store_at(as_oop(), offset, value); }
 
-void oopDesc::release_encode_store_heap_oop(volatile oop* p, oop v) {
-  OrderAccess::release_store(p, v);
-}
-void oopDesc::release_encode_store_heap_oop(volatile narrowOop* p, oop v) {
-  OrderAccess::release_store(p, encode_heap_oop(v));
-}
+inline jchar oopDesc::char_field(int offset) const                  { return HeapAccess<>::load_at(as_oop(), offset);  }
+inline void  oopDesc::char_field_put(int offset, jchar value)       { HeapAccess<>::store_at(as_oop(), offset, value); }
 
-// These functions are only used to exchange oop fields in instances,
-// not headers.
-oop oopDesc::atomic_exchange_oop(oop exchange_value, volatile HeapWord *dest) {
-  if (UseCompressedOops) {
-    // encode exchange value from oop to T
-    narrowOop val = encode_heap_oop(exchange_value);
-    narrowOop old = Atomic::xchg(val, (narrowOop*)dest);
-    // decode old from T to oop
-    return decode_heap_oop(old);
-  } else {
-    return Atomic::xchg(exchange_value, (oop*)dest);
-  }
-}
+inline jboolean oopDesc::bool_field(int offset) const               { return HeapAccess<>::load_at(as_oop(), offset);                }
+inline void     oopDesc::bool_field_put(int offset, jboolean value) { HeapAccess<>::store_at(as_oop(), offset, jboolean(value & 1)); }
 
-oop oopDesc::atomic_compare_exchange_oop(oop exchange_value,
-                                         volatile HeapWord *dest,
-                                         oop compare_value,
-                                         bool prebarrier) {
-  if (UseCompressedOops) {
-    if (prebarrier) {
-      update_barrier_set_pre((narrowOop*)dest, exchange_value);
-    }
-    // encode exchange and compare value from oop to T
-    narrowOop val = encode_heap_oop(exchange_value);
-    narrowOop cmp = encode_heap_oop(compare_value);
+inline jshort oopDesc::short_field(int offset) const                { return HeapAccess<>::load_at(as_oop(), offset);  }
+inline void   oopDesc::short_field_put(int offset, jshort value)    { HeapAccess<>::store_at(as_oop(), offset, value); }
 
-    narrowOop old = Atomic::cmpxchg(val, (narrowOop*)dest, cmp);
-    // decode old from T to oop
-    return decode_heap_oop(old);
-  } else {
-    if (prebarrier) {
-      update_barrier_set_pre((oop*)dest, exchange_value);
-    }
-    return Atomic::cmpxchg(exchange_value, (oop*)dest, compare_value);
-  }
-}
+inline jint oopDesc::int_field(int offset) const                    { return HeapAccess<>::load_at(as_oop(), offset);  }
+inline void oopDesc::int_field_put(int offset, jint value)          { HeapAccess<>::store_at(as_oop(), offset, value); }
 
-// In order to put or get a field out of an instance, must first check
-// if the field has been compressed and uncompress it.
-oop oopDesc::obj_field(int offset) const {
-  return UseCompressedOops ?
-    load_decode_heap_oop(obj_field_addr<narrowOop>(offset)) :
-    load_decode_heap_oop(obj_field_addr<oop>(offset));
-}
+inline jlong oopDesc::long_field(int offset) const                  { return HeapAccess<>::load_at(as_oop(), offset);  }
+inline void  oopDesc::long_field_put(int offset, jlong value)       { HeapAccess<>::store_at(as_oop(), offset, value); }
 
-void oopDesc::obj_field_put(int offset, oop value) {
-  UseCompressedOops ? oop_store(obj_field_addr<narrowOop>(offset), value) :
-                      oop_store(obj_field_addr<oop>(offset),       value);
-}
+inline jfloat oopDesc::float_field(int offset) const                { return HeapAccess<>::load_at(as_oop(), offset);  }
+inline void   oopDesc::float_field_put(int offset, jfloat value)    { HeapAccess<>::store_at(as_oop(), offset, value); }
 
-void oopDesc::obj_field_put_raw(int offset, oop value) {
-  UseCompressedOops ?
-    encode_store_heap_oop(obj_field_addr<narrowOop>(offset), value) :
-    encode_store_heap_oop(obj_field_addr<oop>(offset),       value);
-}
-void oopDesc::obj_field_put_volatile(int offset, oop value) {
-  OrderAccess::release();
-  obj_field_put(offset, value);
-  OrderAccess::fence();
-}
-
-Metadata* oopDesc::metadata_field(int offset) const           { return *metadata_field_addr(offset);   }
-void oopDesc::metadata_field_put(int offset, Metadata* value) { *metadata_field_addr(offset) = value;  }
-
-Metadata* oopDesc::metadata_field_acquire(int offset) const   {
-  return OrderAccess::load_acquire(metadata_field_addr(offset));
-}
-
-void oopDesc::release_metadata_field_put(int offset, Metadata* value) {
-  OrderAccess::release_store(metadata_field_addr(offset), value);
-}
-
-jbyte oopDesc::byte_field(int offset) const                   { return (jbyte) *byte_field_addr(offset);    }
-void oopDesc::byte_field_put(int offset, jbyte contents)      { *byte_field_addr(offset) = (jint) contents; }
-
-jchar oopDesc::char_field(int offset) const                   { return (jchar) *char_field_addr(offset);    }
-void oopDesc::char_field_put(int offset, jchar contents)      { *char_field_addr(offset) = (jint) contents; }
-
-jboolean oopDesc::bool_field(int offset) const                { return (jboolean) *bool_field_addr(offset); }
-void oopDesc::bool_field_put(int offset, jboolean contents)   { *bool_field_addr(offset) = (((jint) contents) & 1); }
-
-jint oopDesc::int_field(int offset) const                     { return *int_field_addr(offset);        }
-void oopDesc::int_field_put(int offset, jint contents)        { *int_field_addr(offset) = contents;    }
-
-jshort oopDesc::short_field(int offset) const                 { return (jshort) *short_field_addr(offset);  }
-void oopDesc::short_field_put(int offset, jshort contents)    { *short_field_addr(offset) = (jint) contents;}
-
-jlong oopDesc::long_field(int offset) const                   { return *long_field_addr(offset);       }
-void oopDesc::long_field_put(int offset, jlong contents)      { *long_field_addr(offset) = contents;   }
-
-jfloat oopDesc::float_field(int offset) const                 { return *float_field_addr(offset);      }
-void oopDesc::float_field_put(int offset, jfloat contents)    { *float_field_addr(offset) = contents;  }
-
-jdouble oopDesc::double_field(int offset) const               { return *double_field_addr(offset);     }
-void oopDesc::double_field_put(int offset, jdouble contents)  { *double_field_addr(offset) = contents; }
-
-address oopDesc::address_field(int offset) const              { return *address_field_addr(offset);     }
-void oopDesc::address_field_put(int offset, address contents) { *address_field_addr(offset) = contents; }
-
-oop oopDesc::obj_field_acquire(int offset) const {
-  return UseCompressedOops ?
-             decode_heap_oop((narrowOop)
-               OrderAccess::load_acquire(obj_field_addr<narrowOop>(offset)))
-           : decode_heap_oop(
-                OrderAccess::load_acquire(obj_field_addr<oop>(offset)));
-}
-void oopDesc::release_obj_field_put(int offset, oop value) {
-  UseCompressedOops ?
-    oop_store((volatile narrowOop*)obj_field_addr<narrowOop>(offset), value) :
-    oop_store((volatile oop*)      obj_field_addr<oop>(offset),       value);
-}
-
-jbyte oopDesc::byte_field_acquire(int offset) const                   { return OrderAccess::load_acquire(byte_field_addr(offset));     }
-void oopDesc::release_byte_field_put(int offset, jbyte contents)      { OrderAccess::release_store(byte_field_addr(offset), contents); }
-
-jchar oopDesc::char_field_acquire(int offset) const                   { return OrderAccess::load_acquire(char_field_addr(offset));     }
-void oopDesc::release_char_field_put(int offset, jchar contents)      { OrderAccess::release_store(char_field_addr(offset), contents); }
-
-jboolean oopDesc::bool_field_acquire(int offset) const                { return OrderAccess::load_acquire(bool_field_addr(offset));     }
-void oopDesc::release_bool_field_put(int offset, jboolean contents)   { OrderAccess::release_store(bool_field_addr(offset), jboolean(contents & 1)); }
-
-jint oopDesc::int_field_acquire(int offset) const                     { return OrderAccess::load_acquire(int_field_addr(offset));      }
-void oopDesc::release_int_field_put(int offset, jint contents)        { OrderAccess::release_store(int_field_addr(offset), contents);  }
-
-jshort oopDesc::short_field_acquire(int offset) const                 { return (jshort)OrderAccess::load_acquire(short_field_addr(offset)); }
-void oopDesc::release_short_field_put(int offset, jshort contents)    { OrderAccess::release_store(short_field_addr(offset), contents);     }
-
-jlong oopDesc::long_field_acquire(int offset) const                   { return OrderAccess::load_acquire(long_field_addr(offset));       }
-void oopDesc::release_long_field_put(int offset, jlong contents)      { OrderAccess::release_store(long_field_addr(offset), contents);   }
-
-jfloat oopDesc::float_field_acquire(int offset) const                 { return OrderAccess::load_acquire(float_field_addr(offset));      }
-void oopDesc::release_float_field_put(int offset, jfloat contents)    { OrderAccess::release_store(float_field_addr(offset), contents);  }
-
-jdouble oopDesc::double_field_acquire(int offset) const               { return OrderAccess::load_acquire(double_field_addr(offset));     }
-void oopDesc::release_double_field_put(int offset, jdouble contents)  { OrderAccess::release_store(double_field_addr(offset), contents); }
-
-address oopDesc::address_field_acquire(int offset) const              { return OrderAccess::load_acquire(address_field_addr(offset)); }
-void oopDesc::release_address_field_put(int offset, address contents) { OrderAccess::release_store(address_field_addr(offset), contents); }
+inline jdouble oopDesc::double_field(int offset) const              { return HeapAccess<>::load_at(as_oop(), offset);  }
+inline void    oopDesc::double_field_put(int offset, jdouble value) { HeapAccess<>::store_at(as_oop(), offset, value); }
 
 bool oopDesc::is_locked() const {
   return mark()->is_locked();
--- a/src/hotspot/share/prims/jni.cpp	Mon Nov 20 12:04:13 2017 +0100
+++ b/src/hotspot/share/prims/jni.cpp	Mon Nov 20 13:07:44 2017 +0100
@@ -43,6 +43,7 @@
 #include "memory/oopFactory.hpp"
 #include "memory/resourceArea.hpp"
 #include "memory/universe.inline.hpp"
+#include "oops/access.inline.hpp"
 #include "oops/instanceKlass.hpp"
 #include "oops/instanceOop.hpp"
 #include "oops/markOop.hpp"
@@ -84,9 +85,6 @@
 #include "utilities/internalVMTests.hpp"
 #include "utilities/macros.hpp"
 #include "utilities/vmError.hpp"
-#if INCLUDE_ALL_GCS
-#include "gc/g1/g1SATBCardTableModRefBS.hpp"
-#endif // INCLUDE_ALL_GCS
 #if INCLUDE_JVMCI
 #include "jvmci/jvmciCompiler.hpp"
 #include "jvmci/jvmciRuntime.hpp"
@@ -2069,28 +2067,9 @@
   if (JvmtiExport::should_post_field_access()) {
     o = JvmtiExport::jni_GetField_probe(thread, obj, o, k, fieldID, false);
   }
-  jobject ret = JNIHandles::make_local(env, o->obj_field(offset));
-#if INCLUDE_ALL_GCS
-  // If G1 is enabled and we are accessing the value of the referent
-  // field in a reference object then we need to register a non-null
-  // referent with the SATB barrier.
-  if (UseG1GC) {
-    bool needs_barrier = false;
-
-    if (ret != NULL &&
-        offset == java_lang_ref_Reference::referent_offset &&
-        InstanceKlass::cast(k)->reference_type() != REF_NONE) {
-      assert(InstanceKlass::cast(k)->is_subclass_of(SystemDictionary::Reference_klass()), "sanity");
-      needs_barrier = true;
-    }
-
-    if (needs_barrier) {
-      oop referent = JNIHandles::resolve(ret);
-      G1SATBCardTableModRefBS::enqueue(referent);
-    }
-  }
-#endif // INCLUDE_ALL_GCS
-HOTSPOT_JNI_GETOBJECTFIELD_RETURN(ret);
+  oop loaded_obj = HeapAccess<ON_UNKNOWN_OOP_REF>::oop_load_at(o, offset);
+  jobject ret = JNIHandles::make_local(env, loaded_obj);
+  HOTSPOT_JNI_GETOBJECTFIELD_RETURN(ret);
   return ret;
 JNI_END
 
@@ -2187,7 +2166,7 @@
     field_value.l = value;
     o = JvmtiExport::jni_SetField_probe_nh(thread, obj, o, k, fieldID, false, 'L', (jvalue *)&field_value);
   }
-  o->obj_field_put(offset, JNIHandles::resolve(value));
+  HeapAccess<ON_UNKNOWN_OOP_REF>::oop_store_at(o, offset, JNIHandles::resolve(value));
   HOTSPOT_JNI_SETOBJECTFIELD_RETURN();
 JNI_END
 
--- a/src/hotspot/share/prims/jvm.cpp	Mon Nov 20 12:04:13 2017 +0100
+++ b/src/hotspot/share/prims/jvm.cpp	Mon Nov 20 13:07:44 2017 +0100
@@ -35,12 +35,12 @@
 #include "classfile/stringTable.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "classfile/vmSymbols.hpp"
-#include "gc/shared/barrierSet.inline.hpp"
 #include "gc/shared/collectedHeap.inline.hpp"
 #include "interpreter/bytecode.hpp"
 #include "memory/oopFactory.hpp"
 #include "memory/resourceArea.hpp"
 #include "memory/universe.inline.hpp"
+#include "oops/access.inline.hpp"
 #include "oops/fieldStreams.hpp"
 #include "oops/instanceKlass.hpp"
 #include "oops/method.hpp"
@@ -652,24 +652,7 @@
     new_obj_oop = CollectedHeap::obj_allocate(klass, size, CHECK_NULL);
   }
 
-  // 4839641 (4840070): We must do an oop-atomic copy, because if another thread
-  // is modifying a reference field in the clonee, a non-oop-atomic copy might
-  // be suspended in the middle of copying the pointer and end up with parts
-  // of two different pointers in the field.  Subsequent dereferences will crash.
-  // 4846409: an oop-copy of objects with long or double fields or arrays of same
-  // won't copy the longs/doubles atomically in 32-bit vm's, so we copy jlongs instead
-  // of oops.  We know objects are aligned on a minimum of an jlong boundary.
-  // The same is true of StubRoutines::object_copy and the various oop_copy
-  // variants, and of the code generated by the inline_native_clone intrinsic.
-  assert(MinObjAlignmentInBytes >= BytesPerLong, "objects misaligned");
-  Copy::conjoint_jlongs_atomic((jlong*)obj(), (jlong*)new_obj_oop,
-                               align_object_size(size) / HeapWordsPerLong);
-  // Clear the header
-  new_obj_oop->init_mark();
-
-  // Store check (mark entire object and let gc sort it out)
-  BarrierSet* bs = Universe::heap()->barrier_set();
-  bs->write_region(MemRegion((HeapWord*)new_obj_oop, size));
+  HeapAccess<>::clone(obj(), new_obj_oop, size);
 
   Handle new_obj(THREAD, new_obj_oop);
   // Caution: this involves a java upcall, so the clone should be
--- a/src/hotspot/share/prims/unsafe.cpp	Mon Nov 20 12:04:13 2017 +0100
+++ b/src/hotspot/share/prims/unsafe.cpp	Mon Nov 20 13:07:44 2017 +0100
@@ -29,6 +29,7 @@
 #include "classfile/vmSymbols.hpp"
 #include "memory/allocation.inline.hpp"
 #include "memory/resourceArea.hpp"
+#include "oops/access.inline.hpp"
 #include "oops/fieldStreams.hpp"
 #include "oops/objArrayOop.inline.hpp"
 #include "oops/oop.inline.hpp"
@@ -45,9 +46,6 @@
 #include "utilities/copy.hpp"
 #include "utilities/dtrace.hpp"
 #include "utilities/macros.hpp"
-#if INCLUDE_ALL_GCS
-#include "gc/g1/g1SATBCardTableModRefBS.hpp"
-#endif // INCLUDE_ALL_GCS
 
 /**
  * Implementation of the jdk.internal.misc.Unsafe class
@@ -100,10 +98,10 @@
   return byte_offset;
 }
 
-static inline void* index_oop_from_field_offset_long(oop p, jlong field_offset) {
+static inline void assert_field_offset_sane(oop p, jlong field_offset) {
+#ifdef ASSERT
   jlong byte_offset = field_offset_to_byte_offset(field_offset);
 
-#ifdef ASSERT
   if (p != NULL) {
     assert(byte_offset >= 0 && byte_offset <= (jlong)MAX_OBJECT_SIZE, "sane offset");
     if (byte_offset == (jint)byte_offset) {
@@ -115,6 +113,11 @@
     assert(byte_offset < p_size, "Unsafe access: offset " INT64_FORMAT " > object's size " INT64_FORMAT, (int64_t)byte_offset, (int64_t)p_size);
   }
 #endif
+}
+
+static inline void* index_oop_from_field_offset_long(oop p, jlong field_offset) {
+  assert_field_offset_sane(p, field_offset);
+  jlong byte_offset = field_offset_to_byte_offset(field_offset);
 
   if (sizeof(char*) == sizeof(jint)) {   // (this constant folds!)
     return (address)p + (jint) byte_offset;
@@ -143,12 +146,12 @@
  */
 class MemoryAccess : StackObj {
   JavaThread* _thread;
-  jobject _obj;
-  jlong _offset;
+  oop _obj;
+  ptrdiff_t _offset;
 
   // Resolves and returns the address of the memory access
   void* addr() {
-    return index_oop_from_field_offset_long(JNIHandles::resolve(_obj), _offset);
+    return index_oop_from_field_offset_long(_obj, _offset);
   }
 
   template <typename T>
@@ -174,252 +177,108 @@
    */
   class GuardUnsafeAccess {
     JavaThread* _thread;
-    bool _active;
 
   public:
-    GuardUnsafeAccess(JavaThread* thread, jobject _obj) : _thread(thread) {
-      if (JNIHandles::resolve(_obj) == NULL) {
-        // native/off-heap access which may raise SIGBUS if accessing
-        // memory mapped file data in a region of the file which has
-        // been truncated and is now invalid
-        _thread->set_doing_unsafe_access(true);
-        _active = true;
-      } else {
-        _active = false;
-      }
+    GuardUnsafeAccess(JavaThread* thread) : _thread(thread) {
+      // native/off-heap access which may raise SIGBUS if accessing
+      // memory mapped file data in a region of the file which has
+      // been truncated and is now invalid
+      _thread->set_doing_unsafe_access(true);
     }
 
     ~GuardUnsafeAccess() {
-      if (_active) {
-        _thread->set_doing_unsafe_access(false);
-      }
+      _thread->set_doing_unsafe_access(false);
     }
   };
 
 public:
   MemoryAccess(JavaThread* thread, jobject obj, jlong offset)
-    : _thread(thread), _obj(obj), _offset(offset) {
+    : _thread(thread), _obj(JNIHandles::resolve(obj)), _offset((ptrdiff_t)offset) {
+    assert_field_offset_sane(_obj, offset);
   }
 
   template <typename T>
   T get() {
-    GuardUnsafeAccess guard(_thread, _obj);
-
-    T* p = (T*)addr();
-
-    T x = normalize_for_read(*p);
-
-    return x;
+    if (oopDesc::is_null(_obj)) {
+      GuardUnsafeAccess guard(_thread);
+      T ret = RawAccess<>::load((T*)addr());
+      return normalize_for_read(ret);
+    } else {
+      T ret = HeapAccess<>::load_at(_obj, _offset);
+      return normalize_for_read(ret);
+    }
   }
 
   template <typename T>
   void put(T x) {
-    GuardUnsafeAccess guard(_thread, _obj);
-
-    T* p = (T*)addr();
-
-    *p = normalize_for_write(x);
+    if (oopDesc::is_null(_obj)) {
+      GuardUnsafeAccess guard(_thread);
+      RawAccess<>::store((T*)addr(), normalize_for_write(x));
+    } else {
+      HeapAccess<>::store_at(_obj, _offset, normalize_for_write(x));
+    }
   }
 
 
   template <typename T>
   T get_volatile() {
-    GuardUnsafeAccess guard(_thread, _obj);
-
-    T* p = (T*)addr();
-
-    if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
-      OrderAccess::fence();
+    if (oopDesc::is_null(_obj)) {
+      GuardUnsafeAccess guard(_thread);
+      volatile T ret = RawAccess<MO_SEQ_CST>::load((volatile T*)addr());
+      return normalize_for_read(ret);
+    } else {
+      T ret = HeapAccess<MO_SEQ_CST>::load_at(_obj, _offset);
+      return normalize_for_read(ret);
     }
-
-    T x = OrderAccess::load_acquire((volatile T*)p);
-
-    return normalize_for_read(x);
   }
 
   template <typename T>
   void put_volatile(T x) {
-    GuardUnsafeAccess guard(_thread, _obj);
-
-    T* p = (T*)addr();
-
-    OrderAccess::release_store_fence((volatile T*)p, normalize_for_write(x));
-  }
-
-
-#ifndef SUPPORTS_NATIVE_CX8
-  jlong get_jlong_locked() {
-    GuardUnsafeAccess guard(_thread, _obj);
-
-    MutexLockerEx mu(UnsafeJlong_lock, Mutex::_no_safepoint_check_flag);
-
-    jlong* p = (jlong*)addr();
-
-    jlong x = Atomic::load(p);
-
-    return x;
-  }
-
-  void put_jlong_locked(jlong x) {
-    GuardUnsafeAccess guard(_thread, _obj);
-
-    MutexLockerEx mu(UnsafeJlong_lock, Mutex::_no_safepoint_check_flag);
-
-    jlong* p = (jlong*)addr();
-
-    Atomic::store(normalize_for_write(x),  p);
-  }
-#endif
-};
-
-// Get/PutObject must be special-cased, since it works with handles.
-
-// We could be accessing the referent field in a reference
-// object. If G1 is enabled then we need to register non-null
-// referent with the SATB barrier.
-
-#if INCLUDE_ALL_GCS
-static bool is_java_lang_ref_Reference_access(oop o, jlong offset) {
-  if (offset == java_lang_ref_Reference::referent_offset && o != NULL) {
-    Klass* k = o->klass();
-    if (InstanceKlass::cast(k)->reference_type() != REF_NONE) {
-      assert(InstanceKlass::cast(k)->is_subclass_of(SystemDictionary::Reference_klass()), "sanity");
-      return true;
+    if (oopDesc::is_null(_obj)) {
+      GuardUnsafeAccess guard(_thread);
+      RawAccess<MO_SEQ_CST>::store((volatile T*)addr(), normalize_for_write(x));
+    } else {
+      HeapAccess<MO_SEQ_CST>::store_at(_obj, _offset, normalize_for_write(x));
     }
   }
-  return false;
-}
-#endif
-
-static void ensure_satb_referent_alive(oop o, jlong offset, oop v) {
-#if INCLUDE_ALL_GCS
-  if (UseG1GC && v != NULL && is_java_lang_ref_Reference_access(o, offset)) {
-    G1SATBCardTableModRefBS::enqueue(v);
-  }
-#endif
-}
+};
 
 // These functions allow a null base pointer with an arbitrary address.
 // But if the base pointer is non-null, the offset should make some sense.
 // That is, it should be in the range [0, MAX_OBJECT_SIZE].
 UNSAFE_ENTRY(jobject, Unsafe_GetObject(JNIEnv *env, jobject unsafe, jobject obj, jlong offset)) {
   oop p = JNIHandles::resolve(obj);
-  oop v;
-
-  if (UseCompressedOops) {
-    narrowOop n = *(narrowOop*)index_oop_from_field_offset_long(p, offset);
-    v = oopDesc::decode_heap_oop(n);
-  } else {
-    v = *(oop*)index_oop_from_field_offset_long(p, offset);
-  }
-
-  ensure_satb_referent_alive(p, offset, v);
-
+  assert_field_offset_sane(p, offset);
+  oop v = HeapAccess<ON_UNKNOWN_OOP_REF>::oop_load_at(p, offset);
   return JNIHandles::make_local(env, v);
 } UNSAFE_END
 
 UNSAFE_ENTRY(void, Unsafe_PutObject(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jobject x_h)) {
   oop x = JNIHandles::resolve(x_h);
   oop p = JNIHandles::resolve(obj);
-
-  if (UseCompressedOops) {
-    oop_store((narrowOop*)index_oop_from_field_offset_long(p, offset), x);
-  } else {
-    oop_store((oop*)index_oop_from_field_offset_long(p, offset), x);
-  }
+  assert_field_offset_sane(p, offset);
+  HeapAccess<ON_UNKNOWN_OOP_REF>::oop_store_at(p, offset, x);
 } UNSAFE_END
 
 UNSAFE_ENTRY(jobject, Unsafe_GetObjectVolatile(JNIEnv *env, jobject unsafe, jobject obj, jlong offset)) {
   oop p = JNIHandles::resolve(obj);
-  void* addr = index_oop_from_field_offset_long(p, offset);
-
-  volatile oop v;
-
-  if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
-    OrderAccess::fence();
-  }
-
-  if (UseCompressedOops) {
-    volatile narrowOop n = *(volatile narrowOop*) addr;
-    (void)const_cast<oop&>(v = oopDesc::decode_heap_oop(n));
-  } else {
-    (void)const_cast<oop&>(v = *(volatile oop*) addr);
-  }
-
-  ensure_satb_referent_alive(p, offset, v);
-
-  OrderAccess::acquire();
+  assert_field_offset_sane(p, offset);
+  oop v = HeapAccess<MO_SEQ_CST | ON_UNKNOWN_OOP_REF>::oop_load_at(p, offset);
   return JNIHandles::make_local(env, v);
 } UNSAFE_END
 
 UNSAFE_ENTRY(void, Unsafe_PutObjectVolatile(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jobject x_h)) {
   oop x = JNIHandles::resolve(x_h);
   oop p = JNIHandles::resolve(obj);
-  void* addr = index_oop_from_field_offset_long(p, offset);
-  OrderAccess::release();
-
-  if (UseCompressedOops) {
-    oop_store((narrowOop*)addr, x);
-  } else {
-    oop_store((oop*)addr, x);
-  }
-
-  OrderAccess::fence();
+  assert_field_offset_sane(p, offset);
+  HeapAccess<MO_SEQ_CST | ON_UNKNOWN_OOP_REF>::oop_store_at(p, offset, x);
 } UNSAFE_END
 
 UNSAFE_ENTRY(jobject, Unsafe_GetUncompressedObject(JNIEnv *env, jobject unsafe, jlong addr)) {
   oop v = *(oop*) (address) addr;
-
   return JNIHandles::make_local(env, v);
 } UNSAFE_END
 
-#ifndef SUPPORTS_NATIVE_CX8
-
-// VM_Version::supports_cx8() is a surrogate for 'supports atomic long memory ops'.
-//
-// On platforms which do not support atomic compare-and-swap of jlong (8 byte)
-// values we have to use a lock-based scheme to enforce atomicity. This has to be
-// applied to all Unsafe operations that set the value of a jlong field. Even so
-// the compareAndSetLong operation will not be atomic with respect to direct stores
-// to the field from Java code. It is important therefore that any Java code that
-// utilizes these Unsafe jlong operations does not perform direct stores. To permit
-// direct loads of the field from Java code we must also use Atomic::store within the
-// locked regions. And for good measure, in case there are direct stores, we also
-// employ Atomic::load within those regions. Note that the field in question must be
-// volatile and so must have atomic load/store accesses applied at the Java level.
-//
-// The locking scheme could utilize a range of strategies for controlling the locking
-// granularity: from a lock per-field through to a single global lock. The latter is
-// the simplest and is used for the current implementation. Note that the Java object
-// that contains the field, can not, in general, be used for locking. To do so can lead
-// to deadlocks as we may introduce locking into what appears to the Java code to be a
-// lock-free path.
-//
-// As all the locked-regions are very short and themselves non-blocking we can treat
-// them as leaf routines and elide safepoint checks (ie we don't perform any thread
-// state transitions even when blocking for the lock). Note that if we do choose to
-// add safepoint checks and thread state transitions, we must ensure that we calculate
-// the address of the field _after_ we have acquired the lock, else the object may have
-// been moved by the GC
-
-UNSAFE_ENTRY(jlong, Unsafe_GetLongVolatile(JNIEnv *env, jobject unsafe, jobject obj, jlong offset)) {
-  if (VM_Version::supports_cx8()) {
-    return MemoryAccess(thread, obj, offset).get_volatile<jlong>();
-  } else {
-    return MemoryAccess(thread, obj, offset).get_jlong_locked();
-  }
-} UNSAFE_END
-
-UNSAFE_ENTRY(void, Unsafe_PutLongVolatile(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jlong x)) {
-  if (VM_Version::supports_cx8()) {
-    MemoryAccess(thread, obj, offset).put_volatile<jlong>(x);
-  } else {
-    MemoryAccess(thread, obj, offset).put_jlong_locked(x);
-  }
-} UNSAFE_END
-
-#endif // not SUPPORTS_NATIVE_CX8
-
 UNSAFE_LEAF(jboolean, Unsafe_isBigEndian0(JNIEnv *env, jobject unsafe)) {
 #ifdef VM_LITTLE_ENDIAN
   return false;
@@ -472,13 +331,10 @@
 DEFINE_GETSETOOP_VOLATILE(jshort, Short);
 DEFINE_GETSETOOP_VOLATILE(jchar, Char);
 DEFINE_GETSETOOP_VOLATILE(jint, Int);
+DEFINE_GETSETOOP_VOLATILE(jlong, Long);
 DEFINE_GETSETOOP_VOLATILE(jfloat, Float);
 DEFINE_GETSETOOP_VOLATILE(jdouble, Double);
 
-#ifdef SUPPORTS_NATIVE_CX8
-DEFINE_GETSETOOP_VOLATILE(jlong, Long);
-#endif
-
 #undef DEFINE_GETSETOOP_VOLATILE
 
 UNSAFE_LEAF(void, Unsafe_LoadFence(JNIEnv *env, jobject unsafe)) {
@@ -1001,85 +857,62 @@
   oop x = JNIHandles::resolve(x_h);
   oop e = JNIHandles::resolve(e_h);
   oop p = JNIHandles::resolve(obj);
-  HeapWord* addr = (HeapWord *)index_oop_from_field_offset_long(p, offset);
-  oop res = oopDesc::atomic_compare_exchange_oop(x, addr, e, true);
-  if (res == e) {
-    update_barrier_set((void*)addr, x);
-  }
+  assert_field_offset_sane(p, offset);
+  oop res = HeapAccess<ON_UNKNOWN_OOP_REF>::oop_atomic_cmpxchg_at(x, p, (ptrdiff_t)offset, e);
   return JNIHandles::make_local(env, res);
 } UNSAFE_END
 
 UNSAFE_ENTRY(jint, Unsafe_CompareAndExchangeInt(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jint e, jint x)) {
   oop p = JNIHandles::resolve(obj);
-  jint* addr = (jint *) index_oop_from_field_offset_long(p, offset);
-
-  return (jint)(Atomic::cmpxchg(x, addr, e));
+  if (oopDesc::is_null(p)) {
+    volatile jint* addr = (volatile jint*)index_oop_from_field_offset_long(p, offset);
+    return RawAccess<>::atomic_cmpxchg(x, addr, e);
+  } else {
+    assert_field_offset_sane(p, offset);
+    return HeapAccess<>::atomic_cmpxchg_at(x, p, (ptrdiff_t)offset, e);
+  }
 } UNSAFE_END
 
 UNSAFE_ENTRY(jlong, Unsafe_CompareAndExchangeLong(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jlong e, jlong x)) {
-  Handle p(THREAD, JNIHandles::resolve(obj));
-  jlong* addr = (jlong*)index_oop_from_field_offset_long(p(), offset);
-
-#ifdef SUPPORTS_NATIVE_CX8
-  return (jlong)(Atomic::cmpxchg(x, addr, e));
-#else
-  if (VM_Version::supports_cx8()) {
-    return (jlong)(Atomic::cmpxchg(x, addr, e));
+  oop p = JNIHandles::resolve(obj);
+  if (oopDesc::is_null(p)) {
+    volatile jlong* addr = (volatile jlong*)index_oop_from_field_offset_long(p, offset);
+    return RawAccess<>::atomic_cmpxchg(x, addr, e);
   } else {
-    MutexLockerEx mu(UnsafeJlong_lock, Mutex::_no_safepoint_check_flag);
-
-    jlong val = Atomic::load(addr);
-    if (val == e) {
-      Atomic::store(x, addr);
-    }
-    return val;
+    assert_field_offset_sane(p, offset);
+    return HeapAccess<>::atomic_cmpxchg_at(x, p, (ptrdiff_t)offset, e);
   }
-#endif
 } UNSAFE_END
 
 UNSAFE_ENTRY(jboolean, Unsafe_CompareAndSetObject(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jobject e_h, jobject x_h)) {
   oop x = JNIHandles::resolve(x_h);
   oop e = JNIHandles::resolve(e_h);
   oop p = JNIHandles::resolve(obj);
-  HeapWord* addr = (HeapWord *)index_oop_from_field_offset_long(p, offset);
-  oop res = oopDesc::atomic_compare_exchange_oop(x, addr, e, true);
-  if (res != e) {
-    return false;
-  }
-
-  update_barrier_set((void*)addr, x);
-
-  return true;
+  assert_field_offset_sane(p, offset);
+  oop ret = HeapAccess<ON_UNKNOWN_OOP_REF>::oop_atomic_cmpxchg_at(x, p, (ptrdiff_t)offset, e);
+  return ret == e;
 } UNSAFE_END
 
 UNSAFE_ENTRY(jboolean, Unsafe_CompareAndSetInt(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jint e, jint x)) {
   oop p = JNIHandles::resolve(obj);
-  jint* addr = (jint *)index_oop_from_field_offset_long(p, offset);
-
-  return (jint)(Atomic::cmpxchg(x, addr, e)) == e;
+  if (oopDesc::is_null(p)) {
+    volatile jint* addr = (volatile jint*)index_oop_from_field_offset_long(p, offset);
+    return RawAccess<>::atomic_cmpxchg(x, addr, e) == e;
+  } else {
+    assert_field_offset_sane(p, offset);
+    return HeapAccess<>::atomic_cmpxchg_at(x, p, (ptrdiff_t)offset, e) == e;
+  }
 } UNSAFE_END
 
 UNSAFE_ENTRY(jboolean, Unsafe_CompareAndSetLong(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jlong e, jlong x)) {
-  Handle p(THREAD, JNIHandles::resolve(obj));
-  jlong* addr = (jlong*)index_oop_from_field_offset_long(p(), offset);
-
-#ifdef SUPPORTS_NATIVE_CX8
-  return (jlong)(Atomic::cmpxchg(x, addr, e)) == e;
-#else
-  if (VM_Version::supports_cx8()) {
-    return (jlong)(Atomic::cmpxchg(x, addr, e)) == e;
+  oop p = JNIHandles::resolve(obj);
+  if (oopDesc::is_null(p)) {
+    volatile jlong* addr = (volatile jlong*)index_oop_from_field_offset_long(p, offset);
+    return RawAccess<>::atomic_cmpxchg(x, addr, e) == e;
   } else {
-    MutexLockerEx mu(UnsafeJlong_lock, Mutex::_no_safepoint_check_flag);
-
-    jlong val = Atomic::load(addr);
-    if (val != e) {
-      return false;
-    }
-
-    Atomic::store(x, addr);
-    return true;
+    assert_field_offset_sane(p, offset);
+    return HeapAccess<>::atomic_cmpxchg_at(x, p, (ptrdiff_t)offset, e) == e;
   }
-#endif
 } UNSAFE_END
 
 UNSAFE_ENTRY(void, Unsafe_Park(JNIEnv *env, jobject unsafe, jboolean isAbsolute, jlong time)) {
--- a/src/hotspot/share/runtime/stubRoutines.cpp	Mon Nov 20 12:04:13 2017 +0100
+++ b/src/hotspot/share/runtime/stubRoutines.cpp	Mon Nov 20 13:07:44 2017 +0100
@@ -25,6 +25,7 @@
 #include "precompiled.hpp"
 #include "asm/codeBuffer.hpp"
 #include "memory/resourceArea.hpp"
+#include "oops/access.inline.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/interfaceSupport.hpp"
 #include "runtime/timerTrace.hpp"
@@ -377,19 +378,6 @@
 // Default versions of arraycopy functions
 //
 
-static void gen_arraycopy_barrier_pre(oop* dest, size_t count, bool dest_uninitialized) {
-    assert(count != 0, "count should be non-zero");
-    assert(count <= (size_t)max_intx, "count too large");
-    BarrierSet* bs = Universe::heap()->barrier_set();
-    bs->write_ref_array_pre(dest, (int)count, dest_uninitialized);
-}
-
-static void gen_arraycopy_barrier(oop* dest, size_t count) {
-    assert(count != 0, "count should be non-zero");
-    BarrierSet* bs = Universe::heap()->barrier_set();
-    bs->write_ref_array((HeapWord*)dest, count);
-}
-
 JRT_LEAF(void, StubRoutines::jbyte_copy(jbyte* src, jbyte* dest, size_t count))
 #ifndef PRODUCT
   SharedRuntime::_jbyte_array_copy_ctr++;      // Slow-path byte array copy
@@ -423,9 +411,7 @@
   SharedRuntime::_oop_array_copy_ctr++;        // Slow-path oop array copy
 #endif // !PRODUCT
   assert(count != 0, "count should be non-zero");
-  gen_arraycopy_barrier_pre(dest, count, /*dest_uninitialized*/false);
-  Copy::conjoint_oops_atomic(src, dest, count);
-  gen_arraycopy_barrier(dest, count);
+  HeapAccess<>::oop_arraycopy(NULL, NULL, (HeapWord*)src, (HeapWord*)dest, count);
 JRT_END
 
 JRT_LEAF(void, StubRoutines::oop_copy_uninit(oop* src, oop* dest, size_t count))
@@ -433,9 +419,7 @@
   SharedRuntime::_oop_array_copy_ctr++;        // Slow-path oop array copy
 #endif // !PRODUCT
   assert(count != 0, "count should be non-zero");
-  gen_arraycopy_barrier_pre(dest, count, /*dest_uninitialized*/true);
-  Copy::conjoint_oops_atomic(src, dest, count);
-  gen_arraycopy_barrier(dest, count);
+  HeapAccess<ARRAYCOPY_DEST_NOT_INITIALIZED>::oop_arraycopy(NULL, NULL, (HeapWord*)src, (HeapWord*)dest, count);
 JRT_END
 
 JRT_LEAF(void, StubRoutines::arrayof_jbyte_copy(HeapWord* src, HeapWord* dest, size_t count))
@@ -471,9 +455,7 @@
   SharedRuntime::_oop_array_copy_ctr++;        // Slow-path oop array copy
 #endif // !PRODUCT
   assert(count != 0, "count should be non-zero");
-  gen_arraycopy_barrier_pre((oop *) dest, count, /*dest_uninitialized*/false);
-  Copy::arrayof_conjoint_oops(src, dest, count);
-  gen_arraycopy_barrier((oop *) dest, count);
+  HeapAccess<ARRAYCOPY_ARRAYOF>::oop_arraycopy(NULL, NULL, src, dest, count);
 JRT_END
 
 JRT_LEAF(void, StubRoutines::arrayof_oop_copy_uninit(HeapWord* src, HeapWord* dest, size_t count))
@@ -481,9 +463,7 @@
   SharedRuntime::_oop_array_copy_ctr++;        // Slow-path oop array copy
 #endif // !PRODUCT
   assert(count != 0, "count should be non-zero");
-  gen_arraycopy_barrier_pre((oop *) dest, count, /*dest_uninitialized*/true);
-  Copy::arrayof_conjoint_oops(src, dest, count);
-  gen_arraycopy_barrier((oop *) dest, count);
+  HeapAccess<ARRAYCOPY_ARRAYOF | ARRAYCOPY_DEST_NOT_INITIALIZED>::oop_arraycopy(NULL, NULL, src, dest, count);
 JRT_END
 
 address StubRoutines::select_fill_function(BasicType t, bool aligned, const char* &name) {
--- a/src/hotspot/share/runtime/vmStructs.cpp	Mon Nov 20 12:04:13 2017 +0100
+++ b/src/hotspot/share/runtime/vmStructs.cpp	Mon Nov 20 13:07:44 2017 +0100
@@ -228,8 +228,8 @@
                                                                                                                                      \
   volatile_nonstatic_field(oopDesc,            _mark,                                         markOop)                               \
   volatile_nonstatic_field(oopDesc,            _metadata._klass,                              Klass*)                                \
-  volatile_nonstatic_field(oopDesc,            _metadata._compressed_klass,                   narrowOop)                             \
-     static_field(oopDesc,                     _bs,                                           BarrierSet*)                           \
+  volatile_nonstatic_field(oopDesc,            _metadata._compressed_klass,                   narrowKlass)                           \
+  static_field(BarrierSet,                     _bs,                                           BarrierSet*)                           \
   nonstatic_field(ArrayKlass,                  _dimension,                                    int)                                   \
   volatile_nonstatic_field(ArrayKlass,         _higher_dimension,                             Klass*)                                \
   volatile_nonstatic_field(ArrayKlass,         _lower_dimension,                              Klass*)                                \