changeset 6044:2113136690bc

8024921: PPC64 (part 113): Extend Load and Store nodes to know about memory ordering Summary: Add a field to C2 LoadNode and StoreNode classes which indicates whether the load/store should do an acquire/release on platforms which support it. Reviewed-by: kvn
author goetz
date Fri, 15 Nov 2013 11:05:32 -0800
parents eb178e97560c
children ea78de16a4a4
files src/share/vm/opto/generateOptoStub.cpp src/share/vm/opto/graphKit.cpp src/share/vm/opto/graphKit.hpp src/share/vm/opto/idealKit.cpp src/share/vm/opto/idealKit.hpp src/share/vm/opto/library_call.cpp src/share/vm/opto/macro.cpp src/share/vm/opto/matcher.cpp src/share/vm/opto/memnode.cpp src/share/vm/opto/memnode.hpp src/share/vm/opto/mulnode.cpp src/share/vm/opto/parse1.cpp src/share/vm/opto/parse2.cpp src/share/vm/opto/parse3.cpp src/share/vm/opto/parseHelper.cpp src/share/vm/opto/stringopts.cpp src/share/vm/opto/vectornode.hpp
diffstat 17 files changed, 351 insertions(+), 255 deletions(-) [+]
line wrap: on
line diff
--- a/src/share/vm/opto/generateOptoStub.cpp	Thu Nov 07 11:47:11 2013 +0100
+++ b/src/share/vm/opto/generateOptoStub.cpp	Fri Nov 15 11:05:32 2013 -0800
@@ -104,13 +104,12 @@
   //
   Node *adr_sp = basic_plus_adr(top(), thread, in_bytes(JavaThread::last_Java_sp_offset()));
   Node *last_sp = basic_plus_adr(top(), frameptr(), (intptr_t) STACK_BIAS);
-  store_to_memory(NULL, adr_sp, last_sp, T_ADDRESS, NoAlias);
+  store_to_memory(NULL, adr_sp, last_sp, T_ADDRESS, NoAlias, MemNode::unordered);
 
   // Set _thread_in_native
   // The order of stores into TLS is critical!  Setting _thread_in_native MUST
   // be last, because a GC is allowed at any time after setting it and the GC
   // will require last_Java_pc and last_Java_sp.
-  Node* adr_state = basic_plus_adr(top(), thread, in_bytes(JavaThread::thread_state_offset()));
 
   //-----------------------------
   // Compute signature for C call.  Varies from the Java signature!
@@ -225,16 +224,15 @@
   //-----------------------------
 
   // Clear last_Java_sp
-  store_to_memory(NULL, adr_sp, null(), T_ADDRESS, NoAlias);
+  store_to_memory(NULL, adr_sp, null(), T_ADDRESS, NoAlias, MemNode::unordered);
   // Clear last_Java_pc and (optionally)_flags
-  store_to_memory(NULL, adr_last_Java_pc, null(), T_ADDRESS, NoAlias);
+  store_to_memory(NULL, adr_last_Java_pc, null(), T_ADDRESS, NoAlias, MemNode::unordered);
 #if defined(SPARC)
-  store_to_memory(NULL, adr_flags, intcon(0), T_INT, NoAlias);
+  store_to_memory(NULL, adr_flags, intcon(0), T_INT, NoAlias, MemNode::unordered);
 #endif /* defined(SPARC) */
 #if (defined(IA64) && !defined(AIX))
   Node* adr_last_Java_fp = basic_plus_adr(top(), thread, in_bytes(JavaThread::last_Java_fp_offset()));
-  if( os::is_MP() ) insert_mem_bar(Op_MemBarRelease);
-  store_to_memory(NULL, adr_last_Java_fp,    null(),    T_ADDRESS, NoAlias);
+  store_to_memory(NULL, adr_last_Java_fp, null(), T_ADDRESS, NoAlias, MemNode::unordered);
 #endif
 
   // For is-fancy-jump, the C-return value is also the branch target
@@ -242,16 +240,16 @@
   // Runtime call returning oop in TLS?  Fetch it out
   if( pass_tls ) {
     Node* adr = basic_plus_adr(top(), thread, in_bytes(JavaThread::vm_result_offset()));
-    Node* vm_result = make_load(NULL, adr, TypeOopPtr::BOTTOM, T_OBJECT, NoAlias, false);
+    Node* vm_result = make_load(NULL, adr, TypeOopPtr::BOTTOM, T_OBJECT, NoAlias, MemNode::unordered);
     map()->set_req(TypeFunc::Parms, vm_result); // vm_result passed as result
     // clear thread-local-storage(tls)
-    store_to_memory(NULL, adr, null(), T_ADDRESS, NoAlias);
+    store_to_memory(NULL, adr, null(), T_ADDRESS, NoAlias, MemNode::unordered);
   }
 
   //-----------------------------
   // check exception
   Node* adr = basic_plus_adr(top(), thread, in_bytes(Thread::pending_exception_offset()));
-  Node* pending = make_load(NULL, adr, TypeOopPtr::BOTTOM, T_OBJECT, NoAlias, false);
+  Node* pending = make_load(NULL, adr, TypeOopPtr::BOTTOM, T_OBJECT, NoAlias, MemNode::unordered);
 
   Node* exit_memory = reset_memory();
 
--- a/src/share/vm/opto/graphKit.cpp	Thu Nov 07 11:47:11 2013 +0100
+++ b/src/share/vm/opto/graphKit.cpp	Fri Nov 15 11:05:32 2013 -0800
@@ -494,7 +494,7 @@
     // first must access the should_post_on_exceptions_flag in this thread's JavaThread
     Node* jthread = _gvn.transform(new (C) ThreadLocalNode());
     Node* adr = basic_plus_adr(top(), jthread, in_bytes(JavaThread::should_post_on_exceptions_flag_offset()));
-    Node* should_post_flag = make_load(control(), adr, TypeInt::INT, T_INT, Compile::AliasIdxRaw, false);
+    Node* should_post_flag = make_load(control(), adr, TypeInt::INT, T_INT, Compile::AliasIdxRaw, MemNode::unordered);
 
     // Test the should_post_on_exceptions_flag vs. 0
     Node* chk = _gvn.transform( new (C) CmpINode(should_post_flag, intcon(0)) );
@@ -596,7 +596,8 @@
 
       Node *adr = basic_plus_adr(ex_node, ex_node, offset);
       const TypeOopPtr* val_type = TypeOopPtr::make_from_klass(env()->String_klass());
-      Node *store = store_oop_to_object(control(), ex_node, adr, adr_typ, null(), val_type, T_OBJECT);
+      // Conservatively release stores of object references.
+      Node *store = store_oop_to_object(control(), ex_node, adr, adr_typ, null(), val_type, T_OBJECT, MemNode::release);
 
       add_exception_state(make_exception_state(ex_node));
       return;
@@ -1483,16 +1484,16 @@
 // factory methods in "int adr_idx"
 Node* GraphKit::make_load(Node* ctl, Node* adr, const Type* t, BasicType bt,
                           int adr_idx,
-                          bool require_atomic_access) {
+                          MemNode::MemOrd mo, bool require_atomic_access) {
   assert(adr_idx != Compile::AliasIdxTop, "use other make_load factory" );
   const TypePtr* adr_type = NULL; // debug-mode-only argument
   debug_only(adr_type = C->get_adr_type(adr_idx));
   Node* mem = memory(adr_idx);
   Node* ld;
   if (require_atomic_access && bt == T_LONG) {
-    ld = LoadLNode::make_atomic(C, ctl, mem, adr, adr_type, t);
+    ld = LoadLNode::make_atomic(C, ctl, mem, adr, adr_type, t, mo);
   } else {
-    ld = LoadNode::make(_gvn, ctl, mem, adr, adr_type, t, bt);
+    ld = LoadNode::make(_gvn, ctl, mem, adr, adr_type, t, bt, mo);
   }
   ld = _gvn.transform(ld);
   if ((bt == T_OBJECT) && C->do_escape_analysis() || C->eliminate_boxing()) {
@@ -1504,6 +1505,7 @@
 
 Node* GraphKit::store_to_memory(Node* ctl, Node* adr, Node *val, BasicType bt,
                                 int adr_idx,
+                                MemNode::MemOrd mo,
                                 bool require_atomic_access) {
   assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory" );
   const TypePtr* adr_type = NULL;
@@ -1511,9 +1513,9 @@
   Node *mem = memory(adr_idx);
   Node* st;
   if (require_atomic_access && bt == T_LONG) {
-    st = StoreLNode::make_atomic(C, ctl, mem, adr, adr_type, val);
+    st = StoreLNode::make_atomic(C, ctl, mem, adr, adr_type, val, mo);
   } else {
-    st = StoreNode::make(_gvn, ctl, mem, adr, adr_type, val, bt);
+    st = StoreNode::make(_gvn, ctl, mem, adr, adr_type, val, bt, mo);
   }
   st = _gvn.transform(st);
   set_memory(st, adr_idx);
@@ -1613,7 +1615,8 @@
                           Node* val,
                           const TypeOopPtr* val_type,
                           BasicType bt,
-                          bool use_precise) {
+                          bool use_precise,
+                          MemNode::MemOrd mo) {
   // Transformation of a value which could be NULL pointer (CastPP #NULL)
   // could be delayed during Parse (for example, in adjust_map_after_if()).
   // Execute transformation here to avoid barrier generation in such case.
@@ -1633,7 +1636,7 @@
               NULL /* pre_val */,
               bt);
 
-  Node* store = store_to_memory(control(), adr, val, bt, adr_idx);
+  Node* store = store_to_memory(control(), adr, val, bt, adr_idx, mo);
   post_barrier(control(), store, obj, adr, adr_idx, val, bt, use_precise);
   return store;
 }
@@ -1644,7 +1647,8 @@
                              Node* adr,  // actual adress to store val at
                              const TypePtr* adr_type,
                              Node* val,
-                             BasicType bt) {
+                             BasicType bt,
+                             MemNode::MemOrd mo) {
   Compile::AliasType* at = C->alias_type(adr_type);
   const TypeOopPtr* val_type = NULL;
   if (adr_type->isa_instptr()) {
@@ -1663,7 +1667,7 @@
   if (val_type == NULL) {
     val_type = TypeInstPtr::BOTTOM;
   }
-  return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, true);
+  return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, true, mo);
 }
 
 
@@ -1707,7 +1711,7 @@
   const Type* elemtype = arytype->elem();
   BasicType elembt = elemtype->array_element_basic_type();
   Node* adr = array_element_address(ary, idx, elembt, arytype->size());
-  Node* ld = make_load(ctl, adr, elemtype, elembt, arytype);
+  Node* ld = make_load(ctl, adr, elemtype, elembt, arytype, MemNode::unordered);
   return ld;
 }
 
@@ -1942,9 +1946,9 @@
 void GraphKit::increment_counter(Node* counter_addr) {
   int adr_type = Compile::AliasIdxRaw;
   Node* ctrl = control();
-  Node* cnt  = make_load(ctrl, counter_addr, TypeInt::INT, T_INT, adr_type);
+  Node* cnt  = make_load(ctrl, counter_addr, TypeInt::INT, T_INT, adr_type, MemNode::unordered);
   Node* incr = _gvn.transform(new (C) AddINode(cnt, _gvn.intcon(1)));
-  store_to_memory( ctrl, counter_addr, incr, T_INT, adr_type );
+  store_to_memory(ctrl, counter_addr, incr, T_INT, adr_type, MemNode::unordered);
 }
 
 
@@ -2525,7 +2529,8 @@
 
   // First load the super-klass's check-offset
   Node *p1 = basic_plus_adr( superklass, superklass, in_bytes(Klass::super_check_offset_offset()) );
-  Node *chk_off = _gvn.transform( new (C) LoadINode( NULL, memory(p1), p1, _gvn.type(p1)->is_ptr() ) );
+  Node *chk_off = _gvn.transform(new (C) LoadINode(NULL, memory(p1), p1, _gvn.type(p1)->is_ptr(),
+                                                   TypeInt::INT, MemNode::unordered));
   int cacheoff_con = in_bytes(Klass::secondary_super_cache_offset());
   bool might_be_cache = (find_int_con(chk_off, cacheoff_con) == cacheoff_con);
 
@@ -3238,7 +3243,7 @@
   }
   constant_value = Klass::_lh_neutral_value;  // put in a known value
   Node* lhp = basic_plus_adr(klass_node, klass_node, in_bytes(Klass::layout_helper_offset()));
-  return make_load(NULL, lhp, TypeInt::INT, T_INT);
+  return make_load(NULL, lhp, TypeInt::INT, T_INT, MemNode::unordered);
 }
 
 // We just put in an allocate/initialize with a big raw-memory effect.
@@ -3773,7 +3778,7 @@
 
   // Smash zero into card
   if( !UseConcMarkSweepGC ) {
-    __ store(__ ctrl(), card_adr, zero, bt, adr_type);
+    __ store(__ ctrl(), card_adr, zero, bt, adr_type, MemNode::release);
   } else {
     // Specialized path for CM store barrier
     __ storeCM(__ ctrl(), card_adr, zero, oop_store, adr_idx, bt, adr_type);
@@ -3870,9 +3875,9 @@
 
         // Now get the buffer location we will log the previous value into and store it
         Node *log_addr = __ AddP(no_base, buffer, next_index);
-        __ store(__ ctrl(), log_addr, pre_val, T_OBJECT, Compile::AliasIdxRaw);
+        __ store(__ ctrl(), log_addr, pre_val, T_OBJECT, Compile::AliasIdxRaw, MemNode::unordered);
         // update the index
-        __ store(__ ctrl(), index_adr, next_index, index_bt, Compile::AliasIdxRaw);
+        __ store(__ ctrl(), index_adr, next_index, index_bt, Compile::AliasIdxRaw, MemNode::unordered);
 
       } __ else_(); {
 
@@ -3912,8 +3917,9 @@
     Node* next_index = _gvn.transform(new (C) SubXNode(index, __ ConX(sizeof(intptr_t))));
     Node* log_addr = __ AddP(no_base, buffer, next_index);
 
-    __ store(__ ctrl(), log_addr, card_adr, T_ADDRESS, Compile::AliasIdxRaw);
-    __ store(__ ctrl(), index_adr, next_index, TypeX_X->basic_type(), Compile::AliasIdxRaw);
+    // Order, see storeCM.
+    __ store(__ ctrl(), log_addr, card_adr, T_ADDRESS, Compile::AliasIdxRaw, MemNode::unordered);
+    __ store(__ ctrl(), index_adr, next_index, TypeX_X->basic_type(), Compile::AliasIdxRaw, MemNode::unordered);
 
   } __ else_(); {
     __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), "g1_wb_post", card_adr, __ thread());
@@ -4043,7 +4049,7 @@
     int offset_field_idx = C->get_alias_index(offset_field_type);
     return make_load(ctrl,
                      basic_plus_adr(str, str, offset_offset),
-                     TypeInt::INT, T_INT, offset_field_idx);
+                     TypeInt::INT, T_INT, offset_field_idx, MemNode::unordered);
   } else {
     return intcon(0);
   }
@@ -4058,7 +4064,7 @@
     int count_field_idx = C->get_alias_index(count_field_type);
     return make_load(ctrl,
                      basic_plus_adr(str, str, count_offset),
-                     TypeInt::INT, T_INT, count_field_idx);
+                     TypeInt::INT, T_INT, count_field_idx, MemNode::unordered);
   } else {
     return load_array_length(load_String_value(ctrl, str));
   }
@@ -4074,7 +4080,7 @@
                                                    ciTypeArrayKlass::make(T_CHAR), true, 0);
   int value_field_idx = C->get_alias_index(value_field_type);
   Node* load = make_load(ctrl, basic_plus_adr(str, str, value_offset),
-                         value_type, T_OBJECT, value_field_idx);
+                         value_type, T_OBJECT, value_field_idx, MemNode::unordered);
   // String.value field is known to be @Stable.
   if (UseImplicitStableValues) {
     load = cast_array_to_stable(load, value_type);
@@ -4089,7 +4095,7 @@
   const TypePtr* offset_field_type = string_type->add_offset(offset_offset);
   int offset_field_idx = C->get_alias_index(offset_field_type);
   store_to_memory(ctrl, basic_plus_adr(str, offset_offset),
-                  value, T_INT, offset_field_idx);
+                  value, T_INT, offset_field_idx, MemNode::unordered);
 }
 
 void GraphKit::store_String_value(Node* ctrl, Node* str, Node* value) {
@@ -4099,7 +4105,7 @@
   const TypePtr* value_field_type = string_type->add_offset(value_offset);
 
   store_oop_to_object(ctrl, str,  basic_plus_adr(str, value_offset), value_field_type,
-      value, TypeAryPtr::CHARS, T_OBJECT);
+      value, TypeAryPtr::CHARS, T_OBJECT, MemNode::unordered);
 }
 
 void GraphKit::store_String_length(Node* ctrl, Node* str, Node* value) {
@@ -4109,7 +4115,7 @@
   const TypePtr* count_field_type = string_type->add_offset(count_offset);
   int count_field_idx = C->get_alias_index(count_field_type);
   store_to_memory(ctrl, basic_plus_adr(str, count_offset),
-                  value, T_INT, count_field_idx);
+                  value, T_INT, count_field_idx, MemNode::unordered);
 }
 
 Node* GraphKit::cast_array_to_stable(Node* ary, const TypeAryPtr* ary_type) {
--- a/src/share/vm/opto/graphKit.hpp	Thu Nov 07 11:47:11 2013 +0100
+++ b/src/share/vm/opto/graphKit.hpp	Fri Nov 15 11:05:32 2013 -0800
@@ -510,36 +510,50 @@
 
   // Create a LoadNode, reading from the parser's memory state.
   // (Note:  require_atomic_access is useful only with T_LONG.)
+  //
+  // We choose the unordered semantics by default because we have
+  // adapted the `do_put_xxx' and `do_get_xxx' procedures for the case
+  // of volatile fields.
   Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt,
-                  bool require_atomic_access = false) {
+                  MemNode::MemOrd mo, bool require_atomic_access = false) {
     // This version computes alias_index from bottom_type
     return make_load(ctl, adr, t, bt, adr->bottom_type()->is_ptr(),
-                     require_atomic_access);
+                     mo, require_atomic_access);
   }
-  Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, const TypePtr* adr_type, bool require_atomic_access = false) {
+  Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, const TypePtr* adr_type,
+                  MemNode::MemOrd mo, bool require_atomic_access = false) {
     // This version computes alias_index from an address type
     assert(adr_type != NULL, "use other make_load factory");
     return make_load(ctl, adr, t, bt, C->get_alias_index(adr_type),
-                     require_atomic_access);
+                     mo, require_atomic_access);
   }
   // This is the base version which is given an alias index.
-  Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, int adr_idx, bool require_atomic_access = false);
+  Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, int adr_idx,
+                  MemNode::MemOrd mo, bool require_atomic_access = false);
 
   // Create & transform a StoreNode and store the effect into the
   // parser's memory state.
+  //
+  // We must ensure that stores of object references will be visible
+  // only after the object's initialization. So the clients of this
+  // procedure must indicate that the store requires `release'
+  // semantics, if the stored value is an object reference that might
+  // point to a new object and may become externally visible.
   Node* store_to_memory(Node* ctl, Node* adr, Node* val, BasicType bt,
                         const TypePtr* adr_type,
+                        MemNode::MemOrd mo,
                         bool require_atomic_access = false) {
     // This version computes alias_index from an address type
     assert(adr_type != NULL, "use other store_to_memory factory");
     return store_to_memory(ctl, adr, val, bt,
                            C->get_alias_index(adr_type),
-                           require_atomic_access);
+                           mo, require_atomic_access);
   }
   // This is the base version which is given alias index
   // Return the new StoreXNode
   Node* store_to_memory(Node* ctl, Node* adr, Node* val, BasicType bt,
                         int adr_idx,
+                        MemNode::MemOrd,
                         bool require_atomic_access = false);
 
 
@@ -557,40 +571,44 @@
 
   Node* store_oop(Node* ctl,
                   Node* obj,   // containing obj
-                  Node* adr,  // actual adress to store val at
+                  Node* adr,   // actual adress to store val at
                   const TypePtr* adr_type,
                   Node* val,
                   const TypeOopPtr* val_type,
                   BasicType bt,
-                  bool use_precise);
+                  bool use_precise,
+                  MemNode::MemOrd mo);
 
   Node* store_oop_to_object(Node* ctl,
                             Node* obj,   // containing obj
-                            Node* adr,  // actual adress to store val at
+                            Node* adr,   // actual adress to store val at
                             const TypePtr* adr_type,
                             Node* val,
                             const TypeOopPtr* val_type,
-                            BasicType bt) {
-    return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, false);
+                            BasicType bt,
+                            MemNode::MemOrd mo) {
+    return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, false, mo);
   }
 
   Node* store_oop_to_array(Node* ctl,
                            Node* obj,   // containing obj
-                           Node* adr,  // actual adress to store val at
+                           Node* adr,   // actual adress to store val at
                            const TypePtr* adr_type,
                            Node* val,
                            const TypeOopPtr* val_type,
-                           BasicType bt) {
-    return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, true);
+                           BasicType bt,
+                           MemNode::MemOrd mo) {
+    return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, true, mo);
   }
 
   // Could be an array or object we don't know at compile time (unsafe ref.)
   Node* store_oop_to_unknown(Node* ctl,
                              Node* obj,   // containing obj
-                             Node* adr,  // actual adress to store val at
+                             Node* adr,   // actual adress to store val at
                              const TypePtr* adr_type,
                              Node* val,
-                             BasicType bt);
+                             BasicType bt,
+                             MemNode::MemOrd mo);
 
   // For the few case where the barriers need special help
   void pre_barrier(bool do_load, Node* ctl,
--- a/src/share/vm/opto/idealKit.cpp	Thu Nov 07 11:47:11 2013 +0100
+++ b/src/share/vm/opto/idealKit.cpp	Fri Nov 15 11:05:32 2013 -0800
@@ -359,25 +359,25 @@
   Node* mem = memory(adr_idx);
   Node* ld;
   if (require_atomic_access && bt == T_LONG) {
-    ld = LoadLNode::make_atomic(C, ctl, mem, adr, adr_type, t);
+    ld = LoadLNode::make_atomic(C, ctl, mem, adr, adr_type, t, MemNode::unordered);
   } else {
-    ld = LoadNode::make(_gvn, ctl, mem, adr, adr_type, t, bt);
+    ld = LoadNode::make(_gvn, ctl, mem, adr, adr_type, t, bt, MemNode::unordered);
   }
   return transform(ld);
 }
 
 Node* IdealKit::store(Node* ctl, Node* adr, Node *val, BasicType bt,
-                                int adr_idx,
-                                bool require_atomic_access) {
-  assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory" );
+                      int adr_idx,
+                      MemNode::MemOrd mo, bool require_atomic_access) {
+  assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory");
   const TypePtr* adr_type = NULL;
   debug_only(adr_type = C->get_adr_type(adr_idx));
   Node *mem = memory(adr_idx);
   Node* st;
   if (require_atomic_access && bt == T_LONG) {
-    st = StoreLNode::make_atomic(C, ctl, mem, adr, adr_type, val);
+    st = StoreLNode::make_atomic(C, ctl, mem, adr, adr_type, val, mo);
   } else {
-    st = StoreNode::make(_gvn, ctl, mem, adr, adr_type, val, bt);
+    st = StoreNode::make(_gvn, ctl, mem, adr, adr_type, val, bt, mo);
   }
   st = transform(st);
   set_memory(st, adr_idx);
--- a/src/share/vm/opto/idealKit.hpp	Thu Nov 07 11:47:11 2013 +0100
+++ b/src/share/vm/opto/idealKit.hpp	Fri Nov 15 11:05:32 2013 -0800
@@ -226,6 +226,7 @@
               Node* val,
               BasicType bt,
               int adr_idx,
+              MemNode::MemOrd mo,
               bool require_atomic_access = false);
 
   // Store a card mark ordered after store_oop
--- a/src/share/vm/opto/library_call.cpp	Thu Nov 07 11:47:11 2013 +0100
+++ b/src/share/vm/opto/library_call.cpp	Fri Nov 15 11:05:32 2013 -0800
@@ -1057,7 +1057,7 @@
   const Type* thread_type  = TypeOopPtr::make_from_klass(thread_klass)->cast_to_ptr_type(TypePtr::NotNull);
   Node* thread = _gvn.transform(new (C) ThreadLocalNode());
   Node* p = basic_plus_adr(top()/*!oop*/, thread, in_bytes(JavaThread::threadObj_offset()));
-  Node* threadObj = make_load(NULL, p, thread_type, T_OBJECT);
+  Node* threadObj = make_load(NULL, p, thread_type, T_OBJECT, MemNode::unordered);
   tls_output = thread;
   return threadObj;
 }
@@ -2640,7 +2640,7 @@
   if (need_mem_bar) insert_mem_bar(Op_MemBarCPUOrder);
 
   if (!is_store) {
-    Node* p = make_load(control(), adr, value_type, type, adr_type, is_volatile);
+    Node* p = make_load(control(), adr, value_type, type, adr_type, MemNode::unordered, is_volatile);
     // load value
     switch (type) {
     case T_BOOLEAN:
@@ -2684,13 +2684,14 @@
       break;
     }
 
+    MemNode::MemOrd mo = is_volatile ? MemNode::release : MemNode::unordered;
     if (type != T_OBJECT ) {
-      (void) store_to_memory(control(), adr, val, type, adr_type, is_volatile);
+      (void) store_to_memory(control(), adr, val, type, adr_type, mo, is_volatile);
     } else {
       // Possibly an oop being stored to Java heap or native memory
       if (!TypePtr::NULL_PTR->higher_equal(_gvn.type(heap_base_oop))) {
         // oop to Java heap.
-        (void) store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type);
+        (void) store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type, mo);
       } else {
         // We can't tell at compile time if we are storing in the Java heap or outside
         // of it. So we need to emit code to conditionally do the proper type of
@@ -2702,11 +2703,11 @@
         __ if_then(heap_base_oop, BoolTest::ne, null(), PROB_UNLIKELY(0.999)); {
           // Sync IdealKit and graphKit.
           sync_kit(ideal);
-          Node* st = store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type);
+          Node* st = store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type, mo);
           // Update IdealKit memory.
           __ sync_kit(this);
         } __ else_(); {
-          __ store(__ ctrl(), adr, val, type, alias_type->index(), is_volatile);
+          __ store(__ ctrl(), adr, val, type, alias_type->index(), mo, is_volatile);
         } __ end_if();
         // Final sync IdealKit and GraphKit.
         final_sync(ideal);
@@ -2979,12 +2980,12 @@
       Node *newval_enc = _gvn.transform(new (C) EncodePNode(newval, newval->bottom_type()->make_narrowoop()));
       if (kind == LS_xchg) {
         load_store = _gvn.transform(new (C) GetAndSetNNode(control(), mem, adr,
-                                                              newval_enc, adr_type, value_type->make_narrowoop()));
+                                                           newval_enc, adr_type, value_type->make_narrowoop()));
       } else {
         assert(kind == LS_cmpxchg, "wrong LoadStore operation");
         Node *oldval_enc = _gvn.transform(new (C) EncodePNode(oldval, oldval->bottom_type()->make_narrowoop()));
         load_store = _gvn.transform(new (C) CompareAndSwapNNode(control(), mem, adr,
-                                                                   newval_enc, oldval_enc));
+                                                                newval_enc, oldval_enc));
       }
     } else
 #endif
@@ -3090,9 +3091,9 @@
   const bool require_atomic_access = true;
   Node* store;
   if (type == T_OBJECT) // reference stores need a store barrier.
-    store = store_oop_to_unknown(control(), base, adr, adr_type, val, type);
+    store = store_oop_to_unknown(control(), base, adr, adr_type, val, type, MemNode::release);
   else {
-    store = store_to_memory(control(), adr, val, type, adr_type, require_atomic_access);
+    store = store_to_memory(control(), adr, val, type, adr_type, MemNode::release, require_atomic_access);
   }
   insert_mem_bar(Op_MemBarCPUOrder);
   return true;
@@ -3152,7 +3153,7 @@
     Node* insp = basic_plus_adr(kls, in_bytes(InstanceKlass::init_state_offset()));
     // Use T_BOOLEAN for InstanceKlass::_init_state so the compiler
     // can generate code to load it as unsigned byte.
-    Node* inst = make_load(NULL, insp, TypeInt::UBYTE, T_BOOLEAN);
+    Node* inst = make_load(NULL, insp, TypeInt::UBYTE, T_BOOLEAN, MemNode::unordered);
     Node* bits = intcon(InstanceKlass::fully_initialized);
     test = _gvn.transform(new (C) SubINode(inst, bits));
     // The 'test' is non-zero if we need to take a slow path.
@@ -3176,14 +3177,14 @@
   kls = null_check(kls, T_OBJECT);
   ByteSize offset = TRACE_ID_OFFSET;
   Node* insp = basic_plus_adr(kls, in_bytes(offset));
-  Node* tvalue = make_load(NULL, insp, TypeLong::LONG, T_LONG);
+  Node* tvalue = make_load(NULL, insp, TypeLong::LONG, T_LONG, MemNode::unordered);
   Node* bits = longcon(~0x03l); // ignore bit 0 & 1
   Node* andl = _gvn.transform(new (C) AndLNode(tvalue, bits));
   Node* clsused = longcon(0x01l); // set the class bit
   Node* orl = _gvn.transform(new (C) OrLNode(tvalue, clsused));
 
   const TypePtr *adr_type = _gvn.type(insp)->isa_ptr();
-  store_to_memory(control(), insp, orl, T_LONG, adr_type);
+  store_to_memory(control(), insp, orl, T_LONG, adr_type, MemNode::unordered);
   set_result(andl);
   return true;
 }
@@ -3192,15 +3193,15 @@
   Node* tls_ptr = NULL;
   Node* cur_thr = generate_current_thread(tls_ptr);
   Node* p = basic_plus_adr(top()/*!oop*/, tls_ptr, in_bytes(JavaThread::osthread_offset()));
-  Node* osthread = make_load(NULL, p, TypeRawPtr::NOTNULL, T_ADDRESS);
+  Node* osthread = make_load(NULL, p, TypeRawPtr::NOTNULL, T_ADDRESS, MemNode::unordered);
   p = basic_plus_adr(top()/*!oop*/, osthread, in_bytes(OSThread::thread_id_offset()));
 
   Node* threadid = NULL;
   size_t thread_id_size = OSThread::thread_id_size();
   if (thread_id_size == (size_t) BytesPerLong) {
-    threadid = ConvL2I(make_load(control(), p, TypeLong::LONG, T_LONG));
+    threadid = ConvL2I(make_load(control(), p, TypeLong::LONG, T_LONG, MemNode::unordered));
   } else if (thread_id_size == (size_t) BytesPerInt) {
-    threadid = make_load(control(), p, TypeInt::INT, T_INT);
+    threadid = make_load(control(), p, TypeInt::INT, T_INT, MemNode::unordered);
   } else {
     ShouldNotReachHere();
   }
@@ -3275,11 +3276,11 @@
 
   // (b) Interrupt bit on TLS must be false.
   Node* p = basic_plus_adr(top()/*!oop*/, tls_ptr, in_bytes(JavaThread::osthread_offset()));
-  Node* osthread = make_load(NULL, p, TypeRawPtr::NOTNULL, T_ADDRESS);
+  Node* osthread = make_load(NULL, p, TypeRawPtr::NOTNULL, T_ADDRESS, MemNode::unordered);
   p = basic_plus_adr(top()/*!oop*/, osthread, in_bytes(OSThread::interrupted_offset()));
 
   // Set the control input on the field _interrupted read to prevent it floating up.
-  Node* int_bit = make_load(control(), p, TypeInt::BOOL, T_INT);
+  Node* int_bit = make_load(control(), p, TypeInt::BOOL, T_INT, MemNode::unordered);
   Node* cmp_bit = _gvn.transform(new (C) CmpINode(int_bit, intcon(0)));
   Node* bol_bit = _gvn.transform(new (C) BoolNode(cmp_bit, BoolTest::ne));
 
@@ -3347,7 +3348,7 @@
 // Given a klass oop, load its java mirror (a java.lang.Class oop).
 Node* LibraryCallKit::load_mirror_from_klass(Node* klass) {
   Node* p = basic_plus_adr(klass, in_bytes(Klass::java_mirror_offset()));
-  return make_load(NULL, p, TypeInstPtr::MIRROR, T_OBJECT);
+  return make_load(NULL, p, TypeInstPtr::MIRROR, T_OBJECT, MemNode::unordered);
 }
 
 //-----------------------load_klass_from_mirror_common-------------------------
@@ -3384,7 +3385,7 @@
   // Branch around if the given klass has the given modifier bit set.
   // Like generate_guard, adds a new path onto the region.
   Node* modp = basic_plus_adr(kls, in_bytes(Klass::access_flags_offset()));
-  Node* mods = make_load(NULL, modp, TypeInt::INT, T_INT);
+  Node* mods = make_load(NULL, modp, TypeInt::INT, T_INT, MemNode::unordered);
   Node* mask = intcon(modifier_mask);
   Node* bits = intcon(modifier_bits);
   Node* mbit = _gvn.transform(new (C) AndINode(mods, mask));
@@ -3501,7 +3502,7 @@
 
   case vmIntrinsics::_getModifiers:
     p = basic_plus_adr(kls, in_bytes(Klass::modifier_flags_offset()));
-    query_value = make_load(NULL, p, TypeInt::INT, T_INT);
+    query_value = make_load(NULL, p, TypeInt::INT, T_INT, MemNode::unordered);
     break;
 
   case vmIntrinsics::_isInterface:
@@ -3559,7 +3560,7 @@
       // Be sure to pin the oop load to the guard edge just created:
       Node* is_array_ctrl = region->in(region->req()-1);
       Node* cma = basic_plus_adr(kls, in_bytes(ArrayKlass::component_mirror_offset()));
-      Node* cmo = make_load(is_array_ctrl, cma, TypeInstPtr::MIRROR, T_OBJECT);
+      Node* cmo = make_load(is_array_ctrl, cma, TypeInstPtr::MIRROR, T_OBJECT, MemNode::unordered);
       phi->add_req(cmo);
     }
     query_value = null();  // non-array case is null
@@ -3567,7 +3568,7 @@
 
   case vmIntrinsics::_getClassAccessFlags:
     p = basic_plus_adr(kls, in_bytes(Klass::access_flags_offset()));
-    query_value = make_load(NULL, p, TypeInt::INT, T_INT);
+    query_value = make_load(NULL, p, TypeInt::INT, T_INT, MemNode::unordered);
     break;
 
   default:
@@ -3933,7 +3934,7 @@
                      vtable_index*vtableEntry::size()) * wordSize +
                      vtableEntry::method_offset_in_bytes();
   Node* entry_addr  = basic_plus_adr(obj_klass, entry_offset);
-  Node* target_call = make_load(NULL, entry_addr, TypePtr::NOTNULL, T_ADDRESS);
+  Node* target_call = make_load(NULL, entry_addr, TypePtr::NOTNULL, T_ADDRESS, MemNode::unordered);
 
   // Compare the target method with the expected method (e.g., Object.hashCode).
   const TypePtr* native_call_addr = TypeMetadataPtr::make(method);
@@ -4059,7 +4060,7 @@
 
   // Get the header out of the object, use LoadMarkNode when available
   Node* header_addr = basic_plus_adr(obj, oopDesc::mark_offset_in_bytes());
-  Node* header = make_load(control(), header_addr, TypeX_X, TypeX_X->basic_type());
+  Node* header = make_load(control(), header_addr, TypeX_X, TypeX_X->basic_type(), MemNode::unordered);
 
   // Test the header to see if it is unlocked.
   Node *lock_mask      = _gvn.MakeConX(markOopDesc::biased_lock_mask_in_place);
@@ -5480,7 +5481,7 @@
         // Store a zero to the immediately preceding jint:
         Node* x1 = _gvn.transform(new(C) AddXNode(start, MakeConX(-bump_bit)));
         Node* p1 = basic_plus_adr(dest, x1);
-        mem = StoreNode::make(_gvn, control(), mem, p1, adr_type, intcon(0), T_INT);
+        mem = StoreNode::make(_gvn, control(), mem, p1, adr_type, intcon(0), T_INT, MemNode::unordered);
         mem = _gvn.transform(mem);
       }
     }
@@ -5530,8 +5531,8 @@
         ((src_off ^ dest_off) & (BytesPerLong-1)) == 0) {
       Node* sptr = basic_plus_adr(src,  src_off);
       Node* dptr = basic_plus_adr(dest, dest_off);
-      Node* sval = make_load(control(), sptr, TypeInt::INT, T_INT, adr_type);
-      store_to_memory(control(), dptr, sval, T_INT, adr_type);
+      Node* sval = make_load(control(), sptr, TypeInt::INT, T_INT, adr_type, MemNode::unordered);
+      store_to_memory(control(), dptr, sval, T_INT, adr_type, MemNode::unordered);
       src_off += BytesPerInt;
       dest_off += BytesPerInt;
     } else {
@@ -5596,7 +5597,7 @@
   // super_check_offset, for the desired klass.
   int sco_offset = in_bytes(Klass::super_check_offset_offset());
   Node* p3 = basic_plus_adr(dest_elem_klass, sco_offset);
-  Node* n3 = new(C) LoadINode(NULL, memory(p3), p3, _gvn.type(p3)->is_ptr());
+  Node* n3 = new(C) LoadINode(NULL, memory(p3), p3, _gvn.type(p3)->is_ptr(), TypeInt::INT, MemNode::unordered);
   Node* check_offset = ConvI2X(_gvn.transform(n3));
   Node* check_value  = dest_elem_klass;
 
@@ -5737,7 +5738,7 @@
   Node* base = makecon(TypeRawPtr::make(StubRoutines::crc_table_addr()));
   Node* offset = _gvn.transform(new (C) LShiftINode(result, intcon(0x2)));
   Node* adr = basic_plus_adr(top(), base, ConvI2X(offset));
-  result = make_load(control(), adr, TypeInt::INT, T_INT);
+  result = make_load(control(), adr, TypeInt::INT, T_INT, MemNode::unordered);
 
   crc = _gvn.transform(new (C) URShiftINode(crc, intcon(8)));
   result = _gvn.transform(new (C) XorINode(crc, result));
@@ -5838,7 +5839,7 @@
   const TypeOopPtr* object_type = TypeOopPtr::make_from_klass(klass);
 
   Node* no_ctrl = NULL;
-  Node* result = make_load(no_ctrl, adr, object_type, T_OBJECT);
+  Node* result = make_load(no_ctrl, adr, object_type, T_OBJECT, MemNode::unordered);
 
   // Use the pre-barrier to record the value in the referent field
   pre_barrier(false /* do_load */,
@@ -5885,7 +5886,7 @@
   const Type *type = TypeOopPtr::make_from_klass(field_klass->as_klass());
 
   // Build the load.
-  Node* loadedField = make_load(NULL, adr, type, bt, adr_type, is_vol);
+  Node* loadedField = make_load(NULL, adr, type, bt, adr_type, MemNode::unordered, is_vol);
   return loadedField;
 }
 
--- a/src/share/vm/opto/macro.cpp	Thu Nov 07 11:47:11 2013 +0100
+++ b/src/share/vm/opto/macro.cpp	Fri Nov 15 11:05:32 2013 -0800
@@ -1084,7 +1084,7 @@
 Node* PhaseMacroExpand::make_load(Node* ctl, Node* mem, Node* base, int offset, const Type* value_type, BasicType bt) {
   Node* adr = basic_plus_adr(base, offset);
   const TypePtr* adr_type = adr->bottom_type()->is_ptr();
-  Node* value = LoadNode::make(_igvn, ctl, mem, adr, adr_type, value_type, bt);
+  Node* value = LoadNode::make(_igvn, ctl, mem, adr, adr_type, value_type, bt, MemNode::unordered);
   transform_later(value);
   return value;
 }
@@ -1092,7 +1092,7 @@
 
 Node* PhaseMacroExpand::make_store(Node* ctl, Node* mem, Node* base, int offset, Node* value, BasicType bt) {
   Node* adr = basic_plus_adr(base, offset);
-  mem = StoreNode::make(_igvn, ctl, mem, adr, NULL, value, bt);
+  mem = StoreNode::make(_igvn, ctl, mem, adr, NULL, value, bt, MemNode::unordered);
   transform_later(mem);
   return mem;
 }
@@ -1272,8 +1272,8 @@
     // Load(-locked) the heap top.
     // See note above concerning the control input when using a TLAB
     Node *old_eden_top = UseTLAB
-      ? new (C) LoadPNode      (ctrl, contended_phi_rawmem, eden_top_adr, TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM)
-      : new (C) LoadPLockedNode(contended_region, contended_phi_rawmem, eden_top_adr);
+      ? new (C) LoadPNode      (ctrl, contended_phi_rawmem, eden_top_adr, TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM, MemNode::unordered)
+      : new (C) LoadPLockedNode(contended_region, contended_phi_rawmem, eden_top_adr, MemNode::acquire);
 
     transform_later(old_eden_top);
     // Add to heap top to get a new heap top
@@ -1320,7 +1320,7 @@
     if (UseTLAB) {
       Node* store_eden_top =
         new (C) StorePNode(needgc_false, contended_phi_rawmem, eden_top_adr,
-                              TypeRawPtr::BOTTOM, new_eden_top);
+                              TypeRawPtr::BOTTOM, new_eden_top, MemNode::unordered);
       transform_later(store_eden_top);
       fast_oop_ctrl = needgc_false; // No contention, so this is the fast path
       fast_oop_rawmem = store_eden_top;
@@ -1700,9 +1700,10 @@
                    _igvn.MakeConX(in_bytes(JavaThread::tlab_pf_top_offset())) );
       transform_later(eden_pf_adr);
 
-      Node *old_pf_wm = new (C) LoadPNode( needgc_false,
+      Node *old_pf_wm = new (C) LoadPNode(needgc_false,
                                    contended_phi_rawmem, eden_pf_adr,
-                                   TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM );
+                                   TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM,
+                                   MemNode::unordered);
       transform_later(old_pf_wm);
 
       // check against new_eden_top
@@ -1726,9 +1727,10 @@
       transform_later(new_pf_wmt );
       new_pf_wmt->set_req(0, need_pf_true);
 
-      Node *store_new_wmt = new (C) StorePNode( need_pf_true,
+      Node *store_new_wmt = new (C) StorePNode(need_pf_true,
                                        contended_phi_rawmem, eden_pf_adr,
-                                       TypeRawPtr::BOTTOM, new_pf_wmt );
+                                       TypeRawPtr::BOTTOM, new_pf_wmt,
+                                       MemNode::unordered);
       transform_later(store_new_wmt);
 
       // adding prefetches
--- a/src/share/vm/opto/matcher.cpp	Thu Nov 07 11:47:11 2013 +0100
+++ b/src/share/vm/opto/matcher.cpp	Fri Nov 15 11:05:32 2013 -0800
@@ -825,16 +825,15 @@
 
   // Compute generic short-offset Loads
 #ifdef _LP64
-  MachNode *spillCP = match_tree(new (C) LoadNNode(NULL,mem,fp,atp,TypeInstPtr::BOTTOM));
+  MachNode *spillCP = match_tree(new (C) LoadNNode(NULL,mem,fp,atp,TypeInstPtr::BOTTOM,MemNode::unordered));
 #endif
-  MachNode *spillI  = match_tree(new (C) LoadINode(NULL,mem,fp,atp));
-  MachNode *spillL  = match_tree(new (C) LoadLNode(NULL,mem,fp,atp));
-  MachNode *spillF  = match_tree(new (C) LoadFNode(NULL,mem,fp,atp));
-  MachNode *spillD  = match_tree(new (C) LoadDNode(NULL,mem,fp,atp));
-  MachNode *spillP  = match_tree(new (C) LoadPNode(NULL,mem,fp,atp,TypeInstPtr::BOTTOM));
+  MachNode *spillI  = match_tree(new (C) LoadINode(NULL,mem,fp,atp,TypeInt::INT,MemNode::unordered));
+  MachNode *spillL  = match_tree(new (C) LoadLNode(NULL,mem,fp,atp,TypeLong::LONG,MemNode::unordered,false));
+  MachNode *spillF  = match_tree(new (C) LoadFNode(NULL,mem,fp,atp,Type::FLOAT,MemNode::unordered));
+  MachNode *spillD  = match_tree(new (C) LoadDNode(NULL,mem,fp,atp,Type::DOUBLE,MemNode::unordered));
+  MachNode *spillP  = match_tree(new (C) LoadPNode(NULL,mem,fp,atp,TypeInstPtr::BOTTOM,MemNode::unordered));
   assert(spillI != NULL && spillL != NULL && spillF != NULL &&
          spillD != NULL && spillP != NULL, "");
-
   // Get the ADLC notion of the right regmask, for each basic type.
 #ifdef _LP64
   idealreg2regmask[Op_RegN] = &spillCP->out_RegMask();
--- a/src/share/vm/opto/memnode.cpp	Thu Nov 07 11:47:11 2013 +0100
+++ b/src/share/vm/opto/memnode.cpp	Fri Nov 15 11:05:32 2013 -0800
@@ -907,7 +907,7 @@
 
 //----------------------------LoadNode::make-----------------------------------
 // Polymorphic factory method:
-Node *LoadNode::make( PhaseGVN& gvn, Node *ctl, Node *mem, Node *adr, const TypePtr* adr_type, const Type *rt, BasicType bt ) {
+Node *LoadNode::make(PhaseGVN& gvn, Node *ctl, Node *mem, Node *adr, const TypePtr* adr_type, const Type *rt, BasicType bt, MemOrd mo) {
   Compile* C = gvn.C;
 
   // sanity check the alias category against the created node type
@@ -923,34 +923,34 @@
           rt->isa_oopptr() || is_immutable_value(adr),
           "raw memory operations should have control edge");
   switch (bt) {
-  case T_BOOLEAN: return new (C) LoadUBNode(ctl, mem, adr, adr_type, rt->is_int()    );
-  case T_BYTE:    return new (C) LoadBNode (ctl, mem, adr, adr_type, rt->is_int()    );
-  case T_INT:     return new (C) LoadINode (ctl, mem, adr, adr_type, rt->is_int()    );
-  case T_CHAR:    return new (C) LoadUSNode(ctl, mem, adr, adr_type, rt->is_int()    );
-  case T_SHORT:   return new (C) LoadSNode (ctl, mem, adr, adr_type, rt->is_int()    );
-  case T_LONG:    return new (C) LoadLNode (ctl, mem, adr, adr_type, rt->is_long()   );
-  case T_FLOAT:   return new (C) LoadFNode (ctl, mem, adr, adr_type, rt              );
-  case T_DOUBLE:  return new (C) LoadDNode (ctl, mem, adr, adr_type, rt              );
-  case T_ADDRESS: return new (C) LoadPNode (ctl, mem, adr, adr_type, rt->is_ptr()    );
+  case T_BOOLEAN: return new (C) LoadUBNode(ctl, mem, adr, adr_type, rt->is_int(),  mo);
+  case T_BYTE:    return new (C) LoadBNode (ctl, mem, adr, adr_type, rt->is_int(),  mo);
+  case T_INT:     return new (C) LoadINode (ctl, mem, adr, adr_type, rt->is_int(),  mo);
+  case T_CHAR:    return new (C) LoadUSNode(ctl, mem, adr, adr_type, rt->is_int(),  mo);
+  case T_SHORT:   return new (C) LoadSNode (ctl, mem, adr, adr_type, rt->is_int(),  mo);
+  case T_LONG:    return new (C) LoadLNode (ctl, mem, adr, adr_type, rt->is_long(), mo);
+  case T_FLOAT:   return new (C) LoadFNode (ctl, mem, adr, adr_type, rt,            mo);
+  case T_DOUBLE:  return new (C) LoadDNode (ctl, mem, adr, adr_type, rt,            mo);
+  case T_ADDRESS: return new (C) LoadPNode (ctl, mem, adr, adr_type, rt->is_ptr(),  mo);
   case T_OBJECT:
 #ifdef _LP64
     if (adr->bottom_type()->is_ptr_to_narrowoop()) {
-      Node* load  = gvn.transform(new (C) LoadNNode(ctl, mem, adr, adr_type, rt->make_narrowoop()));
+      Node* load  = gvn.transform(new (C) LoadNNode(ctl, mem, adr, adr_type, rt->make_narrowoop(), mo));
       return new (C) DecodeNNode(load, load->bottom_type()->make_ptr());
     } else
 #endif
     {
       assert(!adr->bottom_type()->is_ptr_to_narrowoop() && !adr->bottom_type()->is_ptr_to_narrowklass(), "should have got back a narrow oop");
-      return new (C) LoadPNode(ctl, mem, adr, adr_type, rt->is_oopptr());
+      return new (C) LoadPNode(ctl, mem, adr, adr_type, rt->is_oopptr(), mo);
     }
   }
   ShouldNotReachHere();
   return (LoadNode*)NULL;
 }
 
-LoadLNode* LoadLNode::make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt) {
+LoadLNode* LoadLNode::make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt, MemOrd mo) {
   bool require_atomic = true;
-  return new (C) LoadLNode(ctl, mem, adr, adr_type, rt->is_long(), require_atomic);
+  return new (C) LoadLNode(ctl, mem, adr, adr_type, rt->is_long(), mo, require_atomic);
 }
 
 
@@ -2032,12 +2032,12 @@
 #ifdef _LP64
   if (adr_type->is_ptr_to_narrowklass()) {
     assert(UseCompressedClassPointers, "no compressed klasses");
-    Node* load_klass = gvn.transform(new (C) LoadNKlassNode(ctl, mem, adr, at, tk->make_narrowklass()));
+    Node* load_klass = gvn.transform(new (C) LoadNKlassNode(ctl, mem, adr, at, tk->make_narrowklass(), MemNode::unordered));
     return new (C) DecodeNKlassNode(load_klass, load_klass->bottom_type()->make_ptr());
   }
 #endif
   assert(!adr_type->is_ptr_to_narrowklass() && !adr_type->is_ptr_to_narrowoop(), "should have got back a narrow oop");
-  return new (C) LoadKlassNode(ctl, mem, adr, at, tk);
+  return new (C) LoadKlassNode(ctl, mem, adr, at, tk, MemNode::unordered);
 }
 
 //------------------------------Value------------------------------------------
@@ -2347,45 +2347,46 @@
 //=============================================================================
 //---------------------------StoreNode::make-----------------------------------
 // Polymorphic factory method:
-StoreNode* StoreNode::make( PhaseGVN& gvn, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val, BasicType bt ) {
+StoreNode* StoreNode::make(PhaseGVN& gvn, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val, BasicType bt, MemOrd mo) {
+  assert((mo == unordered || mo == release), "unexpected");
   Compile* C = gvn.C;
-  assert( C->get_alias_index(adr_type) != Compile::AliasIdxRaw ||
-          ctl != NULL, "raw memory operations should have control edge");
+  assert(C->get_alias_index(adr_type) != Compile::AliasIdxRaw ||
+         ctl != NULL, "raw memory operations should have control edge");
 
   switch (bt) {
   case T_BOOLEAN:
-  case T_BYTE:    return new (C) StoreBNode(ctl, mem, adr, adr_type, val);
-  case T_INT:     return new (C) StoreINode(ctl, mem, adr, adr_type, val);
+  case T_BYTE:    return new (C) StoreBNode(ctl, mem, adr, adr_type, val, mo);
+  case T_INT:     return new (C) StoreINode(ctl, mem, adr, adr_type, val, mo);
   case T_CHAR:
-  case T_SHORT:   return new (C) StoreCNode(ctl, mem, adr, adr_type, val);
-  case T_LONG:    return new (C) StoreLNode(ctl, mem, adr, adr_type, val);
-  case T_FLOAT:   return new (C) StoreFNode(ctl, mem, adr, adr_type, val);
-  case T_DOUBLE:  return new (C) StoreDNode(ctl, mem, adr, adr_type, val);
+  case T_SHORT:   return new (C) StoreCNode(ctl, mem, adr, adr_type, val, mo);
+  case T_LONG:    return new (C) StoreLNode(ctl, mem, adr, adr_type, val, mo);
+  case T_FLOAT:   return new (C) StoreFNode(ctl, mem, adr, adr_type, val, mo);
+  case T_DOUBLE:  return new (C) StoreDNode(ctl, mem, adr, adr_type, val, mo);
   case T_METADATA:
   case T_ADDRESS:
   case T_OBJECT:
 #ifdef _LP64
     if (adr->bottom_type()->is_ptr_to_narrowoop()) {
       val = gvn.transform(new (C) EncodePNode(val, val->bottom_type()->make_narrowoop()));
-      return new (C) StoreNNode(ctl, mem, adr, adr_type, val);
+      return new (C) StoreNNode(ctl, mem, adr, adr_type, val, mo);
     } else if (adr->bottom_type()->is_ptr_to_narrowklass() ||
                (UseCompressedClassPointers && val->bottom_type()->isa_klassptr() &&
                 adr->bottom_type()->isa_rawptr())) {
       val = gvn.transform(new (C) EncodePKlassNode(val, val->bottom_type()->make_narrowklass()));
-      return new (C) StoreNKlassNode(ctl, mem, adr, adr_type, val);
+      return new (C) StoreNKlassNode(ctl, mem, adr, adr_type, val, mo);
     }
 #endif
     {
-      return new (C) StorePNode(ctl, mem, adr, adr_type, val);
+      return new (C) StorePNode(ctl, mem, adr, adr_type, val, mo);
     }
   }
   ShouldNotReachHere();
   return (StoreNode*)NULL;
 }
 
-StoreLNode* StoreLNode::make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val) {
+StoreLNode* StoreLNode::make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val, MemOrd mo) {
   bool require_atomic = true;
-  return new (C) StoreLNode(ctl, mem, adr, adr_type, val, require_atomic);
+  return new (C) StoreLNode(ctl, mem, adr, adr_type, val, mo, require_atomic);
 }
 
 
@@ -2778,12 +2779,12 @@
 
   Node *zero = phase->makecon(TypeLong::ZERO);
   Node *off  = phase->MakeConX(BytesPerLong);
-  mem = new (phase->C) StoreLNode(in(0),mem,adr,atp,zero);
+  mem = new (phase->C) StoreLNode(in(0),mem,adr,atp,zero,MemNode::unordered,false);
   count--;
   while( count-- ) {
     mem = phase->transform(mem);
     adr = phase->transform(new (phase->C) AddPNode(base,adr,off));
-    mem = new (phase->C) StoreLNode(in(0),mem,adr,atp,zero);
+    mem = new (phase->C) StoreLNode(in(0),mem,adr,atp,zero,MemNode::unordered,false);
   }
   return mem;
 }
@@ -2827,7 +2828,7 @@
     Node* adr = new (C) AddPNode(dest, dest, phase->MakeConX(offset));
     adr = phase->transform(adr);
     const TypePtr* atp = TypeRawPtr::BOTTOM;
-    mem = StoreNode::make(*phase, ctl, mem, adr, atp, phase->zerocon(T_INT), T_INT);
+    mem = StoreNode::make(*phase, ctl, mem, adr, atp, phase->zerocon(T_INT), T_INT, MemNode::unordered);
     mem = phase->transform(mem);
     offset += BytesPerInt;
   }
@@ -2888,7 +2889,7 @@
     Node* adr = new (C) AddPNode(dest, dest, phase->MakeConX(done_offset));
     adr = phase->transform(adr);
     const TypePtr* atp = TypeRawPtr::BOTTOM;
-    mem = StoreNode::make(*phase, ctl, mem, adr, atp, phase->zerocon(T_INT), T_INT);
+    mem = StoreNode::make(*phase, ctl, mem, adr, atp, phase->zerocon(T_INT), T_INT, MemNode::unordered);
     mem = phase->transform(mem);
     done_offset += BytesPerInt;
   }
@@ -3762,14 +3763,14 @@
       ++new_long;
       off[nst] = offset;
       st[nst++] = StoreNode::make(*phase, ctl, zmem, adr, atp,
-                                  phase->longcon(con), T_LONG);
+                                  phase->longcon(con), T_LONG, MemNode::unordered);
     } else {
       // Omit either if it is a zero.
       if (con0 != 0) {
         ++new_int;
         off[nst]  = offset;
         st[nst++] = StoreNode::make(*phase, ctl, zmem, adr, atp,
-                                    phase->intcon(con0), T_INT);
+                                    phase->intcon(con0), T_INT, MemNode::unordered);
       }
       if (con1 != 0) {
         ++new_int;
@@ -3777,7 +3778,7 @@
         adr = make_raw_address(offset, phase);
         off[nst]  = offset;
         st[nst++] = StoreNode::make(*phase, ctl, zmem, adr, atp,
-                                    phase->intcon(con1), T_INT);
+                                    phase->intcon(con1), T_INT, MemNode::unordered);
       }
     }
 
--- a/src/share/vm/opto/memnode.hpp	Thu Nov 07 11:47:11 2013 +0100
+++ b/src/share/vm/opto/memnode.hpp	Fri Nov 15 11:05:32 2013 -0800
@@ -51,6 +51,10 @@
          ValueIn,               // Value to store
          OopStore               // Preceeding oop store, only in StoreCM
   };
+  typedef enum { unordered = 0,
+                 acquire,       // Load has to acquire or be succeeded by MemBarAcquire.
+                 release        // Store has to release or be preceded by MemBarRelease.
+  } MemOrd;
 protected:
   MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at )
     : Node(c0,c1,c2   ) {
@@ -134,20 +138,32 @@
 //------------------------------LoadNode---------------------------------------
 // Load value; requires Memory and Address
 class LoadNode : public MemNode {
+private:
+  // On platforms with weak memory ordering (e.g., PPC, Ia64) we distinguish
+  // loads that can be reordered, and such requiring acquire semantics to
+  // adhere to the Java specification.  The required behaviour is stored in
+  // this field.
+  const MemOrd _mo;
+
 protected:
-  virtual uint cmp( const Node &n ) const;
+  virtual uint cmp(const Node &n) const;
   virtual uint size_of() const; // Size is bigger
   const Type* const _type;      // What kind of value is loaded?
 public:
 
-  LoadNode( Node *c, Node *mem, Node *adr, const TypePtr* at, const Type *rt )
-    : MemNode(c,mem,adr,at), _type(rt) {
+  LoadNode(Node *c, Node *mem, Node *adr, const TypePtr* at, const Type *rt, MemOrd mo)
+    : MemNode(c,mem,adr,at), _type(rt), _mo(mo) {
     init_class_id(Class_Load);
   }
+  inline bool is_unordered() const { return !is_acquire(); }
+  inline bool is_acquire() const {
+    assert(_mo == unordered || _mo == acquire, "unexpected");
+    return _mo == acquire;
+  }
 
   // Polymorphic factory method:
-  static Node* make( PhaseGVN& gvn, Node *c, Node *mem, Node *adr,
-                     const TypePtr* at, const Type *rt, BasicType bt );
+   static Node* make(PhaseGVN& gvn, Node *c, Node *mem, Node *adr,
+                     const TypePtr* at, const Type *rt, BasicType bt, MemOrd mo);
 
   virtual uint hash()   const;  // Check the type
 
@@ -210,8 +226,8 @@
 // Load a byte (8bits signed) from memory
 class LoadBNode : public LoadNode {
 public:
-  LoadBNode( Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti = TypeInt::BYTE )
-    : LoadNode(c,mem,adr,at,ti) {}
+  LoadBNode(Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti, MemOrd mo)
+    : LoadNode(c, mem, adr, at, ti, mo) {}
   virtual int Opcode() const;
   virtual uint ideal_reg() const { return Op_RegI; }
   virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
@@ -224,8 +240,8 @@
 // Load a unsigned byte (8bits unsigned) from memory
 class LoadUBNode : public LoadNode {
 public:
-  LoadUBNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt* ti = TypeInt::UBYTE )
-    : LoadNode(c, mem, adr, at, ti) {}
+  LoadUBNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt* ti, MemOrd mo)
+    : LoadNode(c, mem, adr, at, ti, mo) {}
   virtual int Opcode() const;
   virtual uint ideal_reg() const { return Op_RegI; }
   virtual Node* Ideal(PhaseGVN *phase, bool can_reshape);
@@ -238,8 +254,8 @@
 // Load an unsigned short/char (16bits unsigned) from memory
 class LoadUSNode : public LoadNode {
 public:
-  LoadUSNode( Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti = TypeInt::CHAR )
-    : LoadNode(c,mem,adr,at,ti) {}
+  LoadUSNode(Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti, MemOrd mo)
+    : LoadNode(c, mem, adr, at, ti, mo) {}
   virtual int Opcode() const;
   virtual uint ideal_reg() const { return Op_RegI; }
   virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
@@ -252,8 +268,8 @@
 // Load a short (16bits signed) from memory
 class LoadSNode : public LoadNode {
 public:
-  LoadSNode( Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti = TypeInt::SHORT )
-    : LoadNode(c,mem,adr,at,ti) {}
+  LoadSNode(Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti, MemOrd mo)
+    : LoadNode(c, mem, adr, at, ti, mo) {}
   virtual int Opcode() const;
   virtual uint ideal_reg() const { return Op_RegI; }
   virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
@@ -266,8 +282,8 @@
 // Load an integer from memory
 class LoadINode : public LoadNode {
 public:
-  LoadINode( Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti = TypeInt::INT )
-    : LoadNode(c,mem,adr,at,ti) {}
+  LoadINode(Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti, MemOrd mo)
+    : LoadNode(c, mem, adr, at, ti, mo) {}
   virtual int Opcode() const;
   virtual uint ideal_reg() const { return Op_RegI; }
   virtual int store_Opcode() const { return Op_StoreI; }
@@ -278,8 +294,8 @@
 // Load an array length from the array
 class LoadRangeNode : public LoadINode {
 public:
-  LoadRangeNode( Node *c, Node *mem, Node *adr, const TypeInt *ti = TypeInt::POS )
-    : LoadINode(c,mem,adr,TypeAryPtr::RANGE,ti) {}
+  LoadRangeNode(Node *c, Node *mem, Node *adr, const TypeInt *ti = TypeInt::POS)
+    : LoadINode(c, mem, adr, TypeAryPtr::RANGE, ti, MemNode::unordered) {}
   virtual int Opcode() const;
   virtual const Type *Value( PhaseTransform *phase ) const;
   virtual Node *Identity( PhaseTransform *phase );
@@ -298,18 +314,16 @@
   const bool _require_atomic_access;  // is piecewise load forbidden?
 
 public:
-  LoadLNode( Node *c, Node *mem, Node *adr, const TypePtr* at,
-             const TypeLong *tl = TypeLong::LONG,
-             bool require_atomic_access = false )
-    : LoadNode(c,mem,adr,at,tl)
-    , _require_atomic_access(require_atomic_access)
-  {}
+  LoadLNode(Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeLong *tl,
+            MemOrd mo, bool require_atomic_access = false)
+    : LoadNode(c, mem, adr, at, tl, mo), _require_atomic_access(require_atomic_access) {}
   virtual int Opcode() const;
   virtual uint ideal_reg() const { return Op_RegL; }
   virtual int store_Opcode() const { return Op_StoreL; }
   virtual BasicType memory_type() const { return T_LONG; }
   bool require_atomic_access() { return _require_atomic_access; }
-  static LoadLNode* make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt);
+  static LoadLNode* make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type,
+                                const Type* rt, MemOrd mo);
 #ifndef PRODUCT
   virtual void dump_spec(outputStream *st) const {
     LoadNode::dump_spec(st);
@@ -322,8 +336,8 @@
 // Load a long from unaligned memory
 class LoadL_unalignedNode : public LoadLNode {
 public:
-  LoadL_unalignedNode( Node *c, Node *mem, Node *adr, const TypePtr* at )
-    : LoadLNode(c,mem,adr,at) {}
+  LoadL_unalignedNode(Node *c, Node *mem, Node *adr, const TypePtr* at, MemOrd mo)
+    : LoadLNode(c, mem, adr, at, TypeLong::LONG, mo) {}
   virtual int Opcode() const;
 };
 
@@ -331,8 +345,8 @@
 // Load a float (64 bits) from memory
 class LoadFNode : public LoadNode {
 public:
-  LoadFNode( Node *c, Node *mem, Node *adr, const TypePtr* at, const Type *t = Type::FLOAT )
-    : LoadNode(c,mem,adr,at,t) {}
+  LoadFNode(Node *c, Node *mem, Node *adr, const TypePtr* at, const Type *t, MemOrd mo)
+    : LoadNode(c, mem, adr, at, t, mo) {}
   virtual int Opcode() const;
   virtual uint ideal_reg() const { return Op_RegF; }
   virtual int store_Opcode() const { return Op_StoreF; }
@@ -343,8 +357,8 @@
 // Load a double (64 bits) from memory
 class LoadDNode : public LoadNode {
 public:
-  LoadDNode( Node *c, Node *mem, Node *adr, const TypePtr* at, const Type *t = Type::DOUBLE )
-    : LoadNode(c,mem,adr,at,t) {}
+  LoadDNode(Node *c, Node *mem, Node *adr, const TypePtr* at, const Type *t, MemOrd mo)
+    : LoadNode(c, mem, adr, at, t, mo) {}
   virtual int Opcode() const;
   virtual uint ideal_reg() const { return Op_RegD; }
   virtual int store_Opcode() const { return Op_StoreD; }
@@ -355,8 +369,8 @@
 // Load a double from unaligned memory
 class LoadD_unalignedNode : public LoadDNode {
 public:
-  LoadD_unalignedNode( Node *c, Node *mem, Node *adr, const TypePtr* at )
-    : LoadDNode(c,mem,adr,at) {}
+  LoadD_unalignedNode(Node *c, Node *mem, Node *adr, const TypePtr* at, MemOrd mo)
+    : LoadDNode(c, mem, adr, at, Type::DOUBLE, mo) {}
   virtual int Opcode() const;
 };
 
@@ -364,8 +378,8 @@
 // Load a pointer from memory (either object or array)
 class LoadPNode : public LoadNode {
 public:
-  LoadPNode( Node *c, Node *mem, Node *adr, const TypePtr *at, const TypePtr* t )
-    : LoadNode(c,mem,adr,at,t) {}
+  LoadPNode(Node *c, Node *mem, Node *adr, const TypePtr *at, const TypePtr* t, MemOrd mo)
+    : LoadNode(c, mem, adr, at, t, mo) {}
   virtual int Opcode() const;
   virtual uint ideal_reg() const { return Op_RegP; }
   virtual int store_Opcode() const { return Op_StoreP; }
@@ -387,8 +401,8 @@
 // Load a narrow oop from memory (either object or array)
 class LoadNNode : public LoadNode {
 public:
-  LoadNNode( Node *c, Node *mem, Node *adr, const TypePtr *at, const Type* t )
-    : LoadNode(c,mem,adr,at,t) {}
+  LoadNNode(Node *c, Node *mem, Node *adr, const TypePtr *at, const Type* t, MemOrd mo)
+    : LoadNode(c, mem, adr, at, t, mo) {}
   virtual int Opcode() const;
   virtual uint ideal_reg() const { return Op_RegN; }
   virtual int store_Opcode() const { return Op_StoreN; }
@@ -409,8 +423,8 @@
 // Load a Klass from an object
 class LoadKlassNode : public LoadPNode {
 public:
-  LoadKlassNode( Node *c, Node *mem, Node *adr, const TypePtr *at, const TypeKlassPtr *tk )
-    : LoadPNode(c,mem,adr,at,tk) {}
+  LoadKlassNode(Node *c, Node *mem, Node *adr, const TypePtr *at, const TypeKlassPtr *tk, MemOrd mo)
+    : LoadPNode(c, mem, adr, at, tk, mo) {}
   virtual int Opcode() const;
   virtual const Type *Value( PhaseTransform *phase ) const;
   virtual Node *Identity( PhaseTransform *phase );
@@ -425,8 +439,8 @@
 // Load a narrow Klass from an object.
 class LoadNKlassNode : public LoadNNode {
 public:
-  LoadNKlassNode( Node *c, Node *mem, Node *adr, const TypePtr *at, const TypeNarrowKlass *tk )
-    : LoadNNode(c,mem,adr,at,tk) {}
+  LoadNKlassNode(Node *c, Node *mem, Node *adr, const TypePtr *at, const TypeNarrowKlass *tk, MemOrd mo)
+    : LoadNNode(c, mem, adr, at, tk, mo) {}
   virtual int Opcode() const;
   virtual uint ideal_reg() const { return Op_RegN; }
   virtual int store_Opcode() const { return Op_StoreNKlass; }
@@ -441,6 +455,14 @@
 //------------------------------StoreNode--------------------------------------
 // Store value; requires Store, Address and Value
 class StoreNode : public MemNode {
+private:
+  // On platforms with weak memory ordering (e.g., PPC, Ia64) we distinguish
+  // stores that can be reordered, and such requiring release semantics to
+  // adhere to the Java specification.  The required behaviour is stored in
+  // this field.
+  const MemOrd _mo;
+  // Needed for proper cloning.
+  virtual uint size_of() const { return sizeof(*this); }
 protected:
   virtual uint cmp( const Node &n ) const;
   virtual bool depends_only_on_test() const { return false; }
@@ -449,18 +471,44 @@
   Node *Ideal_sign_extended_input(PhaseGVN *phase, int  num_bits);
 
 public:
-  StoreNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val )
-    : MemNode(c,mem,adr,at,val) {
+  // We must ensure that stores of object references will be visible
+  // only after the object's initialization. So the callers of this
+  // procedure must indicate that the store requires `release'
+  // semantics, if the stored value is an object reference that might
+  // point to a new object and may become externally visible.
+  StoreNode(Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val, MemOrd mo)
+    : MemNode(c, mem, adr, at, val), _mo(mo) {
     init_class_id(Class_Store);
   }
-  StoreNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val, Node *oop_store )
-    : MemNode(c,mem,adr,at,val,oop_store) {
+  StoreNode(Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val, Node *oop_store, MemOrd mo)
+    : MemNode(c, mem, adr, at, val, oop_store), _mo(mo) {
     init_class_id(Class_Store);
   }
 
-  // Polymorphic factory method:
-  static StoreNode* make( PhaseGVN& gvn, Node *c, Node *mem, Node *adr,
-                          const TypePtr* at, Node *val, BasicType bt );
+  inline bool is_unordered() const { return !is_release(); }
+  inline bool is_release() const {
+    assert((_mo == unordered || _mo == release), "unexpected");
+    return _mo == release;
+  }
+
+  // Conservatively release stores of object references in order to
+  // ensure visibility of object initialization.
+  static inline MemOrd release_if_reference(const BasicType t) {
+    const MemOrd mo = (t == T_ARRAY ||
+                       t == T_ADDRESS || // Might be the address of an object reference (`boxing').
+                       t == T_OBJECT) ? release : unordered;
+    return mo;
+  }
+
+  // Polymorphic factory method
+  //
+  // We must ensure that stores of object references will be visible
+  // only after the object's initialization. So the callers of this
+  // procedure must indicate that the store requires `release'
+  // semantics, if the stored value is an object reference that might
+  // point to a new object and may become externally visible.
+  static StoreNode* make(PhaseGVN& gvn, Node *c, Node *mem, Node *adr,
+                         const TypePtr* at, Node *val, BasicType bt, MemOrd mo);
 
   virtual uint hash() const;    // Check the type
 
@@ -491,7 +539,8 @@
 // Store byte to memory
 class StoreBNode : public StoreNode {
 public:
-  StoreBNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val ) : StoreNode(c,mem,adr,at,val) {}
+  StoreBNode(Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val, MemOrd mo)
+    : StoreNode(c, mem, adr, at, val, mo) {}
   virtual int Opcode() const;
   virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
   virtual BasicType memory_type() const { return T_BYTE; }
@@ -501,7 +550,8 @@
 // Store char/short to memory
 class StoreCNode : public StoreNode {
 public:
-  StoreCNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val ) : StoreNode(c,mem,adr,at,val) {}
+  StoreCNode(Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val, MemOrd mo)
+    : StoreNode(c, mem, adr, at, val, mo) {}
   virtual int Opcode() const;
   virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
   virtual BasicType memory_type() const { return T_CHAR; }
@@ -511,7 +561,8 @@
 // Store int to memory
 class StoreINode : public StoreNode {
 public:
-  StoreINode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val ) : StoreNode(c,mem,adr,at,val) {}
+  StoreINode(Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val, MemOrd mo)
+    : StoreNode(c, mem, adr, at, val, mo) {}
   virtual int Opcode() const;
   virtual BasicType memory_type() const { return T_INT; }
 };
@@ -528,15 +579,12 @@
   const bool _require_atomic_access;  // is piecewise store forbidden?
 
 public:
-  StoreLNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val,
-              bool require_atomic_access = false )
-    : StoreNode(c,mem,adr,at,val)
-    , _require_atomic_access(require_atomic_access)
-  {}
+  StoreLNode(Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val, MemOrd mo, bool require_atomic_access = false)
+    : StoreNode(c, mem, adr, at, val, mo), _require_atomic_access(require_atomic_access) {}
   virtual int Opcode() const;
   virtual BasicType memory_type() const { return T_LONG; }
   bool require_atomic_access() { return _require_atomic_access; }
-  static StoreLNode* make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val);
+  static StoreLNode* make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val, MemOrd mo);
 #ifndef PRODUCT
   virtual void dump_spec(outputStream *st) const {
     StoreNode::dump_spec(st);
@@ -549,7 +597,8 @@
 // Store float to memory
 class StoreFNode : public StoreNode {
 public:
-  StoreFNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val ) : StoreNode(c,mem,adr,at,val) {}
+  StoreFNode(Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val, MemOrd mo)
+    : StoreNode(c, mem, adr, at, val, mo) {}
   virtual int Opcode() const;
   virtual BasicType memory_type() const { return T_FLOAT; }
 };
@@ -558,7 +607,8 @@
 // Store double to memory
 class StoreDNode : public StoreNode {
 public:
-  StoreDNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val ) : StoreNode(c,mem,adr,at,val) {}
+  StoreDNode(Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val, MemOrd mo)
+    : StoreNode(c, mem, adr, at, val, mo) {}
   virtual int Opcode() const;
   virtual BasicType memory_type() const { return T_DOUBLE; }
 };
@@ -567,7 +617,8 @@
 // Store pointer to memory
 class StorePNode : public StoreNode {
 public:
-  StorePNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val ) : StoreNode(c,mem,adr,at,val) {}
+  StorePNode(Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val, MemOrd mo)
+    : StoreNode(c, mem, adr, at, val, mo) {}
   virtual int Opcode() const;
   virtual BasicType memory_type() const { return T_ADDRESS; }
 };
@@ -576,7 +627,8 @@
 // Store narrow oop to memory
 class StoreNNode : public StoreNode {
 public:
-  StoreNNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val ) : StoreNode(c,mem,adr,at,val) {}
+  StoreNNode(Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val, MemOrd mo)
+    : StoreNode(c, mem, adr, at, val, mo) {}
   virtual int Opcode() const;
   virtual BasicType memory_type() const { return T_NARROWOOP; }
 };
@@ -585,7 +637,8 @@
 // Store narrow klass to memory
 class StoreNKlassNode : public StoreNNode {
 public:
-  StoreNKlassNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val ) : StoreNNode(c,mem,adr,at,val) {}
+  StoreNKlassNode(Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val, MemOrd mo)
+    : StoreNNode(c, mem, adr, at, val, mo) {}
   virtual int Opcode() const;
   virtual BasicType memory_type() const { return T_NARROWKLASS; }
 };
@@ -606,7 +659,7 @@
 
 public:
   StoreCMNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val, Node *oop_store, int oop_alias_idx ) :
-    StoreNode(c,mem,adr,at,val,oop_store),
+    StoreNode(c, mem, adr, at, val, oop_store, MemNode::release),
     _oop_alias_idx(oop_alias_idx) {
     assert(_oop_alias_idx >= Compile::AliasIdxRaw ||
            _oop_alias_idx == Compile::AliasIdxBot && Compile::current()->AliasLevel() == 0,
@@ -626,8 +679,8 @@
 // On PowerPC and friends it's a real load-locked.
 class LoadPLockedNode : public LoadPNode {
 public:
-  LoadPLockedNode( Node *c, Node *mem, Node *adr )
-    : LoadPNode(c,mem,adr,TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM) {}
+  LoadPLockedNode(Node *c, Node *mem, Node *adr, MemOrd mo)
+    : LoadPNode(c, mem, adr, TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM, mo) {}
   virtual int Opcode() const;
   virtual int store_Opcode() const { return Op_StorePConditional; }
   virtual bool depends_only_on_test() const { return true; }
--- a/src/share/vm/opto/mulnode.cpp	Thu Nov 07 11:47:11 2013 +0100
+++ b/src/share/vm/opto/mulnode.cpp	Fri Nov 15 11:05:32 2013 -0800
@@ -485,7 +485,8 @@
       Node *ldus = new (phase->C) LoadUSNode(load->in(MemNode::Control),
                                              load->in(MemNode::Memory),
                                              load->in(MemNode::Address),
-                                             load->adr_type());
+                                             load->adr_type(),
+                                             TypeInt::CHAR, MemNode::unordered);
       ldus = phase->transform(ldus);
       return new (phase->C) AndINode(ldus, phase->intcon(mask & 0xFFFF));
     }
@@ -496,7 +497,8 @@
       Node* ldub = new (phase->C) LoadUBNode(load->in(MemNode::Control),
                                              load->in(MemNode::Memory),
                                              load->in(MemNode::Address),
-                                             load->adr_type());
+                                             load->adr_type(),
+                                             TypeInt::UBYTE, MemNode::unordered);
       ldub = phase->transform(ldub);
       return new (phase->C) AndINode(ldub, phase->intcon(mask));
     }
@@ -931,9 +933,10 @@
              ld->outcnt() == 1 && ld->unique_out() == shl)
       // Replace zero-extension-load with sign-extension-load
       return new (phase->C) LoadSNode( ld->in(MemNode::Control),
-                                ld->in(MemNode::Memory),
-                                ld->in(MemNode::Address),
-                                ld->adr_type());
+                                       ld->in(MemNode::Memory),
+                                       ld->in(MemNode::Address),
+                                       ld->adr_type(), TypeInt::SHORT,
+                                       MemNode::unordered);
   }
 
   // Check for "(byte[i] <<24)>>24" which simply sign-extends
--- a/src/share/vm/opto/parse1.cpp	Thu Nov 07 11:47:11 2013 +0100
+++ b/src/share/vm/opto/parse1.cpp	Fri Nov 15 11:05:32 2013 -0800
@@ -106,24 +106,24 @@
   // Very similar to LoadNode::make, except we handle un-aligned longs and
   // doubles on Sparc.  Intel can handle them just fine directly.
   Node *l;
-  switch( bt ) {                // Signature is flattened
-  case T_INT:     l = new (C) LoadINode( ctl, mem, adr, TypeRawPtr::BOTTOM ); break;
-  case T_FLOAT:   l = new (C) LoadFNode( ctl, mem, adr, TypeRawPtr::BOTTOM ); break;
-  case T_ADDRESS: l = new (C) LoadPNode( ctl, mem, adr, TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM  ); break;
-  case T_OBJECT:  l = new (C) LoadPNode( ctl, mem, adr, TypeRawPtr::BOTTOM, TypeInstPtr::BOTTOM ); break;
+  switch (bt) {                // Signature is flattened
+  case T_INT:     l = new (C) LoadINode(ctl, mem, adr, TypeRawPtr::BOTTOM, TypeInt::INT,        MemNode::unordered); break;
+  case T_FLOAT:   l = new (C) LoadFNode(ctl, mem, adr, TypeRawPtr::BOTTOM, Type::FLOAT,         MemNode::unordered); break;
+  case T_ADDRESS: l = new (C) LoadPNode(ctl, mem, adr, TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM,  MemNode::unordered); break;
+  case T_OBJECT:  l = new (C) LoadPNode(ctl, mem, adr, TypeRawPtr::BOTTOM, TypeInstPtr::BOTTOM, MemNode::unordered); break;
   case T_LONG:
   case T_DOUBLE: {
     // Since arguments are in reverse order, the argument address 'adr'
     // refers to the back half of the long/double.  Recompute adr.
-    adr = basic_plus_adr( local_addrs_base, local_addrs, -(index+1)*wordSize );
-    if( Matcher::misaligned_doubles_ok ) {
+    adr = basic_plus_adr(local_addrs_base, local_addrs, -(index+1)*wordSize);
+    if (Matcher::misaligned_doubles_ok) {
       l = (bt == T_DOUBLE)
-        ? (Node*)new (C) LoadDNode( ctl, mem, adr, TypeRawPtr::BOTTOM )
-        : (Node*)new (C) LoadLNode( ctl, mem, adr, TypeRawPtr::BOTTOM );
+        ? (Node*)new (C) LoadDNode(ctl, mem, adr, TypeRawPtr::BOTTOM, Type::DOUBLE, MemNode::unordered)
+        : (Node*)new (C) LoadLNode(ctl, mem, adr, TypeRawPtr::BOTTOM, TypeLong::LONG, MemNode::unordered);
     } else {
       l = (bt == T_DOUBLE)
-        ? (Node*)new (C) LoadD_unalignedNode( ctl, mem, adr, TypeRawPtr::BOTTOM )
-        : (Node*)new (C) LoadL_unalignedNode( ctl, mem, adr, TypeRawPtr::BOTTOM );
+        ? (Node*)new (C) LoadD_unalignedNode(ctl, mem, adr, TypeRawPtr::BOTTOM, MemNode::unordered)
+        : (Node*)new (C) LoadL_unalignedNode(ctl, mem, adr, TypeRawPtr::BOTTOM, MemNode::unordered);
     }
     break;
   }
@@ -229,7 +229,7 @@
     Node *displaced_hdr = fetch_interpreter_state((index*2) + 1, T_ADDRESS, monitors_addr, osr_buf);
 
 
-    store_to_memory(control(), box, displaced_hdr, T_ADDRESS, Compile::AliasIdxRaw);
+    store_to_memory(control(), box, displaced_hdr, T_ADDRESS, Compile::AliasIdxRaw, MemNode::unordered);
 
     // Build a bogus FastLockNode (no code will be generated) and push the
     // monitor into our debug info.
@@ -1931,7 +1931,7 @@
   Node* klass = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), klass_addr, TypeInstPtr::KLASS) );
 
   Node* access_flags_addr = basic_plus_adr(klass, klass, in_bytes(Klass::access_flags_offset()));
-  Node* access_flags = make_load(NULL, access_flags_addr, TypeInt::INT, T_INT);
+  Node* access_flags = make_load(NULL, access_flags_addr, TypeInt::INT, T_INT, MemNode::unordered);
 
   Node* mask  = _gvn.transform(new (C) AndINode(access_flags, intcon(JVM_ACC_HAS_FINALIZER)));
   Node* check = _gvn.transform(new (C) CmpINode(mask, intcon(0)));
--- a/src/share/vm/opto/parse2.cpp	Thu Nov 07 11:47:11 2013 +0100
+++ b/src/share/vm/opto/parse2.cpp	Fri Nov 15 11:05:32 2013 -0800
@@ -50,7 +50,7 @@
   if (stopped())  return;     // guaranteed null or range check
   dec_sp(2);                  // Pop array and index
   const TypeAryPtr* adr_type = TypeAryPtr::get_array_body_type(elem_type);
-  Node* ld = make_load(control(), adr, elem, elem_type, adr_type);
+  Node* ld = make_load(control(), adr, elem, elem_type, adr_type, MemNode::unordered);
   push(ld);
 }
 
@@ -62,7 +62,7 @@
   Node* val = pop();
   dec_sp(2);                  // Pop array and index
   const TypeAryPtr* adr_type = TypeAryPtr::get_array_body_type(elem_type);
-  store_to_memory(control(), adr, val, elem_type, adr_type);
+  store_to_memory(control(), adr, val, elem_type, adr_type, StoreNode::release_if_reference(elem_type));
 }
 
 
@@ -1720,14 +1720,14 @@
     a = array_addressing(T_LONG, 0);
     if (stopped())  return;     // guaranteed null or range check
     dec_sp(2);                  // Pop array and index
-    push_pair(make_load(control(), a, TypeLong::LONG, T_LONG, TypeAryPtr::LONGS));
+    push_pair(make_load(control(), a, TypeLong::LONG, T_LONG, TypeAryPtr::LONGS, MemNode::unordered));
     break;
   }
   case Bytecodes::_daload: {
     a = array_addressing(T_DOUBLE, 0);
     if (stopped())  return;     // guaranteed null or range check
     dec_sp(2);                  // Pop array and index
-    push_pair(make_load(control(), a, Type::DOUBLE, T_DOUBLE, TypeAryPtr::DOUBLES));
+    push_pair(make_load(control(), a, Type::DOUBLE, T_DOUBLE, TypeAryPtr::DOUBLES, MemNode::unordered));
     break;
   }
   case Bytecodes::_bastore: array_store(T_BYTE);  break;
@@ -1744,7 +1744,7 @@
     a = pop();                  // the array itself
     const TypeOopPtr* elemtype  = _gvn.type(a)->is_aryptr()->elem()->make_oopptr();
     const TypeAryPtr* adr_type = TypeAryPtr::OOPS;
-    Node* store = store_oop_to_array(control(), a, d, adr_type, c, elemtype, T_OBJECT);
+    Node* store = store_oop_to_array(control(), a, d, adr_type, c, elemtype, T_OBJECT, MemNode::release);
     break;
   }
   case Bytecodes::_lastore: {
@@ -1752,7 +1752,7 @@
     if (stopped())  return;     // guaranteed null or range check
     c = pop_pair();
     dec_sp(2);                  // Pop array and index
-    store_to_memory(control(), a, c, T_LONG, TypeAryPtr::LONGS);
+    store_to_memory(control(), a, c, T_LONG, TypeAryPtr::LONGS, MemNode::unordered);
     break;
   }
   case Bytecodes::_dastore: {
@@ -1761,7 +1761,7 @@
     c = pop_pair();
     dec_sp(2);                  // Pop array and index
     c = dstore_rounding(c);
-    store_to_memory(control(), a, c, T_DOUBLE, TypeAryPtr::DOUBLES);
+    store_to_memory(control(), a, c, T_DOUBLE, TypeAryPtr::DOUBLES, MemNode::unordered);
     break;
   }
   case Bytecodes::_getfield:
--- a/src/share/vm/opto/parse3.cpp	Thu Nov 07 11:47:11 2013 +0100
+++ b/src/share/vm/opto/parse3.cpp	Fri Nov 15 11:05:32 2013 -0800
@@ -228,7 +228,9 @@
     type = Type::get_const_basic_type(bt);
   }
   // Build the load.
-  Node* ld = make_load(NULL, adr, type, bt, adr_type, is_vol);
+  //
+  MemNode::MemOrd mo = is_vol ? MemNode::acquire : MemNode::unordered;
+  Node* ld = make_load(NULL, adr, type, bt, adr_type, mo, is_vol);
 
   // Adjust Java stack
   if (type2size[bt] == 1)
@@ -288,6 +290,16 @@
   // Round doubles before storing
   if (bt == T_DOUBLE)  val = dstore_rounding(val);
 
+  // Conservatively release stores of object references.
+  const MemNode::MemOrd mo =
+    is_vol ?
+    // Volatile fields need releasing stores.
+    MemNode::release :
+    // Non-volatile fields also need releasing stores if they hold an
+    // object reference, because the object reference might point to
+    // a freshly created object.
+    StoreNode::release_if_reference(bt);
+
   // Store the value.
   Node* store;
   if (bt == T_OBJECT) {
@@ -297,9 +309,9 @@
     } else {
       field_type = TypeOopPtr::make_from_klass(field->type()->as_klass());
     }
-    store = store_oop_to_object( control(), obj, adr, adr_type, val, field_type, bt);
+    store = store_oop_to_object(control(), obj, adr, adr_type, val, field_type, bt, mo);
   } else {
-    store = store_to_memory( control(), adr, val, bt, adr_type, is_vol );
+    store = store_to_memory(control(), adr, val, bt, adr_type, mo, is_vol);
   }
 
   // If reference is volatile, prevent following volatiles ops from
@@ -414,7 +426,7 @@
       Node*    elem   = expand_multianewarray(array_klass_1, &lengths[1], ndimensions-1, nargs);
       intptr_t offset = header + ((intptr_t)i << LogBytesPerHeapOop);
       Node*    eaddr  = basic_plus_adr(array, offset);
-      store_oop_to_array(control(), array, eaddr, adr_type, elem, elemtype, T_OBJECT);
+      store_oop_to_array(control(), array, eaddr, adr_type, elem, elemtype, T_OBJECT, MemNode::unordered);
     }
   }
   return array;
@@ -503,7 +515,7 @@
       // Fill-in it with values
       for (j = 0; j < ndimensions; j++) {
         Node *dims_elem = array_element_address(dims, intcon(j), T_INT);
-        store_to_memory(control(), dims_elem, length[j], T_INT, TypeAryPtr::INTS);
+        store_to_memory(control(), dims_elem, length[j], T_INT, TypeAryPtr::INTS, MemNode::unordered);
       }
     }
 
--- a/src/share/vm/opto/parseHelper.cpp	Thu Nov 07 11:47:11 2013 +0100
+++ b/src/share/vm/opto/parseHelper.cpp	Fri Nov 15 11:05:32 2013 -0800
@@ -222,7 +222,7 @@
 
   Node* init_thread_offset = _gvn.MakeConX(in_bytes(InstanceKlass::init_thread_offset()));
   Node* adr_node = basic_plus_adr(kls, kls, init_thread_offset);
-  Node* init_thread = make_load(NULL, adr_node, TypeRawPtr::BOTTOM, T_ADDRESS);
+  Node* init_thread = make_load(NULL, adr_node, TypeRawPtr::BOTTOM, T_ADDRESS, MemNode::unordered);
   Node *tst   = Bool( CmpP( init_thread, cur_thread), BoolTest::eq);
   IfNode* iff = create_and_map_if(control(), tst, PROB_ALWAYS, COUNT_UNKNOWN);
   set_control(IfTrue(iff));
@@ -232,7 +232,7 @@
   adr_node = basic_plus_adr(kls, kls, init_state_offset);
   // Use T_BOOLEAN for InstanceKlass::_init_state so the compiler
   // can generate code to load it as unsigned byte.
-  Node* init_state = make_load(NULL, adr_node, TypeInt::UBYTE, T_BOOLEAN);
+  Node* init_state = make_load(NULL, adr_node, TypeInt::UBYTE, T_BOOLEAN, MemNode::unordered);
   Node* being_init = _gvn.intcon(InstanceKlass::being_initialized);
   tst   = Bool( CmpI( init_state, being_init), BoolTest::eq);
   iff = create_and_map_if(control(), tst, PROB_ALWAYS, COUNT_UNKNOWN);
@@ -354,13 +354,13 @@
   Node *counters_node = makecon(adr_type);
   Node* adr_iic_node = basic_plus_adr(counters_node, counters_node,
     MethodCounters::interpreter_invocation_counter_offset_in_bytes());
-  Node* cnt = make_load(ctrl, adr_iic_node, TypeInt::INT, T_INT, adr_type);
+  Node* cnt = make_load(ctrl, adr_iic_node, TypeInt::INT, T_INT, adr_type, MemNode::unordered);
 
   test_counter_against_threshold(cnt, limit);
 
   // Add one to the counter and store
   Node* incr = _gvn.transform(new (C) AddINode(cnt, _gvn.intcon(1)));
-  store_to_memory( ctrl, adr_iic_node, incr, T_INT, adr_type );
+  store_to_memory(ctrl, adr_iic_node, incr, T_INT, adr_type, MemNode::unordered);
 }
 
 //----------------------------method_data_addressing---------------------------
@@ -392,9 +392,9 @@
   Node* adr_node = method_data_addressing(md, data, counter_offset, idx, stride);
 
   const TypePtr* adr_type = _gvn.type(adr_node)->is_ptr();
-  Node* cnt  = make_load(NULL, adr_node, TypeInt::INT, T_INT, adr_type);
+  Node* cnt  = make_load(NULL, adr_node, TypeInt::INT, T_INT, adr_type, MemNode::unordered);
   Node* incr = _gvn.transform(new (C) AddINode(cnt, _gvn.intcon(DataLayout::counter_increment)));
-  store_to_memory(NULL, adr_node, incr, T_INT, adr_type );
+  store_to_memory(NULL, adr_node, incr, T_INT, adr_type, MemNode::unordered);
 }
 
 //--------------------------test_for_osr_md_counter_at-------------------------
@@ -402,7 +402,7 @@
   Node* adr_node = method_data_addressing(md, data, counter_offset);
 
   const TypePtr* adr_type = _gvn.type(adr_node)->is_ptr();
-  Node* cnt  = make_load(NULL, adr_node, TypeInt::INT, T_INT, adr_type);
+  Node* cnt  = make_load(NULL, adr_node, TypeInt::INT, T_INT, adr_type, MemNode::unordered);
 
   test_counter_against_threshold(cnt, limit);
 }
@@ -412,9 +412,9 @@
   Node* adr_node = method_data_addressing(md, data, DataLayout::flags_offset());
 
   const TypePtr* adr_type = _gvn.type(adr_node)->is_ptr();
-  Node* flags = make_load(NULL, adr_node, TypeInt::BYTE, T_BYTE, adr_type);
+  Node* flags = make_load(NULL, adr_node, TypeInt::BYTE, T_BYTE, adr_type, MemNode::unordered);
   Node* incr = _gvn.transform(new (C) OrINode(flags, _gvn.intcon(flag_constant)));
-  store_to_memory(NULL, adr_node, incr, T_BYTE, adr_type);
+  store_to_memory(NULL, adr_node, incr, T_BYTE, adr_type, MemNode::unordered);
 }
 
 //----------------------------profile_taken_branch-----------------------------
--- a/src/share/vm/opto/stringopts.cpp	Thu Nov 07 11:47:11 2013 +0100
+++ b/src/share/vm/opto/stringopts.cpp	Fri Nov 15 11:05:32 2013 -0800
@@ -1122,7 +1122,8 @@
 
   return kit.make_load(NULL, kit.basic_plus_adr(klass_node, field->offset_in_bytes()),
                        type, T_OBJECT,
-                       C->get_alias_index(mirror_type->add_offset(field->offset_in_bytes())));
+                       C->get_alias_index(mirror_type->add_offset(field->offset_in_bytes())),
+                       MemNode::unordered);
 }
 
 Node* PhaseStringOpts::int_stringSize(GraphKit& kit, Node* arg) {
@@ -1314,7 +1315,7 @@
     Node* ch = __ AddI(r, __ intcon('0'));
 
     Node* st = __ store_to_memory(kit.control(), kit.array_element_address(char_array, m1, T_CHAR),
-                                  ch, T_CHAR, char_adr_idx);
+                                  ch, T_CHAR, char_adr_idx, MemNode::unordered);
 
 
     IfNode* iff = kit.create_and_map_if(head, __ Bool(__ CmpI(q, __ intcon(0)), BoolTest::ne),
@@ -1356,7 +1357,7 @@
     } else {
       Node* m1 = __ SubI(charPos, __ intcon(1));
       Node* st = __ store_to_memory(kit.control(), kit.array_element_address(char_array, m1, T_CHAR),
-                                    sign, T_CHAR, char_adr_idx);
+                                    sign, T_CHAR, char_adr_idx, MemNode::unordered);
 
       final_merge->init_req(1, kit.control());
       final_mem->init_req(1, st);
@@ -1387,7 +1388,8 @@
     ciTypeArray* value_array = t->const_oop()->as_type_array();
     for (int e = 0; e < c; e++) {
       __ store_to_memory(kit.control(), kit.array_element_address(char_array, start, T_CHAR),
-                         __ intcon(value_array->char_at(o + e)), T_CHAR, char_adr_idx);
+                         __ intcon(value_array->char_at(o + e)), T_CHAR, char_adr_idx,
+                         MemNode::unordered);
       start = __ AddI(start, __ intcon(1));
     }
   } else {
@@ -1607,7 +1609,7 @@
         }
         case StringConcat::CharMode: {
           __ store_to_memory(kit.control(), kit.array_element_address(char_array, start, T_CHAR),
-                             arg, T_CHAR, char_adr_idx);
+                             arg, T_CHAR, char_adr_idx, MemNode::unordered);
           start = __ AddI(start, __ intcon(1));
           break;
         }
--- a/src/share/vm/opto/vectornode.hpp	Thu Nov 07 11:47:11 2013 +0100
+++ b/src/share/vm/opto/vectornode.hpp	Fri Nov 15 11:05:32 2013 -0800
@@ -356,7 +356,7 @@
 class LoadVectorNode : public LoadNode {
  public:
   LoadVectorNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeVect* vt)
-    : LoadNode(c, mem, adr, at, vt) {
+    : LoadNode(c, mem, adr, at, vt, MemNode::unordered) {
     init_class_id(Class_LoadVector);
   }
 
@@ -380,7 +380,7 @@
 class StoreVectorNode : public StoreNode {
  public:
   StoreVectorNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
-    : StoreNode(c, mem, adr, at, val) {
+    : StoreNode(c, mem, adr, at, val, MemNode::unordered) {
     assert(val->is_Vector() || val->is_LoadVector(), "sanity");
     init_class_id(Class_StoreVector);
   }