changeset 32600:4fdefe1ae99b

Merge
author dsamersoff
date Mon, 31 Aug 2015 23:29:02 +0200
parents 084cb21b3975 70b490faa49f
children c5df671276bd
files
diffstat 9 files changed, 341 insertions(+), 71 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/sparc/vm/memset_with_concurrent_readers_sparc.cpp	Mon Aug 31 23:29:02 2015 +0200
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+
+#include "gc/shared/memset_with_concurrent_readers.hpp"
+#include "runtime/prefetch.inline.hpp"
+#include "utilities/debug.hpp"
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+
+#if INCLUDE_ALL_GCS
+
+// An implementation of memset, for use when there may be concurrent
+// readers of the region being stored into.
+//
+// We can't use the standard library memset if it is implemented using
+// block initializing stores.  Doing so can result in concurrent readers
+// seeing spurious zeros.
+//
+// We can't use the obvious C/C++ for-loop, because the compiler may
+// recognize the idiomatic loop and optimize it into a call to the
+// standard library memset; we've seen exactly this happen with, for
+// example, Solaris Studio 12.3.  Hence the use of inline assembly
+// code, hiding loops from the compiler's optimizer.
+//
+// We don't attempt to use the standard library memset when it is safe
+// to do so.  We could conservatively do so by detecting the presence
+// of block initializing stores (VM_Version::has_blk_init()), but the
+// implementation provided here should be sufficient.
+
+inline void fill_subword(void* start, void* end, int value) {
+  STATIC_ASSERT(BytesPerWord == 8);
+  assert(pointer_delta(end, start, 1) < BytesPerWord, "precondition");
+  // Dispatch on (end - start).
+  void* pc;
+  __asm__ volatile(
+    // offset := (7 - (end - start)) + 3
+    //   3 instructions from rdpc to DISPATCH
+    " sub %[offset], %[end], %[offset]\n\t" // offset := start - end
+    " sllx %[offset], 2, %[offset]\n\t" // scale offset for instruction size of 4
+    " add %[offset], 40, %[offset]\n\t" // offset += 10 * instruction size
+    " rd %pc, %[pc]\n\t"                // dispatch on scaled offset
+    " jmpl %[pc]+%[offset], %g0\n\t"
+    "  nop\n\t"
+    // DISPATCH: no direct reference, but without it the store block may be elided.
+    "1:\n\t"
+    " stb %[value], [%[end]-7]\n\t" // end[-7] = value
+    " stb %[value], [%[end]-6]\n\t"
+    " stb %[value], [%[end]-5]\n\t"
+    " stb %[value], [%[end]-4]\n\t"
+    " stb %[value], [%[end]-3]\n\t"
+    " stb %[value], [%[end]-2]\n\t"
+    " stb %[value], [%[end]-1]\n\t" // end[-1] = value
+    : /* no outputs */
+      [pc] "&=r" (pc)               // temp
+    : [offset] "&+r" (start),
+      [end] "r" (end),
+      [value] "r" (value)
+    : "memory");
+}
+
+void memset_with_concurrent_readers(void* to, int value, size_t size) {
+  Prefetch::write(to, 0);
+  void* end = static_cast<char*>(to) + size;
+  if (size >= BytesPerWord) {
+    // Fill any partial word prefix.
+    uintx* aligned_to = static_cast<uintx*>(align_ptr_up(to, BytesPerWord));
+    fill_subword(to, aligned_to, value);
+
+    // Compute fill word.
+    STATIC_ASSERT(BitsPerByte == 8);
+    STATIC_ASSERT(BitsPerWord == 64);
+    uintx xvalue = value & 0xff;
+    xvalue |= (xvalue << 8);
+    xvalue |= (xvalue << 16);
+    xvalue |= (xvalue << 32);
+
+    uintx* aligned_end = static_cast<uintx*>(align_ptr_down(end, BytesPerWord));
+    assert(aligned_to <= aligned_end, "invariant");
+
+    // for ( ; aligned_to < aligned_end; ++aligned_to) {
+    //   *aligned_to = xvalue;
+    // }
+    uintptr_t temp;
+    __asm__ volatile(
+      // Unroll loop x8.
+      " sub %[aend], %[ato], %[temp]\n\t"
+      " cmp %[temp], 56\n\t"           // cc := (aligned_end - aligned_to) > 7 words
+      " ba %xcc, 2f\n\t"               // goto TEST always
+      "  sub %[aend], 56, %[temp]\n\t" // limit := aligned_end - 7 words
+      // LOOP:
+      "1:\n\t"                         // unrolled x8 store loop top
+      " cmp %[temp], %[ato]\n\t"       // cc := limit > (next) aligned_to
+      " stx %[xvalue], [%[ato]-64]\n\t" // store 8 words, aligned_to pre-incremented
+      " stx %[xvalue], [%[ato]-56]\n\t"
+      " stx %[xvalue], [%[ato]-48]\n\t"
+      " stx %[xvalue], [%[ato]-40]\n\t"
+      " stx %[xvalue], [%[ato]-32]\n\t"
+      " stx %[xvalue], [%[ato]-24]\n\t"
+      " stx %[xvalue], [%[ato]-16]\n\t"
+      " stx %[xvalue], [%[ato]-8]\n\t"
+      // TEST:
+      "2:\n\t"
+      " bgu,a %xcc, 1b\n\t"            // goto LOOP if more than 7 words remaining
+      "  add %[ato], 64, %[ato]\n\t"   // aligned_to += 8, for next iteration
+      // Fill remaining < 8 full words.
+      // Dispatch on (aligned_end - aligned_to).
+      // offset := (7 - (aligned_end - aligned_to)) + 3
+      //   3 instructions from rdpc to DISPATCH
+      " sub %[ato], %[aend], %[ato]\n\t" // offset := aligned_to - aligned_end
+      " srax %[ato], 1, %[ato]\n\t"      // scale offset for instruction size of 4
+      " add %[ato], 40, %[ato]\n\t"      // offset += 10 * instruction size
+      " rd %pc, %[temp]\n\t"             // dispatch on scaled offset
+      " jmpl %[temp]+%[ato], %g0\n\t"
+      "  nop\n\t"
+      // DISPATCH: no direct reference, but without it the store block may be elided.
+      "3:\n\t"
+      " stx %[xvalue], [%[aend]-56]\n\t" // aligned_end[-7] = xvalue
+      " stx %[xvalue], [%[aend]-48]\n\t"
+      " stx %[xvalue], [%[aend]-40]\n\t"
+      " stx %[xvalue], [%[aend]-32]\n\t"
+      " stx %[xvalue], [%[aend]-24]\n\t"
+      " stx %[xvalue], [%[aend]-16]\n\t"
+      " stx %[xvalue], [%[aend]-8]\n\t"  // aligned_end[-1] = xvalue
+      : /* no outputs */
+        [temp] "&=r" (temp)
+      : [ato] "&+r" (aligned_to),
+        [aend] "r" (aligned_end),
+        [xvalue] "r" (xvalue)
+      : "cc", "memory");
+    to = aligned_end;           // setup for suffix
+  }
+  // Fill any partial word suffix.  Also the prefix if size < BytesPerWord.
+  fill_subword(to, end, value);
+}
+
+#endif // INCLUDE_ALL_GCS
--- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp	Mon Aug 31 21:46:33 2015 +0300
+++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp	Mon Aug 31 23:29:02 2015 +0200
@@ -85,27 +85,6 @@
   _supports_cx8 = has_v9();
   _supports_atomic_getset4 = true; // swap instruction
 
-  // There are Fujitsu Sparc64 CPUs which support blk_init as well so
-  // we have to take this check out of the 'is_niagara()' block below.
-  if (has_blk_init()) {
-    // When using CMS or G1, we cannot use memset() in BOT updates
-    // because the sun4v/CMT version in libc_psr uses BIS which
-    // exposes "phantom zeros" to concurrent readers. See 6948537.
-    if (FLAG_IS_DEFAULT(UseMemSetInBOT) && (UseConcMarkSweepGC || UseG1GC)) {
-      FLAG_SET_DEFAULT(UseMemSetInBOT, false);
-    }
-    // Issue a stern warning if the user has explicitly set
-    // UseMemSetInBOT (it is known to cause issues), but allow
-    // use for experimentation and debugging.
-    if (UseConcMarkSweepGC || UseG1GC) {
-      if (UseMemSetInBOT) {
-        assert(!FLAG_IS_DEFAULT(UseMemSetInBOT), "Error");
-        warning("Experimental flag -XX:+UseMemSetInBOT is known to cause instability"
-                " on sun4v; please understand that you are using at your own risk!");
-      }
-    }
-  }
-
   if (is_niagara()) {
     // Indirect branch is the same cost as direct
     if (FLAG_IS_DEFAULT(UseInlineCaches)) {
--- a/hotspot/src/share/vm/gc/g1/g1BlockOffsetTable.inline.hpp	Mon Aug 31 21:46:33 2015 +0300
+++ b/hotspot/src/share/vm/gc/g1/g1BlockOffsetTable.inline.hpp	Mon Aug 31 23:29:02 2015 +0200
@@ -27,6 +27,7 @@
 
 #include "gc/g1/g1BlockOffsetTable.hpp"
 #include "gc/g1/heapRegion.hpp"
+#include "gc/shared/memset_with_concurrent_readers.hpp"
 #include "gc/shared/space.hpp"
 
 inline HeapWord* G1BlockOffsetTable::block_start(const void* addr) {
@@ -68,15 +69,7 @@
   check_index(right, "right index out of range");
   assert(left <= right, "indexes out of order");
   size_t num_cards = right - left + 1;
-  if (UseMemSetInBOT) {
-    memset(&_offset_array[left], offset, num_cards);
-  } else {
-    size_t i = left;
-    const size_t end = i + num_cards;
-    for (; i < end; i++) {
-      _offset_array[i] = offset;
-    }
-  }
+  memset_with_concurrent_readers(&_offset_array[left], offset, num_cards);
 }
 
 // Variant of index_for that does not check the index for validity.
--- a/hotspot/src/share/vm/gc/g1/g1SATBCardTableModRefBS.cpp	Mon Aug 31 21:46:33 2015 +0300
+++ b/hotspot/src/share/vm/gc/g1/g1SATBCardTableModRefBS.cpp	Mon Aug 31 23:29:02 2015 +0200
@@ -27,6 +27,7 @@
 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
 #include "gc/g1/heapRegion.hpp"
 #include "gc/g1/satbQueue.hpp"
+#include "gc/shared/memset_with_concurrent_readers.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/atomic.inline.hpp"
 #include "runtime/mutexLocker.hpp"
@@ -108,15 +109,7 @@
   jbyte *const first = byte_for(mr.start());
   jbyte *const last = byte_after(mr.last());
 
-  // Below we may use an explicit loop instead of memset() because on
-  // certain platforms memset() can give concurrent readers phantom zeros.
-  if (UseMemSetInBOT) {
-    memset(first, g1_young_gen, last - first);
-  } else {
-    for (jbyte* i = first; i < last; i++) {
-      *i = g1_young_gen;
-    }
-  }
+  memset_with_concurrent_readers(first, g1_young_gen, last - first);
 }
 
 #ifndef PRODUCT
--- a/hotspot/src/share/vm/gc/shared/blockOffsetTable.hpp	Mon Aug 31 21:46:33 2015 +0300
+++ b/hotspot/src/share/vm/gc/shared/blockOffsetTable.hpp	Mon Aug 31 23:29:02 2015 +0200
@@ -25,9 +25,12 @@
 #ifndef SHARE_VM_GC_SHARED_BLOCKOFFSETTABLE_HPP
 #define SHARE_VM_GC_SHARED_BLOCKOFFSETTABLE_HPP
 
+#include "gc/shared/memset_with_concurrent_readers.hpp"
 #include "memory/memRegion.hpp"
 #include "memory/virtualspace.hpp"
+#include "runtime/globals.hpp"
 #include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
 
 // The CollectedHeap type requires subtypes to implement a method
 // "block_start".  For some subtypes, notably generational
@@ -126,6 +129,19 @@
   VirtualSpace _vs;
   u_char* _offset_array;          // byte array keeping backwards offsets
 
+  void fill_range(size_t start, size_t num_cards, u_char offset) {
+    void* start_ptr = &_offset_array[start];
+#if INCLUDE_ALL_GCS
+    // If collector is concurrent, special handling may be needed.
+    assert(!UseG1GC, "Shouldn't be here when using G1");
+    if (UseConcMarkSweepGC) {
+      memset_with_concurrent_readers(start_ptr, offset, num_cards);
+      return;
+    }
+#endif // INCLUDE_ALL_GCS
+    memset(start_ptr, offset, num_cards);
+  }
+
  protected:
   // Bounds checking accessors:
   // For performance these have to devolve to array accesses in product builds.
@@ -160,20 +176,7 @@
     assert(left  < right, "Heap addresses out of order");
     size_t num_cards = pointer_delta(right, left) >> LogN_words;
 
-    // Below, we may use an explicit loop instead of memset()
-    // because on certain platforms memset() can give concurrent
-    // readers "out-of-thin-air," phantom zeros; see 6948537.
-    if (UseMemSetInBOT) {
-      memset(&_offset_array[index_for(left)], offset, num_cards);
-    } else {
-      size_t i = index_for(left);
-      const size_t end = i + num_cards;
-      for (; i < end; i++) {
-        // Elided until CR 6977974 is fixed properly.
-        // assert(!reducing || _offset_array[i] >= offset, "Not reducing");
-        _offset_array[i] = offset;
-      }
-    }
+    fill_range(index_for(left), num_cards, offset);
   }
 
   void set_offset_array(size_t left, size_t right, u_char offset, bool reducing = false) {
@@ -182,20 +185,7 @@
     assert(left  <= right, "indexes out of order");
     size_t num_cards = right - left + 1;
 
-    // Below, we may use an explicit loop instead of memset
-    // because on certain platforms memset() can give concurrent
-    // readers "out-of-thin-air," phantom zeros; see 6948537.
-    if (UseMemSetInBOT) {
-      memset(&_offset_array[left], offset, num_cards);
-    } else {
-      size_t i = left;
-      const size_t end = i + num_cards;
-      for (; i < end; i++) {
-        // Elided until CR 6977974 is fixed properly.
-        // assert(!reducing || _offset_array[i] >= offset, "Not reducing");
-        _offset_array[i] = offset;
-      }
-    }
+    fill_range(left, num_cards, offset);
   }
 
   void check_offset_array(size_t index, HeapWord* high, HeapWord* low) const {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc/shared/memset_with_concurrent_readers.cpp	Mon Aug 31 23:29:02 2015 +0200
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+
+#include <string.h>
+#include "gc/shared/memset_with_concurrent_readers.hpp"
+#include "utilities/debug.hpp"
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+#include "utilities/ostream.hpp"
+
+#if INCLUDE_ALL_GCS
+
+// Unit test
+#ifdef ASSERT
+
+static unsigned line_byte(const char* line, size_t i) {
+  return unsigned(line[i]) & 0xFF;
+}
+
+// Verify memset_with_concurrent_readers mimics memset.
+// We don't attempt to verify the concurrent reader case.
+void test_memset_with_concurrent_readers() {
+  const size_t chunk_size = 8 * BytesPerWord;
+  const unsigned chunk_count = 4;
+  const size_t block_size = (chunk_count + 4) * chunk_size;
+  char block[block_size];
+  char clear_block[block_size];
+  char set_block[block_size];
+
+  // block format:
+  // 0: unused leading chunk
+  // 1: chunk written from start index to end of chunk
+  // ... nchunks fully written chunks
+  // N: chunk written from start of chunk to end index
+  // N+1: unused trailing chunk
+
+  const int clear_value = 0;
+  const int set_value = 0xAC;
+
+  memset(clear_block, clear_value, block_size);
+  memset(set_block, set_value, block_size);
+
+  for (unsigned nchunks = 0; nchunks <= chunk_count; ++nchunks) {
+    for (size_t start = 1; start <= chunk_size; ++start) {
+      for (size_t end = 0; end <= chunk_size; ++end) {
+        size_t set_start = chunk_size + start;
+        size_t set_end = (2 + nchunks) * chunk_size + end;
+        size_t set_size = set_end - set_start;
+
+        memset(block, clear_value, block_size);
+        memset_with_concurrent_readers(&block[set_start], set_value, set_size);
+        bool head_clear = !memcmp(clear_block, block, set_start);
+        bool middle_set = !memcmp(set_block, block + set_start, set_size);
+        bool tail_clear = !memcmp(clear_block, block + set_end, block_size - set_end);
+        if (!(head_clear && middle_set && tail_clear)) {
+          tty->print_cr("*** memset_with_concurrent_readers failed: "
+                        "set start " SIZE_FORMAT ", set end " SIZE_FORMAT,
+                        set_start, set_end);
+          for (unsigned chunk = 0; chunk < (block_size / chunk_size); ++chunk) {
+            for (unsigned line = 0; line < (chunk_size / BytesPerWord); ++line) {
+              const char* lp = &block[chunk * chunk_size + line * BytesPerWord];
+              tty->print_cr("%d,%d: %2x %2x  %2x %2x  %2x %2x  %2x %2x",
+                            chunk, line,
+                            line_byte(lp, 0), line_byte(lp, 1),
+                            line_byte(lp, 2), line_byte(lp, 3),
+                            line_byte(lp, 4), line_byte(lp, 5),
+                            line_byte(lp, 6), line_byte(lp, 7));
+            }
+          }
+          assert(head_clear, "leading byte not clear");
+          assert(middle_set, "memset byte not set");
+          assert(tail_clear, "trailing bye not clear");
+        }
+      }
+    }
+  }
+}
+
+#endif // end unit test
+
+#endif // INCLUDE_ALL_GCS
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc/shared/memset_with_concurrent_readers.hpp	Mon Aug 31 23:29:02 2015 +0200
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SRC_SHARE_VM_GC_SHARED_MEMSETWITHCONCURRENTREADERS_HPP
+#define SRC_SHARE_VM_GC_SHARED_MEMSETWITHCONCURRENTREADERS_HPP
+
+#include <stddef.h>
+#include <string.h>
+#include "utilities/macros.hpp"
+
+// Only used by concurrent collectors.
+#if INCLUDE_ALL_GCS
+
+// Fill a block of memory with value, like memset, but with the
+// understanding that there may be concurrent readers of that memory.
+void memset_with_concurrent_readers(void* to, int value, size_t size);
+
+#ifdef TARGET_ARCH_sparc
+
+// SPARC requires special handling.  See SPARC-specific definition.
+
+#else
+// All others just use memset.
+
+inline void memset_with_concurrent_readers(void* to, int value, size_t size) {
+  ::memset(to, value, size);
+}
+
+#endif // End of target dispatch.
+
+#endif // INCLUDE_ALL_GCS
+
+#endif // include guard
--- a/hotspot/src/share/vm/prims/jni.cpp	Mon Aug 31 21:46:33 2015 +0300
+++ b/hotspot/src/share/vm/prims/jni.cpp	Mon Aug 31 23:29:02 2015 +0200
@@ -3869,6 +3869,7 @@
 void TestBufferingOopClosure_test();
 void TestCodeCacheRemSet_test();
 void FreeRegionList_test();
+void test_memset_with_concurrent_readers();
 #endif
 
 void execute_internal_vm_tests() {
@@ -3910,6 +3911,7 @@
     if (UseG1GC) {
       run_unit_test(FreeRegionList_test());
     }
+    run_unit_test(test_memset_with_concurrent_readers());
 #endif
     tty->print_cr("All internal VM tests passed");
   }
--- a/hotspot/src/share/vm/runtime/globals.hpp	Mon Aug 31 21:46:33 2015 +0300
+++ b/hotspot/src/share/vm/runtime/globals.hpp	Mon Aug 31 23:29:02 2015 +0200
@@ -638,10 +638,6 @@
   experimental(bool, AlwaysSafeConstructors, false,                         \
           "Force safe construction, as if all fields are final.")           \
                                                                             \
-  /* Temporary: See 6948537 */                                              \
-  experimental(bool, UseMemSetInBOT, true,                                  \
-          "(Unstable) uses memset in BOT updates in GC code")               \
-                                                                            \
   diagnostic(bool, UnlockDiagnosticVMOptions, trueInDebug,                  \
           "Enable normal processing of flags relating to field diagnostics")\
                                                                             \