changeset 4536:64aaeeee0a10 hs24-b47

Merge
author amurillo
date Fri, 31 May 2013 14:14:30 -0700
parents ba0b94f82e71 d2e053ead2e6
children 6206774b5959
files
diffstat 13 files changed, 584 insertions(+), 186 deletions(-) [+]
line wrap: on
line diff
--- a/make/hotspot_version	Wed May 29 13:24:23 2013 -0700
+++ b/make/hotspot_version	Fri May 31 14:14:30 2013 -0700
@@ -35,7 +35,7 @@
 
 HS_MAJOR_VER=24
 HS_MINOR_VER=0
-HS_BUILD_NUMBER=46
+HS_BUILD_NUMBER=47
 
 JDK_MAJOR_VER=1
 JDK_MINOR_VER=7
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Wed May 29 13:24:23 2013 -0700
+++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Fri May 31 14:14:30 2013 -0700
@@ -1527,27 +1527,29 @@
     __ movptr(elem_klass, elem_klass_addr); // query the object klass
     generate_type_check(elem_klass, ckoff_arg, ckval_arg, temp,
                         &L_store_element, NULL);
-      // (On fall-through, we have failed the element type check.)
+    // (On fall-through, we have failed the element type check.)
     // ======== end loop ========
 
     // It was a real error; we must depend on the caller to finish the job.
     // Register "count" = -1 * number of *remaining* oops, length_arg = *total* oops.
     // Emit GC store barriers for the oops we have copied (length_arg + count),
     // and report their number to the caller.
+    assert_different_registers(to, count, rax);
+    Label L_post_barrier;
     __ addl(count, length_arg);         // transfers = (length - remaining)
     __ movl2ptr(rax, count);            // save the value
-    __ notptr(rax);                     // report (-1^K) to caller
-    __ movptr(to, to_arg);              // reload
-    assert_different_registers(to, count, rax);
-    gen_write_ref_array_post_barrier(to, count);
-    __ jmpb(L_done);
+    __ notptr(rax);                     // report (-1^K) to caller (does not affect flags)
+    __ jccb(Assembler::notZero, L_post_barrier);
+    __ jmp(L_done); // K == 0, nothing was copied, skip post barrier
 
     // Come here on success only.
     __ BIND(L_do_card_marks);
+    __ xorptr(rax, rax);                // return 0 on success
     __ movl2ptr(count, length_arg);
-    __ movptr(to, to_arg);                // reload
+
+    __ BIND(L_post_barrier);
+    __ movptr(to, to_arg);              // reload
     gen_write_ref_array_post_barrier(to, count);
-    __ xorptr(rax, rax);                  // return 0 on success
 
     // Common exit point (success or failure).
     __ BIND(L_done);
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Wed May 29 13:24:23 2013 -0700
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Fri May 31 14:14:30 2013 -0700
@@ -1245,27 +1245,28 @@
   //
   //  Input:
   //     start    - register containing starting address of destination array
-  //     end      - register containing ending address of destination array
+  //     count    - elements count
   //     scratch  - scratch register
   //
   //  The input registers are overwritten.
-  //  The ending address is inclusive.
-  void  gen_write_ref_array_post_barrier(Register start, Register end, Register scratch) {
-    assert_different_registers(start, end, scratch);
+  //
+  void  gen_write_ref_array_post_barrier(Register start, Register count, Register scratch) {
+    assert_different_registers(start, count, scratch);
     BarrierSet* bs = Universe::heap()->barrier_set();
     switch (bs->kind()) {
       case BarrierSet::G1SATBCT:
       case BarrierSet::G1SATBCTLogging:
-
         {
-          __ pusha();                      // push registers (overkill)
-          // must compute element count unless barrier set interface is changed (other platforms supply count)
-          assert_different_registers(start, end, scratch);
-          __ lea(scratch, Address(end, BytesPerHeapOop));
-          __ subptr(scratch, start);               // subtract start to get #bytes
-          __ shrptr(scratch, LogBytesPerHeapOop);  // convert to element count
-          __ mov(c_rarg0, start);
-          __ mov(c_rarg1, scratch);
+          __ pusha();             // push registers (overkill)
+          if (c_rarg0 == count) { // On win64 c_rarg0 == rcx
+            assert_different_registers(c_rarg1, start);
+            __ mov(c_rarg1, count);
+            __ mov(c_rarg0, start);
+          } else {
+            assert_different_registers(c_rarg0, count);
+            __ mov(c_rarg0, start);
+            __ mov(c_rarg1, count);
+          }
           __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), 2);
           __ popa();
         }
@@ -1277,22 +1278,16 @@
           assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
 
           Label L_loop;
-
-           __ shrptr(start, CardTableModRefBS::card_shift);
-           __ addptr(end, BytesPerHeapOop);
-           __ shrptr(end, CardTableModRefBS::card_shift);
-           __ subptr(end, start); // number of bytes to copy
-
-          intptr_t disp = (intptr_t) ct->byte_map_base;
-          if (Assembler::is_simm32(disp)) {
-            Address cardtable(noreg, noreg, Address::no_scale, disp);
-            __ lea(scratch, cardtable);
-          } else {
-            ExternalAddress cardtable((address)disp);
-            __ lea(scratch, cardtable);
-          }
-
-          const Register count = end; // 'end' register contains bytes count now
+          const Register end = count;
+
+          __ leaq(end, Address(start, count, TIMES_OOP, 0));  // end == start+count*oop_size
+          __ subptr(end, BytesPerHeapOop); // end - 1 to make inclusive
+          __ shrptr(start, CardTableModRefBS::card_shift);
+          __ shrptr(end,   CardTableModRefBS::card_shift);
+          __ subptr(end, start); // end --> cards count
+
+          int64_t disp = (int64_t) ct->byte_map_base;
+          __ mov64(scratch, disp);
           __ addptr(start, scratch);
         __ BIND(L_loop);
           __ movb(Address(start, count, Address::times_1), 0);
@@ -1944,8 +1939,7 @@
 
   __ BIND(L_exit);
     if (is_oop) {
-      __ leaq(end_to, Address(saved_to, dword_count, Address::times_4, -4));
-      gen_write_ref_array_post_barrier(saved_to, end_to, rax);
+      gen_write_ref_array_post_barrier(saved_to, dword_count, rax);
     }
     restore_arg_regs();
     inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
@@ -2040,12 +2034,10 @@
     // Copy in multi-bytes chunks
     copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
 
-   __ bind(L_exit);
-     if (is_oop) {
-       Register end_to = rdx;
-       __ leaq(end_to, Address(to, dword_count, Address::times_4, -4));
-       gen_write_ref_array_post_barrier(to, end_to, rax);
-     }
+  __ BIND(L_exit);
+    if (is_oop) {
+      gen_write_ref_array_post_barrier(to, dword_count, rax);
+    }
     restore_arg_regs();
     inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
     __ xorptr(rax, rax); // return 0
@@ -2083,6 +2075,7 @@
     const Register end_from    = from; // source array end address
     const Register end_to      = rcx;  // destination array end address
     const Register saved_to    = to;
+    const Register saved_count = r11;
     // End pointers are inclusive, and if count is not zero they point
     // to the last unit copied:  end_to[0] := end_from[0]
 
@@ -2100,6 +2093,8 @@
                       // r9 and r10 may be used to save non-volatile registers
     // 'from', 'to' and 'qword_count' are now valid
     if (is_oop) {
+      // Save to and count for store barrier
+      __ movptr(saved_count, qword_count);
       // no registers are destroyed by this call
       gen_write_ref_array_pre_barrier(to, qword_count, dest_uninitialized);
     }
@@ -2132,7 +2127,7 @@
 
     if (is_oop) {
     __ BIND(L_exit);
-      gen_write_ref_array_post_barrier(saved_to, end_to, rax);
+      gen_write_ref_array_post_barrier(saved_to, saved_count, rax);
     }
     restore_arg_regs();
     if (is_oop) {
@@ -2215,8 +2210,7 @@
 
     if (is_oop) {
     __ BIND(L_exit);
-      __ lea(rcx, Address(to, saved_count, Address::times_8, -8));
-      gen_write_ref_array_post_barrier(to, rcx, rax);
+      gen_write_ref_array_post_barrier(to, saved_count, rax);
     }
     restore_arg_regs();
     if (is_oop) {
@@ -2403,20 +2397,20 @@
     // Register rdx = -1 * number of *remaining* oops, r14 = *total* oops.
     // Emit GC store barriers for the oops we have copied (r14 + rdx),
     // and report their number to the caller.
-    assert_different_registers(rax, r14_length, count, to, end_to, rcx);
-    __ lea(end_to, to_element_addr);
-    __ addptr(end_to, -heapOopSize);      // make an inclusive end pointer
-    gen_write_ref_array_post_barrier(to, end_to, rscratch1);
-    __ movptr(rax, r14_length);           // original oops
-    __ addptr(rax, count);                // K = (original - remaining) oops
-    __ notptr(rax);                       // report (-1^K) to caller
-    __ jmp(L_done);
+    assert_different_registers(rax, r14_length, count, to, end_to, rcx, rscratch1);
+    Label L_post_barrier;
+    __ addptr(r14_length, count);     // K = (original - remaining) oops
+    __ movptr(rax, r14_length);       // save the value
+    __ notptr(rax);                   // report (-1^K) to caller (does not affect flags)
+    __ jccb(Assembler::notZero, L_post_barrier);
+    __ jmp(L_done); // K == 0, nothing was copied, skip post barrier
 
     // Come here on success only.
     __ BIND(L_do_card_marks);
-    __ addptr(end_to, -heapOopSize);         // make an inclusive end pointer
-    gen_write_ref_array_post_barrier(to, end_to, rscratch1);
-    __ xorptr(rax, rax);                  // return 0 on success
+    __ xorptr(rax, rax);              // return 0 on success
+
+    __ BIND(L_post_barrier);
+    gen_write_ref_array_post_barrier(to, r14_length, rscratch1);
 
     // Common exit point (success or failure).
     __ BIND(L_done);
--- a/src/os/linux/vm/os_linux.cpp	Wed May 29 13:24:23 2013 -0700
+++ b/src/os/linux/vm/os_linux.cpp	Fri May 31 14:14:30 2013 -0700
@@ -138,6 +138,7 @@
 Mutex* os::Linux::_createThread_lock = NULL;
 pthread_t os::Linux::_main_thread;
 int os::Linux::_page_size = -1;
+const int os::Linux::_vm_default_page_size = (8 * K);
 bool os::Linux::_is_floating_stack = false;
 bool os::Linux::_is_NPTL = false;
 bool os::Linux::_supports_fast_thread_cpu_time = false;
@@ -4385,6 +4386,15 @@
   Linux::clock_init();
   initial_time_count = os::elapsed_counter();
   pthread_mutex_init(&dl_mutex, NULL);
+
+  // If the pagesize of the VM is greater than 8K determine the appropriate
+  // number of initial guard pages.  The user can change this with the
+  // command line arguments, if needed.
+  if (vm_page_size() > (int)Linux::vm_default_page_size()) {
+    StackYellowPages = 1;
+    StackRedPages = 1;
+    StackShadowPages = round_to((StackShadowPages*Linux::vm_default_page_size()), vm_page_size()) / vm_page_size();
+  }
 }
 
 // To install functions for atexit system call
@@ -4438,8 +4448,8 @@
   // Add in 2*BytesPerWord times page size to account for VM stack during
   // class initialization depending on 32 or 64 bit VM.
   os::Linux::min_stack_allowed = MAX2(os::Linux::min_stack_allowed,
-            (size_t)(StackYellowPages+StackRedPages+StackShadowPages+
-                    2*BytesPerWord COMPILER2_PRESENT(+1)) * Linux::page_size());
+            (size_t)(StackYellowPages+StackRedPages+StackShadowPages) * Linux::page_size() +
+                    (2*BytesPerWord COMPILER2_PRESENT(+1)) * Linux::vm_default_page_size());
 
   size_t threadStackSizeInBytes = ThreadStackSize * K;
   if (threadStackSizeInBytes != 0 &&
--- a/src/os/linux/vm/os_linux.hpp	Wed May 29 13:24:23 2013 -0700
+++ b/src/os/linux/vm/os_linux.hpp	Fri May 31 14:14:30 2013 -0700
@@ -70,6 +70,7 @@
   static pthread_t _main_thread;
   static Mutex* _createThread_lock;
   static int _page_size;
+  static const int _vm_default_page_size;
 
   static julong available_memory();
   static julong physical_memory() { return _physical_memory; }
@@ -116,6 +117,8 @@
   static int page_size(void)                                        { return _page_size; }
   static void set_page_size(int val)                                { _page_size = val; }
 
+  static int vm_default_page_size(void)                             { return _vm_default_page_size; }
+
   static address   ucontext_get_pc(ucontext_t* uc);
   static intptr_t* ucontext_get_sp(ucontext_t* uc);
   static intptr_t* ucontext_get_fp(ucontext_t* uc);
--- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Wed May 29 13:24:23 2013 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Fri May 31 14:14:30 2013 -0700
@@ -129,7 +129,7 @@
 
   // The alignment used for eden and survivors within the young gen
   // and for boundary between young gen and old gen.
-  size_t intra_heap_alignment() const { return 64 * K; }
+  size_t intra_heap_alignment() const { return 64 * K * HeapWordSize; }
 
   size_t capacity() const;
   size_t used() const;
--- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Wed May 29 13:24:23 2013 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Fri May 31 14:14:30 2013 -0700
@@ -64,13 +64,25 @@
 #include <math.h>
 
 // All sizes are in HeapWords.
-const size_t ParallelCompactData::Log2RegionSize  = 9; // 512 words
+const size_t ParallelCompactData::Log2RegionSize  = 16; // 64K words
 const size_t ParallelCompactData::RegionSize      = (size_t)1 << Log2RegionSize;
 const size_t ParallelCompactData::RegionSizeBytes =
   RegionSize << LogHeapWordSize;
 const size_t ParallelCompactData::RegionSizeOffsetMask = RegionSize - 1;
 const size_t ParallelCompactData::RegionAddrOffsetMask = RegionSizeBytes - 1;
-const size_t ParallelCompactData::RegionAddrMask  = ~RegionAddrOffsetMask;
+const size_t ParallelCompactData::RegionAddrMask       = ~RegionAddrOffsetMask;
+
+const size_t ParallelCompactData::Log2BlockSize   = 7; // 128 words
+const size_t ParallelCompactData::BlockSize       = (size_t)1 << Log2BlockSize;
+const size_t ParallelCompactData::BlockSizeBytes  =
+  BlockSize << LogHeapWordSize;
+const size_t ParallelCompactData::BlockSizeOffsetMask = BlockSize - 1;
+const size_t ParallelCompactData::BlockAddrOffsetMask = BlockSizeBytes - 1;
+const size_t ParallelCompactData::BlockAddrMask       = ~BlockAddrOffsetMask;
+
+const size_t ParallelCompactData::BlocksPerRegion = RegionSize / BlockSize;
+const size_t ParallelCompactData::Log2BlocksPerRegion =
+  Log2RegionSize - Log2BlockSize;
 
 const ParallelCompactData::RegionData::region_sz_t
 ParallelCompactData::RegionData::dc_shift = 27;
@@ -379,6 +391,10 @@
   _region_vspace = 0;
   _region_data = 0;
   _region_count = 0;
+
+  _block_vspace = 0;
+  _block_data = 0;
+  _block_count = 0;
 }
 
 bool ParallelCompactData::initialize(MemRegion covered_region)
@@ -392,8 +408,7 @@
   assert((region_size & RegionSizeOffsetMask) == 0,
          "region size not a multiple of RegionSize");
 
-  bool result = initialize_region_data(region_size);
-
+  bool result = initialize_region_data(region_size) && initialize_block_data();
   return result;
 }
 
@@ -438,17 +453,36 @@
   return false;
 }
 
+bool ParallelCompactData::initialize_block_data()
+{
+  assert(_region_count != 0, "region data must be initialized first");
+  const size_t count = _region_count << Log2BlocksPerRegion;
+  _block_vspace = create_vspace(count, sizeof(BlockData));
+  if (_block_vspace != 0) {
+    _block_data = (BlockData*)_block_vspace->reserved_low_addr();
+    _block_count = count;
+    return true;
+  }
+  return false;
+}
+
 void ParallelCompactData::clear()
 {
   memset(_region_data, 0, _region_vspace->committed_size());
+  memset(_block_data, 0, _block_vspace->committed_size());
 }
 
 void ParallelCompactData::clear_range(size_t beg_region, size_t end_region) {
   assert(beg_region <= _region_count, "beg_region out of range");
   assert(end_region <= _region_count, "end_region out of range");
+  assert(RegionSize % BlockSize == 0, "RegionSize not a multiple of BlockSize");
 
   const size_t region_cnt = end_region - beg_region;
   memset(_region_data + beg_region, 0, region_cnt * sizeof(RegionData));
+
+  const size_t beg_block = beg_region * BlocksPerRegion;
+  const size_t block_cnt = region_cnt * BlocksPerRegion;
+  memset(_block_data + beg_block, 0, block_cnt * sizeof(BlockData));
 }
 
 HeapWord* ParallelCompactData::partial_obj_end(size_t region_idx) const
@@ -727,45 +761,44 @@
 
 HeapWord* ParallelCompactData::calc_new_pointer(HeapWord* addr) {
   assert(addr != NULL, "Should detect NULL oop earlier");
-  assert(PSParallelCompact::gc_heap()->is_in(addr), "addr not in heap");
-#ifdef ASSERT
-  if (PSParallelCompact::mark_bitmap()->is_unmarked(addr)) {
-    gclog_or_tty->print_cr("calc_new_pointer:: addr " PTR_FORMAT, addr);
-  }
-#endif
-  assert(PSParallelCompact::mark_bitmap()->is_marked(addr), "obj not marked");
+  assert(PSParallelCompact::gc_heap()->is_in(addr), "not in heap");
+  assert(PSParallelCompact::mark_bitmap()->is_marked(addr), "not marked");
 
   // Region covering the object.
-  size_t region_index = addr_to_region_idx(addr);
-  const RegionData* const region_ptr = region(region_index);
-  HeapWord* const region_addr = region_align_down(addr);
-
-  assert(addr < region_addr + RegionSize, "Region does not cover object");
-  assert(addr_to_region_ptr(region_addr) == region_ptr, "sanity check");
-
+  RegionData* const region_ptr = addr_to_region_ptr(addr);
   HeapWord* result = region_ptr->destination();
 
-  // If all the data in the region is live, then the new location of the object
-  // can be calculated from the destination of the region plus the offset of the
-  // object in the region.
+  // If the entire Region is live, the new location is region->destination + the
+  // offset of the object within in the Region.
+
+  // Run some performance tests to determine if this special case pays off.  It
+  // is worth it for pointers into the dense prefix.  If the optimization to
+  // avoid pointer updates in regions that only point to the dense prefix is
+  // ever implemented, this should be revisited.
   if (region_ptr->data_size() == RegionSize) {
-    result += pointer_delta(addr, region_addr);
-    DEBUG_ONLY(PSParallelCompact::check_new_location(addr, result);)
+    result += region_offset(addr);
     return result;
   }
 
-  // The new location of the object is
-  //    region destination +
-  //    size of the partial object extending onto the region +
-  //    sizes of the live objects in the Region that are to the left of addr
-  const size_t partial_obj_size = region_ptr->partial_obj_size();
-  HeapWord* const search_start = region_addr + partial_obj_size;
+  // Otherwise, the new location is region->destination + block offset + the
+  // number of live words in the Block that are (a) to the left of addr and (b)
+  // due to objects that start in the Block.
+
+  // Fill in the block table if necessary.  This is unsynchronized, so multiple
+  // threads may fill the block table for a region (harmless, since it is
+  // idempotent).
+  if (!region_ptr->blocks_filled()) {
+    PSParallelCompact::fill_blocks(addr_to_region_idx(addr));
+    region_ptr->set_blocks_filled();
+  }
+
+  HeapWord* const search_start = block_align_down(addr);
+  const size_t block_offset = addr_to_block_ptr(addr)->offset();
 
   const ParMarkBitMap* bitmap = PSParallelCompact::mark_bitmap();
-  size_t live_to_left = bitmap->live_words_in_range(search_start, oop(addr));
-
-  result += partial_obj_size + live_to_left;
-  DEBUG_ONLY(PSParallelCompact::check_new_location(addr, result);)
+  const size_t live = bitmap->live_words_in_range(search_start, oop(addr));
+  result += block_offset + live;
+  DEBUG_ONLY(PSParallelCompact::check_new_location(addr, result));
   return result;
 }
 
@@ -780,7 +813,7 @@
   return updated_klass;
 }
 
-#ifdef  ASSERT
+#ifdef ASSERT
 void ParallelCompactData::verify_clear(const PSVirtualSpace* vspace)
 {
   const size_t* const beg = (const size_t*)vspace->committed_low_addr();
@@ -793,16 +826,10 @@
 void ParallelCompactData::verify_clear()
 {
   verify_clear(_region_vspace);
+  verify_clear(_block_vspace);
 }
 #endif  // #ifdef ASSERT
 
-#ifdef NOT_PRODUCT
-ParallelCompactData::RegionData* debug_region(size_t region_index) {
-  ParallelCompactData& sd = PSParallelCompact::summary_data();
-  return sd.region(region_index);
-}
-#endif
-
 STWGCTimer          PSParallelCompact::_gc_timer;
 ParallelOldTracer   PSParallelCompact::_gc_tracer;
 elapsedTimer        PSParallelCompact::_accumulated_time;
@@ -1997,11 +2024,6 @@
                                       maximum_heap_compaction);
 }
 
-bool ParallelCompactData::region_contains(size_t region_index, HeapWord* addr) {
-  size_t addr_region_index = addr_to_region_idx(addr);
-  return region_index == addr_region_index;
-}
-
 // This method contains no policy. You should probably
 // be calling invoke() instead.
 bool PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) {
@@ -2661,6 +2683,41 @@
   }
 }
 
+#ifdef ASSERT
+// Write a histogram of the number of times the block table was filled for a
+// region.
+void PSParallelCompact::write_block_fill_histogram(outputStream* const out)
+{
+  if (!TraceParallelOldGCCompactionPhase) return;
+
+  typedef ParallelCompactData::RegionData rd_t;
+  ParallelCompactData& sd = summary_data();
+
+  for (unsigned int id = old_space_id; id < last_space_id; ++id) {
+    MutableSpace* const spc = _space_info[id].space();
+    if (spc->bottom() != spc->top()) {
+      const rd_t* const beg = sd.addr_to_region_ptr(spc->bottom());
+      HeapWord* const top_aligned_up = sd.region_align_up(spc->top());
+      const rd_t* const end = sd.addr_to_region_ptr(top_aligned_up);
+
+      size_t histo[5] = { 0, 0, 0, 0, 0 };
+      const size_t histo_len = sizeof(histo) / sizeof(size_t);
+      const size_t region_cnt = pointer_delta(end, beg, sizeof(rd_t));
+
+      for (const rd_t* cur = beg; cur < end; ++cur) {
+        ++histo[MIN2(cur->blocks_filled_count(), histo_len - 1)];
+      }
+      out->print("%u %-4s" SIZE_FORMAT_W(5), id, space_names[id], region_cnt);
+      for (size_t i = 0; i < histo_len; ++i) {
+        out->print(" " SIZE_FORMAT_W(5) " %5.1f%%",
+                   histo[i], 100.0 * histo[i] / region_cnt);
+      }
+      out->cr();
+    }
+  }
+}
+#endif // #ifdef ASSERT
+
 void PSParallelCompact::compact() {
   // trace("5");
   GCTraceTime tm("compaction phase", print_phases(), true, &_gc_timer);
@@ -2702,6 +2759,8 @@
       update_deferred_objects(cm, SpaceId(id));
     }
   }
+
+  DEBUG_ONLY(write_block_fill_histogram(gclog_or_tty));
 }
 
 #ifdef  ASSERT
@@ -3371,6 +3430,57 @@
   } while (true);
 }
 
+void PSParallelCompact::fill_blocks(size_t region_idx)
+{
+  // Fill in the block table elements for the specified region.  Each block
+  // table element holds the number of live words in the region that are to the
+  // left of the first object that starts in the block.  Thus only blocks in
+  // which an object starts need to be filled.
+  //
+  // The algorithm scans the section of the bitmap that corresponds to the
+  // region, keeping a running total of the live words.  When an object start is
+  // found, if it's the first to start in the block that contains it, the
+  // current total is written to the block table element.
+  const size_t Log2BlockSize = ParallelCompactData::Log2BlockSize;
+  const size_t Log2RegionSize = ParallelCompactData::Log2RegionSize;
+  const size_t RegionSize = ParallelCompactData::RegionSize;
+
+  ParallelCompactData& sd = summary_data();
+  const size_t partial_obj_size = sd.region(region_idx)->partial_obj_size();
+  if (partial_obj_size >= RegionSize) {
+    return; // No objects start in this region.
+  }
+
+  // Ensure the first loop iteration decides that the block has changed.
+  size_t cur_block = sd.block_count();
+
+  const ParMarkBitMap* const bitmap = mark_bitmap();
+
+  const size_t Log2BitsPerBlock = Log2BlockSize - LogMinObjAlignment;
+  assert((size_t)1 << Log2BitsPerBlock ==
+         bitmap->words_to_bits(ParallelCompactData::BlockSize), "sanity");
+
+  size_t beg_bit = bitmap->words_to_bits(region_idx << Log2RegionSize);
+  const size_t range_end = beg_bit + bitmap->words_to_bits(RegionSize);
+  size_t live_bits = bitmap->words_to_bits(partial_obj_size);
+  beg_bit = bitmap->find_obj_beg(beg_bit + live_bits, range_end);
+  while (beg_bit < range_end) {
+    const size_t new_block = beg_bit >> Log2BitsPerBlock;
+    if (new_block != cur_block) {
+      cur_block = new_block;
+      sd.block(cur_block)->set_offset(bitmap->bits_to_words(live_bits));
+    }
+
+    const size_t end_bit = bitmap->find_obj_end(beg_bit, range_end);
+    if (end_bit < range_end - 1) {
+      live_bits += end_bit - beg_bit + 1;
+      beg_bit = bitmap->find_obj_beg(end_bit + 1, range_end);
+    } else {
+      return;
+    }
+  }
+}
+
 void
 PSParallelCompact::move_and_update(ParCompactionManager* cm, SpaceId space_id) {
   const MutableSpace* sp = space(space_id);
--- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp	Wed May 29 13:24:23 2013 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp	Fri May 31 14:14:30 2013 -0700
@@ -223,6 +223,17 @@
   // Mask for the bits in a pointer to get the address of the start of a region.
   static const size_t RegionAddrMask;
 
+  static const size_t Log2BlockSize;
+  static const size_t BlockSize;
+  static const size_t BlockSizeBytes;
+
+  static const size_t BlockSizeOffsetMask;
+  static const size_t BlockAddrOffsetMask;
+  static const size_t BlockAddrMask;
+
+  static const size_t BlocksPerRegion;
+  static const size_t Log2BlocksPerRegion;
+
   class RegionData
   {
   public:
@@ -275,6 +286,12 @@
     inline uint destination_count() const;
     inline uint destination_count_raw() const;
 
+    // Whether the block table for this region has been filled.
+    inline bool blocks_filled() const;
+
+    // Number of times the block table was filled.
+    DEBUG_ONLY(inline size_t blocks_filled_count() const;)
+
     // The location of the java heap data that corresponds to this region.
     inline HeapWord* data_location() const;
 
@@ -299,6 +316,7 @@
     void set_partial_obj_size(size_t words)    {
       _partial_obj_size = (region_sz_t) words;
     }
+    inline void set_blocks_filled();
 
     inline void set_destination_count(uint count);
     inline void set_live_obj_size(size_t words);
@@ -331,7 +349,11 @@
     HeapWord*            _partial_obj_addr;
     region_sz_t          _partial_obj_size;
     region_sz_t volatile _dc_and_los;
+    bool                 _blocks_filled;
+
 #ifdef ASSERT
+    size_t               _blocks_filled_count;   // Number of block table fills.
+
     // These enable optimizations that are only partially implemented.  Use
     // debug builds to prevent the code fragments from breaking.
     HeapWord*            _data_location;
@@ -340,11 +362,26 @@
 
 #ifdef ASSERT
    public:
-    uint            _pushed;   // 0 until region is pushed onto a worker's stack
+    uint                 _pushed;   // 0 until region is pushed onto a stack
    private:
 #endif
   };
 
+  // "Blocks" allow shorter sections of the bitmap to be searched.  Each Block
+  // holds an offset, which is the amount of live data in the Region to the left
+  // of the first live object that starts in the Block.
+  class BlockData
+  {
+  public:
+    typedef unsigned short int blk_ofs_t;
+
+    blk_ofs_t offset() const    { return _offset; }
+    void set_offset(size_t val) { _offset = (blk_ofs_t)val; }
+
+  private:
+    blk_ofs_t _offset;
+  };
+
 public:
   ParallelCompactData();
   bool initialize(MemRegion covered_region);
@@ -355,8 +392,9 @@
   inline RegionData* region(size_t region_idx) const;
   inline size_t     region(const RegionData* const region_ptr) const;
 
-  // Returns true if the given address is contained within the region
-  bool region_contains(size_t region_index, HeapWord* addr);
+  size_t block_count() const { return _block_count; }
+  inline BlockData* block(size_t block_idx) const;
+  inline size_t     block(const BlockData* block_ptr) const;
 
   void add_obj(HeapWord* addr, size_t len);
   void add_obj(oop p, size_t len) { add_obj((HeapWord*)p, len); }
@@ -396,11 +434,24 @@
   inline HeapWord*  region_align_up(HeapWord* addr) const;
   inline bool       is_region_aligned(HeapWord* addr) const;
 
+  // Analogous to region_offset() for blocks.
+  size_t     block_offset(const HeapWord* addr) const;
+  size_t     addr_to_block_idx(const HeapWord* addr) const;
+  size_t     addr_to_block_idx(const oop obj) const {
+    return addr_to_block_idx((HeapWord*) obj);
+  }
+  inline BlockData* addr_to_block_ptr(const HeapWord* addr) const;
+  inline HeapWord*  block_to_addr(size_t block) const;
+  inline size_t     region_to_block_idx(size_t region) const;
+
+  inline HeapWord*  block_align_down(HeapWord* addr) const;
+  inline HeapWord*  block_align_up(HeapWord* addr) const;
+  inline bool       is_block_aligned(HeapWord* addr) const;
+
   // Return the address one past the end of the partial object.
   HeapWord* partial_obj_end(size_t region_idx) const;
 
-  // Return the new location of the object p after the
-  // the compaction.
+  // Return the location of the object after compaction.
   HeapWord* calc_new_pointer(HeapWord* addr);
 
   HeapWord* calc_new_pointer(oop p) {
@@ -416,6 +467,7 @@
 #endif  // #ifdef ASSERT
 
 private:
+  bool initialize_block_data();
   bool initialize_region_data(size_t region_size);
   PSVirtualSpace* create_vspace(size_t count, size_t element_size);
 
@@ -428,6 +480,10 @@
   PSVirtualSpace* _region_vspace;
   RegionData*     _region_data;
   size_t          _region_count;
+
+  PSVirtualSpace* _block_vspace;
+  BlockData*      _block_data;
+  size_t          _block_count;
 };
 
 inline uint
@@ -442,6 +498,28 @@
   return destination_count_raw() >> dc_shift;
 }
 
+inline bool
+ParallelCompactData::RegionData::blocks_filled() const
+{
+  return _blocks_filled;
+}
+
+#ifdef ASSERT
+inline size_t
+ParallelCompactData::RegionData::blocks_filled_count() const
+{
+  return _blocks_filled_count;
+}
+#endif // #ifdef ASSERT
+
+inline void
+ParallelCompactData::RegionData::set_blocks_filled()
+{
+  _blocks_filled = true;
+  // Debug builds count the number of times the table was filled.
+  DEBUG_ONLY(Atomic::inc_ptr(&_blocks_filled_count));
+}
+
 inline void
 ParallelCompactData::RegionData::set_destination_count(uint count)
 {
@@ -536,6 +614,12 @@
   return pointer_delta(region_ptr, _region_data, sizeof(RegionData));
 }
 
+inline ParallelCompactData::BlockData*
+ParallelCompactData::block(size_t n) const {
+  assert(n < block_count(), "bad arg");
+  return _block_data + n;
+}
+
 inline size_t
 ParallelCompactData::region_offset(const HeapWord* addr) const
 {
@@ -602,6 +686,63 @@
   return region_offset(addr) == 0;
 }
 
+inline size_t
+ParallelCompactData::block_offset(const HeapWord* addr) const
+{
+  assert(addr >= _region_start, "bad addr");
+  assert(addr <= _region_end, "bad addr");
+  return (size_t(addr) & BlockAddrOffsetMask) >> LogHeapWordSize;
+}
+
+inline size_t
+ParallelCompactData::addr_to_block_idx(const HeapWord* addr) const
+{
+  assert(addr >= _region_start, "bad addr");
+  assert(addr <= _region_end, "bad addr");
+  return pointer_delta(addr, _region_start) >> Log2BlockSize;
+}
+
+inline ParallelCompactData::BlockData*
+ParallelCompactData::addr_to_block_ptr(const HeapWord* addr) const
+{
+  return block(addr_to_block_idx(addr));
+}
+
+inline HeapWord*
+ParallelCompactData::block_to_addr(size_t block) const
+{
+  assert(block < _block_count, "block out of range");
+  return _region_start + (block << Log2BlockSize);
+}
+
+inline size_t
+ParallelCompactData::region_to_block_idx(size_t region) const
+{
+  return region << Log2BlocksPerRegion;
+}
+
+inline HeapWord*
+ParallelCompactData::block_align_down(HeapWord* addr) const
+{
+  assert(addr >= _region_start, "bad addr");
+  assert(addr < _region_end + RegionSize, "bad addr");
+  return (HeapWord*)(size_t(addr) & BlockAddrMask);
+}
+
+inline HeapWord*
+ParallelCompactData::block_align_up(HeapWord* addr) const
+{
+  assert(addr >= _region_start, "bad addr");
+  assert(addr <= _region_end, "bad addr");
+  return block_align_down(addr + BlockSizeOffsetMask);
+}
+
+inline bool
+ParallelCompactData::is_block_aligned(HeapWord* addr) const
+{
+  return block_offset(addr) == 0;
+}
+
 // Abstract closure for use with ParMarkBitMap::iterate(), which will invoke the
 // do_addr() method.
 //
@@ -779,6 +920,7 @@
   // Convenient access to type names.
   typedef ParMarkBitMap::idx_t idx_t;
   typedef ParallelCompactData::RegionData RegionData;
+  typedef ParallelCompactData::BlockData BlockData;
 
   typedef enum {
     perm_space_id, old_space_id, eden_space_id,
@@ -987,6 +1129,8 @@
   // Serial code executed in preparation for the compaction phase.
   static void compact_prologue();
 
+  DEBUG_ONLY(static void write_block_fill_histogram(outputStream* const out);)
+
   // Move objects to new locations.
   static void compact_perm(ParCompactionManager* cm);
   static void compact();
@@ -1158,6 +1302,9 @@
     fill_region(cm, region);
   }
 
+  // Fill in the block table for the specified region.
+  static void fill_blocks(size_t region_idx);
+
   // Update the deferred objects in the space.
   static void update_deferred_objects(ParCompactionManager* cm, SpaceId id);
 
--- a/src/share/vm/opto/loopnode.hpp	Wed May 29 13:24:23 2013 -0700
+++ b/src/share/vm/opto/loopnode.hpp	Fri May 31 14:14:30 2013 -0700
@@ -961,7 +961,7 @@
   // Has use internal to the vector set (ie. not in a phi at the loop head)
   bool has_use_internal_to_set( Node* n, VectorSet& vset, IdealLoopTree *loop );
   // clone "n" for uses that are outside of loop
-  void clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, Node_List& worklist );
+  int  clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, Node_List& worklist );
   // clone "n" for special uses that are in the not_peeled region
   void clone_for_special_use_inside_loop( IdealLoopTree *loop, Node* n,
                                           VectorSet& not_peel, Node_List& sink_list, Node_List& worklist );
--- a/src/share/vm/opto/loopopts.cpp	Wed May 29 13:24:23 2013 -0700
+++ b/src/share/vm/opto/loopopts.cpp	Fri May 31 14:14:30 2013 -0700
@@ -1926,8 +1926,8 @@
 
 //------------------------------ clone_for_use_outside_loop -------------------------------------
 // clone "n" for uses that are outside of loop
-void PhaseIdealLoop::clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, Node_List& worklist ) {
-
+int PhaseIdealLoop::clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, Node_List& worklist ) {
+  int cloned = 0;
   assert(worklist.size() == 0, "should be empty");
   for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
     Node* use = n->fast_out(j);
@@ -1947,6 +1947,7 @@
     // clone "n" and insert it between the inputs of "n" and the use outside the loop
     Node* n_clone = n->clone();
     _igvn.replace_input_of(use, j, n_clone);
+    cloned++;
     Node* use_c;
     if (!use->is_Phi()) {
       use_c = has_ctrl(use) ? get_ctrl(use) : use->in(0);
@@ -1964,6 +1965,7 @@
     }
 #endif
   }
+  return cloned;
 }
 
 
@@ -2482,6 +2484,7 @@
 
   // Evacuate nodes in peel region into the not_peeled region if possible
   uint new_phi_cnt = 0;
+  uint cloned_for_outside_use = 0;
   for (i = 0; i < peel_list.size();) {
     Node* n = peel_list.at(i);
 #if !defined(PRODUCT)
@@ -2500,8 +2503,7 @@
           // if not pinned and not a load (which maybe anti-dependent on a store)
           // and not a CMove (Matcher expects only bool->cmove).
           if ( n->in(0) == NULL && !n->is_Load() && !n->is_CMove() ) {
-            clone_for_use_outside_loop( loop, n, worklist );
-
+            cloned_for_outside_use += clone_for_use_outside_loop( loop, n, worklist );
             sink_list.push(n);
             peel     >>= n->_idx; // delete n from peel set.
             not_peel <<= n->_idx; // add n to not_peel set.
@@ -2538,6 +2540,12 @@
     // Inhibit more partial peeling on this loop
     assert(!head->is_partial_peel_loop(), "not partial peeled");
     head->mark_partial_peel_failed();
+    if (cloned_for_outside_use > 0) {
+      // Terminate this round of loop opts because
+      // the graph outside this loop was changed.
+      C->set_major_progress();
+      return true;
+    }
     return false;
   }
 
--- a/src/share/vm/prims/jvmtiImpl.cpp	Wed May 29 13:24:23 2013 -0700
+++ b/src/share/vm/prims/jvmtiImpl.cpp	Fri May 31 14:14:30 2013 -0700
@@ -372,19 +372,14 @@
   case CLEAR_BREAKPOINT:
     _breakpoints->clear_at_safepoint(*_bp);
     break;
-  case CLEAR_ALL_BREAKPOINT:
-    _breakpoints->clearall_at_safepoint();
-    break;
   default:
     assert(false, "Unknown operation");
   }
 }
 
 void VM_ChangeBreakpoints::oops_do(OopClosure* f) {
-  // This operation keeps breakpoints alive
-  if (_breakpoints != NULL) {
-    _breakpoints->oops_do(f);
-  }
+  // The JvmtiBreakpoints in _breakpoints will be visited via
+  // JvmtiExport::oops_do.
   if (_bp != NULL) {
     _bp->oops_do(f);
   }
@@ -445,23 +440,13 @@
   }
 }
 
-void JvmtiBreakpoints::clearall_at_safepoint() {
-  assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
-
-  int len = _bps.length();
-  for (int i=0; i<len; i++) {
-    _bps.at(i).clear();
-  }
-  _bps.clear();
-}
-
 int JvmtiBreakpoints::length() { return _bps.length(); }
 
 int JvmtiBreakpoints::set(JvmtiBreakpoint& bp) {
   if ( _bps.find(bp) != -1) {
      return JVMTI_ERROR_DUPLICATE;
   }
-  VM_ChangeBreakpoints set_breakpoint(this,VM_ChangeBreakpoints::SET_BREAKPOINT, &bp);
+  VM_ChangeBreakpoints set_breakpoint(VM_ChangeBreakpoints::SET_BREAKPOINT, &bp);
   VMThread::execute(&set_breakpoint);
   return JVMTI_ERROR_NONE;
 }
@@ -471,7 +456,7 @@
      return JVMTI_ERROR_NOT_FOUND;
   }
 
-  VM_ChangeBreakpoints clear_breakpoint(this,VM_ChangeBreakpoints::CLEAR_BREAKPOINT, &bp);
+  VM_ChangeBreakpoints clear_breakpoint(VM_ChangeBreakpoints::CLEAR_BREAKPOINT, &bp);
   VMThread::execute(&clear_breakpoint);
   return JVMTI_ERROR_NONE;
 }
@@ -502,11 +487,6 @@
   }
 }
 
-void JvmtiBreakpoints::clearall() {
-  VM_ChangeBreakpoints clearall_breakpoint(this,VM_ChangeBreakpoints::CLEAR_ALL_BREAKPOINT);
-  VMThread::execute(&clearall_breakpoint);
-}
-
 //
 // class JvmtiCurrentBreakpoints
 //
--- a/src/share/vm/prims/jvmtiImpl.hpp	Wed May 29 13:24:23 2013 -0700
+++ b/src/share/vm/prims/jvmtiImpl.hpp	Fri May 31 14:14:30 2013 -0700
@@ -200,47 +200,6 @@
 
 ///////////////////////////////////////////////////////////////
 //
-// class VM_ChangeBreakpoints
-// Used by              : JvmtiBreakpoints
-// Used by JVMTI methods: none directly.
-// Note: A Helper class.
-//
-// VM_ChangeBreakpoints implements a VM_Operation for ALL modifications to the JvmtiBreakpoints class.
-//
-
-class VM_ChangeBreakpoints : public VM_Operation {
-private:
-  JvmtiBreakpoints* _breakpoints;
-  int               _operation;
-  JvmtiBreakpoint*  _bp;
-
-public:
-  enum { SET_BREAKPOINT=0, CLEAR_BREAKPOINT=1, CLEAR_ALL_BREAKPOINT=2 };
-
-  VM_ChangeBreakpoints(JvmtiBreakpoints* breakpoints, int operation) {
-    _breakpoints = breakpoints;
-    _bp = NULL;
-    _operation = operation;
-    assert(breakpoints != NULL, "breakpoints != NULL");
-    assert(operation == CLEAR_ALL_BREAKPOINT, "unknown breakpoint operation");
-  }
-  VM_ChangeBreakpoints(JvmtiBreakpoints* breakpoints, int operation, JvmtiBreakpoint *bp) {
-    _breakpoints = breakpoints;
-    _bp = bp;
-    _operation = operation;
-    assert(breakpoints != NULL, "breakpoints != NULL");
-    assert(bp != NULL, "bp != NULL");
-    assert(operation == SET_BREAKPOINT || operation == CLEAR_BREAKPOINT , "unknown breakpoint operation");
-  }
-
-  VMOp_Type type() const { return VMOp_ChangeBreakpoints; }
-  void doit();
-  void oops_do(OopClosure* f);
-};
-
-
-///////////////////////////////////////////////////////////////
-//
 // class JvmtiBreakpoints
 // Used by              : JvmtiCurrentBreakpoints
 // Used by JVMTI methods: none directly
@@ -267,7 +226,6 @@
   friend class VM_ChangeBreakpoints;
   void set_at_safepoint(JvmtiBreakpoint& bp);
   void clear_at_safepoint(JvmtiBreakpoint& bp);
-  void clearall_at_safepoint();
 
   static void do_element(GrowableElement *e);
 
@@ -282,7 +240,6 @@
   int  set(JvmtiBreakpoint& bp);
   int  clear(JvmtiBreakpoint& bp);
   void clearall_in_class_at_safepoint(klassOop klass);
-  void clearall();
   void gc_epilogue();
 };
 
@@ -340,6 +297,40 @@
     return false;
 }
 
+
+///////////////////////////////////////////////////////////////
+//
+// class VM_ChangeBreakpoints
+// Used by              : JvmtiBreakpoints
+// Used by JVMTI methods: none directly.
+// Note: A Helper class.
+//
+// VM_ChangeBreakpoints implements a VM_Operation for ALL modifications to the JvmtiBreakpoints class.
+//
+
+class VM_ChangeBreakpoints : public VM_Operation {
+private:
+  JvmtiBreakpoints* _breakpoints;
+  int               _operation;
+  JvmtiBreakpoint*  _bp;
+
+public:
+  enum { SET_BREAKPOINT=0, CLEAR_BREAKPOINT=1 };
+
+  VM_ChangeBreakpoints(int operation, JvmtiBreakpoint *bp) {
+    JvmtiBreakpoints& current_bps = JvmtiCurrentBreakpoints::get_jvmti_breakpoints();
+    _breakpoints = &current_bps;
+    _bp = bp;
+    _operation = operation;
+    assert(bp != NULL, "bp != NULL");
+  }
+
+  VMOp_Type type() const { return VMOp_ChangeBreakpoints; }
+  void doit();
+  void oops_do(OopClosure* f);
+};
+
+
 ///////////////////////////////////////////////////////////////
 // The get/set local operations must only be done by the VM thread
 // because the interpreter version needs to access oop maps, which can
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/8010927/Test8010927.java	Fri May 31 14:14:30 2013 -0700
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8010927
+ * @summary Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
+ * @library /testlibrary/whitebox /testlibrary
+ * @build Test8010927
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -Xbootclasspath/a:. -Xmx64m -XX:NewSize=20971520 -XX:MaxNewSize=32m -XX:-UseTLAB -XX:-UseParNewGC -XX:-UseAdaptiveSizePolicy Test8010927
+ */
+
+import sun.hotspot.WhiteBox;
+import java.lang.reflect.Field;
+import sun.misc.Unsafe;
+
+/**
+ * The test creates uncommitted space between oldgen and young gen
+ * by specifying MaxNewSize bigger than NewSize.
+ * NewSize = 20971520 = (512*4K) * 10 for 4k pages
+ * Then it tries to execute arraycopy() with elements type check
+ * to the array at the end of survive space near unused space.
+ */
+
+public class Test8010927 {
+
+  private static final Unsafe U;
+
+  static {
+    try {
+      Field unsafe = Unsafe.class.getDeclaredField("theUnsafe");
+      unsafe.setAccessible(true);
+      U = (Unsafe) unsafe.get(null);
+    } catch (Exception e) {
+      throw new Error(e);
+    }
+  }
+
+  public static Object[] o;
+
+  public static final boolean debug = Boolean.getBoolean("debug");
+
+  // 2 different obect arrays but same element types
+  static Test8010927[] masterA;
+  static Object[] masterB;
+  static final Test8010927 elem = new Test8010927();
+  static final WhiteBox wb = WhiteBox.getWhiteBox();
+
+  static final int obj_header_size = U.ARRAY_OBJECT_BASE_OFFSET;
+  static final int heap_oop_size = wb.getHeapOopSize();
+  static final int card_size = 512;
+  static final int one_card = (card_size - obj_header_size)/heap_oop_size;
+
+  static final int surv_size = 2112 * 1024;
+
+  // The size is big to not fit into survive space.
+  static final Object[] cache = new Object[(surv_size / card_size)];
+
+  public static void main(String[] args) {
+    masterA = new Test8010927[one_card];
+    masterB = new Object[one_card];
+    for (int i = 0; i < one_card; ++i) {
+      masterA[i] = elem;
+      masterB[i] = elem;
+    }
+
+    // Move cache[] to the old gen.
+    long low_limit = wb.getObjectAddress(cache);
+    System.gc();
+    // Move 'cache' to oldgen.
+    long upper_limit = wb.getObjectAddress(cache);
+    if ((low_limit - upper_limit) > 0) { // substaction works with unsigned values
+      // OldGen is placed before youngger for ParallelOldGC.
+      upper_limit = low_limit + 21000000l; // +20971520
+    }
+    // Each A[one_card] size is 512 bytes,
+    // it will take about 40000 allocations to trigger GC.
+    // cache[] has 8192 elements so GC should happen
+    // each 5th iteration.
+    for(long l = 0; l < 20; l++) {
+      fill_heap();
+      if (debug) {
+        System.out.println("test oop_disjoint_arraycopy");
+      }
+      testA_arraycopy();
+      if (debug) {
+        System.out.println("test checkcast_arraycopy");
+      }
+      testB_arraycopy();
+      // Execute arraycopy to the topmost array in young gen
+      if (debug) {
+        int top_index = get_top_address(low_limit, upper_limit);
+        if (top_index >= 0) {
+          long addr = wb.getObjectAddress(cache[top_index]);
+          System.out.println("top_addr: 0x" + Long.toHexString(addr) + ", 0x" + Long.toHexString(addr + 512));
+        }
+      }
+    }
+  }
+  static void fill_heap() {
+    for (int i = 0; i < cache.length; ++i) {
+      o = new Test8010927[one_card];
+      System.arraycopy(masterA, 0, o, 0, masterA.length);
+      cache[i] = o;
+    }
+    for (long j = 0; j < 256; ++j) {
+      o = new Long[10000]; // to trigger GC
+    }
+  }
+  static void testA_arraycopy() {
+    for (int i = 0; i < cache.length; ++i) {
+      System.arraycopy(masterA, 0, cache[i], 0, masterA.length);
+    }
+  }
+  static void testB_arraycopy() {
+    for (int i = 0; i < cache.length; ++i) {
+      System.arraycopy(masterB, 0, cache[i], 0, masterB.length);
+    }
+  }
+  static int get_top_address(long min, long max) {
+    int index = -1;
+    long addr = min;
+    for (int i = 0; i < cache.length; ++i) {
+      long test = wb.getObjectAddress(cache[i]);
+      if (((test - addr) > 0) && ((max - test) > 0)) { // substaction works with unsigned values
+        addr = test;
+        index = i;
+      }
+    }
+    return index;
+  }
+}