changeset 1784:2d127394260e

6916623: Align object to 16 bytes to use Compressed Oops with java heap up to 64Gb Summary: Added new product ObjectAlignmentInBytes flag to control object alignment. Reviewed-by: twisti, ysr, iveresov
author kvn
date Thu, 27 May 2010 18:01:56 -0700
parents de91a2f25c7e
children 87fc6aca31ab
files agent/src/share/classes/sun/jvm/hotspot/memory/CompactibleFreeListSpace.java agent/src/share/classes/sun/jvm/hotspot/oops/Oop.java agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java src/cpu/sparc/vm/copy_sparc.hpp src/cpu/sparc/vm/sparc.ad src/cpu/x86/vm/assembler_x86.cpp src/cpu/x86/vm/x86_64.ad src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp src/share/vm/gc_interface/collectedHeap.cpp src/share/vm/memory/space.cpp src/share/vm/memory/threadLocalAllocBuffer.inline.hpp src/share/vm/memory/universe.cpp src/share/vm/oops/arrayOop.hpp src/share/vm/oops/oop.hpp src/share/vm/oops/oop.inline.hpp src/share/vm/runtime/arguments.cpp src/share/vm/runtime/globals.hpp src/share/vm/runtime/vmStructs.cpp src/share/vm/utilities/copy.hpp src/share/vm/utilities/globalDefinitions.cpp src/share/vm/utilities/globalDefinitions.hpp
diffstat 29 files changed, 223 insertions(+), 137 deletions(-) [+]
line wrap: on
line diff
--- a/agent/src/share/classes/sun/jvm/hotspot/memory/CompactibleFreeListSpace.java	Thu May 27 09:54:07 2010 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/memory/CompactibleFreeListSpace.java	Thu May 27 18:01:56 2010 -0700
@@ -73,18 +73,11 @@
 
    public CompactibleFreeListSpace(Address addr) {
       super(addr);
-      if ( VM.getVM().isLP64() ) {
-         heapWordSize = 8;
-         IndexSetStart = 1;
-         IndexSetStride = 1;
-      }
-      else {
-         heapWordSize = 4;
-         IndexSetStart = 2;
-         IndexSetStride = 2;
-      }
-
-      IndexSetSize = 257;
+      VM vm = VM.getVM();
+      heapWordSize   = vm.getHeapWordSize();
+      IndexSetStart  = vm.getMinObjAlignmentInBytes() / heapWordSize;
+      IndexSetStride = IndexSetStart;
+      IndexSetSize   = 257;
    }
 
    // Accessing block offset table
--- a/agent/src/share/classes/sun/jvm/hotspot/oops/Oop.java	Thu May 27 09:54:07 2010 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/oops/Oop.java	Thu May 27 18:01:56 2010 -0700
@@ -128,7 +128,7 @@
 
   // Align the object size.
   public static long alignObjectSize(long size) {
-    return VM.getVM().alignUp(size, VM.getVM().getMinObjAlignment());
+    return VM.getVM().alignUp(size, VM.getVM().getMinObjAlignmentInBytes());
   }
 
   // All vm's align longs, so pad out certain offsets.
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java	Thu May 27 09:54:07 2010 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java	Thu May 27 18:01:56 2010 -0700
@@ -93,6 +93,7 @@
   /** alignment constants */
   private boolean      isLP64;
   private int          bytesPerLong;
+  private int          objectAlignmentInBytes;
   private int          minObjAlignmentInBytes;
   private int          logMinObjAlignmentInBytes;
   private int          heapWordSize;
@@ -313,9 +314,15 @@
       isLP64 = debugger.getMachineDescription().isLP64();
     }
     bytesPerLong = db.lookupIntConstant("BytesPerLong").intValue();
-    minObjAlignmentInBytes = db.lookupIntConstant("MinObjAlignmentInBytes").intValue();
-    // minObjAlignment = db.lookupIntConstant("MinObjAlignment").intValue();
-    logMinObjAlignmentInBytes = db.lookupIntConstant("LogMinObjAlignmentInBytes").intValue();
+    minObjAlignmentInBytes = getObjectAlignmentInBytes();
+    if (minObjAlignmentInBytes == 8) {
+      logMinObjAlignmentInBytes = 3;
+    } else if (minObjAlignmentInBytes == 16) {
+      logMinObjAlignmentInBytes = 4;
+    } else {
+      throw new RuntimeException("Object alignment " + minObjAlignmentInBytes + " not yet supported");
+    }
+
     heapWordSize = db.lookupIntConstant("HeapWordSize").intValue();
     oopSize  = db.lookupIntConstant("oopSize").intValue();
 
@@ -492,10 +499,6 @@
   }
 
   /** Get minimum object alignment in bytes. */
-  public int getMinObjAlignment() {
-    return minObjAlignmentInBytes;
-  }
-
   public int getMinObjAlignmentInBytes() {
     return minObjAlignmentInBytes;
   }
@@ -754,6 +757,14 @@
     return compressedOopsEnabled.booleanValue();
   }
 
+  public int getObjectAlignmentInBytes() {
+    if (objectAlignmentInBytes == 0) {
+        Flag flag = getCommandLineFlag("ObjectAlignmentInBytes");
+        objectAlignmentInBytes = (flag == null) ? 8 : (int)flag.getIntx();
+    }
+    return objectAlignmentInBytes;
+  }
+
   // returns null, if not available.
   public Flag[] getCommandLineFlags() {
     if (commandLineFlags == null) {
--- a/src/cpu/sparc/vm/copy_sparc.hpp	Thu May 27 09:54:07 2010 -0700
+++ b/src/cpu/sparc/vm/copy_sparc.hpp	Thu May 27 18:01:56 2010 -0700
@@ -154,7 +154,7 @@
 }
 
 static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) {
-  assert(MinObjAlignmentInBytes == BytesPerLong, "need alternate implementation");
+  assert(MinObjAlignmentInBytes >= BytesPerLong, "need alternate implementation");
 
    julong* to = (julong*)tohw;
    julong  v  = ((julong)value << 32) | value;
@@ -162,7 +162,7 @@
    // and be equal to 0 on 64-bit platform.
    size_t odd = count % (BytesPerLong / HeapWordSize) ;
 
-   size_t aligned_count = align_object_size(count - odd) / HeapWordsPerLong;
+   size_t aligned_count = align_object_offset(count - odd) / HeapWordsPerLong;
    julong* end = ((julong*)tohw) + aligned_count - 1;
    while (to <= end) {
      DEBUG_ONLY(count -= BytesPerLong / HeapWordSize ;)
--- a/src/cpu/sparc/vm/sparc.ad	Thu May 27 09:54:07 2010 -0700
+++ b/src/cpu/sparc/vm/sparc.ad	Thu May 27 18:01:56 2010 -0700
@@ -821,6 +821,10 @@
           !(n->ideal_Opcode()==Op_ConvI2D   && ld_op==Op_LoadF) &&
           !(n->ideal_Opcode()==Op_PrefetchRead  && ld_op==Op_LoadI) &&
           !(n->ideal_Opcode()==Op_PrefetchWrite && ld_op==Op_LoadI) &&
+          !(n->ideal_Opcode()==Op_Load2I    && ld_op==Op_LoadD) &&
+          !(n->ideal_Opcode()==Op_Load4C    && ld_op==Op_LoadD) &&
+          !(n->ideal_Opcode()==Op_Load4S    && ld_op==Op_LoadD) &&
+          !(n->ideal_Opcode()==Op_Load8B    && ld_op==Op_LoadD) &&
           !(n->rule() == loadUB_rule)) {
         verify_oops_warning(n, n->ideal_Opcode(), ld_op);
       }
@@ -832,6 +836,9 @@
           !(n->ideal_Opcode()==Op_StoreI && st_op==Op_StoreF) &&
           !(n->ideal_Opcode()==Op_StoreF && st_op==Op_StoreI) &&
           !(n->ideal_Opcode()==Op_StoreL && st_op==Op_StoreI) &&
+          !(n->ideal_Opcode()==Op_Store2I && st_op==Op_StoreD) &&
+          !(n->ideal_Opcode()==Op_Store4C && st_op==Op_StoreD) &&
+          !(n->ideal_Opcode()==Op_Store8B && st_op==Op_StoreD) &&
           !(n->ideal_Opcode()==Op_StoreD && st_op==Op_StoreI && n->rule() == storeD0_rule)) {
         verify_oops_warning(n, n->ideal_Opcode(), st_op);
       }
--- a/src/cpu/x86/vm/assembler_x86.cpp	Thu May 27 09:54:07 2010 -0700
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Thu May 27 18:01:56 2010 -0700
@@ -8185,9 +8185,14 @@
     assert (Universe::heap() != NULL, "java heap should be initialized");
     movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
     if (Universe::narrow_oop_shift() != 0) {
-      assert(Address::times_8 == LogMinObjAlignmentInBytes &&
-             Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong");
-      movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+      assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+      if (LogMinObjAlignmentInBytes == Address::times_8) {
+        movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+      } else {
+        // OK to use shift since we don't need to preserve flags.
+        shlq(dst, LogMinObjAlignmentInBytes);
+        movq(dst, Address(r12_heapbase, dst, Address::times_1, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+      }
     } else {
       movq(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
     }
@@ -8361,31 +8366,43 @@
 }
 
 void  MacroAssembler::decode_heap_oop_not_null(Register r) {
+  // Note: it will change flags
   assert (UseCompressedOops, "should only be used for compressed headers");
   assert (Universe::heap() != NULL, "java heap should be initialized");
   // Cannot assert, unverified entry point counts instructions (see .ad file)
   // vtableStubs also counts instructions in pd_code_size_limit.
   // Also do not verify_oop as this is called by verify_oop.
   if (Universe::narrow_oop_shift() != 0) {
-    assert (Address::times_8 == LogMinObjAlignmentInBytes &&
-            Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong");
-    // Don't use Shift since it modifies flags.
-    leaq(r, Address(r12_heapbase, r, Address::times_8, 0));
+    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+    shlq(r, LogMinObjAlignmentInBytes);
+    if (Universe::narrow_oop_base() != NULL) {
+      addq(r, r12_heapbase);
+    }
   } else {
     assert (Universe::narrow_oop_base() == NULL, "sanity");
   }
 }
 
 void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
+  // Note: it will change flags
   assert (UseCompressedOops, "should only be used for compressed headers");
   assert (Universe::heap() != NULL, "java heap should be initialized");
   // Cannot assert, unverified entry point counts instructions (see .ad file)
   // vtableStubs also counts instructions in pd_code_size_limit.
   // Also do not verify_oop as this is called by verify_oop.
   if (Universe::narrow_oop_shift() != 0) {
-    assert (Address::times_8 == LogMinObjAlignmentInBytes &&
-            Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong");
-    leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
+    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+    if (LogMinObjAlignmentInBytes == Address::times_8) {
+      leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
+    } else {
+      if (dst != src) {
+        movq(dst, src);
+      }
+      shlq(dst, LogMinObjAlignmentInBytes);
+      if (Universe::narrow_oop_base() != NULL) {
+        addq(dst, r12_heapbase);
+      }
+    }
   } else if (dst != src) {
     assert (Universe::narrow_oop_base() == NULL, "sanity");
     movq(dst, src);
--- a/src/cpu/x86/vm/x86_64.ad	Thu May 27 09:54:07 2010 -0700
+++ b/src/cpu/x86/vm/x86_64.ad	Thu May 27 18:01:56 2010 -0700
@@ -1851,29 +1851,24 @@
 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 {
   if (UseCompressedOops) {
-    st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t", oopDesc::klass_offset_in_bytes());
+    st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
     if (Universe::narrow_oop_shift() != 0) {
-      st->print_cr("leaq    rscratch1, [r12_heapbase, r, Address::times_8, 0]");
-    }
-    st->print_cr("cmpq    rax, rscratch1\t # Inline cache check");
+      st->print_cr("\tdecode_heap_oop_not_null rscratch1, rscratch1");
+    }
+    st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
   } else {
-    st->print_cr("cmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t"
-                 "# Inline cache check", oopDesc::klass_offset_in_bytes());
+    st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
+                 "# Inline cache check");
   }
   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
-  st->print_cr("\tnop");
-  if (!OptoBreakpoint) {
-    st->print_cr("\tnop");
-  }
+  st->print_cr("\tnop\t# nops to align entry point");
 }
 #endif
 
 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 {
   MacroAssembler masm(&cbuf);
-#ifdef ASSERT
   uint code_size = cbuf.code_size();
-#endif
   if (UseCompressedOops) {
     masm.load_klass(rscratch1, j_rarg0);
     masm.cmpptr(rax, rscratch1);
@@ -1884,33 +1879,21 @@
   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 
   /* WARNING these NOPs are critical so that verified entry point is properly
-     aligned for patching by NativeJump::patch_verified_entry() */
-  int nops_cnt = 1;
-  if (!OptoBreakpoint) {
+     4 bytes aligned for patching by NativeJump::patch_verified_entry() */
+  int nops_cnt = 4 - ((cbuf.code_size() - code_size) & 0x3);
+  if (OptoBreakpoint) {
     // Leave space for int3
-     nops_cnt += 1;
+    nops_cnt -= 1;
   }
-  if (UseCompressedOops) {
-    // ??? divisible by 4 is aligned?
-    nops_cnt += 1;
-  }
-  masm.nop(nops_cnt);
-
-  assert(cbuf.code_size() - code_size == size(ra_),
-         "checking code size of inline cache node");
+  nops_cnt &= 0x3; // Do not add nops if code is aligned.
+  if (nops_cnt > 0)
+    masm.nop(nops_cnt);
 }
 
 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 {
-  if (UseCompressedOops) {
-    if (Universe::narrow_oop_shift() == 0) {
-      return OptoBreakpoint ? 15 : 16;
-    } else {
-      return OptoBreakpoint ? 19 : 20;
-    }
-  } else {
-    return OptoBreakpoint ? 11 : 12;
-  }
+  return MachNode::size(ra_); // too many variables; just compute it
+                              // the hard way
 }
 
 
@@ -5127,7 +5110,7 @@
 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 // we can't free r12 even with Universe::narrow_oop_base() == NULL.
 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
-  predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
+  predicate(UseCompressedOops && (Universe::narrow_oop_shift() == Address::times_8));
   constraint(ALLOC_IN_RC(ptr_reg));
   match(AddP (DecodeN reg) off);
 
@@ -7742,10 +7725,11 @@
   ins_pipe(ialu_reg_long);
 %}
 
-instruct decodeHeapOop_not_null(rRegP dst, rRegN src) %{
+instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
   predicate(n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
             n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant);
   match(Set dst (DecodeN src));
+  effect(KILL cr);
   format %{ "decode_heap_oop_not_null $dst,$src" %}
   ins_encode %{
     Register s = $src$$Register;
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Thu May 27 09:54:07 2010 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Thu May 27 18:01:56 2010 -0700
@@ -32,6 +32,23 @@
 // highest ranked  free list lock rank
 int CompactibleFreeListSpace::_lockRank = Mutex::leaf + 3;
 
+// Defaults are 0 so things will break badly if incorrectly initialized.
+int CompactibleFreeListSpace::IndexSetStart  = 0;
+int CompactibleFreeListSpace::IndexSetStride = 0;
+
+size_t MinChunkSize = 0;
+
+void CompactibleFreeListSpace::set_cms_values() {
+  // Set CMS global values
+  assert(MinChunkSize == 0, "already set");
+  #define numQuanta(x,y) ((x+y-1)/y)
+  MinChunkSize = numQuanta(sizeof(FreeChunk), MinObjAlignmentInBytes) * MinObjAlignment;
+
+  assert(IndexSetStart == 0 && IndexSetStride == 0, "already set");
+  IndexSetStart  = MinObjAlignment;
+  IndexSetStride = MinObjAlignment;
+}
+
 // Constructor
 CompactibleFreeListSpace::CompactibleFreeListSpace(BlockOffsetSharedArray* bs,
   MemRegion mr, bool use_adaptive_freelists,
@@ -302,7 +319,7 @@
 
 size_t CompactibleFreeListSpace::totalCountInIndexedFreeLists() const {
   size_t count = 0;
-  for (int i = MinChunkSize; i < IndexSetSize; i++) {
+  for (int i = (int)MinChunkSize; i < IndexSetSize; i++) {
     debug_only(
       ssize_t total_list_count = 0;
       for (FreeChunk* fc = _indexedFreeList[i].head(); fc != NULL;
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Thu May 27 09:54:07 2010 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Thu May 27 18:01:56 2010 -0700
@@ -91,10 +91,10 @@
   enum SomeConstants {
     SmallForLinearAlloc = 16,        // size < this then use _sLAB
     SmallForDictionary  = 257,       // size < this then use _indexedFreeList
-    IndexSetSize        = SmallForDictionary,  // keep this odd-sized
-    IndexSetStart       = MinObjAlignment,
-    IndexSetStride      = MinObjAlignment
+    IndexSetSize        = SmallForDictionary  // keep this odd-sized
   };
+  static int IndexSetStart;
+  static int IndexSetStride;
 
  private:
   enum FitStrategyOptions {
@@ -278,6 +278,9 @@
   HeapWord* nearLargestChunk() const { return _nearLargestChunk; }
   void set_nearLargestChunk(HeapWord* v) { _nearLargestChunk = v; }
 
+  // Set CMS global values
+  static void set_cms_values();
+
   // Return the free chunk at the end of the space.  If no such
   // chunk exists, return NULL.
   FreeChunk* find_chunk_at_end();
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Thu May 27 09:54:07 2010 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Thu May 27 18:01:56 2010 -0700
@@ -159,7 +159,7 @@
      CardTableRS* ct, bool use_adaptive_freelists,
      FreeBlockDictionary::DictionaryChoice dictionaryChoice) :
   CardGeneration(rs, initial_byte_size, level, ct),
-  _dilatation_factor(((double)MinChunkSize)/((double)(oopDesc::header_size()))),
+  _dilatation_factor(((double)MinChunkSize)/((double)(CollectedHeap::min_fill_size()))),
   _debug_collection_type(Concurrent_collection_type)
 {
   HeapWord* bottom = (HeapWord*) _virtual_space.low();
@@ -222,7 +222,7 @@
   // promoting generation, we'll instead just use the mimimum
   // object size (which today is a header's worth of space);
   // note that all arithmetic is in units of HeapWords.
-  assert(MinChunkSize >= oopDesc::header_size(), "just checking");
+  assert(MinChunkSize >= CollectedHeap::min_fill_size(), "just checking");
   assert(_dilatation_factor >= 1.0, "from previous assert");
 }
 
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp	Thu May 27 09:54:07 2010 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp	Thu May 27 18:01:56 2010 -0700
@@ -133,9 +133,5 @@
   void print_on(outputStream* st);
 };
 
-// Alignment helpers etc.
-#define numQuanta(x,y) ((x+y-1)/y)
-enum AlignmentConstants {
-  MinChunkSize = numQuanta(sizeof(FreeChunk), MinObjAlignmentInBytes) * MinObjAlignment
-};
+extern size_t MinChunkSize;
 
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Thu May 27 09:54:07 2010 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Thu May 27 18:01:56 2010 -0700
@@ -3644,7 +3644,7 @@
   do {
     free_words = r->free()/HeapWordSize;
     // If there's too little space, no one can allocate, so we're done.
-    if (free_words < (size_t)oopDesc::header_size()) return;
+    if (free_words < CollectedHeap::min_fill_size()) return;
     // Otherwise, try to claim it.
     block = r->par_allocate(free_words);
   } while (block == NULL);
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Thu May 27 09:54:07 2010 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Thu May 27 18:01:56 2010 -0700
@@ -2523,14 +2523,14 @@
   }
   if (ParallelGCThreads > 0) {
     const size_t OverpartitionFactor = 4;
-    const size_t MinChunkSize = 8;
-    const size_t ChunkSize =
+    const size_t MinWorkUnit = 8;
+    const size_t WorkUnit =
       MAX2(_g1->n_regions() / (ParallelGCThreads * OverpartitionFactor),
-           MinChunkSize);
+           MinWorkUnit);
     _collectionSetChooser->prepareForAddMarkedHeapRegionsPar(_g1->n_regions(),
-                                                             ChunkSize);
+                                                             WorkUnit);
     ParKnownGarbageTask parKnownGarbageTask(_collectionSetChooser,
-                                            (int) ChunkSize);
+                                            (int) WorkUnit);
     _g1->workers()->run_task(&parKnownGarbageTask);
 
     assert(_g1->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
--- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Thu May 27 09:54:07 2010 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Thu May 27 18:01:56 2010 -0700
@@ -711,6 +711,7 @@
   // object in the region.
   if (region_ptr->data_size() == RegionSize) {
     result += pointer_delta(addr, region_addr);
+    DEBUG_ONLY(PSParallelCompact::check_new_location(addr, result);)
     return result;
   }
 
@@ -1487,13 +1488,14 @@
       space->set_top_for_allocations();
     }
 
-    size_t obj_len = 8;
+    size_t min_size = CollectedHeap::min_fill_size();
+    size_t obj_len = min_size;
     while (b + obj_len <= t) {
       CollectedHeap::fill_with_object(b, obj_len);
       mark_bitmap()->mark_obj(b, obj_len);
       summary_data().add_obj(b, obj_len);
       b += obj_len;
-      obj_len = (obj_len & 0x18) + 8; // 8 16 24 32 8 16 24 32 ...
+      obj_len = (obj_len & (min_size*3)) + min_size; // 8 16 24 32 8 16 24 32 ...
     }
     if (b < t) {
       // The loop didn't completely fill to t (top); adjust top downward.
@@ -1680,11 +1682,13 @@
     //                          +-------+
 
     // Initially assume case a, c or e will apply.
-    size_t obj_len = (size_t)oopDesc::header_size();
+    size_t obj_len = CollectedHeap::min_fill_size();
     HeapWord* obj_beg = dense_prefix_end - obj_len;
 
 #ifdef  _LP64
-    if (_mark_bitmap.is_obj_end(dense_prefix_bit - 2)) {
+    if (MinObjAlignment > 1) { // object alignment > heap word size
+      // Cases a, c or e.
+    } else if (_mark_bitmap.is_obj_end(dense_prefix_bit - 2)) {
       // Case b above.
       obj_beg = dense_prefix_end - 1;
     } else if (!_mark_bitmap.is_obj_end(dense_prefix_bit - 3) &&
--- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp	Thu May 27 09:54:07 2010 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp	Thu May 27 18:01:56 2010 -0700
@@ -1414,6 +1414,8 @@
 {
   assert(old_addr >= new_addr || space_id(old_addr) != space_id(new_addr),
          "must move left or to a different space");
+  assert(is_object_aligned((intptr_t)old_addr) && is_object_aligned((intptr_t)new_addr),
+         "checking alignment");
 }
 #endif // ASSERT
 
--- a/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp	Thu May 27 09:54:07 2010 -0700
+++ b/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp	Thu May 27 18:01:56 2010 -0700
@@ -761,7 +761,7 @@
 
   if (p != NULL) {
     size_t remainder = s->free_in_words();
-    if (remainder < (size_t)oopDesc::header_size() && remainder > 0) {
+    if (remainder < CollectedHeap::min_fill_size() && remainder > 0) {
       s->set_top(s->top() - size);
       p = NULL;
     }
@@ -803,7 +803,7 @@
   HeapWord *p = s->cas_allocate(size);
   if (p != NULL) {
     size_t remainder = pointer_delta(s->end(), p + size);
-    if (remainder < (size_t)oopDesc::header_size() && remainder > 0) {
+    if (remainder < CollectedHeap::min_fill_size() && remainder > 0) {
       if (s->cas_deallocate(p, size)) {
         // We were the last to allocate and created a fragment less than
         // a minimal object.
--- a/src/share/vm/gc_interface/collectedHeap.cpp	Thu May 27 09:54:07 2010 -0700
+++ b/src/share/vm/gc_interface/collectedHeap.cpp	Thu May 27 18:01:56 2010 -0700
@@ -239,11 +239,11 @@
 }
 
 size_t CollectedHeap::filler_array_hdr_size() {
-  return size_t(arrayOopDesc::header_size(T_INT));
+  return size_t(align_object_offset(arrayOopDesc::header_size(T_INT))); // align to Long
 }
 
 size_t CollectedHeap::filler_array_min_size() {
-  return align_object_size(filler_array_hdr_size());
+  return align_object_size(filler_array_hdr_size()); // align to MinObjAlignment
 }
 
 size_t CollectedHeap::filler_array_max_size() {
--- a/src/share/vm/memory/space.cpp	Thu May 27 09:54:07 2010 -0700
+++ b/src/share/vm/memory/space.cpp	Thu May 27 18:01:56 2010 -0700
@@ -861,9 +861,9 @@
   }
   size = align_object_size(size);
 
-  const size_t min_int_array_size = typeArrayOopDesc::header_size(T_INT);
-  if (size >= min_int_array_size) {
-    size_t length = (size - min_int_array_size) * (HeapWordSize / sizeof(jint));
+  const size_t array_header_size = typeArrayOopDesc::header_size(T_INT);
+  if (size >= (size_t)align_object_size(array_header_size)) {
+    size_t length = (size - array_header_size) * (HeapWordSize / sizeof(jint));
     // allocate uninitialized int array
     typeArrayOop t = (typeArrayOop) allocate(size);
     assert(t != NULL, "allocation should succeed");
@@ -871,7 +871,7 @@
     t->set_klass(Universe::intArrayKlassObj());
     t->set_length((int)length);
   } else {
-    assert((int) size == instanceOopDesc::header_size(),
+    assert(size == CollectedHeap::min_fill_size(),
            "size for smallest fake object doesn't match");
     instanceOop obj = (instanceOop) allocate(size);
     obj->set_mark(markOopDesc::prototype());
--- a/src/share/vm/memory/threadLocalAllocBuffer.inline.hpp	Thu May 27 09:54:07 2010 -0700
+++ b/src/share/vm/memory/threadLocalAllocBuffer.inline.hpp	Thu May 27 18:01:56 2010 -0700
@@ -31,7 +31,7 @@
     // Skip mangling the space corresponding to the object header to
     // ensure that the returned space is not considered parsable by
     // any concurrent GC thread.
-    size_t hdr_size = CollectedHeap::min_fill_size();
+    size_t hdr_size = oopDesc::header_size();
     Copy::fill_to_words(obj + hdr_size, size - hdr_size, badHeapWordVal);
 #endif // ASSERT
     // This addition is safe because we know that top is
--- a/src/share/vm/memory/universe.cpp	Thu May 27 09:54:07 2010 -0700
+++ b/src/share/vm/memory/universe.cpp	Thu May 27 18:01:56 2010 -0700
@@ -748,7 +748,7 @@
 // 4Gb
 static const uint64_t NarrowOopHeapMax = (uint64_t(max_juint) + 1);
 // 32Gb
-static const uint64_t OopEncodingHeapMax = NarrowOopHeapMax << LogMinObjAlignmentInBytes;
+// OopEncodingHeapMax == NarrowOopHeapMax << LogMinObjAlignmentInBytes;
 
 char* Universe::preferred_heap_base(size_t heap_size, NARROW_OOP_MODE mode) {
   size_t base = 0;
@@ -1261,7 +1261,7 @@
 
   // decide which low-order bits we require to be clear:
   size_t alignSize = MinObjAlignmentInBytes;
-  size_t min_object_size = oopDesc::header_size();
+  size_t min_object_size = CollectedHeap::min_fill_size();
 
   // make an inclusive limit:
   uintptr_t max = (uintptr_t)high_boundary - min_object_size*wordSize;
--- a/src/share/vm/oops/arrayOop.hpp	Thu May 27 09:54:07 2010 -0700
+++ b/src/share/vm/oops/arrayOop.hpp	Thu May 27 18:01:56 2010 -0700
@@ -92,7 +92,7 @@
   static int header_size(BasicType type) {
     size_t typesize_in_bytes = header_size_in_bytes();
     return (int)(Universe::element_type_should_be_aligned(type)
-      ? align_object_size(typesize_in_bytes/HeapWordSize)
+      ? align_object_offset(typesize_in_bytes/HeapWordSize)
       : typesize_in_bytes/HeapWordSize);
   }
 
--- a/src/share/vm/oops/oop.hpp	Thu May 27 09:54:07 2010 -0700
+++ b/src/share/vm/oops/oop.hpp	Thu May 27 18:01:56 2010 -0700
@@ -149,10 +149,6 @@
   // Need this as public for garbage collection.
   template <class T> T* obj_field_addr(int offset) const;
 
-  // Oop encoding heap max
-  static const uint64_t OopEncodingHeapMax =
-              (uint64_t(max_juint) + 1) << LogMinObjAlignmentInBytes;
-
   static bool is_null(oop obj);
   static bool is_null(narrowOop obj);
 
--- a/src/share/vm/oops/oop.inline.hpp	Thu May 27 09:54:07 2010 -0700
+++ b/src/share/vm/oops/oop.inline.hpp	Thu May 27 18:01:56 2010 -0700
@@ -146,8 +146,13 @@
 // offset from the heap base.  Saving the check for null can save instructions
 // in inner GC loops so these are separated.
 
+inline bool check_obj_alignment(oop obj) {
+  return (intptr_t)obj % MinObjAlignmentInBytes == 0;
+}
+
 inline narrowOop oopDesc::encode_heap_oop_not_null(oop v) {
   assert(!is_null(v), "oop value can never be zero");
+  assert(check_obj_alignment(v), "Address not aligned");
   assert(Universe::heap()->is_in_reserved(v), "Address not in heap");
   address base = Universe::narrow_oop_base();
   int    shift = Universe::narrow_oop_shift();
@@ -167,7 +172,9 @@
   assert(!is_null(v), "narrow oop value can never be zero");
   address base = Universe::narrow_oop_base();
   int    shift = Universe::narrow_oop_shift();
-  return (oop)(void*)((uintptr_t)base + ((uintptr_t)v << shift));
+  oop result = (oop)(void*)((uintptr_t)base + ((uintptr_t)v << shift));
+  assert(check_obj_alignment(result), "Address not aligned");
+  return result;
 }
 
 inline oop oopDesc::decode_heap_oop(narrowOop v) {
@@ -522,10 +529,6 @@
   return mark()->has_bias_pattern();
 }
 
-inline bool check_obj_alignment(oop obj) {
-  return (intptr_t)obj % MinObjAlignmentInBytes == 0;
-}
-
 
 // used only for asserts
 inline bool oopDesc::is_oop(bool ignore_mark_word) const {
@@ -600,6 +603,8 @@
 
 // Used by scavengers
 inline void oopDesc::forward_to(oop p) {
+  assert(check_obj_alignment(p),
+         "forwarding to something not aligned");
   assert(Universe::heap()->is_in_reserved(p),
          "forwarding to something not in heap");
   markOop m = markOopDesc::encode_pointer_as_mark(p);
@@ -609,6 +614,8 @@
 
 // Used by parallel scavengers
 inline bool oopDesc::cas_forward_to(oop p, markOop compare) {
+  assert(check_obj_alignment(p),
+         "forwarding to something not aligned");
   assert(Universe::heap()->is_in_reserved(p),
          "forwarding to something not in heap");
   markOop m = markOopDesc::encode_pointer_as_mark(p);
--- a/src/share/vm/runtime/arguments.cpp	Thu May 27 09:54:07 2010 -0700
+++ b/src/share/vm/runtime/arguments.cpp	Thu May 27 18:01:56 2010 -0700
@@ -1211,8 +1211,44 @@
 }
 #endif // KERNEL
 
+void set_object_alignment() {
+  // Object alignment.
+  assert(is_power_of_2(ObjectAlignmentInBytes), "ObjectAlignmentInBytes must be power of 2");
+  MinObjAlignmentInBytes     = ObjectAlignmentInBytes;
+  assert(MinObjAlignmentInBytes >= HeapWordsPerLong * HeapWordSize, "ObjectAlignmentInBytes value is too small");
+  MinObjAlignment            = MinObjAlignmentInBytes / HeapWordSize;
+  assert(MinObjAlignmentInBytes == MinObjAlignment * HeapWordSize, "ObjectAlignmentInBytes value is incorrect");
+  MinObjAlignmentInBytesMask = MinObjAlignmentInBytes - 1;
+
+  LogMinObjAlignmentInBytes  = exact_log2(ObjectAlignmentInBytes);
+  LogMinObjAlignment         = LogMinObjAlignmentInBytes - LogHeapWordSize;
+
+  // Oop encoding heap max
+  OopEncodingHeapMax = (uint64_t(max_juint) + 1) << LogMinObjAlignmentInBytes;
+
+#ifndef KERNEL
+  // Set CMS global values
+  CompactibleFreeListSpace::set_cms_values();
+#endif // KERNEL
+}
+
+bool verify_object_alignment() {
+  // Object alignment.
+  if (!is_power_of_2(ObjectAlignmentInBytes)) {
+    jio_fprintf(defaultStream::error_stream(),
+                "error: ObjectAlignmentInBytes=%d must be power of 2", (int)ObjectAlignmentInBytes);
+    return false;
+  }
+  if ((int)ObjectAlignmentInBytes < BytesPerLong) {
+    jio_fprintf(defaultStream::error_stream(),
+                "error: ObjectAlignmentInBytes=%d must be greater or equal %d", (int)ObjectAlignmentInBytes, BytesPerLong);
+    return false;
+  }
+  return true;
+}
+
 inline uintx max_heap_for_compressed_oops() {
-  LP64_ONLY(return oopDesc::OopEncodingHeapMax - MaxPermSize - os::vm_page_size());
+  LP64_ONLY(return OopEncodingHeapMax - MaxPermSize - os::vm_page_size());
   NOT_LP64(ShouldNotReachHere(); return 0);
 }
 
@@ -1776,6 +1812,8 @@
   status = status && verify_interval(TLABWasteTargetPercent,
                                      1, 100, "TLABWasteTargetPercent");
 
+  status = status && verify_object_alignment();
+
   return status;
 }
 
@@ -2848,6 +2886,9 @@
   UseCompressedOops = false;
 #endif
 
+  // Set object alignment values.
+  set_object_alignment();
+
 #ifdef SERIALGC
   force_serial_gc();
 #endif // SERIALGC
--- a/src/share/vm/runtime/globals.hpp	Thu May 27 09:54:07 2010 -0700
+++ b/src/share/vm/runtime/globals.hpp	Thu May 27 18:01:56 2010 -0700
@@ -321,6 +321,9 @@
   diagnostic(bool, PrintCompressedOopsMode, false,                          \
             "Print compressed oops base address and encoding mode")         \
                                                                             \
+  lp64_product(intx, ObjectAlignmentInBytes, 8,                             \
+          "Default object alignment in bytes, 8 is minimum")                \
+                                                                            \
   /* UseMembar is theoretically a temp flag used for memory barrier         \
    * removal testing.  It was supposed to be removed before FCS but has     \
    * been re-added (see 6401008) */                                         \
--- a/src/share/vm/runtime/vmStructs.cpp	Thu May 27 09:54:07 2010 -0700
+++ b/src/share/vm/runtime/vmStructs.cpp	Thu May 27 18:01:56 2010 -0700
@@ -1328,14 +1328,6 @@
   declare_constant(LogBytesPerWord)                                       \
   declare_constant(BytesPerLong)                                          \
                                                                           \
-  /********************/                                                  \
-  /* Object alignment */                                                  \
-  /********************/                                                  \
-                                                                          \
-  declare_constant(MinObjAlignment)                                       \
-  declare_constant(MinObjAlignmentInBytes)                                \
-  declare_constant(LogMinObjAlignmentInBytes)                             \
-                                                                          \
   /********************************************/                          \
   /* Generation and Space Hierarchy Constants */                          \
   /********************************************/                          \
--- a/src/share/vm/utilities/copy.hpp	Thu May 27 09:54:07 2010 -0700
+++ b/src/share/vm/utilities/copy.hpp	Thu May 27 18:01:56 2010 -0700
@@ -51,7 +51,7 @@
 class Copy : AllStatic {
  public:
   // Block copy methods have four attributes.  We don't define all possibilities.
-  //   alignment: aligned according to minimum Java object alignment (MinObjAlignment)
+  //   alignment: aligned to BytesPerLong
   //   arrayof:   arraycopy operation with both operands aligned on the same
   //              boundary as the first element of an array of the copy unit.
   //              This is currently a HeapWord boundary on all platforms, except
@@ -70,7 +70,7 @@
   //     [ '_atomic' ]
   //
   // Except in the arrayof case, whatever the alignment is, we assume we can copy
-  // whole alignment units.  E.g., if MinObjAlignment is 2x word alignment, an odd
+  // whole alignment units.  E.g., if BytesPerLong is 2x word alignment, an odd
   // count may copy an extra word.  In the arrayof case, we are allowed to copy
   // only the number of copy units specified.
 
@@ -305,17 +305,17 @@
   }
   static void assert_params_aligned(HeapWord* from, HeapWord* to) {
 #ifdef ASSERT
-    if (mask_bits((uintptr_t)from, MinObjAlignmentInBytes-1) != 0)
-      basic_fatal("not object aligned");
-    if (mask_bits((uintptr_t)to, MinObjAlignmentInBytes-1) != 0)
-      basic_fatal("not object aligned");
+    if (mask_bits((uintptr_t)from, BytesPerLong-1) != 0)
+      basic_fatal("not long aligned");
+    if (mask_bits((uintptr_t)to, BytesPerLong-1) != 0)
+      basic_fatal("not long aligned");
 #endif
   }
 
   static void assert_params_aligned(HeapWord* to) {
 #ifdef ASSERT
-    if (mask_bits((uintptr_t)to, MinObjAlignmentInBytes-1) != 0)
-      basic_fatal("not object aligned");
+    if (mask_bits((uintptr_t)to, BytesPerLong-1) != 0)
+      basic_fatal("not long aligned");
 #endif
   }
 
--- a/src/share/vm/utilities/globalDefinitions.cpp	Thu May 27 09:54:07 2010 -0700
+++ b/src/share/vm/utilities/globalDefinitions.cpp	Thu May 27 18:01:56 2010 -0700
@@ -34,6 +34,18 @@
 int BytesPerHeapOop    = 0;
 int BitsPerHeapOop     = 0;
 
+// Object alignment, in units of HeapWords.
+// Defaults are -1 so things will break badly if incorrectly initialized.
+int MinObjAlignment            = -1;
+int MinObjAlignmentInBytes     = -1;
+int MinObjAlignmentInBytesMask = 0;
+
+int LogMinObjAlignment         = -1;
+int LogMinObjAlignmentInBytes  = -1;
+
+// Oop encoding heap max
+uint64_t OopEncodingHeapMax = 0;
+
 void basic_fatal(const char* msg) {
   fatal(msg);
 }
--- a/src/share/vm/utilities/globalDefinitions.hpp	Thu May 27 09:54:07 2010 -0700
+++ b/src/share/vm/utilities/globalDefinitions.hpp	Thu May 27 18:01:56 2010 -0700
@@ -73,6 +73,9 @@
 extern int BytesPerHeapOop;
 extern int BitsPerHeapOop;
 
+// Oop encoding heap max
+extern uint64_t OopEncodingHeapMax;
+
 const int BitsPerJavaInteger = 32;
 const int BitsPerJavaLong    = 64;
 const int BitsPerSize_t      = size_tSize * BitsPerByte;
@@ -292,12 +295,12 @@
 // Minimum is max(BytesPerLong, BytesPerDouble, BytesPerOop) / HeapWordSize, so jlong, jdouble and
 // reference fields can be naturally aligned.
 
-const int MinObjAlignment            = HeapWordsPerLong;
-const int MinObjAlignmentInBytes     = MinObjAlignment * HeapWordSize;
-const int MinObjAlignmentInBytesMask = MinObjAlignmentInBytes - 1;
+extern int MinObjAlignment;
+extern int MinObjAlignmentInBytes;
+extern int MinObjAlignmentInBytesMask;
 
-const int LogMinObjAlignment         = LogHeapWordsPerLong;
-const int LogMinObjAlignmentInBytes  = LogMinObjAlignment + LogHeapWordSize;
+extern int LogMinObjAlignment;
+extern int LogMinObjAlignmentInBytes;
 
 // Machine dependent stuff
 
@@ -332,18 +335,16 @@
   return align_size_up(size, MinObjAlignment);
 }
 
+inline bool is_object_aligned(intptr_t addr) {
+  return addr == align_object_size(addr);
+}
+
 // Pad out certain offsets to jlong alignment, in HeapWord units.
 
-#define align_object_offset_(offset) align_size_up_(offset, HeapWordsPerLong)
-
 inline intptr_t align_object_offset(intptr_t offset) {
   return align_size_up(offset, HeapWordsPerLong);
 }
 
-inline bool is_object_aligned(intptr_t offset) {
-  return offset == align_object_offset(offset);
-}
-
 
 //----------------------------------------------------------------------------------------------------
 // Utility macros for compilers