changeset 1372:e16cca0aa5e1

Merge
author trims
date Thu, 15 Apr 2010 19:08:48 -0700
parents 4c78b7c16824 fc3cd2277dc7
children 25f53b53aaa3
files
diffstat 56 files changed, 751 insertions(+), 391 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -1728,9 +1728,13 @@
       ShouldNotReachHere();
     }
   } else if (code == lir_cmp_l2i) {
+#ifdef _LP64
+    __ lcmp(left->as_register_lo(), right->as_register_lo(), dst->as_register());
+#else
     __ lcmp(left->as_register_hi(),  left->as_register_lo(),
             right->as_register_hi(), right->as_register_lo(),
             dst->as_register());
+#endif
   } else {
     ShouldNotReachHere();
   }
@@ -2849,7 +2853,7 @@
 
 
 void LIR_Assembler::align_backward_branch_target() {
-  __ align(16);
+  __ align(OptoLoopAlignment);
 }
 
 
--- a/src/cpu/sparc/vm/c2_globals_sparc.hpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/cpu/sparc/vm/c2_globals_sparc.hpp	Thu Apr 15 19:08:48 2010 -0700
@@ -60,9 +60,6 @@
 define_pd_global(intx, INTPRESSURE,                  48);  // large register set
 define_pd_global(intx, InteriorEntryAlignment,       16);  // = CodeEntryAlignment
 define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
-// The default setting 16/16 seems to work best.
-// (For _228_jack 16/16 is 2% better than 4/4, 16/4, 32/32, 32/16, or 16/32.)
-define_pd_global(intx, OptoLoopAlignment,            16);  // = 4*wordSize
 define_pd_global(intx, RegisterCostAreaRatio,        12000);
 define_pd_global(bool, UseTLAB,                      true);
 define_pd_global(bool, ResizeTLAB,                   true);
--- a/src/cpu/sparc/vm/globals_sparc.hpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/cpu/sparc/vm/globals_sparc.hpp	Thu Apr 15 19:08:48 2010 -0700
@@ -40,6 +40,9 @@
 define_pd_global(bool, UncommonNullCast,            true);  // Uncommon-trap NULLs past to check cast
 
 define_pd_global(intx, CodeEntryAlignment,    32);
+// The default setting 16/16 seems to work best.
+// (For _228_jack 16/16 is 2% better than 4/4, 16/4, 32/32, 32/16, or 16/32.)
+define_pd_global(intx, OptoLoopAlignment,     16);  // = 4*wordSize
 define_pd_global(intx, InlineFrequencyCount,  50);  // we can use more inlining on the SPARC
 define_pd_global(intx, InlineSmallCode,       1500);
 #ifdef _LP64
--- a/src/cpu/sparc/vm/sparc.ad	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/cpu/sparc/vm/sparc.ad	Thu Apr 15 19:08:48 2010 -0700
@@ -471,6 +471,9 @@
 source %{
 #define __ _masm.
 
+// Block initializing store
+#define ASI_BLK_INIT_QUAD_LDD_P    0xE2
+
 // tertiary op of a LoadP or StoreP encoding
 #define REGP_OP true
 
@@ -6147,6 +6150,7 @@
 %}
 
 instruct prefetchw( memory mem ) %{
+  predicate(AllocatePrefetchStyle != 3 );
   match( PrefetchWrite mem );
   ins_cost(MEMORY_REF_COST);
 
@@ -6156,6 +6160,23 @@
   ins_pipe(iload_mem);
 %}
 
+// Use BIS instruction to prefetch.
+instruct prefetchw_bis( memory mem ) %{
+  predicate(AllocatePrefetchStyle == 3);
+  match( PrefetchWrite mem );
+  ins_cost(MEMORY_REF_COST);
+
+  format %{ "STXA   G0,$mem\t! // Block initializing store" %}
+  ins_encode %{
+     Register base = as_Register($mem$$base);
+     int disp = $mem$$disp;
+     if (disp != 0) {
+       __ add(base, AllocatePrefetchStepSize, base);
+     }
+     __ stxa(G0, base, G0, ASI_BLK_INIT_QUAD_LDD_P);
+  %}
+  ins_pipe(istore_mem_reg);
+%}
 
 //----------Store Instructions-------------------------------------------------
 // Store Byte
--- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -1148,7 +1148,7 @@
       __ andn(from, 7, from);     // Align address
       __ ldx(from, 0, O3);
       __ inc(from, 8);
-      __ align(16);
+      __ align(OptoLoopAlignment);
     __ BIND(L_loop);
       __ ldx(from, 0, O4);
       __ deccc(count, count_dec); // Can we do next iteration after this one?
@@ -1220,7 +1220,7 @@
     //
       __ andn(end_from, 7, end_from);     // Align address
       __ ldx(end_from, 0, O3);
-      __ align(16);
+      __ align(OptoLoopAlignment);
     __ BIND(L_loop);
       __ ldx(end_from, -8, O4);
       __ deccc(count, count_dec); // Can we do next iteration after this one?
@@ -1349,7 +1349,7 @@
     __ BIND(L_copy_byte);
       __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
       __ delayed()->nop();
-      __ align(16);
+      __ align(OptoLoopAlignment);
     __ BIND(L_copy_byte_loop);
       __ ldub(from, offset, O3);
       __ deccc(count);
@@ -1445,7 +1445,7 @@
                                         L_aligned_copy, L_copy_byte);
     }
     // copy 4 elements (16 bytes) at a time
-      __ align(16);
+      __ align(OptoLoopAlignment);
     __ BIND(L_aligned_copy);
       __ dec(end_from, 16);
       __ ldx(end_from, 8, O3);
@@ -1461,7 +1461,7 @@
     __ BIND(L_copy_byte);
       __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
       __ delayed()->nop();
-      __ align(16);
+      __ align(OptoLoopAlignment);
     __ BIND(L_copy_byte_loop);
       __ dec(end_from);
       __ dec(end_to);
@@ -1577,7 +1577,7 @@
     __ BIND(L_copy_2_bytes);
       __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
       __ delayed()->nop();
-      __ align(16);
+      __ align(OptoLoopAlignment);
     __ BIND(L_copy_2_bytes_loop);
       __ lduh(from, offset, O3);
       __ deccc(count);
@@ -1684,7 +1684,7 @@
                                         L_aligned_copy, L_copy_2_bytes);
     }
     // copy 4 elements (16 bytes) at a time
-      __ align(16);
+      __ align(OptoLoopAlignment);
     __ BIND(L_aligned_copy);
       __ dec(end_from, 16);
       __ ldx(end_from, 8, O3);
@@ -1781,7 +1781,7 @@
     // copy with shift 4 elements (16 bytes) at a time
       __ dec(count, 4);   // The cmp at the beginning guaranty count >= 4
 
-      __ align(16);
+      __ align(OptoLoopAlignment);
     __ BIND(L_copy_16_bytes);
       __ ldx(from, 4, O4);
       __ deccc(count, 4); // Can we do next iteration after this one?
@@ -1907,7 +1907,7 @@
     // to form 2 aligned 8-bytes chunks to store.
     //
       __ ldx(end_from, -4, O3);
-      __ align(16);
+      __ align(OptoLoopAlignment);
     __ BIND(L_copy_16_bytes);
       __ ldx(end_from, -12, O4);
       __ deccc(count, 4);
@@ -1929,7 +1929,7 @@
       __ delayed()->inc(count, 4);
 
     // copy 4 elements (16 bytes) at a time
-      __ align(16);
+      __ align(OptoLoopAlignment);
     __ BIND(L_aligned_copy);
       __ dec(end_from, 16);
       __ ldx(end_from, 8, O3);
@@ -2000,6 +2000,27 @@
   //      to:    O1
   //      count: O2 treated as signed
   //
+  // count -= 2;
+  // if ( count >= 0 ) { // >= 2 elements
+  //   if ( count > 6) { // >= 8 elements
+  //     count -= 6; // original count - 8
+  //     do {
+  //       copy_8_elements;
+  //       count -= 8;
+  //     } while ( count >= 0 );
+  //     count += 6;
+  //   }
+  //   if ( count >= 0 ) { // >= 2 elements
+  //     do {
+  //       copy_2_elements;
+  //     } while ( (count=count-2) >= 0 );
+  //   }
+  // }
+  // count += 2;
+  // if ( count != 0 ) { // 1 element left
+  //   copy_1_element;
+  // }
+  //
   void generate_disjoint_long_copy_core(bool aligned) {
     Label L_copy_8_bytes, L_copy_16_bytes, L_exit;
     const Register from    = O0;  // source array address
@@ -2012,7 +2033,39 @@
       __ mov(G0, offset0);   // offset from start of arrays (0)
       __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes );
       __ delayed()->add(offset0, 8, offset8);
-      __ align(16);
+
+    // Copy by 64 bytes chunks
+    Label L_copy_64_bytes;
+    const Register from64 = O3;  // source address
+    const Register to64   = G3;  // destination address
+      __ subcc(count, 6, O3);
+      __ brx(Assembler::negative, false, Assembler::pt, L_copy_16_bytes );
+      __ delayed()->mov(to,   to64);
+      // Now we can use O4(offset0), O5(offset8) as temps
+      __ mov(O3, count);
+      __ mov(from, from64);
+
+      __ align(OptoLoopAlignment);
+    __ BIND(L_copy_64_bytes);
+      for( int off = 0; off < 64; off += 16 ) {
+        __ ldx(from64,  off+0, O4);
+        __ ldx(from64,  off+8, O5);
+        __ stx(O4, to64,  off+0);
+        __ stx(O5, to64,  off+8);
+      }
+      __ deccc(count, 8);
+      __ inc(from64, 64);
+      __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_64_bytes);
+      __ delayed()->inc(to64, 64);
+
+      // Restore O4(offset0), O5(offset8)
+      __ sub(from64, from, offset0);
+      __ inccc(count, 6);
+      __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes );
+      __ delayed()->add(offset0, 8, offset8);
+
+      // Copy by 16 bytes chunks
+      __ align(OptoLoopAlignment);
     __ BIND(L_copy_16_bytes);
       __ ldx(from, offset0, O3);
       __ ldx(from, offset8, G3);
@@ -2023,6 +2076,7 @@
       __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes);
       __ delayed()->inc(offset8, 16);
 
+      // Copy last 8 bytes
     __ BIND(L_copy_8_bytes);
       __ inccc(count, 2);
       __ brx(Assembler::zero, true, Assembler::pn, L_exit );
@@ -2085,7 +2139,7 @@
       __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_8_bytes );
       __ delayed()->sllx(count, LogBytesPerLong, offset8);
       __ sub(offset8, 8, offset0);
-      __ align(16);
+      __ align(OptoLoopAlignment);
     __ BIND(L_copy_16_bytes);
       __ ldx(from, offset8, O2);
       __ ldx(from, offset0, O3);
@@ -2351,7 +2405,7 @@
     //   (O5 = 0; ; O5 += wordSize) --- offset from src, dest arrays
     //   (O2 = len; O2 != 0; O2--) --- number of oops *remaining*
     //   G3, G4, G5 --- current oop, oop.klass, oop.klass.super
-    __ align(16);
+    __ align(OptoLoopAlignment);
 
     __ BIND(store_element);
     __ deccc(G1_remain);                // decrement the count
--- a/src/cpu/sparc/vm/vm_version_sparc.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/cpu/sparc/vm/vm_version_sparc.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -86,14 +86,24 @@
     if (FLAG_IS_DEFAULT(InteriorEntryAlignment)) {
       FLAG_SET_DEFAULT(InteriorEntryAlignment, 4);
     }
+    if (is_niagara1_plus()) {
+      if (AllocatePrefetchStyle > 0 && FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
+        // Use BIS instruction for allocation prefetch.
+        FLAG_SET_DEFAULT(AllocatePrefetchStyle, 3);
+        if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
+          // Use smaller prefetch distance on N2 with BIS
+          FLAG_SET_DEFAULT(AllocatePrefetchDistance, 64);
+        }
+      }
+      if (AllocatePrefetchStyle != 3 && FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
+        // Use different prefetch distance without BIS
+        FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256);
+      }
+    }
+#endif
     if (FLAG_IS_DEFAULT(OptoLoopAlignment)) {
       FLAG_SET_DEFAULT(OptoLoopAlignment, 4);
     }
-    if (is_niagara1_plus() && FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
-      // Use smaller prefetch distance on N2
-      FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256);
-    }
-#endif
   }
 
   // Use hardware population count instruction if available.
--- a/src/cpu/x86/vm/assembler_x86.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -3365,6 +3365,13 @@
 
 #else // LP64
 
+void Assembler::set_byte_if_not_zero(Register dst) {
+  int enc = prefix_and_encode(dst->encoding(), true);
+  emit_byte(0x0F);
+  emit_byte(0x95);
+  emit_byte(0xE0 | enc);
+}
+
 // 64bit only pieces of the assembler
 // This should only be used by 64bit instructions that can use rip-relative
 // it cannot be used by instructions that want an immediate value.
--- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -2690,19 +2690,14 @@
   } else {
     assert(code == lir_cmp_l2i, "check");
 #ifdef _LP64
-      Register dest = dst->as_register();
-      __ xorptr(dest, dest);
-      Label high, done;
-      __ cmpptr(left->as_register_lo(), right->as_register_lo());
-      __ jcc(Assembler::equal, done);
-      __ jcc(Assembler::greater, high);
-      __ decrement(dest);
-      __ jmp(done);
-      __ bind(high);
-      __ increment(dest);
-
-      __ bind(done);
-
+    Label done;
+    Register dest = dst->as_register();
+    __ cmpptr(left->as_register_lo(), right->as_register_lo());
+    __ movl(dest, -1);
+    __ jccb(Assembler::less, done);
+    __ set_byte_if_not_zero(dest);
+    __ movzbl(dest, dest);
+    __ bind(done);
 #else
     __ lcmp2int(left->as_register_hi(),
                 left->as_register_lo(),
--- a/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -781,7 +781,7 @@
 
   // Restore SP from BP if the exception PC is a MethodHandle call site.
   NOT_LP64(__ get_thread(thread);)
-  __ cmpl(Address(thread, JavaThread::is_method_handle_exception_offset()), 0);
+  __ cmpl(Address(thread, JavaThread::is_method_handle_return_offset()), 0);
   __ cmovptr(Assembler::notEqual, rsp, rbp);
 
   // continue at exception handler (return address removed)
--- a/src/cpu/x86/vm/c2_globals_x86.hpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/cpu/x86/vm/c2_globals_x86.hpp	Thu Apr 15 19:08:48 2010 -0700
@@ -80,7 +80,6 @@
 // Ergonomics related flags
 define_pd_global(uint64_t,MaxRAM,                    4ULL*G);
 #endif // AMD64
-define_pd_global(intx, OptoLoopAlignment,            16);
 define_pd_global(intx, RegisterCostAreaRatio,        16000);
 
 // Peephole and CISC spilling both break the graph, and so makes the
--- a/src/cpu/x86/vm/globals_x86.hpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/cpu/x86/vm/globals_x86.hpp	Thu Apr 15 19:08:48 2010 -0700
@@ -45,6 +45,7 @@
 #else
 define_pd_global(intx, CodeEntryAlignment,       16);
 #endif // COMPILER2
+define_pd_global(intx, OptoLoopAlignment,        16);
 define_pd_global(intx, InlineFrequencyCount,     100);
 define_pd_global(intx, InlineSmallCode,          1000);
 
--- a/src/cpu/x86/vm/runtime_x86_32.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/cpu/x86/vm/runtime_x86_32.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -115,8 +115,8 @@
 
   // rax: exception handler for given <exception oop/exception pc>
 
-  // Restore SP from BP if the exception PC is a MethodHandle call.
-  __ cmpl(Address(rcx, JavaThread::is_method_handle_exception_offset()), 0);
+  // Restore SP from BP if the exception PC is a MethodHandle call site.
+  __ cmpl(Address(rcx, JavaThread::is_method_handle_return_offset()), 0);
   __ cmovptr(Assembler::notEqual, rsp, rbp);
 
   // We have a handler in rax, (could be deopt blob)
--- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -3328,8 +3328,8 @@
 
   // rax: exception handler
 
-  // Restore SP from BP if the exception PC is a MethodHandle call.
-  __ cmpl(Address(r15_thread, JavaThread::is_method_handle_exception_offset()), 0);
+  // Restore SP from BP if the exception PC is a MethodHandle call site.
+  __ cmpl(Address(r15_thread, JavaThread::is_method_handle_return_offset()), 0);
   __ cmovptr(Assembler::notEqual, rsp, rbp);
 
   // We have a handler in rax (could be deopt blob).
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -430,7 +430,7 @@
     __ verify_oop(exception_oop);
 
     // Restore SP from BP if the exception PC is a MethodHandle call site.
-    __ cmpl(Address(thread, JavaThread::is_method_handle_exception_offset()), 0);
+    __ cmpl(Address(thread, JavaThread::is_method_handle_return_offset()), 0);
     __ cmovptr(Assembler::notEqual, rsp, rbp);
 
     // continue at exception handler (return address removed)
@@ -812,7 +812,7 @@
     Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit;
     // Copy 64-byte chunks
     __ jmpb(L_copy_64_bytes);
-    __ align(16);
+    __ align(OptoLoopAlignment);
   __ BIND(L_copy_64_bytes_loop);
 
     if(UseUnalignedLoadStores) {
@@ -874,7 +874,7 @@
     Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit;
     // Copy 64-byte chunks
     __ jmpb(L_copy_64_bytes);
-    __ align(16);
+    __ align(OptoLoopAlignment);
   __ BIND(L_copy_64_bytes_loop);
     __ movq(mmx0, Address(from, 0));
     __ movq(mmx1, Address(from, 8));
@@ -1144,7 +1144,7 @@
       __ movl(Address(to, count, sf, 0), rdx);
       __ jmpb(L_copy_8_bytes);
 
-      __ align(16);
+      __ align(OptoLoopAlignment);
       // Move 8 bytes
     __ BIND(L_copy_8_bytes_loop);
       if (UseXMMForArrayCopy) {
@@ -1235,7 +1235,7 @@
       }
     } else {
       __ jmpb(L_copy_8_bytes);
-      __ align(16);
+      __ align(OptoLoopAlignment);
     __ BIND(L_copy_8_bytes_loop);
       __ fild_d(Address(from, 0));
       __ fistp_d(Address(from, to_from, Address::times_1));
@@ -1282,7 +1282,7 @@
 
     __ jmpb(L_copy_8_bytes);
 
-    __ align(16);
+    __ align(OptoLoopAlignment);
   __ BIND(L_copy_8_bytes_loop);
     if (VM_Version::supports_mmx()) {
       if (UseXMMForArrayCopy) {
@@ -1454,7 +1454,7 @@
     // Loop control:
     //   for (count = -count; count != 0; count++)
     // Base pointers src, dst are biased by 8*count,to last element.
-    __ align(16);
+    __ align(OptoLoopAlignment);
 
     __ BIND(L_store_element);
     __ movptr(to_element_addr, elem);     // store the oop
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -871,9 +871,8 @@
   }
 
   address generate_fp_mask(const char *stub_name, int64_t mask) {
+    __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", stub_name);
-
-    __ align(16);
     address start = __ pc();
 
     __ emit_data64( mask, relocInfo::none );
@@ -1268,7 +1267,7 @@
                              Label& L_copy_32_bytes, Label& L_copy_8_bytes) {
     DEBUG_ONLY(__ stop("enter at entry label, not here"));
     Label L_loop;
-    __ align(16);
+    __ align(OptoLoopAlignment);
   __ BIND(L_loop);
     if(UseUnalignedLoadStores) {
       __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
@@ -1309,7 +1308,7 @@
                               Label& L_copy_32_bytes, Label& L_copy_8_bytes) {
     DEBUG_ONLY(__ stop("enter at entry label, not here"));
     Label L_loop;
-    __ align(16);
+    __ align(OptoLoopAlignment);
   __ BIND(L_loop);
     if(UseUnalignedLoadStores) {
       __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 16));
@@ -2229,7 +2228,7 @@
     // Loop control:
     //   for (count = -count; count != 0; count++)
     // Base pointers src, dst are biased by 8*(count-1),to last element.
-    __ align(16);
+    __ align(OptoLoopAlignment);
 
     __ BIND(L_store_element);
     __ store_heap_oop(to_element_addr, rax_oop);  // store the oop
--- a/src/cpu/zero/vm/cppInterpreter_zero.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/cpu/zero/vm/cppInterpreter_zero.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -206,7 +206,6 @@
 
   // Update the invocation counter
   if ((UseCompiler || CountCompiledCalls) && !method->is_synchronized()) {
-    thread->set_do_not_unlock();
     InvocationCounter *counter = method->invocation_counter();
     counter->increment();
     if (counter->reached_InvocationLimit()) {
@@ -215,7 +214,6 @@
       if (HAS_PENDING_EXCEPTION)
         goto unwind_and_return;
     }
-    thread->clr_do_not_unlock();
   }
 
   // Lock if necessary
--- a/src/cpu/zero/vm/methodHandles_zero.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/cpu/zero/vm/methodHandles_zero.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -1,6 +1,6 @@
 /*
  * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
- * Copyright 2009 Red Hat, Inc.
+ * Copyright 2009, 2010 Red Hat, Inc.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -23,4 +23,10 @@
  *
  */
 
-// This file is intentionally empty
+#include "incls/_precompiled.incl"
+#include "incls/_methodHandles_zero.cpp.incl"
+
+void MethodHandles::generate_method_handle_stub(MacroAssembler*          masm,
+                                                MethodHandles::EntryKind ek) {
+  ShouldNotCallThis();
+}
--- a/src/cpu/zero/vm/stubRoutines_zero.hpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/cpu/zero/vm/stubRoutines_zero.hpp	Thu Apr 15 19:08:48 2010 -0700
@@ -1,6 +1,6 @@
 /*
  * Copyright 2003-2005 Sun Microsystems, Inc.  All Rights Reserved.
- * Copyright 2007, 2008, 2009 Red Hat, Inc.
+ * Copyright 2007, 2008, 2009, 2010 Red Hat, Inc.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -41,6 +41,10 @@
     code_size2 = 0       // if these are too small.  Simply increase
   };                     // them if that happens.
 
+  enum method_handles_platform_dependent_constants {
+    method_handles_adapters_code_size = 0
+  };
+
 #ifdef IA32
   class x86 {
     friend class VMStructs;
--- a/src/os/linux/vm/attachListener_linux.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/os/linux/vm/attachListener_linux.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -192,7 +192,8 @@
     res = ::bind(listener, (struct sockaddr*)&addr, sizeof(addr));
   }
   if (res == -1) {
-    sprintf(path, "%s/.java_pid%d", os::get_temp_directory(), os::current_process_id());
+    snprintf(path, PATH_MAX+1, "%s/.java_pid%d",
+             os::get_temp_directory(), os::current_process_id());
     strcpy(addr.sun_path, path);
     ::unlink(path);
     res = ::bind(listener, (struct sockaddr*)&addr, sizeof(addr));
@@ -460,13 +461,14 @@
   if (init_at_startup() || is_initialized()) {
     return false;               // initialized at startup or already initialized
   }
-  char fn[32];
+  char fn[128];
   sprintf(fn, ".attach_pid%d", os::current_process_id());
   int ret;
   struct stat64 st;
   RESTARTABLE(::stat64(fn, &st), ret);
   if (ret == -1) {
-    sprintf(fn, "/tmp/.attach_pid%d", os::current_process_id());
+    snprintf(fn, sizeof(fn), "%s/.attach_pid%d",
+             os::get_temp_directory(), os::current_process_id());
     RESTARTABLE(::stat64(fn, &st), ret);
   }
   if (ret == 0) {
--- a/src/os/linux/vm/os_linux.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/os/linux/vm/os_linux.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -1522,7 +1522,10 @@
 
 const char* os::dll_file_extension() { return ".so"; }
 
-const char* os::get_temp_directory() { return "/tmp/"; }
+const char* os::get_temp_directory() {
+  const char *prop = Arguments::get_property("java.io.tmpdir");
+  return prop == NULL ? "/tmp" : prop;
+}
 
 static bool file_exists(const char* filename) {
   struct stat statbuf;
@@ -2305,7 +2308,8 @@
   char buf[40];
   int num = Atomic::add(1, &cnt);
 
-  sprintf(buf, "/tmp/hs-vm-%d-%d", os::current_process_id(), num);
+  snprintf(buf, sizeof(buf), "%s/hs-vm-%d-%d",
+           os::get_temp_directory(), os::current_process_id(), num);
   unlink(buf);
 
   int fd = open(buf, O_CREAT | O_RDWR, S_IRWXU);
--- a/src/os/linux/vm/perfMemory_linux.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/os/linux/vm/perfMemory_linux.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -145,11 +145,11 @@
 
   const char* tmpdir = os::get_temp_directory();
   const char* perfdir = PERFDATA_NAME;
-  size_t nbytes = strlen(tmpdir) + strlen(perfdir) + strlen(user) + 2;
+  size_t nbytes = strlen(tmpdir) + strlen(perfdir) + strlen(user) + 3;
   char* dirname = NEW_C_HEAP_ARRAY(char, nbytes);
 
   // construct the path name to user specific tmp directory
-  snprintf(dirname, nbytes, "%s%s_%s", tmpdir, perfdir, user);
+  snprintf(dirname, nbytes, "%s/%s_%s", tmpdir, perfdir, user);
 
   return dirname;
 }
@@ -331,8 +331,9 @@
     }
 
     char* usrdir_name = NEW_C_HEAP_ARRAY(char,
-                              strlen(tmpdirname) + strlen(dentry->d_name) + 1);
+                              strlen(tmpdirname) + strlen(dentry->d_name) + 2);
     strcpy(usrdir_name, tmpdirname);
+    strcat(usrdir_name, "/");
     strcat(usrdir_name, dentry->d_name);
 
     DIR* subdirp = os::opendir(usrdir_name);
--- a/src/os/solaris/vm/attachListener_solaris.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/os/solaris/vm/attachListener_solaris.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -375,7 +375,8 @@
     return -1;
   }
 
-  sprintf(door_path, "%s/.java_pid%d", os::get_temp_directory(), os::current_process_id());
+  snprintf(door_path, sizeof(door_path), "%s/.java_pid%d",
+           os::get_temp_directory(), os::current_process_id());
   RESTARTABLE(::creat(door_path, S_IRUSR | S_IWUSR), fd);
 
   if (fd == -1) {
@@ -591,13 +592,14 @@
   if (init_at_startup() || is_initialized()) {
     return false;               // initialized at startup or already initialized
   }
-  char fn[32];
+  char fn[128];
   sprintf(fn, ".attach_pid%d", os::current_process_id());
   int ret;
   struct stat64 st;
   RESTARTABLE(::stat64(fn, &st), ret);
   if (ret == -1) {
-    sprintf(fn, "/tmp/.attach_pid%d", os::current_process_id());
+    snprintf(fn, sizeof(fn), "%s/.attach_pid%d",
+             os::get_temp_directory(), os::current_process_id());
     RESTARTABLE(::stat64(fn, &st), ret);
   }
   if (ret == 0) {
--- a/src/os/solaris/vm/os_solaris.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/os/solaris/vm/os_solaris.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -676,15 +676,6 @@
 }
 
 
-static char* get_property(char* name, char* buffer, int buffer_size) {
-  if (os::getenv(name, buffer, buffer_size)) {
-    return buffer;
-  }
-  static char empty[] = "";
-  return empty;
-}
-
-
 void os::init_system_properties_values() {
   char arch[12];
   sysinfo(SI_ARCHITECTURE, arch, sizeof(arch));
@@ -1826,7 +1817,10 @@
 
 const char* os::dll_file_extension() { return ".so"; }
 
-const char* os::get_temp_directory() { return "/tmp/"; }
+const char* os::get_temp_directory() {
+  const char *prop = Arguments::get_property("java.io.tmpdir");
+  return prop == NULL ? "/tmp" : prop;
+}
 
 static bool file_exists(const char* filename) {
   struct stat statbuf;
--- a/src/os/solaris/vm/perfMemory_solaris.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/os/solaris/vm/perfMemory_solaris.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -147,11 +147,11 @@
 
   const char* tmpdir = os::get_temp_directory();
   const char* perfdir = PERFDATA_NAME;
-  size_t nbytes = strlen(tmpdir) + strlen(perfdir) + strlen(user) + 2;
+  size_t nbytes = strlen(tmpdir) + strlen(perfdir) + strlen(user) + 3;
   char* dirname = NEW_C_HEAP_ARRAY(char, nbytes);
 
   // construct the path name to user specific tmp directory
-  snprintf(dirname, nbytes, "%s%s_%s", tmpdir, perfdir, user);
+  snprintf(dirname, nbytes, "%s/%s_%s", tmpdir, perfdir, user);
 
   return dirname;
 }
@@ -322,8 +322,9 @@
     }
 
     char* usrdir_name = NEW_C_HEAP_ARRAY(char,
-                              strlen(tmpdirname) + strlen(dentry->d_name) + 1);
+                              strlen(tmpdirname) + strlen(dentry->d_name) + 2);
     strcpy(usrdir_name, tmpdirname);
+    strcat(usrdir_name, "/");
     strcat(usrdir_name, dentry->d_name);
 
     DIR* subdirp = os::opendir(usrdir_name);
--- a/src/os/windows/vm/os_windows.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/os/windows/vm/os_windows.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -998,15 +998,16 @@
 
 const char* os::dll_file_extension() { return ".dll"; }
 
-const char * os::get_temp_directory()
-{
-    static char path_buf[MAX_PATH];
-    if (GetTempPath(MAX_PATH, path_buf)>0)
-      return path_buf;
-    else{
-      path_buf[0]='\0';
-      return path_buf;
-    }
+const char* os::get_temp_directory() {
+  const char *prop = Arguments::get_property("java.io.tmpdir");
+  if (prop != 0) return prop;
+  static char path_buf[MAX_PATH];
+  if (GetTempPath(MAX_PATH, path_buf)>0)
+    return path_buf;
+  else{
+    path_buf[0]='\0';
+    return path_buf;
+  }
 }
 
 static bool file_exists(const char* filename) {
--- a/src/os/windows/vm/perfMemory_windows.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/os/windows/vm/perfMemory_windows.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -149,11 +149,11 @@
 
   const char* tmpdir = os::get_temp_directory();
   const char* perfdir = PERFDATA_NAME;
-  size_t nbytes = strlen(tmpdir) + strlen(perfdir) + strlen(user) + 2;
+  size_t nbytes = strlen(tmpdir) + strlen(perfdir) + strlen(user) + 3;
   char* dirname = NEW_C_HEAP_ARRAY(char, nbytes);
 
   // construct the path name to user specific tmp directory
-  _snprintf(dirname, nbytes, "%s%s_%s", tmpdir, perfdir, user);
+  _snprintf(dirname, nbytes, "%s\\%s_%s", tmpdir, perfdir, user);
 
   return dirname;
 }
@@ -318,8 +318,9 @@
     }
 
     char* usrdir_name = NEW_C_HEAP_ARRAY(char,
-                              strlen(tmpdirname) + strlen(dentry->d_name) + 1);
+                              strlen(tmpdirname) + strlen(dentry->d_name) + 2);
     strcpy(usrdir_name, tmpdirname);
+    strcat(usrdir_name, "\\");
     strcat(usrdir_name, dentry->d_name);
 
     DIR* subdirp = os::opendir(usrdir_name);
--- a/src/share/vm/c1/c1_LinearScan.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/share/vm/c1/c1_LinearScan.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -2608,6 +2608,46 @@
     } else if (opr->is_double_xmm()) {
       assert(opr->fpu_regnrLo() == opr->fpu_regnrHi(), "assumed in calculation");
       VMReg rname_first  = opr->as_xmm_double_reg()->as_VMReg();
+#  ifdef _LP64
+      first = new LocationValue(Location::new_reg_loc(Location::dbl, rname_first));
+      second = &_int_0_scope_value;
+#  else
+      first = new LocationValue(Location::new_reg_loc(Location::normal, rname_first));
+      // %%% This is probably a waste but we'll keep things as they were for now
+      if (true) {
+        VMReg rname_second = rname_first->next();
+        second = new LocationValue(Location::new_reg_loc(Location::normal, rname_second));
+      }
+#  endif
+#endif
+
+    } else if (opr->is_double_fpu()) {
+      // On SPARC, fpu_regnrLo/fpu_regnrHi represents the two halves of
+      // the double as float registers in the native ordering. On X86,
+      // fpu_regnrLo is a FPU stack slot whose VMReg represents
+      // the low-order word of the double and fpu_regnrLo + 1 is the
+      // name for the other half.  *first and *second must represent the
+      // least and most significant words, respectively.
+
+#ifdef X86
+      // the exact location of fpu stack values is only known
+      // during fpu stack allocation, so the stack allocator object
+      // must be present
+      assert(use_fpu_stack_allocation(), "should not have float stack values without fpu stack allocation (all floats must be SSE2)");
+      assert(_fpu_stack_allocator != NULL, "must be present");
+      opr = _fpu_stack_allocator->to_fpu_stack(opr);
+
+      assert(opr->fpu_regnrLo() == opr->fpu_regnrHi(), "assumed in calculation (only fpu_regnrHi is used)");
+#endif
+#ifdef SPARC
+      assert(opr->fpu_regnrLo() == opr->fpu_regnrHi() + 1, "assumed in calculation (only fpu_regnrHi is used)");
+#endif
+
+      VMReg rname_first = frame_map()->fpu_regname(opr->fpu_regnrHi());
+#ifdef _LP64
+      first = new LocationValue(Location::new_reg_loc(Location::dbl, rname_first));
+      second = &_int_0_scope_value;
+#else
       first = new LocationValue(Location::new_reg_loc(Location::normal, rname_first));
       // %%% This is probably a waste but we'll keep things as they were for now
       if (true) {
@@ -2616,37 +2656,6 @@
       }
 #endif
 
-    } else if (opr->is_double_fpu()) {
-      // On SPARC, fpu_regnrLo/fpu_regnrHi represents the two halves of
-      // the double as float registers in the native ordering. On X86,
-      // fpu_regnrLo is a FPU stack slot whose VMReg represents
-      // the low-order word of the double and fpu_regnrLo + 1 is the
-      // name for the other half.  *first and *second must represent the
-      // least and most significant words, respectively.
-
-#ifdef X86
-      // the exact location of fpu stack values is only known
-      // during fpu stack allocation, so the stack allocator object
-      // must be present
-      assert(use_fpu_stack_allocation(), "should not have float stack values without fpu stack allocation (all floats must be SSE2)");
-      assert(_fpu_stack_allocator != NULL, "must be present");
-      opr = _fpu_stack_allocator->to_fpu_stack(opr);
-
-      assert(opr->fpu_regnrLo() == opr->fpu_regnrHi(), "assumed in calculation (only fpu_regnrHi is used)");
-#endif
-#ifdef SPARC
-      assert(opr->fpu_regnrLo() == opr->fpu_regnrHi() + 1, "assumed in calculation (only fpu_regnrHi is used)");
-#endif
-
-      VMReg rname_first = frame_map()->fpu_regname(opr->fpu_regnrHi());
-
-      first = new LocationValue(Location::new_reg_loc(Location::normal, rname_first));
-      // %%% This is probably a waste but we'll keep things as they were for now
-      if (true) {
-        VMReg rname_second = rname_first->next();
-        second = new LocationValue(Location::new_reg_loc(Location::normal, rname_second));
-      }
-
     } else {
       ShouldNotReachHere();
       first = NULL;
--- a/src/share/vm/ci/ciConstant.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/share/vm/ci/ciConstant.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -36,7 +36,7 @@
              basictype_to_str(basic_type()));
   switch (basic_type()) {
   case T_BOOLEAN:
-    tty->print("%s", bool_to_str(_value._int == 0));
+    tty->print("%s", bool_to_str(_value._int != 0));
     break;
   case T_CHAR:
   case T_BYTE:
--- a/src/share/vm/classfile/classFileParser.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/share/vm/classfile/classFileParser.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -2956,8 +2956,8 @@
 #endif
     bool compact_fields   = CompactFields;
     int  allocation_style = FieldsAllocationStyle;
-    if( allocation_style < 0 || allocation_style > 1 ) { // Out of range?
-      assert(false, "0 <= FieldsAllocationStyle <= 1");
+    if( allocation_style < 0 || allocation_style > 2 ) { // Out of range?
+      assert(false, "0 <= FieldsAllocationStyle <= 2");
       allocation_style = 1; // Optimistic
     }
 
@@ -2993,6 +2993,25 @@
     } else if( allocation_style == 1 ) {
       // Fields order: longs/doubles, ints, shorts/chars, bytes, oops
       next_nonstatic_double_offset = next_nonstatic_field_offset;
+    } else if( allocation_style == 2 ) {
+      // Fields allocation: oops fields in super and sub classes are together.
+      if( nonstatic_field_size > 0 && super_klass() != NULL &&
+          super_klass->nonstatic_oop_map_size() > 0 ) {
+        int map_size = super_klass->nonstatic_oop_map_size();
+        OopMapBlock* first_map = super_klass->start_of_nonstatic_oop_maps();
+        OopMapBlock* last_map = first_map + map_size - 1;
+        int next_offset = last_map->offset() + (last_map->count() * heapOopSize);
+        if (next_offset == next_nonstatic_field_offset) {
+          allocation_style = 0;   // allocate oops first
+          next_nonstatic_oop_offset    = next_nonstatic_field_offset;
+          next_nonstatic_double_offset = next_nonstatic_oop_offset +
+                                         (nonstatic_oop_count * heapOopSize);
+        }
+      }
+      if( allocation_style == 2 ) {
+        allocation_style = 1;     // allocate oops last
+        next_nonstatic_double_offset = next_nonstatic_field_offset;
+      }
     } else {
       ShouldNotReachHere();
     }
--- a/src/share/vm/code/codeCache.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/share/vm/code/codeCache.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2010 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -284,9 +284,11 @@
       cur->print_on(tty, is_live ? "scavenge root" : "dead scavenge root"); tty->cr();
     }
 #endif //PRODUCT
-    if (is_live)
+    if (is_live) {
       // Perform cur->oops_do(f), maybe just once per nmethod.
       f->do_code_blob(cur);
+      cur->fix_oop_relocations();
+    }
   }
 
   // Check for stray marks.
--- a/src/share/vm/compiler/compileBroker.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/share/vm/compiler/compileBroker.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -1414,9 +1414,14 @@
     intx thread_id = os::current_thread_id();
     for (int try_temp_dir = 1; try_temp_dir >= 0; try_temp_dir--) {
       const char* dir = (try_temp_dir ? os::get_temp_directory() : NULL);
-      if (dir == NULL)  dir = "";
-      sprintf(fileBuf, "%shs_c" UINTX_FORMAT "_pid%u.log",
-              dir, thread_id, os::current_process_id());
+      if (dir == NULL) {
+        jio_snprintf(fileBuf, sizeof(fileBuf), "hs_c" UINTX_FORMAT "_pid%u.log",
+                     thread_id, os::current_process_id());
+      } else {
+        jio_snprintf(fileBuf, sizeof(fileBuf),
+                     "%s%shs_c" UINTX_FORMAT "_pid%u.log", dir,
+                     os::file_separator(), thread_id, os::current_process_id());
+      }
       fp = fopen(fileBuf, "at");
       if (fp != NULL) {
         file = NEW_C_HEAP_ARRAY(char, strlen(fileBuf)+1);
--- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -297,6 +297,11 @@
   }
 }
 
+// Currently we do not call this at all. Normally we would call it
+// during the concurrent marking / remark phases but we now call
+// the lock-based version instead. But we might want to resurrect this
+// code in the future. So, we'll leave it here commented out.
+#if 0
 MemRegion CMRegionStack::pop() {
   while (true) {
     // Otherwise...
@@ -321,6 +326,41 @@
     // Otherwise, we need to try again.
   }
 }
+#endif // 0
+
+void CMRegionStack::push_with_lock(MemRegion mr) {
+  assert(mr.word_size() > 0, "Precondition");
+  MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
+
+  if (isFull()) {
+    _overflow = true;
+    return;
+  }
+
+  _base[_index] = mr;
+  _index += 1;
+}
+
+MemRegion CMRegionStack::pop_with_lock() {
+  MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
+
+  while (true) {
+    if (_index == 0) {
+      return MemRegion();
+    }
+    _index -= 1;
+
+    MemRegion mr = _base[_index];
+    if (mr.start() != NULL) {
+      assert(mr.end() != NULL, "invariant");
+      assert(mr.word_size() > 0, "invariant");
+      return mr;
+    } else {
+      // that entry was invalidated... let's skip it
+      assert(mr.end() == NULL, "invariant");
+    }
+  }
+}
 
 bool CMRegionStack::invalidate_entries_into_cset() {
   bool result = false;
@@ -668,24 +708,46 @@
 //
 
 void ConcurrentMark::clearNextBitmap() {
-   guarantee(!G1CollectedHeap::heap()->mark_in_progress(), "Precondition.");
-
-   // clear the mark bitmap (no grey objects to start with).
-   // We need to do this in chunks and offer to yield in between
-   // each chunk.
-   HeapWord* start  = _nextMarkBitMap->startWord();
-   HeapWord* end    = _nextMarkBitMap->endWord();
-   HeapWord* cur    = start;
-   size_t chunkSize = M;
-   while (cur < end) {
-     HeapWord* next = cur + chunkSize;
-     if (next > end)
-       next = end;
-     MemRegion mr(cur,next);
-     _nextMarkBitMap->clearRange(mr);
-     cur = next;
-     do_yield_check();
-   }
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  G1CollectorPolicy* g1p = g1h->g1_policy();
+
+  // Make sure that the concurrent mark thread looks to still be in
+  // the current cycle.
+  guarantee(cmThread()->during_cycle(), "invariant");
+
+  // We are finishing up the current cycle by clearing the next
+  // marking bitmap and getting it ready for the next cycle. During
+  // this time no other cycle can start. So, let's make sure that this
+  // is the case.
+  guarantee(!g1h->mark_in_progress(), "invariant");
+
+  // clear the mark bitmap (no grey objects to start with).
+  // We need to do this in chunks and offer to yield in between
+  // each chunk.
+  HeapWord* start  = _nextMarkBitMap->startWord();
+  HeapWord* end    = _nextMarkBitMap->endWord();
+  HeapWord* cur    = start;
+  size_t chunkSize = M;
+  while (cur < end) {
+    HeapWord* next = cur + chunkSize;
+    if (next > end)
+      next = end;
+    MemRegion mr(cur,next);
+    _nextMarkBitMap->clearRange(mr);
+    cur = next;
+    do_yield_check();
+
+    // Repeat the asserts from above. We'll do them as asserts here to
+    // minimize their overhead on the product. However, we'll have
+    // them as guarantees at the beginning / end of the bitmap
+    // clearing to get some checking in the product.
+    assert(cmThread()->during_cycle(), "invariant");
+    assert(!g1h->mark_in_progress(), "invariant");
+  }
+
+  // Repeat the asserts from above.
+  guarantee(cmThread()->during_cycle(), "invariant");
+  guarantee(!g1h->mark_in_progress(), "invariant");
 }
 
 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
@@ -3363,7 +3425,7 @@
       gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
                              _task_id, _cm->region_stack_size());
 
-    MemRegion mr = _cm->region_stack_pop();
+    MemRegion mr = _cm->region_stack_pop_with_lock();
     // it returns MemRegion() if the pop fails
     statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
 
@@ -3384,7 +3446,7 @@
         if (has_aborted())
           mr = MemRegion();
         else {
-          mr = _cm->region_stack_pop();
+          mr = _cm->region_stack_pop_with_lock();
           // it returns MemRegion() if the pop fails
           statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
         }
@@ -3417,7 +3479,7 @@
           }
           // Now push the part of the region we didn't scan on the
           // region stack to make sure a task scans it later.
-          _cm->region_stack_push(newRegion);
+          _cm->region_stack_push_with_lock(newRegion);
         }
         // break from while
         mr = MemRegion();
--- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Thu Apr 15 19:08:48 2010 -0700
@@ -252,9 +252,19 @@
   // with other "push" operations (no pops).
   void push(MemRegion mr);
 
+#if 0
+  // This is currently not used. See the comment in the .cpp file.
+
   // Lock-free; assumes that it will only be called in parallel
   // with other "pop" operations (no pushes).
   MemRegion pop();
+#endif // 0
+
+  // These two are the implementations that use a lock. They can be
+  // called concurrently with each other but they should not be called
+  // concurrently with the lock-free versions (push() / pop()).
+  void push_with_lock(MemRegion mr);
+  MemRegion pop_with_lock();
 
   bool isEmpty()    { return _index == 0; }
   bool isFull()     { return _index == _capacity; }
@@ -540,6 +550,10 @@
 
   // Manipulation of the region stack
   bool region_stack_push(MemRegion mr) {
+    // Currently we only call the lock-free version during evacuation
+    // pauses.
+    assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
+
     _regionStack.push(mr);
     if (_regionStack.overflow()) {
       set_has_overflown();
@@ -547,7 +561,33 @@
     }
     return true;
   }
-  MemRegion region_stack_pop()          { return _regionStack.pop(); }
+#if 0
+  // Currently this is not used. See the comment in the .cpp file.
+  MemRegion region_stack_pop() { return _regionStack.pop(); }
+#endif // 0
+
+  bool region_stack_push_with_lock(MemRegion mr) {
+    // Currently we only call the lock-based version during either
+    // concurrent marking or remark.
+    assert(!SafepointSynchronize::is_at_safepoint() || !concurrent(),
+           "if we are at a safepoint it should be the remark safepoint");
+
+    _regionStack.push_with_lock(mr);
+    if (_regionStack.overflow()) {
+      set_has_overflown();
+      return false;
+    }
+    return true;
+  }
+  MemRegion region_stack_pop_with_lock() {
+    // Currently we only call the lock-based version during either
+    // concurrent marking or remark.
+    assert(!SafepointSynchronize::is_at_safepoint() || !concurrent(),
+           "if we are at a safepoint it should be the remark safepoint");
+
+    return _regionStack.pop_with_lock();
+  }
+
   int region_stack_size()               { return _regionStack.size(); }
   bool region_stack_overflow()          { return _regionStack.overflow(); }
   bool region_stack_empty()             { return _regionStack.isEmpty(); }
--- a/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp	Thu Apr 15 19:08:48 2010 -0700
@@ -42,8 +42,8 @@
 
  private:
   ConcurrentMark*                  _cm;
-  bool                             _started;
-  bool                             _in_progress;
+  volatile bool                    _started;
+  volatile bool                    _in_progress;
 
   void sleepBeforeNextCycle();
 
@@ -67,15 +67,25 @@
   // Counting virtual time so far.
   double vtime_count_accum() { return _vtime_count_accum; }
 
-  ConcurrentMark* cm()                           { return _cm;     }
+  ConcurrentMark* cm()     { return _cm; }
 
-  void            set_started()                  { _started = true;   }
-  void            clear_started()                { _started = false;  }
-  bool            started()                      { return _started;   }
+  void set_started()       { _started = true;  }
+  void clear_started()     { _started = false; }
+  bool started()           { return _started;  }
 
-  void            set_in_progress()              { _in_progress = true;   }
-  void            clear_in_progress()            { _in_progress = false;  }
-  bool            in_progress()                  { return _in_progress;   }
+  void set_in_progress()   { _in_progress = true;  }
+  void clear_in_progress() { _in_progress = false; }
+  bool in_progress()       { return _in_progress;  }
+
+  // This flag returns true from the moment a marking cycle is
+  // initiated (during the initial-mark pause when started() is set)
+  // to the moment when the cycle completes (just after the next
+  // marking bitmap has been cleared and in_progress() is
+  // cleared). While this flag is true we will not start another cycle
+  // so that cycles do not overlap. We cannot use just in_progress()
+  // as the CM thread might take some time to wake up before noticing
+  // that started() is set and set in_progress().
+  bool during_cycle()      { return started() || in_progress(); }
 
   // Yield for GC
   void            yield();
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -902,6 +902,10 @@
 
 void G1CollectedHeap::do_collection(bool full, bool clear_all_soft_refs,
                                     size_t word_size) {
+  if (GC_locker::check_active_before_gc()) {
+    return; // GC is disabled (e.g. JNI GetXXXCritical operation)
+  }
+
   ResourceMark rm;
 
   if (PrintHeapAtGC) {
@@ -916,10 +920,6 @@
   assert(SafepointSynchronize::is_at_safepoint(), "should be at safepoint");
   assert(Thread::current() == VMThread::vm_thread(), "should be in vm thread");
 
-  if (GC_locker::is_active()) {
-    return; // GC is disabled (e.g. JNI GetXXXCritical operation)
-  }
-
   {
     IsGCActiveMark x;
 
@@ -2658,6 +2658,10 @@
 
 void
 G1CollectedHeap::do_collection_pause_at_safepoint() {
+  if (GC_locker::check_active_before_gc()) {
+    return; // GC is disabled (e.g. JNI GetXXXCritical operation)
+  }
+
   if (PrintHeapAtGC) {
     Universe::print_heap_before_gc();
   }
@@ -2665,6 +2669,11 @@
   {
     ResourceMark rm;
 
+    // This call will decide whether this pause is an initial-mark
+    // pause. If it is, during_initial_mark_pause() will return true
+    // for the duration of this pause.
+    g1_policy()->decide_on_conc_mark_initiation();
+
     char verbose_str[128];
     sprintf(verbose_str, "GC pause ");
     if (g1_policy()->in_young_gc_mode()) {
@@ -2673,7 +2682,7 @@
       else
         strcat(verbose_str, "(partial)");
     }
-    if (g1_policy()->should_initiate_conc_mark())
+    if (g1_policy()->during_initial_mark_pause())
       strcat(verbose_str, " (initial-mark)");
 
     // if PrintGCDetails is on, we'll print long statistics information
@@ -2697,10 +2706,6 @@
              "young list should be well formed");
     }
 
-    if (GC_locker::is_active()) {
-      return; // GC is disabled (e.g. JNI GetXXXCritical operation)
-    }
-
     bool abandoned = false;
     { // Call to jvmpi::post_class_unload_events must occur outside of active GC
       IsGCActiveMark x;
@@ -2756,7 +2761,7 @@
       _young_list->print();
 #endif // SCAN_ONLY_VERBOSE
 
-      if (g1_policy()->should_initiate_conc_mark()) {
+      if (g1_policy()->during_initial_mark_pause()) {
         concurrent_mark()->checkpointRootsInitialPre();
       }
       save_marks();
@@ -2858,7 +2863,7 @@
       }
 
       if (g1_policy()->in_young_gc_mode() &&
-          g1_policy()->should_initiate_conc_mark()) {
+          g1_policy()->during_initial_mark_pause()) {
         concurrent_mark()->checkpointRootsInitialPost();
         set_marking_started();
         // CAUTION: after the doConcurrentMark() call below,
@@ -2937,6 +2942,9 @@
   // the same region
   assert(r == NULL || !r->is_gc_alloc_region(),
          "shouldn't already be a GC alloc region");
+  assert(r == NULL || !r->isHumongous(),
+         "humongous regions shouldn't be used as GC alloc regions");
+
   HeapWord* original_top = NULL;
   if (r != NULL)
     original_top = r->top();
@@ -3079,12 +3087,17 @@
 
       if (alloc_region->in_collection_set() ||
           alloc_region->top() == alloc_region->end() ||
-          alloc_region->top() == alloc_region->bottom()) {
-        // we will discard the current GC alloc region if it's in the
-        // collection set (it can happen!), if it's already full (no
-        // point in using it), or if it's empty (this means that it
-        // was emptied during a cleanup and it should be on the free
-        // list now).
+          alloc_region->top() == alloc_region->bottom() ||
+          alloc_region->isHumongous()) {
+        // we will discard the current GC alloc region if
+        // * it's in the collection set (it can happen!),
+        // * it's already full (no point in using it),
+        // * it's empty (this means that it was emptied during
+        // a cleanup and it should be on the free list now), or
+        // * it's humongous (this means that it was emptied
+        // during a cleanup and was added to the free list, but
+        // has been subseqently used to allocate a humongous
+        // object that may be less than the region size).
 
         alloc_region = NULL;
       }
@@ -3977,7 +3990,7 @@
     OopsInHeapRegionClosure        *scan_perm_cl;
     OopsInHeapRegionClosure        *scan_so_cl;
 
-    if (_g1h->g1_policy()->should_initiate_conc_mark()) {
+    if (_g1h->g1_policy()->during_initial_mark_pause()) {
       scan_root_cl = &scan_mark_root_cl;
       scan_perm_cl = &scan_mark_perm_cl;
       scan_so_cl   = &scan_mark_heap_rs_cl;
@@ -4140,7 +4153,7 @@
   FilterAndMarkInHeapRegionAndIntoCSClosure scan_and_mark(this, &boc, concurrent_mark());
 
   OopsInHeapRegionClosure *foc;
-  if (g1_policy()->should_initiate_conc_mark())
+  if (g1_policy()->during_initial_mark_pause())
     foc = &scan_and_mark;
   else
     foc = &scan_only;
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -178,8 +178,8 @@
   // so the hack is to do the cast  QQQ FIXME
   _pauses_btwn_concurrent_mark((size_t)G1PausesBtwnConcMark),
   _n_marks_since_last_pause(0),
-  _conc_mark_initiated(false),
-  _should_initiate_conc_mark(false),
+  _initiate_conc_mark_if_possible(false),
+  _during_initial_mark_pause(false),
   _should_revert_to_full_young_gcs(false),
   _last_full_young_gc(false),
 
@@ -198,7 +198,9 @@
   _recorded_survivor_regions(0),
   _recorded_survivor_head(NULL),
   _recorded_survivor_tail(NULL),
-  _survivors_age_table(true)
+  _survivors_age_table(true),
+
+  _gc_overhead_perc(0.0)
 
 {
   // Set up the region size and associated fields. Given that the
@@ -275,6 +277,11 @@
   // calculate_young_list_target_config during initialization
   _max_survivor_regions = G1FixedSurvivorSpaceSize / HeapRegion::GrainBytes;
 
+  assert(GCTimeRatio > 0,
+         "we should have set it to a default value set_g1_gc_flags() "
+         "if a user set it to 0");
+  _gc_overhead_perc = 100.0 * (1.0 / (1.0 + GCTimeRatio));
+
   initialize_all();
 }
 
@@ -786,7 +793,7 @@
                            elapsed_time_ms,
                            calculations,
                            full_young_gcs() ? "full" : "partial",
-                           should_initiate_conc_mark() ? " i-m" : "",
+                           during_initial_mark_pause() ? " i-m" : "",
                            _in_marking_window,
                            _in_marking_window_im);
 #endif // TRACE_CALC_YOUNG_CONFIG
@@ -1033,7 +1040,8 @@
   set_full_young_gcs(true);
   _last_full_young_gc = false;
   _should_revert_to_full_young_gcs = false;
-  _should_initiate_conc_mark = false;
+  clear_initiate_conc_mark_if_possible();
+  clear_during_initial_mark_pause();
   _known_garbage_bytes = 0;
   _known_garbage_ratio = 0.0;
   _in_marking_window = false;
@@ -1179,7 +1187,8 @@
 void G1CollectorPolicy::record_concurrent_mark_init_end_pre(double
                                                    mark_init_elapsed_time_ms) {
   _during_marking = true;
-  _should_initiate_conc_mark = false;
+  assert(!initiate_conc_mark_if_possible(), "we should have cleared it by now");
+  clear_during_initial_mark_pause();
   _cur_mark_stop_world_time_ms = mark_init_elapsed_time_ms;
 }
 
@@ -1250,7 +1259,6 @@
   }
   _n_pauses_at_mark_end = _n_pauses;
   _n_marks_since_last_pause++;
-  _conc_mark_initiated = false;
 }
 
 void
@@ -1446,17 +1454,24 @@
 #endif // PRODUCT
 
   if (in_young_gc_mode()) {
-    last_pause_included_initial_mark = _should_initiate_conc_mark;
+    last_pause_included_initial_mark = during_initial_mark_pause();
     if (last_pause_included_initial_mark)
       record_concurrent_mark_init_end_pre(0.0);
 
     size_t min_used_targ =
       (_g1->capacity() / 100) * InitiatingHeapOccupancyPercent;
 
-    if (cur_used_bytes > min_used_targ) {
-      if (cur_used_bytes <= _prev_collection_pause_used_at_end_bytes) {
-      } else if (!_g1->mark_in_progress() && !_last_full_young_gc) {
-        _should_initiate_conc_mark = true;
+
+    if (!_g1->mark_in_progress() && !_last_full_young_gc) {
+      assert(!last_pause_included_initial_mark, "invariant");
+      if (cur_used_bytes > min_used_targ &&
+          cur_used_bytes > _prev_collection_pause_used_at_end_bytes) {
+        assert(!during_initial_mark_pause(), "we should not see this here");
+
+        // Note: this might have already been set, if during the last
+        // pause we decided to start a cycle but at the beginning of
+        // this pause we decided to postpone it. That's OK.
+        set_initiate_conc_mark_if_possible();
       }
     }
 
@@ -1747,7 +1762,7 @@
 
   bool new_in_marking_window = _in_marking_window;
   bool new_in_marking_window_im = false;
-  if (_should_initiate_conc_mark) {
+  if (during_initial_mark_pause()) {
     new_in_marking_window = true;
     new_in_marking_window_im = true;
   }
@@ -2166,7 +2181,13 @@
   if (predicted_time_ms > _expensive_region_limit_ms) {
     if (!in_young_gc_mode()) {
         set_full_young_gcs(true);
-      _should_initiate_conc_mark = true;
+        // We might want to do something different here. However,
+        // right now we don't support the non-generational G1 mode
+        // (and in fact we are planning to remove the associated code,
+        // see CR 6814390). So, let's leave it as is and this will be
+        // removed some time in the future
+        ShouldNotReachHere();
+        set_during_initial_mark_pause();
     } else
       // no point in doing another partial one
       _should_revert_to_full_young_gcs = true;
@@ -2288,7 +2309,7 @@
 }
 
 size_t G1CollectorPolicy::expansion_amount() {
-  if ((int)(recent_avg_pause_time_ratio() * 100.0) > G1GCPercent) {
+  if ((recent_avg_pause_time_ratio() * 100.0) > _gc_overhead_perc) {
     // We will double the existing space, or take
     // G1ExpandByPercentOfAvailable % of the available expansion
     // space, whichever is smaller, bounded below by a minimum
@@ -2690,6 +2711,50 @@
 #endif
 
 void
+G1CollectorPolicy::decide_on_conc_mark_initiation() {
+  // We are about to decide on whether this pause will be an
+  // initial-mark pause.
+
+  // First, during_initial_mark_pause() should not be already set. We
+  // will set it here if we have to. However, it should be cleared by
+  // the end of the pause (it's only set for the duration of an
+  // initial-mark pause).
+  assert(!during_initial_mark_pause(), "pre-condition");
+
+  if (initiate_conc_mark_if_possible()) {
+    // We had noticed on a previous pause that the heap occupancy has
+    // gone over the initiating threshold and we should start a
+    // concurrent marking cycle. So we might initiate one.
+
+    bool during_cycle = _g1->concurrent_mark()->cmThread()->during_cycle();
+    if (!during_cycle) {
+      // The concurrent marking thread is not "during a cycle", i.e.,
+      // it has completed the last one. So we can go ahead and
+      // initiate a new cycle.
+
+      set_during_initial_mark_pause();
+
+      // And we can now clear initiate_conc_mark_if_possible() as
+      // we've already acted on it.
+      clear_initiate_conc_mark_if_possible();
+    } else {
+      // The concurrent marking thread is still finishing up the
+      // previous cycle. If we start one right now the two cycles
+      // overlap. In particular, the concurrent marking thread might
+      // be in the process of clearing the next marking bitmap (which
+      // we will use for the next cycle if we start one). Starting a
+      // cycle now will be bad given that parts of the marking
+      // information might get cleared by the marking thread. And we
+      // cannot wait for the marking thread to finish the cycle as it
+      // periodically yields while clearing the next marking bitmap
+      // and, if it's in a yield point, it's waiting for us to
+      // finish. So, at this point we will not start a cycle and we'll
+      // let the concurrent marking thread complete the last one.
+    }
+  }
+}
+
+void
 G1CollectorPolicy_BestRegionsFirst::
 record_collection_pause_start(double start_time_sec, size_t start_used) {
   G1CollectorPolicy::record_collection_pause_start(start_time_sec, start_used);
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Thu Apr 15 19:08:48 2010 -0700
@@ -215,6 +215,8 @@
   SurvRateGroup*        _survivor_surv_rate_group;
   // add here any more surv rate groups
 
+  double                _gc_overhead_perc;
+
   bool during_marking() {
     return _during_marking;
   }
@@ -722,11 +724,31 @@
 
   size_t _n_marks_since_last_pause;
 
-  // True iff CM has been initiated.
-  bool _conc_mark_initiated;
+  // At the end of a pause we check the heap occupancy and we decide
+  // whether we will start a marking cycle during the next pause. If
+  // we decide that we want to do that, we will set this parameter to
+  // true. So, this parameter will stay true between the end of a
+  // pause and the beginning of a subsequent pause (not necessarily
+  // the next one, see the comments on the next field) when we decide
+  // that we will indeed start a marking cycle and do the initial-mark
+  // work.
+  volatile bool _initiate_conc_mark_if_possible;
 
-  // True iff CM should be initiated
-  bool _should_initiate_conc_mark;
+  // If initiate_conc_mark_if_possible() is set at the beginning of a
+  // pause, it is a suggestion that the pause should start a marking
+  // cycle by doing the initial-mark work. However, it is possible
+  // that the concurrent marking thread is still finishing up the
+  // previous marking cycle (e.g., clearing the next marking
+  // bitmap). If that is the case we cannot start a new cycle and
+  // we'll have to wait for the concurrent marking thread to finish
+  // what it is doing. In this case we will postpone the marking cycle
+  // initiation decision for the next pause. When we eventually decide
+  // to start a cycle, we will set _during_initial_mark_pause which
+  // will stay true until the end of the initial-mark pause and it's
+  // the condition that indicates that a pause is doing the
+  // initial-mark work.
+  volatile bool _during_initial_mark_pause;
+
   bool _should_revert_to_full_young_gcs;
   bool _last_full_young_gc;
 
@@ -979,9 +1001,21 @@
   // Add "hr" to the CS.
   void add_to_collection_set(HeapRegion* hr);
 
-  bool should_initiate_conc_mark()      { return _should_initiate_conc_mark; }
-  void set_should_initiate_conc_mark()  { _should_initiate_conc_mark = true; }
-  void unset_should_initiate_conc_mark(){ _should_initiate_conc_mark = false; }
+  bool initiate_conc_mark_if_possible()       { return _initiate_conc_mark_if_possible;  }
+  void set_initiate_conc_mark_if_possible()   { _initiate_conc_mark_if_possible = true;  }
+  void clear_initiate_conc_mark_if_possible() { _initiate_conc_mark_if_possible = false; }
+
+  bool during_initial_mark_pause()      { return _during_initial_mark_pause;  }
+  void set_during_initial_mark_pause()  { _during_initial_mark_pause = true;  }
+  void clear_during_initial_mark_pause(){ _during_initial_mark_pause = false; }
+
+  // This is called at the very beginning of an evacuation pause (it
+  // has to be the first thing that the pause does). If
+  // initiate_conc_mark_if_possible() is true, and the concurrent
+  // marking thread has completed its work during the previous cycle,
+  // it will set during_initial_mark_pause() to so that the pause does
+  // the initial-mark work and start a marking cycle.
+  void decide_on_conc_mark_initiation();
 
   // If an expansion would be appropriate, because recent GC overhead had
   // exceeded the desired limit, return an amount to expand by.
--- a/src/share/vm/gc_implementation/g1/g1_globals.hpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp	Thu Apr 15 19:08:48 2010 -0700
@@ -40,9 +40,6 @@
   develop(bool, G1Gen, true,                                                \
           "If true, it will enable the generational G1")                    \
                                                                             \
-  develop(intx, G1GCPercent, 10,                                            \
-          "The desired percent time spent on GC")                           \
-                                                                            \
   develop(intx, G1PolicyVerbose, 0,                                         \
           "The verbosity level on G1 policy decisions")                     \
                                                                             \
@@ -270,11 +267,11 @@
   product(uintx, G1HeapRegionSize, 0,                                       \
           "Size of the G1 regions.")                                        \
                                                                             \
-  experimental(bool, G1UseParallelRSetUpdating, false,                      \
+  experimental(bool, G1UseParallelRSetUpdating, true,                       \
           "Enables the parallelization of remembered set updating "         \
           "during evacuation pauses")                                       \
                                                                             \
-  experimental(bool, G1UseParallelRSetScanning, false,                      \
+  experimental(bool, G1UseParallelRSetScanning, true,                       \
           "Enables the parallelization of remembered set scanning "         \
           "during evacuation pauses")                                       \
                                                                             \
--- a/src/share/vm/interpreter/bytecodeInterpreter.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/share/vm/interpreter/bytecodeInterpreter.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -2328,6 +2328,17 @@
       }
 
       DEFAULT:
+#ifdef ZERO
+          // Some zero configurations use the C++ interpreter as a
+          // fallback interpreter and have support for platform
+          // specific fast bytecodes which aren't supported here, so
+          // redispatch to the equivalent non-fast bytecode when they
+          // are encountered.
+          if (Bytecodes::is_defined((Bytecodes::Code)opcode)) {
+              opcode = (jubyte)Bytecodes::java_code((Bytecodes::Code)opcode);
+              goto opcode_switch;
+          }
+#endif
           fatal2("\t*** Unimplemented opcode: %d = %s\n",
                  opcode, Bytecodes::name((Bytecodes::Code)opcode));
           goto finish;
--- a/src/share/vm/memory/threadLocalAllocBuffer.hpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/share/vm/memory/threadLocalAllocBuffer.hpp	Thu Apr 15 19:08:48 2010 -0700
@@ -111,7 +111,22 @@
 
   // Allocate size HeapWords. The memory is NOT initialized to zero.
   inline HeapWord* allocate(size_t size);
-  static size_t alignment_reserve()              { return align_object_size(typeArrayOopDesc::header_size(T_INT)); }
+
+  // Reserve space at the end of TLAB
+  static size_t end_reserve() {
+    int reserve_size = typeArrayOopDesc::header_size(T_INT);
+    if (AllocatePrefetchStyle == 3) {
+      // BIS is used to prefetch - we need a space for it.
+      // +1 for rounding up to next cache line +1 to be safe
+      int lines = AllocatePrefetchLines + 2;
+      int step_size = AllocatePrefetchStepSize;
+      int distance = AllocatePrefetchDistance;
+      int prefetch_end = (distance + step_size*lines)/(int)HeapWordSize;
+      reserve_size = MAX2(reserve_size, prefetch_end);
+    }
+    return reserve_size;
+  }
+  static size_t alignment_reserve()              { return align_object_size(end_reserve()); }
   static size_t alignment_reserve_in_bytes()     { return alignment_reserve() * HeapWordSize; }
 
   // Return tlab size or remaining space in eden such that the
--- a/src/share/vm/opto/c2_globals.hpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/share/vm/opto/c2_globals.hpp	Thu Apr 15 19:08:48 2010 -0700
@@ -52,9 +52,6 @@
           "Code alignment for interior entry points "                       \
           "in generated code (in bytes)")                                   \
                                                                             \
-  product_pd(intx, OptoLoopAlignment,                                       \
-          "Align inner loops to zero relative to this modulus")             \
-                                                                            \
   product(intx, MaxLoopPad, (OptoLoopAlignment-1),                          \
           "Align a loop if padding size in bytes is less or equal to this value") \
                                                                             \
--- a/src/share/vm/opto/doCall.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/share/vm/opto/doCall.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -714,8 +714,6 @@
 
   // iterate through all entries sequentially
   for (;!handlers.is_done(); handlers.next()) {
-    // Do nothing if turned off
-    if( !DeutschShiffmanExceptions ) break;
     ciExceptionHandler* handler = handlers.handler();
 
     if (handler->is_rethrow()) {
@@ -741,46 +739,26 @@
       return;                   // No more handling to be done here!
     }
 
-    // %%% The following logic replicates make_from_klass_unique.
-    // TO DO:  Replace by a subroutine call.  Then generalize
-    // the type check, as noted in the next "%%%" comment.
+    // Get the handler's klass
+    ciInstanceKlass* klass = handler->catch_klass();
 
-    ciInstanceKlass* klass = handler->catch_klass();
-    if (UseUniqueSubclasses) {
-      // (We use make_from_klass because it respects UseUniqueSubclasses.)
-      const TypeOopPtr* tp = TypeOopPtr::make_from_klass(klass);
-      klass = tp->klass()->as_instance_klass();
+    if (!klass->is_loaded()) {  // klass is not loaded?
+      // fall through into catch_call_exceptions which will emit a
+      // handler with an uncommon trap.
+      break;
     }
 
-    // Get the handler's klass
-    if (!klass->is_loaded())    // klass is not loaded?
-      break;                    // Must call Rethrow!
     if (klass->is_interface())  // should not happen, but...
       break;                    // bail out
-    // See if the loaded exception klass has no subtypes
-    if (klass->has_subklass())
-      break;                    // Cannot easily do precise test ==> Rethrow
 
-    // %%% Now that subclass checking is very fast, we need to rewrite
-    // this section and remove the option "DeutschShiffmanExceptions".
-    // The exception processing chain should be a normal typecase pattern,
-    // with a bailout to the interpreter only in the case of unloaded
-    // classes.  (The bailout should mark the method non-entrant.)
-    // This rewrite should be placed in GraphKit::, not Parse::.
-
-    // Add a dependence; if any subclass added we need to recompile
-    // %%% should use stronger assert_unique_concrete_subtype instead
-    if (!klass->is_final()) {
-      C->dependencies()->assert_leaf_type(klass);
-    }
-
-    // Implement precise test
+    // Check the type of the exception against the catch type
     const TypeKlassPtr *tk = TypeKlassPtr::make(klass);
     Node* con = _gvn.makecon(tk);
-    Node* cmp = _gvn.transform( new (C, 3) CmpPNode(ex_klass_node, con) );
-    Node* bol = _gvn.transform( new (C, 2) BoolNode(cmp, BoolTest::ne) );
-    { BuildCutout unless(this, bol, PROB_LIKELY(0.7f));
-      const TypeInstPtr* tinst = TypeInstPtr::make_exact(TypePtr::NotNull, klass);
+    Node* not_subtype_ctrl = gen_subtype_check(ex_klass_node, con);
+    if (!stopped()) {
+      PreserveJVMState pjvms(this);
+      const TypeInstPtr* tinst = TypeOopPtr::make_from_klass_unique(klass)->cast_to_ptr_type(TypePtr::NotNull)->is_instptr();
+      assert(klass->has_subklass() || tinst->klass_is_exact(), "lost exactness");
       Node* ex_oop = _gvn.transform(new (C, 2) CheckCastPPNode(control(), ex_node, tinst));
       push_ex_oop(ex_oop);      // Push exception oop for handler
 #ifndef PRODUCT
@@ -792,6 +770,7 @@
 #endif
       merge_exception(handler_bci);
     }
+    set_control(not_subtype_ctrl);
 
     // Come here if exception does not match handler.
     // Carry on with more handler checks.
@@ -800,21 +779,6 @@
 
   assert(!stopped(), "you should return if you finish the chain");
 
-  if (remaining == 1) {
-    // Further checks do not matter.
-  }
-
-  if (can_rerun_bytecode()) {
-    // Do not push_ex_oop here!
-    // Re-executing the bytecode will reproduce the throwing condition.
-    bool must_throw = true;
-    uncommon_trap(Deoptimization::Reason_unhandled,
-                  Deoptimization::Action_none,
-                  (ciKlass*)NULL, (const char*)NULL, // default args
-                  must_throw);
-    return;
-  }
-
   // Oops, need to call into the VM to resolve the klasses at runtime.
   // Note:  This call must not deoptimize, since it is not a real at this bci!
   kill_dead_locals();
--- a/src/share/vm/opto/macro.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/share/vm/opto/macro.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -1487,11 +1487,11 @@
                                         Node*& contended_phi_rawmem,
                                         Node* old_eden_top, Node* new_eden_top,
                                         Node* length) {
+   enum { fall_in_path = 1, pf_path = 2 };
    if( UseTLAB && AllocatePrefetchStyle == 2 ) {
       // Generate prefetch allocation with watermark check.
       // As an allocation hits the watermark, we will prefetch starting
       // at a "distance" away from watermark.
-      enum { fall_in_path = 1, pf_path = 2 };
 
       Node *pf_region = new (C, 3) RegionNode(3);
       Node *pf_phi_rawmem = new (C, 3) PhiNode( pf_region, Type::MEMORY,
@@ -1570,6 +1570,45 @@
       needgc_false = pf_region;
       contended_phi_rawmem = pf_phi_rawmem;
       i_o = pf_phi_abio;
+   } else if( UseTLAB && AllocatePrefetchStyle == 3 ) {
+      // Insert a prefetch for each allocation only on the fast-path
+      Node *pf_region = new (C, 3) RegionNode(3);
+      Node *pf_phi_rawmem = new (C, 3) PhiNode( pf_region, Type::MEMORY,
+                                                TypeRawPtr::BOTTOM );
+
+      // Generate several prefetch instructions only for arrays.
+      uint lines = (length != NULL) ? AllocatePrefetchLines : 1;
+      uint step_size = AllocatePrefetchStepSize;
+      uint distance = AllocatePrefetchDistance;
+
+      // Next cache address.
+      Node *cache_adr = new (C, 4) AddPNode(old_eden_top, old_eden_top,
+                                            _igvn.MakeConX(distance));
+      transform_later(cache_adr);
+      cache_adr = new (C, 2) CastP2XNode(needgc_false, cache_adr);
+      transform_later(cache_adr);
+      Node* mask = _igvn.MakeConX(~(intptr_t)(step_size-1));
+      cache_adr = new (C, 3) AndXNode(cache_adr, mask);
+      transform_later(cache_adr);
+      cache_adr = new (C, 2) CastX2PNode(cache_adr);
+      transform_later(cache_adr);
+
+      // Prefetch
+      Node *prefetch = new (C, 3) PrefetchWriteNode( contended_phi_rawmem, cache_adr );
+      prefetch->set_req(0, needgc_false);
+      transform_later(prefetch);
+      contended_phi_rawmem = prefetch;
+      Node *prefetch_adr;
+      distance = step_size;
+      for ( uint i = 1; i < lines; i++ ) {
+        prefetch_adr = new (C, 4) AddPNode( cache_adr, cache_adr,
+                                            _igvn.MakeConX(distance) );
+        transform_later(prefetch_adr);
+        prefetch = new (C, 3) PrefetchWriteNode( contended_phi_rawmem, prefetch_adr );
+        transform_later(prefetch);
+        distance += step_size;
+        contended_phi_rawmem = prefetch;
+      }
    } else if( AllocatePrefetchStyle > 0 ) {
       // Insert a prefetch for each allocation only on the fast-path
       Node *prefetch_adr;
--- a/src/share/vm/opto/memnode.hpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/share/vm/opto/memnode.hpp	Thu Apr 15 19:08:48 2010 -0700
@@ -1244,5 +1244,5 @@
   virtual int Opcode() const;
   virtual uint ideal_reg() const { return NotAMachineReg; }
   virtual uint match_edge(uint idx) const { return idx==2; }
-  virtual const Type *bottom_type() const { return Type::ABIO; }
+  virtual const Type *bottom_type() const { return ( AllocatePrefetchStyle == 3 ) ? Type::MEMORY : Type::ABIO; }
 };
--- a/src/share/vm/opto/parse.hpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/share/vm/opto/parse.hpp	Thu Apr 15 19:08:48 2010 -0700
@@ -551,9 +551,6 @@
   // Also handles exceptions for individual bytecodes.
   void catch_inline_exceptions(SafePointNode* ex_map);
 
-  // Bytecode classifier, helps decide to use uncommon_trap vs. rethrow_C.
-  bool can_rerun_bytecode();
-
   // Merge the given map into correct exceptional exit state.
   // Assumes that there is no applicable local handler.
   void throw_to_exit(SafePointNode* ex_map);
--- a/src/share/vm/opto/parse1.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/share/vm/opto/parse1.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -798,67 +798,6 @@
   initial_gvn()->transform_no_reclaim(exit);
 }
 
-bool Parse::can_rerun_bytecode() {
-  switch (bc()) {
-  case Bytecodes::_ldc:
-  case Bytecodes::_ldc_w:
-  case Bytecodes::_ldc2_w:
-  case Bytecodes::_getfield:
-  case Bytecodes::_putfield:
-  case Bytecodes::_getstatic:
-  case Bytecodes::_putstatic:
-  case Bytecodes::_arraylength:
-  case Bytecodes::_baload:
-  case Bytecodes::_caload:
-  case Bytecodes::_iaload:
-  case Bytecodes::_saload:
-  case Bytecodes::_faload:
-  case Bytecodes::_aaload:
-  case Bytecodes::_laload:
-  case Bytecodes::_daload:
-  case Bytecodes::_bastore:
-  case Bytecodes::_castore:
-  case Bytecodes::_iastore:
-  case Bytecodes::_sastore:
-  case Bytecodes::_fastore:
-  case Bytecodes::_aastore:
-  case Bytecodes::_lastore:
-  case Bytecodes::_dastore:
-  case Bytecodes::_irem:
-  case Bytecodes::_idiv:
-  case Bytecodes::_lrem:
-  case Bytecodes::_ldiv:
-  case Bytecodes::_frem:
-  case Bytecodes::_fdiv:
-  case Bytecodes::_drem:
-  case Bytecodes::_ddiv:
-  case Bytecodes::_checkcast:
-  case Bytecodes::_instanceof:
-  case Bytecodes::_anewarray:
-  case Bytecodes::_newarray:
-  case Bytecodes::_multianewarray:
-  case Bytecodes::_new:
-  case Bytecodes::_monitorenter:  // can re-run initial null check, only
-  case Bytecodes::_return:
-    return true;
-    break;
-
-  // Don't rerun athrow since it's part of the exception path.
-  case Bytecodes::_athrow:
-  case Bytecodes::_invokestatic:
-  case Bytecodes::_invokedynamic:
-  case Bytecodes::_invokespecial:
-  case Bytecodes::_invokevirtual:
-  case Bytecodes::_invokeinterface:
-    return false;
-    break;
-
-  default:
-    assert(false, "unexpected bytecode produced an exception");
-    return true;
-  }
-}
-
 //---------------------------do_exceptions-------------------------------------
 // Process exceptions arising from the current bytecode.
 // Send caught exceptions to the proper handler within this method.
@@ -872,9 +811,6 @@
     return;
   }
 
-  // Make sure we can classify this bytecode if we need to.
-  debug_only(can_rerun_bytecode());
-
   PreserveJVMState pjvms(this, false);
 
   SafePointNode* ex_map;
--- a/src/share/vm/opto/runtime.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/share/vm/opto/runtime.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -865,7 +865,7 @@
     thread->set_exception_stack_size(0);
 
     // Check if the exception PC is a MethodHandle call site.
-    thread->set_is_method_handle_exception(nm->is_method_handle_return(pc));
+    thread->set_is_method_handle_return(nm->is_method_handle_return(pc));
   }
 
   // Restore correct return pc.  Was saved above.
--- a/src/share/vm/prims/forte.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/share/vm/prims/forte.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -55,12 +55,11 @@
 };
 
 
-static void is_decipherable_compiled_frame(frame* fr, RegisterMap* map,
-  bool* is_compiled_p, bool* is_walkable_p);
+static bool is_decipherable_compiled_frame(JavaThread* thread, frame* fr, nmethod* nm);
 static bool is_decipherable_interpreted_frame(JavaThread* thread,
-                                                frame* fr,
-                                                methodOop* method_p,
-                                                int* bci_p);
+                                              frame* fr,
+                                              methodOop* method_p,
+                                              int* bci_p);
 
 
 
@@ -122,41 +121,43 @@
 // Determine if 'fr' is a decipherable compiled frame. We are already
 // assured that fr is for a java nmethod.
 
-static bool is_decipherable_compiled_frame(frame* fr) {
-
-  assert(fr->cb() != NULL && fr->cb()->is_nmethod(), "invariant");
-  nmethod* nm = (nmethod*) fr->cb();
+static bool is_decipherable_compiled_frame(JavaThread* thread, frame* fr, nmethod* nm) {
   assert(nm->is_java_method(), "invariant");
 
-  // First try and find an exact PcDesc
+  if (thread->has_last_Java_frame() && thread->last_Java_pc() == fr->pc()) {
+    // We're stopped at a call into the JVM so look for a PcDesc with
+    // the actual pc reported by the frame.
+    PcDesc* pc_desc = nm->pc_desc_at(fr->pc());
 
-  PcDesc* pc_desc = nm->pc_desc_at(fr->pc());
-
-  // Did we find a useful PcDesc?
-  if (pc_desc != NULL &&
-      pc_desc->scope_decode_offset() == DebugInformationRecorder::serialized_null) {
-
-    address probe_pc = fr->pc() + 1;
-    pc_desc = nm->pc_desc_near(probe_pc);
-
-    // Now do we have a useful PcDesc?
-
+    // Did we find a useful PcDesc?
     if (pc_desc != NULL &&
-        pc_desc->scope_decode_offset() == DebugInformationRecorder::serialized_null) {
-      // No debug information available for this pc
-      // vframeStream would explode if we try and walk the frames.
-      return false;
+        pc_desc->scope_decode_offset() != DebugInformationRecorder::serialized_null) {
+      return true;
     }
-
-    // This PcDesc is useful however we must adjust the frame's pc
-    // so that the vframeStream lookups will use this same pc
-
-    fr->set_pc(pc_desc->real_pc(nm));
   }
 
+  // We're at some random pc in the nmethod so search for the PcDesc
+  // whose pc is greater than the current PC.  It's done this way
+  // because the extra PcDescs that are recorded for improved debug
+  // info record the end of the region covered by the ScopeDesc
+  // instead of the beginning.
+  PcDesc* pc_desc = nm->pc_desc_near(fr->pc() + 1);
+
+  // Now do we have a useful PcDesc?
+  if (pc_desc == NULL ||
+      pc_desc->scope_decode_offset() == DebugInformationRecorder::serialized_null) {
+    // No debug information available for this pc
+    // vframeStream would explode if we try and walk the frames.
+    return false;
+  }
+
+  // This PcDesc is useful however we must adjust the frame's pc
+  // so that the vframeStream lookups will use this same pc
+  fr->set_pc(pc_desc->real_pc(nm));
   return true;
 }
 
+
 // Determine if 'fr' is a walkable interpreted frame. Returns false
 // if it is not. *method_p, and *bci_p are not set when false is
 // returned. *method_p is non-NULL if frame was executing a Java
@@ -166,9 +167,9 @@
 // even if a valid BCI cannot be found.
 
 static bool is_decipherable_interpreted_frame(JavaThread* thread,
-                                                frame* fr,
-                                                methodOop* method_p,
-                                                int* bci_p) {
+                                              frame* fr,
+                                              methodOop* method_p,
+                                              int* bci_p) {
   assert(fr->is_interpreted_frame(), "just checking");
 
   // top frame is an interpreted frame
@@ -323,13 +324,15 @@
       // have a PCDesc that can get us a bci however we did find
       // a method
 
-      if (!is_decipherable_compiled_frame(&candidate)) {
+      if (!is_decipherable_compiled_frame(thread, &candidate, nm)) {
         return false;
       }
 
       // is_decipherable_compiled_frame may modify candidate's pc
       *initial_frame_p = candidate;
 
+      assert(nm->pc_desc_at(candidate.pc()) != NULL, "if it's decipherable then pc must be valid");
+
       return true;
     }
 
--- a/src/share/vm/runtime/arguments.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/share/vm/runtime/arguments.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -1353,6 +1353,16 @@
       MarkStackSize / K, MarkStackSizeMax / K);
     tty->print_cr("ConcGCThreads: %u", ConcGCThreads);
   }
+
+  if (FLAG_IS_DEFAULT(GCTimeRatio) || GCTimeRatio == 0) {
+    // In G1, we want the default GC overhead goal to be higher than
+    // say in PS. So we set it here to 10%. Otherwise the heap might
+    // be expanded more aggressively than we would like it to. In
+    // fact, even 10% seems to not be high enough in some cases
+    // (especially small GC stress tests that the main thing they do
+    // is allocation). We might consider increase it further.
+    FLAG_SET_DEFAULT(GCTimeRatio, 9);
+  }
 }
 
 void Arguments::set_heap_size() {
--- a/src/share/vm/runtime/globals.hpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/share/vm/runtime/globals.hpp	Thu Apr 15 19:08:48 2010 -0700
@@ -1052,7 +1052,8 @@
           "Use SSE2 MOVDQU instruction for Arraycopy")                      \
                                                                             \
   product(intx, FieldsAllocationStyle, 1,                                   \
-          "0 - type based with oops first, 1 - with oops last")             \
+          "0 - type based with oops first, 1 - with oops last, "            \
+          "2 - oops in super and sub classes are together")                 \
                                                                             \
   product(bool, CompactFields, true,                                        \
           "Allocate nonstatic fields in gaps between previous fields")      \
@@ -2502,10 +2503,6 @@
   notproduct(bool, TraceSpilling, false,                                    \
           "Trace spilling")                                                 \
                                                                             \
-  develop(bool, DeutschShiffmanExceptions, true,                            \
-          "Fast check to find exception handler for precisely typed "       \
-          "exceptions")                                                     \
-                                                                            \
   product(bool, SplitIfBlocks, true,                                        \
           "Clone compares and control flow through merge points to fold "   \
           "some branches")                                                  \
@@ -2711,7 +2708,8 @@
   product(intx,  AllocatePrefetchStyle, 1,                                  \
           "0 = no prefetch, "                                               \
           "1 = prefetch instructions for each allocation, "                 \
-          "2 = use TLAB watermark to gate allocation prefetch")             \
+          "2 = use TLAB watermark to gate allocation prefetch, "            \
+          "3 = use BIS instruction on Sparc for allocation prefetch")       \
                                                                             \
   product(intx,  AllocatePrefetchDistance, -1,                              \
           "Distance to prefetch ahead of allocation pointer")               \
@@ -3114,6 +3112,9 @@
   develop_pd(intx, CodeEntryAlignment,                                      \
           "Code entry alignment for generated code (in bytes)")             \
                                                                             \
+  product_pd(intx, OptoLoopAlignment,                                       \
+          "Align inner loops to zero relative to this modulus")             \
+                                                                            \
   product_pd(uintx, InitialCodeCacheSize,                                   \
           "Initial code cache size (in bytes)")                             \
                                                                             \
--- a/src/share/vm/runtime/mutexLocker.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/share/vm/runtime/mutexLocker.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -70,6 +70,7 @@
 Monitor* CMark_lock                   = NULL;
 Monitor* ZF_mon                       = NULL;
 Monitor* Cleanup_mon                  = NULL;
+Mutex*   CMRegionStack_lock           = NULL;
 Mutex*   SATB_Q_FL_lock               = NULL;
 Monitor* SATB_Q_CBL_mon               = NULL;
 Mutex*   Shared_SATB_Q_lock           = NULL;
@@ -167,6 +168,7 @@
     def(CMark_lock                 , Monitor, nonleaf,     true ); // coordinate concurrent mark thread
     def(ZF_mon                     , Monitor, leaf,        true );
     def(Cleanup_mon                , Monitor, nonleaf,     true );
+    def(CMRegionStack_lock         , Mutex,   leaf,        true );
     def(SATB_Q_FL_lock             , Mutex  , special,     true );
     def(SATB_Q_CBL_mon             , Monitor, nonleaf,     true );
     def(Shared_SATB_Q_lock         , Mutex,   nonleaf,     true );
--- a/src/share/vm/runtime/mutexLocker.hpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/share/vm/runtime/mutexLocker.hpp	Thu Apr 15 19:08:48 2010 -0700
@@ -63,6 +63,7 @@
 extern Monitor* CMark_lock;                      // used for concurrent mark thread coordination
 extern Monitor* ZF_mon;                          // used for G1 conc zero-fill.
 extern Monitor* Cleanup_mon;                     // used for G1 conc cleanup.
+extern Mutex*   CMRegionStack_lock;              // used for protecting accesses to the CM region stack
 extern Mutex*   SATB_Q_FL_lock;                  // Protects SATB Q
                                                  // buffer free list.
 extern Monitor* SATB_Q_CBL_mon;                  // Protects SATB Q
--- a/src/share/vm/runtime/sharedRuntime.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/share/vm/runtime/sharedRuntime.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -259,13 +259,16 @@
 address SharedRuntime::raw_exception_handler_for_return_address(JavaThread* thread, address return_address) {
   assert(frame::verify_return_pc(return_address), "must be a return pc");
 
+  // Reset MethodHandle flag.
+  thread->set_is_method_handle_return(false);
+
   // the fastest case first
   CodeBlob* blob = CodeCache::find_blob(return_address);
   if (blob != NULL && blob->is_nmethod()) {
     nmethod* code = (nmethod*)blob;
     assert(code != NULL, "nmethod must be present");
     // Check if the return address is a MethodHandle call site.
-    thread->set_is_method_handle_exception(code->is_method_handle_return(return_address));
+    thread->set_is_method_handle_return(code->is_method_handle_return(return_address));
     // native nmethods don't have exception handlers
     assert(!code->is_native_method(), "no exception handler");
     assert(code->header_begin() != code->exception_begin(), "no exception handler");
@@ -292,7 +295,7 @@
       nmethod* code = (nmethod*)blob;
       assert(code != NULL, "nmethod must be present");
       // Check if the return address is a MethodHandle call site.
-      thread->set_is_method_handle_exception(code->is_method_handle_return(return_address));
+      thread->set_is_method_handle_return(code->is_method_handle_return(return_address));
       assert(code->header_begin() != code->exception_begin(), "no exception handler");
       return code->exception_begin();
     }
--- a/src/share/vm/runtime/thread.hpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/share/vm/runtime/thread.hpp	Thu Apr 15 19:08:48 2010 -0700
@@ -772,7 +772,7 @@
   volatile address _exception_pc;                // PC where exception happened
   volatile address _exception_handler_pc;        // PC for handler of exception
   volatile int     _exception_stack_size;        // Size of frame where exception happened
-  volatile int     _is_method_handle_exception;  // True if the current exception PC is at a MethodHandle call.
+  volatile int     _is_method_handle_return;     // true (== 1) if the current exception PC is a MethodHandle call site.
 
   // support for compilation
   bool    _is_compiling;                         // is true if a compilation is active inthis thread (one compilation per thread possible)
@@ -1108,13 +1108,13 @@
   int      exception_stack_size() const          { return _exception_stack_size; }
   address  exception_pc() const                  { return _exception_pc; }
   address  exception_handler_pc() const          { return _exception_handler_pc; }
-  int      is_method_handle_exception() const    { return _is_method_handle_exception; }
+  bool     is_method_handle_return() const       { return _is_method_handle_return == 1; }
 
   void set_exception_oop(oop o)                  { _exception_oop = o; }
   void set_exception_pc(address a)               { _exception_pc = a; }
   void set_exception_handler_pc(address a)       { _exception_handler_pc = a; }
   void set_exception_stack_size(int size)        { _exception_stack_size = size; }
-  void set_is_method_handle_exception(int value) { _is_method_handle_exception = value; }
+  void set_is_method_handle_return(bool value)   { _is_method_handle_return = value ? 1 : 0; }
 
   // Stack overflow support
   inline size_t stack_available(address cur_sp);
@@ -1188,7 +1188,7 @@
   static ByteSize exception_pc_offset()          { return byte_offset_of(JavaThread, _exception_pc        ); }
   static ByteSize exception_handler_pc_offset()  { return byte_offset_of(JavaThread, _exception_handler_pc); }
   static ByteSize exception_stack_size_offset()  { return byte_offset_of(JavaThread, _exception_stack_size); }
-  static ByteSize is_method_handle_exception_offset() { return byte_offset_of(JavaThread, _is_method_handle_exception); }
+  static ByteSize is_method_handle_return_offset() { return byte_offset_of(JavaThread, _is_method_handle_return); }
   static ByteSize stack_guard_state_offset()     { return byte_offset_of(JavaThread, _stack_guard_state   ); }
   static ByteSize suspend_flags_offset()         { return byte_offset_of(JavaThread, _suspend_flags       ); }
 
--- a/src/share/vm/utilities/ostream.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/share/vm/utilities/ostream.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -363,7 +363,7 @@
   return _log_file != NULL;
 }
 
-static const char* make_log_name(const char* log_name, const char* force_directory, char* buf) {
+static const char* make_log_name(const char* log_name, const char* force_directory) {
   const char* basename = log_name;
   char file_sep = os::file_separator()[0];
   const char* cp;
@@ -374,6 +374,27 @@
   }
   const char* nametail = log_name;
 
+  // Compute buffer length
+  size_t buffer_length;
+  if (force_directory != NULL) {
+    buffer_length = strlen(force_directory) + strlen(os::file_separator()) +
+                    strlen(basename) + 1;
+  } else {
+    buffer_length = strlen(log_name) + 1;
+  }
+
+  const char* star = strchr(basename, '*');
+  int star_pos = (star == NULL) ? -1 : (star - nametail);
+
+  char pid[32];
+  if (star_pos >= 0) {
+    jio_snprintf(pid, sizeof(pid), "%u", os::current_process_id());
+    buffer_length += strlen(pid);
+  }
+
+  // Create big enough buffer.
+  char *buf = NEW_C_HEAP_ARRAY(char, buffer_length);
+
   strcpy(buf, "");
   if (force_directory != NULL) {
     strcat(buf, force_directory);
@@ -381,14 +402,11 @@
     nametail = basename;       // completely skip directory prefix
   }
 
-  const char* star = strchr(basename, '*');
-  int star_pos = (star == NULL) ? -1 : (star - nametail);
-
   if (star_pos >= 0) {
     // convert foo*bar.log to foo123bar.log
     int buf_pos = (int) strlen(buf);
     strncpy(&buf[buf_pos], nametail, star_pos);
-    sprintf(&buf[buf_pos + star_pos], "%u", os::current_process_id());
+    strcpy(&buf[buf_pos + star_pos], pid);
     nametail += star_pos + 1;  // skip prefix and star
   }
 
@@ -399,20 +417,23 @@
 void defaultStream::init_log() {
   // %%% Need a MutexLocker?
   const char* log_name = LogFile != NULL ? LogFile : "hotspot.log";
-  char buf[O_BUFLEN*2];
-  const char* try_name = make_log_name(log_name, NULL, buf);
+  const char* try_name = make_log_name(log_name, NULL);
   fileStream* file = new(ResourceObj::C_HEAP) fileStream(try_name);
   if (!file->is_open()) {
     // Try again to open the file.
     char warnbuf[O_BUFLEN*2];
-    sprintf(warnbuf, "Warning:  Cannot open log file: %s\n", try_name);
+    jio_snprintf(warnbuf, sizeof(warnbuf),
+                 "Warning:  Cannot open log file: %s\n", try_name);
     // Note:  This feature is for maintainer use only.  No need for L10N.
     jio_print(warnbuf);
-    try_name = make_log_name("hs_pid*.log", os::get_temp_directory(), buf);
-    sprintf(warnbuf, "Warning:  Forcing option -XX:LogFile=%s\n", try_name);
+    FREE_C_HEAP_ARRAY(char, try_name);
+    try_name = make_log_name("hs_pid*.log", os::get_temp_directory());
+    jio_snprintf(warnbuf, sizeof(warnbuf),
+                 "Warning:  Forcing option -XX:LogFile=%s\n", try_name);
     jio_print(warnbuf);
     delete file;
     file = new(ResourceObj::C_HEAP) fileStream(try_name);
+    FREE_C_HEAP_ARRAY(char, try_name);
   }
   if (file->is_open()) {
     _log_file = file;
--- a/src/share/vm/utilities/vmError.cpp	Thu Apr 15 19:08:18 2010 -0700
+++ b/src/share/vm/utilities/vmError.cpp	Thu Apr 15 19:08:48 2010 -0700
@@ -807,8 +807,8 @@
       if (fd == -1) {
         // try temp directory
         const char * tmpdir = os::get_temp_directory();
-        jio_snprintf(buffer, sizeof(buffer), "%shs_err_pid%u.log",
-                     (tmpdir ? tmpdir : ""), os::current_process_id());
+        jio_snprintf(buffer, sizeof(buffer), "%s%shs_err_pid%u.log",
+                     tmpdir, os::file_separator(), os::current_process_id());
         fd = open(buffer, O_WRONLY | O_CREAT | O_TRUNC, 0666);
       }