changeset 2180:8c9c9ee30d71

Merge
author kvn
date Thu, 03 Mar 2011 23:31:45 -0800
parents 11303bede852 0ac769a57c64
children 3e2b59ab2d07 5d8f5a6dced7 4cd9add59b1e
files src/share/vm/runtime/arguments.cpp src/share/vm/runtime/globals.hpp
diffstat 34 files changed, 1791 insertions(+), 770 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Thu Mar 03 21:02:56 2011 -0800
+++ b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Thu Mar 03 23:31:45 2011 -0800
@@ -395,9 +395,9 @@
 
   int offset = code_offset();
 
-  __ call(Runtime1::entry_for(Runtime1::handle_exception_id), relocInfo::runtime_call_type);
+  __ call(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id), relocInfo::runtime_call_type);
   __ delayed()->nop();
-  debug_only(__ stop("should have gone to the caller");)
+  __ should_not_reach_here();
   assert(code_offset() - offset <= exception_handler_size, "overflow");
   __ end_a_stub();
 
--- a/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp	Thu Mar 03 21:02:56 2011 -0800
+++ b/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp	Thu Mar 03 23:31:45 2011 -0800
@@ -148,7 +148,7 @@
 
 static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) {
   assert(frame_size_in_bytes == __ total_frame_size_in_bytes(reg_save_size_in_words),
-         " mismatch in calculation");
+         "mismatch in calculation");
   sasm->set_frame_size(frame_size_in_bytes / BytesPerWord);
   int frame_size_in_slots = frame_size_in_bytes / sizeof(jint);
   OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
@@ -176,9 +176,8 @@
 
 static OopMap* save_live_registers(StubAssembler* sasm, bool save_fpu_registers = true) {
   assert(frame_size_in_bytes == __ total_frame_size_in_bytes(reg_save_size_in_words),
-         " mismatch in calculation");
+         "mismatch in calculation");
   __ save_frame_c1(frame_size_in_bytes);
-  sasm->set_frame_size(frame_size_in_bytes / BytesPerWord);
 
   // Record volatile registers as callee-save values in an OopMap so their save locations will be
   // propagated to the caller frame's RegisterMap during StackFrameStream construction (needed for
@@ -367,23 +366,7 @@
   switch (id) {
     case forward_exception_id:
       {
-        // we're handling an exception in the context of a compiled
-        // frame.  The registers have been saved in the standard
-        // places.  Perform an exception lookup in the caller and
-        // dispatch to the handler if found.  Otherwise unwind and
-        // dispatch to the callers exception handler.
-
-        oop_maps = new OopMapSet();
-        OopMap* oop_map = generate_oop_map(sasm, true);
-
-        // transfer the pending exception to the exception_oop
-        __ ld_ptr(G2_thread, in_bytes(JavaThread::pending_exception_offset()), Oexception);
-        __ ld_ptr(Oexception, 0, G0);
-        __ st_ptr(G0, G2_thread, in_bytes(JavaThread::pending_exception_offset()));
-        __ add(I7, frame::pc_return_offset, Oissuing_pc);
-
-        generate_handle_exception(sasm, oop_maps, oop_map);
-        __ should_not_reach_here();
+        oop_maps = generate_handle_exception(id, sasm);
       }
       break;
 
@@ -671,15 +654,14 @@
       break;
 
     case handle_exception_id:
-      {
-        __ set_info("handle_exception", dont_gc_arguments);
-        // make a frame and preserve the caller's caller-save registers
+      { __ set_info("handle_exception", dont_gc_arguments);
+        oop_maps = generate_handle_exception(id, sasm);
+      }
+      break;
 
-        oop_maps = new OopMapSet();
-        OopMap* oop_map = save_live_registers(sasm);
-        __ mov(Oexception->after_save(),  Oexception);
-        __ mov(Oissuing_pc->after_save(), Oissuing_pc);
-        generate_handle_exception(sasm, oop_maps, oop_map);
+    case handle_exception_from_callee_id:
+      { __ set_info("handle_exception_from_callee", dont_gc_arguments);
+        oop_maps = generate_handle_exception(id, sasm);
       }
       break;
 
@@ -696,7 +678,7 @@
                         G2_thread, Oissuing_pc->after_save());
         __ verify_not_null_oop(Oexception->after_save());
 
-        // Restore SP from L7 if the exception PC is a MethodHandle call site.
+        // Restore SP from L7 if the exception PC is a method handle call site.
         __ mov(O0, G5);  // Save the target address.
         __ lduw(Address(G2_thread, JavaThread::is_method_handle_return_offset()), L0);
         __ tst(L0);  // Condition codes are preserved over the restore.
@@ -1006,48 +988,89 @@
 }
 
 
-void Runtime1::generate_handle_exception(StubAssembler* sasm, OopMapSet* oop_maps, OopMap* oop_map, bool) {
-  Label no_deopt;
+OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler* sasm) {
+  __ block_comment("generate_handle_exception");
+
+  // Save registers, if required.
+  OopMapSet* oop_maps = new OopMapSet();
+  OopMap* oop_map = NULL;
+  switch (id) {
+  case forward_exception_id:
+    // We're handling an exception in the context of a compiled frame.
+    // The registers have been saved in the standard places.  Perform
+    // an exception lookup in the caller and dispatch to the handler
+    // if found.  Otherwise unwind and dispatch to the callers
+    // exception handler.
+     oop_map = generate_oop_map(sasm, true);
+
+     // transfer the pending exception to the exception_oop
+     __ ld_ptr(G2_thread, in_bytes(JavaThread::pending_exception_offset()), Oexception);
+     __ ld_ptr(Oexception, 0, G0);
+     __ st_ptr(G0, G2_thread, in_bytes(JavaThread::pending_exception_offset()));
+     __ add(I7, frame::pc_return_offset, Oissuing_pc);
+    break;
+  case handle_exception_id:
+    // At this point all registers MAY be live.
+    oop_map = save_live_registers(sasm);
+    __ mov(Oexception->after_save(),  Oexception);
+    __ mov(Oissuing_pc->after_save(), Oissuing_pc);
+    break;
+  case handle_exception_from_callee_id:
+    // At this point all registers except exception oop (Oexception)
+    // and exception pc (Oissuing_pc) are dead.
+    oop_map = new OopMap(frame_size_in_bytes / sizeof(jint), 0);
+    sasm->set_frame_size(frame_size_in_bytes / BytesPerWord);
+    __ save_frame_c1(frame_size_in_bytes);
+    __ mov(Oexception->after_save(),  Oexception);
+    __ mov(Oissuing_pc->after_save(), Oissuing_pc);
+    break;
+  default:  ShouldNotReachHere();
+  }
 
   __ verify_not_null_oop(Oexception);
 
   // save the exception and issuing pc in the thread
-  __ st_ptr(Oexception, G2_thread, in_bytes(JavaThread::exception_oop_offset()));
+  __ st_ptr(Oexception,  G2_thread, in_bytes(JavaThread::exception_oop_offset()));
   __ st_ptr(Oissuing_pc, G2_thread, in_bytes(JavaThread::exception_pc_offset()));
 
-  // save the real return address and use the throwing pc as the return address to lookup (has bci & oop map)
-  __ mov(I7, L0);
+  // use the throwing pc as the return address to lookup (has bci & oop map)
   __ mov(Oissuing_pc, I7);
   __ sub(I7, frame::pc_return_offset, I7);
   int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, exception_handler_for_pc));
+  oop_maps->add_gc_map(call_offset, oop_map);
 
   // Note: if nmethod has been deoptimized then regardless of
   // whether it had a handler or not we will deoptimize
   // by entering the deopt blob with a pending exception.
 
-#ifdef ASSERT
-  Label done;
-  __ tst(O0);
-  __ br(Assembler::notZero, false, Assembler::pn, done);
-  __ delayed()->nop();
-  __ stop("should have found address");
-  __ bind(done);
-#endif
+  // Restore the registers that were saved at the beginning, remove
+  // the frame and jump to the exception handler.
+  switch (id) {
+  case forward_exception_id:
+  case handle_exception_id:
+    restore_live_registers(sasm);
+    __ jmp(O0, 0);
+    __ delayed()->restore();
+    break;
+  case handle_exception_from_callee_id:
+    // Restore SP from L7 if the exception PC is a method handle call site.
+    __ mov(O0, G5);  // Save the target address.
+    __ lduw(Address(G2_thread, JavaThread::is_method_handle_return_offset()), L0);
+    __ tst(L0);  // Condition codes are preserved over the restore.
+    __ restore();
 
-  // restore the registers that were saved at the beginning and jump to the exception handler.
-  restore_live_registers(sasm);
+    __ jmp(G5, 0);  // jump to the exception handler
+    __ delayed()->movcc(Assembler::notZero, false, Assembler::icc, L7_mh_SP_save, SP);  // Restore SP if required.
+    break;
+  default:  ShouldNotReachHere();
+  }
 
-  __ jmp(O0, 0);
-  __ delayed()->restore();
-
-  oop_maps->add_gc_map(call_offset, oop_map);
+  return oop_maps;
 }
 
 
 #undef __
 
-#define __ masm->
-
 const char *Runtime1::pd_name_for_address(address entry) {
   return "<unknown function>";
 }
--- a/src/cpu/sparc/vm/methodHandles_sparc.cpp	Thu Mar 03 21:02:56 2011 -0800
+++ b/src/cpu/sparc/vm/methodHandles_sparc.cpp	Thu Mar 03 23:31:45 2011 -0800
@@ -417,6 +417,7 @@
 
   // Some handy addresses:
   Address G5_method_fie(    G5_method,        in_bytes(methodOopDesc::from_interpreted_offset()));
+  Address G5_method_fce(    G5_method,        in_bytes(methodOopDesc::from_compiled_offset()));
 
   Address G3_mh_vmtarget(   G3_method_handle, java_dyn_MethodHandle::vmtarget_offset_in_bytes());
 
@@ -444,12 +445,10 @@
   case _raise_exception:
     {
       // Not a real MH entry, but rather shared code for raising an
-      // exception.  Since we use a C2I adapter to set up the
-      // interpreter state, arguments are expected in compiler
-      // argument registers.
+      // exception.  Since we use the compiled entry, arguments are
+      // expected in compiler argument registers.
       assert(raise_exception_method(), "must be set");
-      address c2i_entry = raise_exception_method()->get_c2i_entry();
-      assert(c2i_entry, "method must be linked");
+      assert(raise_exception_method()->from_compiled_entry(), "method must be linked");
 
       __ mov(O5_savedSP, SP);  // Cut the stack back to where the caller started.
 
@@ -468,10 +467,9 @@
       __ delayed()->nop();
 
       __ verify_oop(G5_method);
-      __ jump_to(AddressLiteral(c2i_entry), O3_scratch);
+      __ jump_indirect_to(G5_method_fce, O3_scratch);  // jump to compiled entry
       __ delayed()->nop();
 
-      // If we get here, the Java runtime did not do its job of creating the exception.
       // Do something that is at least causes a valid throw from the interpreter.
       __ bind(L_no_method);
       __ unimplemented("call throw_WrongMethodType_entry");
--- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Thu Mar 03 21:02:56 2011 -0800
+++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Thu Mar 03 23:31:45 2011 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -968,19 +968,6 @@
     return start;
   }
 
-  static address disjoint_byte_copy_entry;
-  static address disjoint_short_copy_entry;
-  static address disjoint_int_copy_entry;
-  static address disjoint_long_copy_entry;
-  static address disjoint_oop_copy_entry;
-
-  static address byte_copy_entry;
-  static address short_copy_entry;
-  static address int_copy_entry;
-  static address long_copy_entry;
-  static address oop_copy_entry;
-
-  static address checkcast_copy_entry;
 
   //
   // Verify that a register contains clean 32-bits positive value
@@ -1046,31 +1033,40 @@
   //
   //  The input registers are overwritten.
   //
-  void gen_write_ref_array_pre_barrier(Register addr, Register count) {
+  void gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) {
     BarrierSet* bs = Universe::heap()->barrier_set();
-    if (bs->has_write_ref_pre_barrier()) {
-      assert(bs->has_write_ref_array_pre_opt(),
-             "Else unsupported barrier set.");
-
-      __ save_frame(0);
-      // Save the necessary global regs... will be used after.
-      if (addr->is_global()) {
-        __ mov(addr, L0);
-      }
-      if (count->is_global()) {
-        __ mov(count, L1);
-      }
-      __ mov(addr->after_save(), O0);
-      // Get the count into O1
-      __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre));
-      __ delayed()->mov(count->after_save(), O1);
-      if (addr->is_global()) {
-        __ mov(L0, addr);
-      }
-      if (count->is_global()) {
-        __ mov(L1, count);
-      }
-      __ restore();
+    switch (bs->kind()) {
+      case BarrierSet::G1SATBCT:
+      case BarrierSet::G1SATBCTLogging:
+        // With G1, don't generate the call if we statically know that the target in uninitialized
+        if (!dest_uninitialized) {
+          __ save_frame(0);
+          // Save the necessary global regs... will be used after.
+          if (addr->is_global()) {
+            __ mov(addr, L0);
+          }
+          if (count->is_global()) {
+            __ mov(count, L1);
+          }
+          __ mov(addr->after_save(), O0);
+          // Get the count into O1
+          __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre));
+          __ delayed()->mov(count->after_save(), O1);
+          if (addr->is_global()) {
+            __ mov(L0, addr);
+          }
+          if (count->is_global()) {
+            __ mov(L1, count);
+          }
+          __ restore();
+        }
+        break;
+      case BarrierSet::CardTableModRef:
+      case BarrierSet::CardTableExtension:
+      case BarrierSet::ModRef:
+        break;
+      default:
+        ShouldNotReachHere();
     }
   }
   //
@@ -1084,7 +1080,7 @@
   //  The input registers are overwritten.
   //
   void gen_write_ref_array_post_barrier(Register addr, Register count,
-                                   Register tmp) {
+                                        Register tmp) {
     BarrierSet* bs = Universe::heap()->barrier_set();
 
     switch (bs->kind()) {
@@ -1283,7 +1279,7 @@
   //      to:    O1
   //      count: O2 treated as signed
   //
-  address generate_disjoint_byte_copy(bool aligned, const char * name) {
+  address generate_disjoint_byte_copy(bool aligned, address *entry, const char *name) {
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
@@ -1299,9 +1295,11 @@
 
     assert_clean_int(count, O3);     // Make sure 'count' is clean int.
 
-    if (!aligned)  disjoint_byte_copy_entry = __ pc();
-    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
-    if (!aligned)  BLOCK_COMMENT("Entry:");
+    if (entry != NULL) {
+      *entry = __ pc();
+      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
+      BLOCK_COMMENT("Entry:");
+    }
 
     // for short arrays, just do single element copy
     __ cmp(count, 23); // 16 + 7
@@ -1391,15 +1389,13 @@
   //      to:    O1
   //      count: O2 treated as signed
   //
-  address generate_conjoint_byte_copy(bool aligned, const char * name) {
+  address generate_conjoint_byte_copy(bool aligned, address nooverlap_target,
+                                      address *entry, const char *name) {
     // Do reverse copy.
 
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
-    address nooverlap_target = aligned ?
-        StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
-        disjoint_byte_copy_entry;
 
     Label L_skip_alignment, L_align, L_aligned_copy;
     Label L_copy_byte, L_copy_byte_loop, L_exit;
@@ -1412,9 +1408,11 @@
 
     assert_clean_int(count, O3);     // Make sure 'count' is clean int.
 
-    if (!aligned)  byte_copy_entry = __ pc();
-    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
-    if (!aligned)  BLOCK_COMMENT("Entry:");
+    if (entry != NULL) {
+      *entry = __ pc();
+      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
+      BLOCK_COMMENT("Entry:");
+    }
 
     array_overlap_test(nooverlap_target, 0);
 
@@ -1504,7 +1502,7 @@
   //      to:    O1
   //      count: O2 treated as signed
   //
-  address generate_disjoint_short_copy(bool aligned, const char * name) {
+  address generate_disjoint_short_copy(bool aligned, address *entry, const char * name) {
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
@@ -1520,9 +1518,11 @@
 
     assert_clean_int(count, O3);     // Make sure 'count' is clean int.
 
-    if (!aligned)  disjoint_short_copy_entry = __ pc();
-    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
-    if (!aligned)  BLOCK_COMMENT("Entry:");
+    if (entry != NULL) {
+      *entry = __ pc();
+      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
+      BLOCK_COMMENT("Entry:");
+    }
 
     // for short arrays, just do single element copy
     __ cmp(count, 11); // 8 + 3  (22 bytes)
@@ -1842,15 +1842,13 @@
   //      to:    O1
   //      count: O2 treated as signed
   //
-  address generate_conjoint_short_copy(bool aligned, const char * name) {
+  address generate_conjoint_short_copy(bool aligned, address nooverlap_target,
+                                       address *entry, const char *name) {
     // Do reverse copy.
 
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
-    address nooverlap_target = aligned ?
-        StubRoutines::arrayof_jshort_disjoint_arraycopy() :
-        disjoint_short_copy_entry;
 
     Label L_skip_alignment, L_skip_alignment2, L_aligned_copy;
     Label L_copy_2_bytes, L_copy_2_bytes_loop, L_exit;
@@ -1865,9 +1863,11 @@
 
     assert_clean_int(count, O3);     // Make sure 'count' is clean int.
 
-    if (!aligned)  short_copy_entry = __ pc();
-    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
-    if (!aligned)  BLOCK_COMMENT("Entry:");
+    if (entry != NULL) {
+      *entry = __ pc();
+      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
+      BLOCK_COMMENT("Entry:");
+    }
 
     array_overlap_test(nooverlap_target, 1);
 
@@ -2072,7 +2072,7 @@
   //      to:    O1
   //      count: O2 treated as signed
   //
-  address generate_disjoint_int_copy(bool aligned, const char * name) {
+  address generate_disjoint_int_copy(bool aligned, address *entry, const char *name) {
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
@@ -2080,9 +2080,11 @@
     const Register count = O2;
     assert_clean_int(count, O3);     // Make sure 'count' is clean int.
 
-    if (!aligned)  disjoint_int_copy_entry = __ pc();
-    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
-    if (!aligned)  BLOCK_COMMENT("Entry:");
+    if (entry != NULL) {
+      *entry = __ pc();
+      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
+      BLOCK_COMMENT("Entry:");
+    }
 
     generate_disjoint_int_copy_core(aligned);
 
@@ -2204,20 +2206,19 @@
   //      to:    O1
   //      count: O2 treated as signed
   //
-  address generate_conjoint_int_copy(bool aligned, const char * name) {
+  address generate_conjoint_int_copy(bool aligned, address nooverlap_target,
+                                     address *entry, const char *name) {
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
 
-    address nooverlap_target = aligned ?
-        StubRoutines::arrayof_jint_disjoint_arraycopy() :
-        disjoint_int_copy_entry;
-
     assert_clean_int(O2, O3);     // Make sure 'count' is clean int.
 
-    if (!aligned)  int_copy_entry = __ pc();
-    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
-    if (!aligned)  BLOCK_COMMENT("Entry:");
+    if (entry != NULL) {
+      *entry = __ pc();
+      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
+      BLOCK_COMMENT("Entry:");
+    }
 
     array_overlap_test(nooverlap_target, 2);
 
@@ -2336,16 +2337,18 @@
   //      to:    O1
   //      count: O2 treated as signed
   //
-  address generate_disjoint_long_copy(bool aligned, const char * name) {
+  address generate_disjoint_long_copy(bool aligned, address *entry, const char *name) {
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
 
     assert_clean_int(O2, O3);     // Make sure 'count' is clean int.
 
-    if (!aligned)  disjoint_long_copy_entry = __ pc();
-    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
-    if (!aligned)  BLOCK_COMMENT("Entry:");
+    if (entry != NULL) {
+      *entry = __ pc();
+      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
+      BLOCK_COMMENT("Entry:");
+    }
 
     generate_disjoint_long_copy_core(aligned);
 
@@ -2406,19 +2409,21 @@
   //      to:    O1
   //      count: O2 treated as signed
   //
-  address generate_conjoint_long_copy(bool aligned, const char * name) {
+  address generate_conjoint_long_copy(bool aligned, address nooverlap_target,
+                                      address *entry, const char *name) {
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
 
-    assert(!aligned, "usage");
-    address nooverlap_target = disjoint_long_copy_entry;
+    assert(aligned, "Should always be aligned");
 
     assert_clean_int(O2, O3);     // Make sure 'count' is clean int.
 
-    if (!aligned)  long_copy_entry = __ pc();
-    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
-    if (!aligned)  BLOCK_COMMENT("Entry:");
+    if (entry != NULL) {
+      *entry = __ pc();
+      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
+      BLOCK_COMMENT("Entry:");
+    }
 
     array_overlap_test(nooverlap_target, 3);
 
@@ -2439,7 +2444,8 @@
   //      to:    O1
   //      count: O2 treated as signed
   //
-  address generate_disjoint_oop_copy(bool aligned, const char * name) {
+  address generate_disjoint_oop_copy(bool aligned, address *entry, const char *name,
+                                     bool dest_uninitialized = false) {
 
     const Register from  = O0;  // source array address
     const Register to    = O1;  // destination array address
@@ -2451,14 +2457,16 @@
 
     assert_clean_int(count, O3);     // Make sure 'count' is clean int.
 
-    if (!aligned)  disjoint_oop_copy_entry = __ pc();
-    // caller can pass a 64-bit byte count here
-    if (!aligned)  BLOCK_COMMENT("Entry:");
+    if (entry != NULL) {
+      *entry = __ pc();
+      // caller can pass a 64-bit byte count here
+      BLOCK_COMMENT("Entry:");
+    }
 
     // save arguments for barrier generation
     __ mov(to, G1);
     __ mov(count, G5);
-    gen_write_ref_array_pre_barrier(G1, G5);
+    gen_write_ref_array_pre_barrier(G1, G5, dest_uninitialized);
   #ifdef _LP64
     assert_clean_int(count, O3);     // Make sure 'count' is clean int.
     if (UseCompressedOops) {
@@ -2487,7 +2495,9 @@
   //      to:    O1
   //      count: O2 treated as signed
   //
-  address generate_conjoint_oop_copy(bool aligned, const char * name) {
+  address generate_conjoint_oop_copy(bool aligned, address nooverlap_target,
+                                     address *entry, const char *name,
+                                     bool dest_uninitialized = false) {
 
     const Register from  = O0;  // source array address
     const Register to    = O1;  // destination array address
@@ -2499,21 +2509,18 @@
 
     assert_clean_int(count, O3);     // Make sure 'count' is clean int.
 
-    if (!aligned)  oop_copy_entry = __ pc();
-    // caller can pass a 64-bit byte count here
-    if (!aligned)  BLOCK_COMMENT("Entry:");
+    if (entry != NULL) {
+      *entry = __ pc();
+      // caller can pass a 64-bit byte count here
+      BLOCK_COMMENT("Entry:");
+    }
+
+    array_overlap_test(nooverlap_target, LogBytesPerHeapOop);
 
     // save arguments for barrier generation
     __ mov(to, G1);
     __ mov(count, G5);
-
-    gen_write_ref_array_pre_barrier(G1, G5);
-
-    address nooverlap_target = aligned ?
-        StubRoutines::arrayof_oop_disjoint_arraycopy() :
-        disjoint_oop_copy_entry;
-
-    array_overlap_test(nooverlap_target, LogBytesPerHeapOop);
+    gen_write_ref_array_pre_barrier(G1, G5, dest_uninitialized);
 
   #ifdef _LP64
     if (UseCompressedOops) {
@@ -2582,7 +2589,7 @@
   //      ckval: O4 (super_klass)
   //      ret:   O0 zero for success; (-1^K) where K is partial transfer count
   //
-  address generate_checkcast_copy(const char* name) {
+  address generate_checkcast_copy(const char *name, address *entry, bool dest_uninitialized = false) {
 
     const Register O0_from   = O0;      // source array address
     const Register O1_to     = O1;      // destination array address
@@ -2600,8 +2607,6 @@
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
 
-    gen_write_ref_array_pre_barrier(O1, O2);
-
 #ifdef ASSERT
     // We sometimes save a frame (see generate_type_check below).
     // If this will cause trouble, let's fail now instead of later.
@@ -2625,9 +2630,12 @@
     }
 #endif //ASSERT
 
-    checkcast_copy_entry = __ pc();
-    // caller can pass a 64-bit byte count here (from generic stub)
-    BLOCK_COMMENT("Entry:");
+    if (entry != NULL) {
+      *entry = __ pc();
+      // caller can pass a 64-bit byte count here (from generic stub)
+      BLOCK_COMMENT("Entry:");
+    }
+    gen_write_ref_array_pre_barrier(O1_to, O2_count, dest_uninitialized);
 
     Label load_element, store_element, do_card_marks, fail, done;
     __ addcc(O2_count, 0, G1_remain);   // initialize loop index, and test it
@@ -2700,7 +2708,11 @@
   // Examines the alignment of the operands and dispatches
   // to a long, int, short, or byte copy loop.
   //
-  address generate_unsafe_copy(const char* name) {
+  address generate_unsafe_copy(const char* name,
+                               address byte_copy_entry,
+                               address short_copy_entry,
+                               address int_copy_entry,
+                               address long_copy_entry) {
 
     const Register O0_from   = O0;      // source array address
     const Register O1_to     = O1;      // destination array address
@@ -2796,8 +2808,13 @@
   //    O0 ==  0  -  success
   //    O0 == -1  -  need to call System.arraycopy
   //
-  address generate_generic_copy(const char *name) {
-
+  address generate_generic_copy(const char *name,
+                                address entry_jbyte_arraycopy,
+                                address entry_jshort_arraycopy,
+                                address entry_jint_arraycopy,
+                                address entry_oop_arraycopy,
+                                address entry_jlong_arraycopy,
+                                address entry_checkcast_arraycopy) {
     Label L_failed, L_objArray;
 
     // Input registers
@@ -2970,15 +2987,15 @@
 
     BLOCK_COMMENT("choose copy loop based on element size");
     __ cmp(G3_elsize, 0);
-    __ br(Assembler::equal,true,Assembler::pt,StubRoutines::_jbyte_arraycopy);
+    __ br(Assembler::equal, true, Assembler::pt, entry_jbyte_arraycopy);
     __ delayed()->signx(length, count); // length
 
     __ cmp(G3_elsize, LogBytesPerShort);
-    __ br(Assembler::equal,true,Assembler::pt,StubRoutines::_jshort_arraycopy);
+    __ br(Assembler::equal, true, Assembler::pt, entry_jshort_arraycopy);
     __ delayed()->signx(length, count); // length
 
     __ cmp(G3_elsize, LogBytesPerInt);
-    __ br(Assembler::equal,true,Assembler::pt,StubRoutines::_jint_arraycopy);
+    __ br(Assembler::equal, true, Assembler::pt, entry_jint_arraycopy);
     __ delayed()->signx(length, count); // length
 #ifdef ASSERT
     { Label L;
@@ -2989,7 +3006,7 @@
       __ bind(L);
     }
 #endif
-    __ br(Assembler::always,false,Assembler::pt,StubRoutines::_jlong_arraycopy);
+    __ br(Assembler::always, false, Assembler::pt, entry_jlong_arraycopy);
     __ delayed()->signx(length, count); // length
 
     // objArrayKlass
@@ -3013,7 +3030,7 @@
     __ add(src, src_pos, from);       // src_addr
     __ add(dst, dst_pos, to);         // dst_addr
   __ BIND(L_plain_copy);
-    __ br(Assembler::always, false, Assembler::pt,StubRoutines::_oop_arraycopy);
+    __ br(Assembler::always, false, Assembler::pt, entry_oop_arraycopy);
     __ delayed()->signx(length, count); // length
 
   __ BIND(L_checkcast_copy);
@@ -3057,7 +3074,7 @@
       __ ld_ptr(G4_dst_klass, ek_offset, O4);   // dest elem klass
       // lduw(O4, sco_offset, O3);              // sco of elem klass
 
-      __ br(Assembler::always, false, Assembler::pt, checkcast_copy_entry);
+      __ br(Assembler::always, false, Assembler::pt, entry_checkcast_arraycopy);
       __ delayed()->lduw(O4, sco_offset, O3);
     }
 
@@ -3068,39 +3085,124 @@
   }
 
   void generate_arraycopy_stubs() {
-
-    // Note:  the disjoint stubs must be generated first, some of
-    //        the conjoint stubs use them.
-    StubRoutines::_jbyte_disjoint_arraycopy  = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy");
-    StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
-    StubRoutines::_jint_disjoint_arraycopy   = generate_disjoint_int_copy(false, "jint_disjoint_arraycopy");
-    StubRoutines::_jlong_disjoint_arraycopy  = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy");
-    StubRoutines::_oop_disjoint_arraycopy    = generate_disjoint_oop_copy(false, "oop_disjoint_arraycopy");
-    StubRoutines::_arrayof_jbyte_disjoint_arraycopy  = generate_disjoint_byte_copy(true, "arrayof_jbyte_disjoint_arraycopy");
-    StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, "arrayof_jshort_disjoint_arraycopy");
-    StubRoutines::_arrayof_jint_disjoint_arraycopy   = generate_disjoint_int_copy(true, "arrayof_jint_disjoint_arraycopy");
-    StubRoutines::_arrayof_jlong_disjoint_arraycopy  = generate_disjoint_long_copy(true, "arrayof_jlong_disjoint_arraycopy");
-    StubRoutines::_arrayof_oop_disjoint_arraycopy    =  generate_disjoint_oop_copy(true, "arrayof_oop_disjoint_arraycopy");
-
-    StubRoutines::_jbyte_arraycopy  = generate_conjoint_byte_copy(false, "jbyte_arraycopy");
-    StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy");
-    StubRoutines::_jint_arraycopy   = generate_conjoint_int_copy(false, "jint_arraycopy");
-    StubRoutines::_jlong_arraycopy  = generate_conjoint_long_copy(false, "jlong_arraycopy");
-    StubRoutines::_oop_arraycopy    = generate_conjoint_oop_copy(false, "oop_arraycopy");
-    StubRoutines::_arrayof_jbyte_arraycopy    = generate_conjoint_byte_copy(true, "arrayof_jbyte_arraycopy");
-    StubRoutines::_arrayof_jshort_arraycopy   = generate_conjoint_short_copy(true, "arrayof_jshort_arraycopy");
+    address entry;
+    address entry_jbyte_arraycopy;
+    address entry_jshort_arraycopy;
+    address entry_jint_arraycopy;
+    address entry_oop_arraycopy;
+    address entry_jlong_arraycopy;
+    address entry_checkcast_arraycopy;
+
+    //*** jbyte
+    // Always need aligned and unaligned versions
+    StubRoutines::_jbyte_disjoint_arraycopy         = generate_disjoint_byte_copy(false, &entry,
+                                                                                  "jbyte_disjoint_arraycopy");
+    StubRoutines::_jbyte_arraycopy                  = generate_conjoint_byte_copy(false, entry,
+                                                                                  &entry_jbyte_arraycopy,
+                                                                                  "jbyte_arraycopy");
+    StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, &entry,
+                                                                                  "arrayof_jbyte_disjoint_arraycopy");
+    StubRoutines::_arrayof_jbyte_arraycopy          = generate_conjoint_byte_copy(true, entry, NULL,
+                                                                                  "arrayof_jbyte_arraycopy");
+
+    //*** jshort
+    // Always need aligned and unaligned versions
+    StubRoutines::_jshort_disjoint_arraycopy         = generate_disjoint_short_copy(false, &entry,
+                                                                                    "jshort_disjoint_arraycopy");
+    StubRoutines::_jshort_arraycopy                  = generate_conjoint_short_copy(false, entry,
+                                                                                    &entry_jshort_arraycopy,
+                                                                                    "jshort_arraycopy");
+    StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry,
+                                                                                    "arrayof_jshort_disjoint_arraycopy");
+    StubRoutines::_arrayof_jshort_arraycopy          = generate_conjoint_short_copy(true, entry, NULL,
+                                                                                    "arrayof_jshort_arraycopy");
+
+    //*** jint
+    // Aligned versions
+    StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, &entry,
+                                                                                "arrayof_jint_disjoint_arraycopy");
+    StubRoutines::_arrayof_jint_arraycopy          = generate_conjoint_int_copy(true, entry, &entry_jint_arraycopy,
+                                                                                "arrayof_jint_arraycopy");
 #ifdef _LP64
-    // since sizeof(jint) < sizeof(HeapWord), there's a different flavor:
-    StubRoutines::_arrayof_jint_arraycopy     = generate_conjoint_int_copy(true, "arrayof_jint_arraycopy");
-  #else
-    StubRoutines::_arrayof_jint_arraycopy     = StubRoutines::_jint_arraycopy;
+    // In 64 bit we need both aligned and unaligned versions of jint arraycopy.
+    // entry_jint_arraycopy always points to the unaligned version (notice that we overwrite it).
+    StubRoutines::_jint_disjoint_arraycopy         = generate_disjoint_int_copy(false, &entry,
+                                                                                "jint_disjoint_arraycopy");
+    StubRoutines::_jint_arraycopy                  = generate_conjoint_int_copy(false, entry,
+                                                                                &entry_jint_arraycopy,
+                                                                                "jint_arraycopy");
+#else
+    // In 32 bit jints are always HeapWordSize aligned, so always use the aligned version
+    // (in fact in 32bit we always have a pre-loop part even in the aligned version,
+    //  because it uses 64-bit loads/stores, so the aligned flag is actually ignored).
+    StubRoutines::_jint_disjoint_arraycopy = StubRoutines::_arrayof_jint_disjoint_arraycopy;
+    StubRoutines::_jint_arraycopy          = StubRoutines::_arrayof_jint_arraycopy;
 #endif
-    StubRoutines::_arrayof_jlong_arraycopy    = StubRoutines::_jlong_arraycopy;
-    StubRoutines::_arrayof_oop_arraycopy      = StubRoutines::_oop_arraycopy;
-
-    StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy");
-    StubRoutines::_unsafe_arraycopy    = generate_unsafe_copy("unsafe_arraycopy");
-    StubRoutines::_generic_arraycopy   = generate_generic_copy("generic_arraycopy");
+
+
+    //*** jlong
+    // It is always aligned
+    StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, &entry,
+                                                                                  "arrayof_jlong_disjoint_arraycopy");
+    StubRoutines::_arrayof_jlong_arraycopy          = generate_conjoint_long_copy(true, entry, &entry_jlong_arraycopy,
+                                                                                  "arrayof_jlong_arraycopy");
+    StubRoutines::_jlong_disjoint_arraycopy         = StubRoutines::_arrayof_jlong_disjoint_arraycopy;
+    StubRoutines::_jlong_arraycopy                  = StubRoutines::_arrayof_jlong_arraycopy;
+
+
+    //*** oops
+    // Aligned versions
+    StubRoutines::_arrayof_oop_disjoint_arraycopy        = generate_disjoint_oop_copy(true, &entry,
+                                                                                      "arrayof_oop_disjoint_arraycopy");
+    StubRoutines::_arrayof_oop_arraycopy                 = generate_conjoint_oop_copy(true, entry, &entry_oop_arraycopy,
+                                                                                      "arrayof_oop_arraycopy");
+    // Aligned versions without pre-barriers
+    StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy(true, &entry,
+                                                                                      "arrayof_oop_disjoint_arraycopy_uninit",
+                                                                                      /*dest_uninitialized*/true);
+    StubRoutines::_arrayof_oop_arraycopy_uninit          = generate_conjoint_oop_copy(true, entry, NULL,
+                                                                                      "arrayof_oop_arraycopy_uninit",
+                                                                                      /*dest_uninitialized*/true);
+#ifdef _LP64
+    if (UseCompressedOops) {
+      // With compressed oops we need unaligned versions, notice that we overwrite entry_oop_arraycopy.
+      StubRoutines::_oop_disjoint_arraycopy            = generate_disjoint_oop_copy(false, &entry,
+                                                                                    "oop_disjoint_arraycopy");
+      StubRoutines::_oop_arraycopy                     = generate_conjoint_oop_copy(false, entry, &entry_oop_arraycopy,
+                                                                                    "oop_arraycopy");
+      // Unaligned versions without pre-barriers
+      StubRoutines::_oop_disjoint_arraycopy_uninit     = generate_disjoint_oop_copy(false, &entry,
+                                                                                    "oop_disjoint_arraycopy_uninit",
+                                                                                    /*dest_uninitialized*/true);
+      StubRoutines::_oop_arraycopy_uninit              = generate_conjoint_oop_copy(false, entry, NULL,
+                                                                                    "oop_arraycopy_uninit",
+                                                                                    /*dest_uninitialized*/true);
+    } else
+#endif
+    {
+      // oop arraycopy is always aligned on 32bit and 64bit without compressed oops
+      StubRoutines::_oop_disjoint_arraycopy            = StubRoutines::_arrayof_oop_disjoint_arraycopy;
+      StubRoutines::_oop_arraycopy                     = StubRoutines::_arrayof_oop_arraycopy;
+      StubRoutines::_oop_disjoint_arraycopy_uninit     = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit;
+      StubRoutines::_oop_arraycopy_uninit              = StubRoutines::_arrayof_oop_arraycopy_uninit;
+    }
+
+    StubRoutines::_checkcast_arraycopy        = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
+    StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL,
+                                                                        /*dest_uninitialized*/true);
+
+    StubRoutines::_unsafe_arraycopy    = generate_unsafe_copy("unsafe_arraycopy",
+                                                              entry_jbyte_arraycopy,
+                                                              entry_jshort_arraycopy,
+                                                              entry_jint_arraycopy,
+                                                              entry_jlong_arraycopy);
+    StubRoutines::_generic_arraycopy   = generate_generic_copy("generic_arraycopy",
+                                                               entry_jbyte_arraycopy,
+                                                               entry_jshort_arraycopy,
+                                                               entry_jint_arraycopy,
+                                                               entry_oop_arraycopy,
+                                                               entry_jlong_arraycopy,
+                                                               entry_checkcast_arraycopy);
 
     StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
     StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
@@ -3224,21 +3326,6 @@
 
 }; // end class declaration
 
-
-address StubGenerator::disjoint_byte_copy_entry  = NULL;
-address StubGenerator::disjoint_short_copy_entry = NULL;
-address StubGenerator::disjoint_int_copy_entry   = NULL;
-address StubGenerator::disjoint_long_copy_entry  = NULL;
-address StubGenerator::disjoint_oop_copy_entry   = NULL;
-
-address StubGenerator::byte_copy_entry  = NULL;
-address StubGenerator::short_copy_entry = NULL;
-address StubGenerator::int_copy_entry   = NULL;
-address StubGenerator::long_copy_entry  = NULL;
-address StubGenerator::oop_copy_entry   = NULL;
-
-address StubGenerator::checkcast_copy_entry = NULL;
-
 void StubGenerator_generate(CodeBuffer* code, bool all) {
   StubGenerator g(code, all);
 }
--- a/src/cpu/x86/vm/assembler_x86.cpp	Thu Mar 03 21:02:56 2011 -0800
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Thu Mar 03 23:31:45 2011 -0800
@@ -1601,6 +1601,17 @@
   emit_byte(0xC0 | encode);
 }
 
+void Assembler::movdl(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionMark im(this);
+  emit_byte(0x66);
+  prefix(src, dst);
+  emit_byte(0x0F);
+  emit_byte(0x6E);
+  emit_operand(dst, src);
+}
+
+
 void Assembler::movdqa(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
@@ -2412,7 +2423,10 @@
 }
 
 void Assembler::psrlq(XMMRegister dst, int shift) {
-  // HMM Table D-1 says sse2 or mmx
+  // Shift 64 bit value logically right by specified number of bits.
+  // HMM Table D-1 says sse2 or mmx.
+  // Do not confuse it with psrldq SSE2 instruction which
+  // shifts 128 bit value in xmm register by number of bytes.
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
 
   int encode = prefixq_and_encode(xmm2->encoding(), dst->encoding());
@@ -2423,6 +2437,18 @@
   emit_byte(shift);
 }
 
+void Assembler::psrldq(XMMRegister dst, int shift) {
+  // Shift 128 bit value in xmm register by number of bytes.
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+
+  int encode = prefixq_and_encode(xmm3->encoding(), dst->encoding());
+  emit_byte(0x66);
+  emit_byte(0x0F);
+  emit_byte(0x73);
+  emit_byte(0xC0 | encode);
+  emit_byte(shift);
+}
+
 void Assembler::ptest(XMMRegister dst, Address src) {
   assert(VM_Version::supports_sse4_1(), "");
 
@@ -8567,101 +8593,418 @@
 }
 #endif // _LP64
 
-// IndexOf substring.
-void MacroAssembler::string_indexof(Register str1, Register str2,
-                                    Register cnt1, Register cnt2, Register result,
-                                    XMMRegister vec, Register tmp) {
+// IndexOf for constant substrings with size >= 8 chars
+// which don't need to be loaded through stack.
+void MacroAssembler::string_indexofC8(Register str1, Register str2,
+                                      Register cnt1, Register cnt2,
+                                      int int_cnt2,  Register result,
+                                      XMMRegister vec, Register tmp) {
   assert(UseSSE42Intrinsics, "SSE4.2 is required");
 
-  Label RELOAD_SUBSTR, PREP_FOR_SCAN, SCAN_TO_SUBSTR,
-        SCAN_SUBSTR, RET_NOT_FOUND, CLEANUP;
-
-  push(str1); // string addr
-  push(str2); // substr addr
-  push(cnt2); // substr count
-  jmpb(PREP_FOR_SCAN);
-
-  // Substr count saved at sp
-  // Substr saved at sp+1*wordSize
-  // String saved at sp+2*wordSize
-
-  // Reload substr for rescan
-  bind(RELOAD_SUBSTR);
-  movl(cnt2, Address(rsp, 0));
-  movptr(str2, Address(rsp, wordSize));
-  // We came here after the beginninig of the substring was
-  // matched but the rest of it was not so we need to search
-  // again. Start from the next element after the previous match.
-  subptr(str1, result); // Restore counter
-  shrl(str1, 1);
-  addl(cnt1, str1);
-  decrementl(cnt1);
-  lea(str1, Address(result, 2)); // Reload string
-
-  // Load substr
-  bind(PREP_FOR_SCAN);
-  movdqu(vec, Address(str2, 0));
-  addl(cnt1, 8);  // prime the loop
-  subptr(str1, 16);
-
-  // Scan string for substr in 16-byte vectors
-  bind(SCAN_TO_SUBSTR);
-  subl(cnt1, 8);
-  addptr(str1, 16);
-
-  // pcmpestri
+  // This method uses pcmpestri inxtruction with bound registers
   //   inputs:
   //     xmm - substring
   //     rax - substring length (elements count)
-  //     mem - scaned string
+  //     mem - scanned string
   //     rdx - string length (elements count)
   //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
   //   outputs:
   //     rcx - matched index in string
   assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
 
-  pcmpestri(vec, Address(str1, 0), 0x0d);
-  jcc(Assembler::above, SCAN_TO_SUBSTR);      // CF == 0 && ZF == 0
-  jccb(Assembler::aboveEqual, RET_NOT_FOUND); // CF == 0
-
-  // Fallthrough: found a potential substr
+  Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR,
+        RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR,
+        MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE;
+
+  // Note, inline_string_indexOf() generates checks:
+  // if (substr.count > string.count) return -1;
+  // if (substr.count == 0) return 0;
+  assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars");
+
+  // Load substring.
+  movdqu(vec, Address(str2, 0));
+  movl(cnt2, int_cnt2);
+  movptr(result, str1); // string addr
+
+  if (int_cnt2 > 8) {
+    jmpb(SCAN_TO_SUBSTR);
+
+    // Reload substr for rescan, this code
+    // is executed only for large substrings (> 8 chars)
+    bind(RELOAD_SUBSTR);
+    movdqu(vec, Address(str2, 0));
+    negptr(cnt2); // Jumped here with negative cnt2, convert to positive
+
+    bind(RELOAD_STR);
+    // We came here after the beginning of the substring was
+    // matched but the rest of it was not so we need to search
+    // again. Start from the next element after the previous match.
+
+    // cnt2 is number of substring reminding elements and
+    // cnt1 is number of string reminding elements when cmp failed.
+    // Restored cnt1 = cnt1 - cnt2 + int_cnt2
+    subl(cnt1, cnt2);
+    addl(cnt1, int_cnt2);
+    movl(cnt2, int_cnt2); // Now restore cnt2
+
+    decrementl(cnt1);     // Shift to next element
+    cmpl(cnt1, cnt2);
+    jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
+
+    addptr(result, 2);
+
+  } // (int_cnt2 > 8)
+
+  // Scan string for start of substr in 16-byte vectors
+  bind(SCAN_TO_SUBSTR);
+  pcmpestri(vec, Address(result, 0), 0x0d);
+  jccb(Assembler::below, FOUND_CANDIDATE);   // CF == 1
+  subl(cnt1, 8);
+  jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
+  cmpl(cnt1, cnt2);
+  jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
+  addptr(result, 16);
+  jmpb(SCAN_TO_SUBSTR);
+
+  // Found a potential substr
+  bind(FOUND_CANDIDATE);
+  // Matched whole vector if first element matched (tmp(rcx) == 0).
+  if (int_cnt2 == 8) {
+    jccb(Assembler::overflow, RET_FOUND);    // OF == 1
+  } else { // int_cnt2 > 8
+    jccb(Assembler::overflow, FOUND_SUBSTR);
+  }
+  // After pcmpestri tmp(rcx) contains matched element index
+  // Compute start addr of substr
+  lea(result, Address(result, tmp, Address::times_2));
 
   // Make sure string is still long enough
   subl(cnt1, tmp);
   cmpl(cnt1, cnt2);
-  jccb(Assembler::negative, RET_NOT_FOUND);
-  // Compute start addr of substr
-  lea(str1, Address(str1, tmp, Address::times_2));
-  movptr(result, str1); // save
-
-  // Compare potential substr
-  addl(cnt1, 8);     // prime the loop
-  addl(cnt2, 8);
-  subptr(str1, 16);
-  subptr(str2, 16);
-
-  // Scan 16-byte vectors of string and substr
-  bind(SCAN_SUBSTR);
-  subl(cnt1, 8);
-  subl(cnt2, 8);
-  addptr(str1, 16);
-  addptr(str2, 16);
-  movdqu(vec, Address(str2, 0));
-  pcmpestri(vec, Address(str1, 0), 0x0d);
-  jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
-  jcc(Assembler::positive, SCAN_SUBSTR);     // SF == 0
-
-  // Compute substr offset
-  subptr(result, Address(rsp, 2*wordSize));
-  shrl(result, 1); // index
-  jmpb(CLEANUP);
+  if (int_cnt2 == 8) {
+    jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
+  } else { // int_cnt2 > 8
+    jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD);
+  }
+  // Left less then substring.
 
   bind(RET_NOT_FOUND);
   movl(result, -1);
+  jmpb(EXIT);
+
+  if (int_cnt2 > 8) {
+    // This code is optimized for the case when whole substring
+    // is matched if its head is matched.
+    bind(MATCH_SUBSTR_HEAD);
+    pcmpestri(vec, Address(result, 0), 0x0d);
+    // Reload only string if does not match
+    jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0
+
+    Label CONT_SCAN_SUBSTR;
+    // Compare the rest of substring (> 8 chars).
+    bind(FOUND_SUBSTR);
+    // First 8 chars are already matched.
+    negptr(cnt2);
+    addptr(cnt2, 8);
+
+    bind(SCAN_SUBSTR);
+    subl(cnt1, 8);
+    cmpl(cnt2, -8); // Do not read beyond substring
+    jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR);
+    // Back-up strings to avoid reading beyond substring:
+    // cnt1 = cnt1 - cnt2 + 8
+    addl(cnt1, cnt2); // cnt2 is negative
+    addl(cnt1, 8);
+    movl(cnt2, 8); negptr(cnt2);
+    bind(CONT_SCAN_SUBSTR);
+    if (int_cnt2 < (int)G) {
+      movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2));
+      pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d);
+    } else {
+      // calculate index in register to avoid integer overflow (int_cnt2*2)
+      movl(tmp, int_cnt2);
+      addptr(tmp, cnt2);
+      movdqu(vec, Address(str2, tmp, Address::times_2, 0));
+      pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d);
+    }
+    // Need to reload strings pointers if not matched whole vector
+    jccb(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
+    addptr(cnt2, 8);
+    jccb(Assembler::negative, SCAN_SUBSTR);
+    // Fall through if found full substring
+
+  } // (int_cnt2 > 8)
+
+  bind(RET_FOUND);
+  // Found result if we matched full small substring.
+  // Compute substr offset
+  subptr(result, str1);
+  shrl(result, 1); // index
+  bind(EXIT);
+
+} // string_indexofC8
+
+// Small strings are loaded through stack if they cross page boundary.
+void MacroAssembler::string_indexof(Register str1, Register str2,
+                                    Register cnt1, Register cnt2,
+                                    int int_cnt2,  Register result,
+                                    XMMRegister vec, Register tmp) {
+  assert(UseSSE42Intrinsics, "SSE4.2 is required");
+  //
+  // int_cnt2 is length of small (< 8 chars) constant substring
+  // or (-1) for non constant substring in which case its length
+  // is in cnt2 register.
+  //
+  // Note, inline_string_indexOf() generates checks:
+  // if (substr.count > string.count) return -1;
+  // if (substr.count == 0) return 0;
+  //
+  assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0");
+
+  // This method uses pcmpestri inxtruction with bound registers
+  //   inputs:
+  //     xmm - substring
+  //     rax - substring length (elements count)
+  //     mem - scanned string
+  //     rdx - string length (elements count)
+  //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
+  //   outputs:
+  //     rcx - matched index in string
+  assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
+
+  Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR,
+        RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR,
+        FOUND_CANDIDATE;
+
+  { //========================================================
+    // We don't know where these strings are located
+    // and we can't read beyond them. Load them through stack.
+    Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR;
+
+    movptr(tmp, rsp); // save old SP
+
+    if (int_cnt2 > 0) {     // small (< 8 chars) constant substring
+      if (int_cnt2 == 1) {  // One char
+        load_unsigned_short(result, Address(str2, 0));
+        movdl(vec, result); // move 32 bits
+      } else if (int_cnt2 == 2) { // Two chars
+        movdl(vec, Address(str2, 0)); // move 32 bits
+      } else if (int_cnt2 == 4) { // Four chars
+        movq(vec, Address(str2, 0));  // move 64 bits
+      } else { // cnt2 = { 3, 5, 6, 7 }
+        // Array header size is 12 bytes in 32-bit VM
+        // + 6 bytes for 3 chars == 18 bytes,
+        // enough space to load vec and shift.
+        assert(HeapWordSize*typeArrayKlass::header_size() >= 12,"sanity");
+        movdqu(vec, Address(str2, (int_cnt2*2)-16));
+        psrldq(vec, 16-(int_cnt2*2));
+      }
+    } else { // not constant substring
+      cmpl(cnt2, 8);
+      jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough
+
+      // We can read beyond string if srt+16 does not cross page boundary
+      // since heaps are aligned and mapped by pages.
+      assert(os::vm_page_size() < (int)G, "default page should be small");
+      movl(result, str2); // We need only low 32 bits
+      andl(result, (os::vm_page_size()-1));
+      cmpl(result, (os::vm_page_size()-16));
+      jccb(Assembler::belowEqual, CHECK_STR);
+
+      // Move small strings to stack to allow load 16 bytes into vec.
+      subptr(rsp, 16);
+      int stk_offset = wordSize-2;
+      push(cnt2);
+
+      bind(COPY_SUBSTR);
+      load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2));
+      movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);
+      decrement(cnt2);
+      jccb(Assembler::notZero, COPY_SUBSTR);
+
+      pop(cnt2);
+      movptr(str2, rsp);  // New substring address
+    } // non constant
+
+    bind(CHECK_STR);
+    cmpl(cnt1, 8);
+    jccb(Assembler::aboveEqual, BIG_STRINGS);
+
+    // Check cross page boundary.
+    movl(result, str1); // We need only low 32 bits
+    andl(result, (os::vm_page_size()-1));
+    cmpl(result, (os::vm_page_size()-16));
+    jccb(Assembler::belowEqual, BIG_STRINGS);
+
+    subptr(rsp, 16);
+    int stk_offset = -2;
+    if (int_cnt2 < 0) { // not constant
+      push(cnt2);
+      stk_offset += wordSize;
+    }
+    movl(cnt2, cnt1);
+
+    bind(COPY_STR);
+    load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2));
+    movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);
+    decrement(cnt2);
+    jccb(Assembler::notZero, COPY_STR);
+
+    if (int_cnt2 < 0) { // not constant
+      pop(cnt2);
+    }
+    movptr(str1, rsp);  // New string address
+
+    bind(BIG_STRINGS);
+    // Load substring.
+    if (int_cnt2 < 0) { // -1
+      movdqu(vec, Address(str2, 0));
+      push(cnt2);       // substr count
+      push(str2);       // substr addr
+      push(str1);       // string addr
+    } else {
+      // Small (< 8 chars) constant substrings are loaded already.
+      movl(cnt2, int_cnt2);
+    }
+    push(tmp);  // original SP
+
+  } // Finished loading
+
+  //========================================================
+  // Start search
+  //
+
+  movptr(result, str1); // string addr
+
+  if (int_cnt2  < 0) {  // Only for non constant substring
+    jmpb(SCAN_TO_SUBSTR);
+
+    // SP saved at sp+0
+    // String saved at sp+1*wordSize
+    // Substr saved at sp+2*wordSize
+    // Substr count saved at sp+3*wordSize
+
+    // Reload substr for rescan, this code
+    // is executed only for large substrings (> 8 chars)
+    bind(RELOAD_SUBSTR);
+    movptr(str2, Address(rsp, 2*wordSize));
+    movl(cnt2, Address(rsp, 3*wordSize));
+    movdqu(vec, Address(str2, 0));
+    // We came here after the beginning of the substring was
+    // matched but the rest of it was not so we need to search
+    // again. Start from the next element after the previous match.
+    subptr(str1, result); // Restore counter
+    shrl(str1, 1);
+    addl(cnt1, str1);
+    decrementl(cnt1);   // Shift to next element
+    cmpl(cnt1, cnt2);
+    jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
+
+    addptr(result, 2);
+  } // non constant
+
+  // Scan string for start of substr in 16-byte vectors
+  bind(SCAN_TO_SUBSTR);
+  assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
+  pcmpestri(vec, Address(result, 0), 0x0d);
+  jccb(Assembler::below, FOUND_CANDIDATE);   // CF == 1
+  subl(cnt1, 8);
+  jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
+  cmpl(cnt1, cnt2);
+  jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
+  addptr(result, 16);
+
+  bind(ADJUST_STR);
+  cmpl(cnt1, 8); // Do not read beyond string
+  jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
+  // Back-up string to avoid reading beyond string.
+  lea(result, Address(result, cnt1, Address::times_2, -16));
+  movl(cnt1, 8);
+  jmpb(SCAN_TO_SUBSTR);
+
+  // Found a potential substr
+  bind(FOUND_CANDIDATE);
+  // After pcmpestri tmp(rcx) contains matched element index
+
+  // Make sure string is still long enough
+  subl(cnt1, tmp);
+  cmpl(cnt1, cnt2);
+  jccb(Assembler::greaterEqual, FOUND_SUBSTR);
+  // Left less then substring.
+
+  bind(RET_NOT_FOUND);
+  movl(result, -1);
+  jmpb(CLEANUP);
+
+  bind(FOUND_SUBSTR);
+  // Compute start addr of substr
+  lea(result, Address(result, tmp, Address::times_2));
+
+  if (int_cnt2 > 0) { // Constant substring
+    // Repeat search for small substring (< 8 chars)
+    // from new point without reloading substring.
+    // Have to check that we don't read beyond string.
+    cmpl(tmp, 8-int_cnt2);
+    jccb(Assembler::greater, ADJUST_STR);
+    // Fall through if matched whole substring.
+  } else { // non constant
+    assert(int_cnt2 == -1, "should be != 0");
+
+    addl(tmp, cnt2);
+    // Found result if we matched whole substring.
+    cmpl(tmp, 8);
+    jccb(Assembler::lessEqual, RET_FOUND);
+
+    // Repeat search for small substring (<= 8 chars)
+    // from new point 'str1' without reloading substring.
+    cmpl(cnt2, 8);
+    // Have to check that we don't read beyond string.
+    jccb(Assembler::lessEqual, ADJUST_STR);
+
+    Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG;
+    // Compare the rest of substring (> 8 chars).
+    movptr(str1, result);
+
+    cmpl(tmp, cnt2);
+    // First 8 chars are already matched.
+    jccb(Assembler::equal, CHECK_NEXT);
+
+    bind(SCAN_SUBSTR);
+    pcmpestri(vec, Address(str1, 0), 0x0d);
+    // Need to reload strings pointers if not matched whole vector
+    jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
+
+    bind(CHECK_NEXT);
+    subl(cnt2, 8);
+    jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring
+    addptr(str1, 16);
+    addptr(str2, 16);
+    subl(cnt1, 8);
+    cmpl(cnt2, 8); // Do not read beyond substring
+    jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR);
+    // Back-up strings to avoid reading beyond substring.
+    lea(str2, Address(str2, cnt2, Address::times_2, -16));
+    lea(str1, Address(str1, cnt2, Address::times_2, -16));
+    subl(cnt1, cnt2);
+    movl(cnt2, 8);
+    addl(cnt1, 8);
+    bind(CONT_SCAN_SUBSTR);
+    movdqu(vec, Address(str2, 0));
+    jmpb(SCAN_SUBSTR);
+
+    bind(RET_FOUND_LONG);
+    movptr(str1, Address(rsp, wordSize));
+  } // non constant
+
+  bind(RET_FOUND);
+  // Compute substr offset
+  subptr(result, str1);
+  shrl(result, 1); // index
 
   bind(CLEANUP);
-  addptr(rsp, 3*wordSize);
-}
+  pop(rsp); // restore SP
+
+} // string_indexof
 
 // Compare strings.
 void MacroAssembler::string_compare(Register str1, Register str2,
--- a/src/cpu/x86/vm/assembler_x86.hpp	Thu Mar 03 21:02:56 2011 -0800
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Thu Mar 03 23:31:45 2011 -0800
@@ -1121,6 +1121,7 @@
 
   void movdl(XMMRegister dst, Register src);
   void movdl(Register dst, XMMRegister src);
+  void movdl(XMMRegister dst, Address src);
 
   // Move Double Quadword
   void movdq(XMMRegister dst, Register src);
@@ -1288,9 +1289,12 @@
   void pshuflw(XMMRegister dst, XMMRegister src, int mode);
   void pshuflw(XMMRegister dst, Address src,     int mode);
 
-  // Shift Right Logical Quadword Immediate
+  // Shift Right by bits Logical Quadword Immediate
   void psrlq(XMMRegister dst, int shift);
 
+  // Shift Right by bytes Logical DoubleQuadword Immediate
+  void psrldq(XMMRegister dst, int shift);
+
   // Logical Compare Double Quadword
   void ptest(XMMRegister dst, XMMRegister src);
   void ptest(XMMRegister dst, Address src);
@@ -2290,10 +2294,22 @@
   void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(if (dst != src) movl(dst, src)); }
 
   // IndexOf strings.
+  // Small strings are loaded through stack if they cross page boundary.
   void string_indexof(Register str1, Register str2,
-                      Register cnt1, Register cnt2, Register result,
+                      Register cnt1, Register cnt2,
+                      int int_cnt2,  Register result,
                       XMMRegister vec, Register tmp);
 
+  // IndexOf for constant substrings with size >= 8 elements
+  // which don't need to be loaded through stack.
+  void string_indexofC8(Register str1, Register str2,
+                      Register cnt1, Register cnt2,
+                      int int_cnt2,  Register result,
+                      XMMRegister vec, Register tmp);
+
+    // Smallest code: we don't need to load through stack,
+    // check string tail.
+
   // Compare strings.
   void string_compare(Register str1, Register str2,
                       Register cnt1, Register cnt2, Register result,
--- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Thu Mar 03 21:02:56 2011 -0800
+++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Thu Mar 03 23:31:45 2011 -0800
@@ -456,10 +456,8 @@
   __ verify_not_null_oop(rax);
 
   // search an exception handler (rax: exception oop, rdx: throwing pc)
-  __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::handle_exception_nofpu_id)));
-
-  __ stop("should not reach here");
-
+  __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id)));
+  __ should_not_reach_here();
   assert(code_offset() - offset <= exception_handler_size, "overflow");
   __ end_a_stub();
 
--- a/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Thu Mar 03 21:02:56 2011 -0800
+++ b/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Thu Mar 03 23:31:45 2011 -0800
@@ -248,11 +248,14 @@
 #ifdef _LP64
   align_dummy_0, align_dummy_1,
 #endif // _LP64
-  dummy1, SLOT2(dummy1H)                                                                    // 0, 4
-  dummy2, SLOT2(dummy2H)                                                                    // 8, 12
-  // Two temps to be used as needed by users of save/restore callee registers
-  temp_2_off, SLOT2(temp_2H_off)                                                            // 16, 20
-  temp_1_off, SLOT2(temp_1H_off)                                                            // 24, 28
+#ifdef _WIN64
+  // Windows always allocates space for it's argument registers (see
+  // frame::arg_reg_save_area_bytes).
+  arg_reg_save_1, arg_reg_save_1H,                                                          // 0, 4
+  arg_reg_save_2, arg_reg_save_2H,                                                          // 8, 12
+  arg_reg_save_3, arg_reg_save_3H,                                                          // 16, 20
+  arg_reg_save_4, arg_reg_save_4H,                                                          // 24, 28
+#endif // _WIN64
   xmm_regs_as_doubles_off,                                                                  // 32
   float_regs_as_doubles_off = xmm_regs_as_doubles_off + xmm_regs_as_doubles_size_in_slots,  // 160
   fpu_state_off = float_regs_as_doubles_off + float_regs_as_doubles_size_in_slots,          // 224
@@ -282,24 +285,7 @@
   rax_off, SLOT2(raxH_off)                                                                  // 480, 484
   saved_rbp_off, SLOT2(saved_rbpH_off)                                                      // 488, 492
   return_off, SLOT2(returnH_off)                                                            // 496, 500
-  reg_save_frame_size,  // As noted: neglects any parameters to runtime                     // 504
-
-#ifdef _WIN64
-  c_rarg0_off = rcx_off,
-#else
-  c_rarg0_off = rdi_off,
-#endif // WIN64
-
-  // equates
-
-  // illegal instruction handler
-  continue_dest_off = temp_1_off,
-
-  // deoptimization equates
-  fp0_off = float_regs_as_doubles_off, // slot for java float/double return value
-  xmm0_off = xmm_regs_as_doubles_off,  // slot for java float/double return value
-  deopt_type = temp_2_off,             // slot for type of deopt in progress
-  ret_type = temp_1_off                // slot for return type
+  reg_save_frame_size   // As noted: neglects any parameters to runtime                     // 504
 };
 
 
@@ -405,11 +391,6 @@
                                    bool save_fpu_registers = true) {
   __ block_comment("save_live_registers");
 
-  // 64bit passes the args in regs to the c++ runtime
-  int frame_size_in_slots = reg_save_frame_size NOT_LP64(+ num_rt_args); // args + thread
-  // frame_size = round_to(frame_size, 4);
-  sasm->set_frame_size(frame_size_in_slots / VMRegImpl::slots_per_word );
-
   __ pusha();         // integer registers
 
   // assert(float_regs_as_doubles_off % 2 == 0, "misaligned offset");
@@ -642,19 +623,58 @@
 }
 
 
-void Runtime1::generate_handle_exception(StubAssembler *sasm, OopMapSet* oop_maps, OopMap* oop_map, bool save_fpu_registers) {
+OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
+  __ block_comment("generate_handle_exception");
+
   // incoming parameters
   const Register exception_oop = rax;
-  const Register exception_pc = rdx;
+  const Register exception_pc  = rdx;
   // other registers used in this stub
-  const Register real_return_addr = rbx;
   const Register thread = NOT_LP64(rdi) LP64_ONLY(r15_thread);
 
-  __ block_comment("generate_handle_exception");
+  // Save registers, if required.
+  OopMapSet* oop_maps = new OopMapSet();
+  OopMap* oop_map = NULL;
+  switch (id) {
+  case forward_exception_id:
+    // We're handling an exception in the context of a compiled frame.
+    // The registers have been saved in the standard places.  Perform
+    // an exception lookup in the caller and dispatch to the handler
+    // if found.  Otherwise unwind and dispatch to the callers
+    // exception handler.
+    oop_map = generate_oop_map(sasm, 1 /*thread*/);
+
+    // load and clear pending exception oop into RAX
+    __ movptr(exception_oop, Address(thread, Thread::pending_exception_offset()));
+    __ movptr(Address(thread, Thread::pending_exception_offset()), NULL_WORD);
+
+    // load issuing PC (the return address for this stub) into rdx
+    __ movptr(exception_pc, Address(rbp, 1*BytesPerWord));
+
+    // make sure that the vm_results are cleared (may be unnecessary)
+    __ movptr(Address(thread, JavaThread::vm_result_offset()),   NULL_WORD);
+    __ movptr(Address(thread, JavaThread::vm_result_2_offset()), NULL_WORD);
+    break;
+  case handle_exception_nofpu_id:
+  case handle_exception_id:
+    // At this point all registers MAY be live.
+    oop_map = save_live_registers(sasm, 1 /*thread*/, id == handle_exception_nofpu_id);
+    break;
+  case handle_exception_from_callee_id: {
+    // At this point all registers except exception oop (RAX) and
+    // exception pc (RDX) are dead.
+    const int frame_size = 2 /*BP, return address*/ NOT_LP64(+ 1 /*thread*/) WIN64_ONLY(+ frame::arg_reg_save_area_bytes / BytesPerWord);
+    oop_map = new OopMap(frame_size * VMRegImpl::slots_per_word, 0);
+    sasm->set_frame_size(frame_size);
+    WIN64_ONLY(__ subq(rsp, frame::arg_reg_save_area_bytes));
+    break;
+  }
+  default:  ShouldNotReachHere();
+  }
 
 #ifdef TIERED
   // C2 can leave the fpu stack dirty
-  if (UseSSE < 2 ) {
+  if (UseSSE < 2) {
     __ empty_FPU_stack();
   }
 #endif // TIERED
@@ -686,11 +706,7 @@
   // save exception oop and issuing pc into JavaThread
   // (exception handler will load it from here)
   __ movptr(Address(thread, JavaThread::exception_oop_offset()), exception_oop);
-  __ movptr(Address(thread, JavaThread::exception_pc_offset()), exception_pc);
-
-  // save real return address (pc that called this stub)
-  __ movptr(real_return_addr, Address(rbp, 1*BytesPerWord));
-  __ movptr(Address(rsp, temp_1_off * VMRegImpl::stack_slot_size), real_return_addr);
+  __ movptr(Address(thread, JavaThread::exception_pc_offset()),  exception_pc);
 
   // patch throwing pc into return address (has bci & oop map)
   __ movptr(Address(rbp, 1*BytesPerWord), exception_pc);
@@ -700,33 +716,41 @@
   int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, exception_handler_for_pc));
   oop_maps->add_gc_map(call_offset, oop_map);
 
-  // rax,: handler address
+  // rax: handler address
   //      will be the deopt blob if nmethod was deoptimized while we looked up
   //      handler regardless of whether handler existed in the nmethod.
 
   // only rax, is valid at this time, all other registers have been destroyed by the runtime call
   __ invalidate_registers(false, true, true, true, true, true);
 
-#ifdef ASSERT
-  // Do we have an exception handler in the nmethod?
-  Label done;
-  __ testptr(rax, rax);
-  __ jcc(Assembler::notZero, done);
-  __ stop("no handler found");
-  __ bind(done);
-#endif
-
-  // exception handler found
-  // patch the return address -> the stub will directly return to the exception handler
+  // patch the return address, this stub will directly return to the exception handler
   __ movptr(Address(rbp, 1*BytesPerWord), rax);
 
-  // restore registers
-  restore_live_registers(sasm, save_fpu_registers);
+  switch (id) {
+  case forward_exception_id:
+  case handle_exception_nofpu_id:
+  case handle_exception_id:
+    // Restore the registers that were saved at the beginning.
+    restore_live_registers(sasm, id == handle_exception_nofpu_id);
+    break;
+  case handle_exception_from_callee_id:
+    // WIN64_ONLY: No need to add frame::arg_reg_save_area_bytes to SP
+    // since we do a leave anyway.
 
-  // return to exception handler
-  __ leave();
-  __ ret(0);
+    // Pop the return address since we are possibly changing SP (restoring from BP).
+    __ leave();
+    __ pop(rcx);
 
+    // Restore SP from BP if the exception PC is a method handle call site.
+    NOT_LP64(__ get_thread(thread);)
+    __ cmpl(Address(thread, JavaThread::is_method_handle_return_offset()), 0);
+    __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save);
+    __ jmp(rcx);  // jump to exception handler
+    break;
+  default:  ShouldNotReachHere();
+  }
+
+  return oop_maps;
 }
 
 
@@ -791,7 +815,7 @@
   // the pop is also necessary to simulate the effect of a ret(0)
   __ pop(exception_pc);
 
-  // Restore SP from BP if the exception PC is a MethodHandle call site.
+  // Restore SP from BP if the exception PC is a method handle call site.
   NOT_LP64(__ get_thread(thread);)
   __ cmpl(Address(thread, JavaThread::is_method_handle_return_offset()), 0);
   __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save);
@@ -934,7 +958,6 @@
   __ ret(0);
 
   return oop_maps;
-
 }
 
 
@@ -952,35 +975,9 @@
   switch (id) {
     case forward_exception_id:
       {
-        // we're handling an exception in the context of a compiled
-        // frame.  The registers have been saved in the standard
-        // places.  Perform an exception lookup in the caller and
-        // dispatch to the handler if found.  Otherwise unwind and
-        // dispatch to the callers exception handler.
-
-        const Register thread = NOT_LP64(rdi) LP64_ONLY(r15_thread);
-        const Register exception_oop = rax;
-        const Register exception_pc = rdx;
-
-        // load pending exception oop into rax,
-        __ movptr(exception_oop, Address(thread, Thread::pending_exception_offset()));
-        // clear pending exception
-        __ movptr(Address(thread, Thread::pending_exception_offset()), NULL_WORD);
-
-        // load issuing PC (the return address for this stub) into rdx
-        __ movptr(exception_pc, Address(rbp, 1*BytesPerWord));
-
-        // make sure that the vm_results are cleared (may be unnecessary)
-        __ movptr(Address(thread, JavaThread::vm_result_offset()), NULL_WORD);
-        __ movptr(Address(thread, JavaThread::vm_result_2_offset()), NULL_WORD);
-
-        // verify that that there is really a valid exception in rax,
-        __ verify_not_null_oop(exception_oop);
-
-        oop_maps = new OopMapSet();
-        OopMap* oop_map = generate_oop_map(sasm, 1);
-        generate_handle_exception(sasm, oop_maps, oop_map);
-        __ stop("should not reach here");
+        oop_maps = generate_handle_exception(id, sasm);
+        __ leave();
+        __ ret(0);
       }
       break;
 
@@ -1315,13 +1312,15 @@
       break;
 
     case handle_exception_nofpu_id:
-      save_fpu_registers = false;
-      // fall through
     case handle_exception_id:
       { StubFrame f(sasm, "handle_exception", dont_gc_arguments);
-        oop_maps = new OopMapSet();
-        OopMap* oop_map = save_live_registers(sasm, 1, save_fpu_registers);
-        generate_handle_exception(sasm, oop_maps, oop_map, save_fpu_registers);
+        oop_maps = generate_handle_exception(id, sasm);
+      }
+      break;
+
+    case handle_exception_from_callee_id:
+      { StubFrame f(sasm, "handle_exception_from_callee", dont_gc_arguments);
+        oop_maps = generate_handle_exception(id, sasm);
       }
       break;
 
--- a/src/cpu/x86/vm/methodHandles_x86.cpp	Thu Mar 03 21:02:56 2011 -0800
+++ b/src/cpu/x86/vm/methodHandles_x86.cpp	Thu Mar 03 23:31:45 2011 -0800
@@ -419,6 +419,7 @@
 
   // some handy addresses
   Address rbx_method_fie(     rbx,      methodOopDesc::from_interpreted_offset() );
+  Address rbx_method_fce(     rbx,      methodOopDesc::from_compiled_offset() );
 
   Address rcx_mh_vmtarget(    rcx_recv, java_dyn_MethodHandle::vmtarget_offset_in_bytes() );
   Address rcx_dmh_vmindex(    rcx_recv, sun_dyn_DirectMethodHandle::vmindex_offset_in_bytes() );
@@ -448,12 +449,10 @@
   case _raise_exception:
     {
       // Not a real MH entry, but rather shared code for raising an
-      // exception.  Since we use a C2I adapter to set up the
-      // interpreter state, arguments are expected in compiler
-      // argument registers.
+      // exception.  Since we use the compiled entry, arguments are
+      // expected in compiler argument registers.
       assert(raise_exception_method(), "must be set");
-      address c2i_entry = raise_exception_method()->get_c2i_entry();
-      assert(c2i_entry, "method must be linked");
+      assert(raise_exception_method()->from_compiled_entry(), "method must be linked");
 
       const Register rdi_pc = rax;
       __ pop(rdi_pc);  // caller PC
@@ -472,13 +471,10 @@
       __ jccb(Assembler::zero, L_no_method);
       __ verify_oop(rbx_method);
 
-      // 32-bit: push remaining arguments as if coming from the compiler.
       NOT_LP64(__ push(rarg2_required));
+      __ push(rdi_pc);         // restore caller PC
+      __ jmp(rbx_method_fce);  // jump to compiled entry
 
-      __ push(rdi_pc);  // restore caller PC
-      __ jump(ExternalAddress(c2i_entry));  // do C2I transition
-
-      // If we get here, the Java runtime did not do its job of creating the exception.
       // Do something that is at least causes a valid throw from the interpreter.
       __ bind(L_no_method);
       __ push(rarg2_required);
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Thu Mar 03 21:02:56 2011 -0800
+++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Thu Mar 03 23:31:45 2011 -0800
@@ -439,10 +439,6 @@
     // Verify that there is really a valid exception in RAX.
     __ verify_oop(exception_oop);
 
-    // Restore SP from BP if the exception PC is a MethodHandle call site.
-    __ cmpl(Address(thread, JavaThread::is_method_handle_return_offset()), 0);
-    __ cmovptr(Assembler::notEqual, rsp, rbp);
-
     // continue at exception handler (return address removed)
     // rax: exception
     // rbx: exception handler
@@ -733,18 +729,19 @@
   //  Input:
   //     start   -  starting address
   //     count   -  element count
-  void  gen_write_ref_array_pre_barrier(Register start, Register count) {
+  void  gen_write_ref_array_pre_barrier(Register start, Register count, bool uninitialized_target) {
     assert_different_registers(start, count);
     BarrierSet* bs = Universe::heap()->barrier_set();
     switch (bs->kind()) {
       case BarrierSet::G1SATBCT:
       case BarrierSet::G1SATBCTLogging:
-        {
-          __ pusha();                      // push registers
-          __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre),
-                          start, count);
-          __ popa();
-        }
+        // With G1, don't generate the call if we statically know that the target in uninitialized
+        if (!uninitialized_target) {
+           __ pusha();                      // push registers
+           __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre),
+                           start, count);
+           __ popa();
+         }
         break;
       case BarrierSet::CardTableModRef:
       case BarrierSet::CardTableExtension:
@@ -923,7 +920,8 @@
 
   address generate_disjoint_copy(BasicType t, bool aligned,
                                  Address::ScaleFactor sf,
-                                 address* entry, const char *name) {
+                                 address* entry, const char *name,
+                                 bool dest_uninitialized = false) {
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
@@ -945,16 +943,19 @@
     __ movptr(from , Address(rsp, 12+ 4));
     __ movptr(to   , Address(rsp, 12+ 8));
     __ movl(count, Address(rsp, 12+ 12));
+
+    if (entry != NULL) {
+      *entry = __ pc(); // Entry point from conjoint arraycopy stub.
+      BLOCK_COMMENT("Entry:");
+    }
+
     if (t == T_OBJECT) {
       __ testl(count, count);
       __ jcc(Assembler::zero, L_0_count);
-      gen_write_ref_array_pre_barrier(to, count);
+      gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
       __ mov(saved_to, to);          // save 'to'
     }
 
-    *entry = __ pc(); // Entry point from conjoint arraycopy stub.
-    BLOCK_COMMENT("Entry:");
-
     __ subptr(to, from); // to --> to_from
     __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
     __ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp
@@ -1085,7 +1086,8 @@
   address generate_conjoint_copy(BasicType t, bool aligned,
                                  Address::ScaleFactor sf,
                                  address nooverlap_target,
-                                 address* entry, const char *name) {
+                                 address* entry, const char *name,
+                                 bool dest_uninitialized = false) {
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
@@ -1108,23 +1110,17 @@
     __ movptr(src  , Address(rsp, 12+ 4));   // from
     __ movptr(dst  , Address(rsp, 12+ 8));   // to
     __ movl2ptr(count, Address(rsp, 12+12)); // count
-    if (t == T_OBJECT) {
-       gen_write_ref_array_pre_barrier(dst, count);
-    }
 
     if (entry != NULL) {
       *entry = __ pc(); // Entry point from generic arraycopy stub.
       BLOCK_COMMENT("Entry:");
     }
 
-    if (t == T_OBJECT) {
-      __ testl(count, count);
-      __ jcc(Assembler::zero, L_0_count);
-    }
+    // nooverlap_target expects arguments in rsi and rdi.
     __ mov(from, src);
     __ mov(to  , dst);
 
-    // arrays overlap test
+    // arrays overlap test: dispatch to disjoint stub if necessary.
     RuntimeAddress nooverlap(nooverlap_target);
     __ cmpptr(dst, src);
     __ lea(end, Address(src, count, sf, 0)); // src + count * elem_size
@@ -1132,6 +1128,12 @@
     __ cmpptr(dst, end);
     __ jump_cc(Assembler::aboveEqual, nooverlap);
 
+    if (t == T_OBJECT) {
+      __ testl(count, count);
+      __ jcc(Assembler::zero, L_0_count);
+      gen_write_ref_array_pre_barrier(dst, count, dest_uninitialized);
+    }
+
     // copy from high to low
     __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
     __ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp
@@ -1416,7 +1418,7 @@
   //    rax, ==  0  -  success
   //    rax, == -1^K - failure, where K is partial transfer count
   //
-  address generate_checkcast_copy(const char *name, address* entry) {
+  address generate_checkcast_copy(const char *name, address* entry, bool dest_uninitialized = false) {
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
@@ -1451,8 +1453,10 @@
     __ movptr(to,         to_arg);
     __ movl2ptr(length, length_arg);
 
-    *entry = __ pc(); // Entry point from generic arraycopy stub.
-    BLOCK_COMMENT("Entry:");
+    if (entry != NULL) {
+      *entry = __ pc(); // Entry point from generic arraycopy stub.
+      BLOCK_COMMENT("Entry:");
+    }
 
     //---------------------------------------------------------------
     // Assembler stub will be used for this call to arraycopy
@@ -1475,7 +1479,7 @@
     Address elem_klass_addr(elem, oopDesc::klass_offset_in_bytes());
 
     // Copy from low to high addresses, indexed from the end of each array.
-    gen_write_ref_array_pre_barrier(to, count);
+    gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
     __ lea(end_from, end_from_addr);
     __ lea(end_to,   end_to_addr);
     assert(length == count, "");        // else fix next line:
@@ -2038,6 +2042,15 @@
         generate_conjoint_copy(T_OBJECT, true, Address::times_ptr,  entry,
                                &entry_oop_arraycopy, "oop_arraycopy");
 
+    StubRoutines::_oop_disjoint_arraycopy_uninit =
+        generate_disjoint_copy(T_OBJECT, true, Address::times_ptr, &entry,
+                               "oop_disjoint_arraycopy_uninit",
+                               /*dest_uninitialized*/true);
+    StubRoutines::_oop_arraycopy_uninit =
+        generate_conjoint_copy(T_OBJECT, true, Address::times_ptr,  entry,
+                               NULL, "oop_arraycopy_uninit",
+                               /*dest_uninitialized*/true);
+
     StubRoutines::_jlong_disjoint_arraycopy =
         generate_disjoint_long_copy(&entry, "jlong_disjoint_arraycopy");
     StubRoutines::_jlong_arraycopy =
@@ -2051,20 +2064,20 @@
     StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
     StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
 
-    StubRoutines::_arrayof_jint_disjoint_arraycopy  =
-        StubRoutines::_jint_disjoint_arraycopy;
-    StubRoutines::_arrayof_oop_disjoint_arraycopy   =
-        StubRoutines::_oop_disjoint_arraycopy;
-    StubRoutines::_arrayof_jlong_disjoint_arraycopy =
-        StubRoutines::_jlong_disjoint_arraycopy;
+    StubRoutines::_arrayof_jint_disjoint_arraycopy       = StubRoutines::_jint_disjoint_arraycopy;
+    StubRoutines::_arrayof_oop_disjoint_arraycopy        = StubRoutines::_oop_disjoint_arraycopy;
+    StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit;
+    StubRoutines::_arrayof_jlong_disjoint_arraycopy      = StubRoutines::_jlong_disjoint_arraycopy;
 
-    StubRoutines::_arrayof_jint_arraycopy  = StubRoutines::_jint_arraycopy;
-    StubRoutines::_arrayof_oop_arraycopy   = StubRoutines::_oop_arraycopy;
-    StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy;
+    StubRoutines::_arrayof_jint_arraycopy       = StubRoutines::_jint_arraycopy;
+    StubRoutines::_arrayof_oop_arraycopy        = StubRoutines::_oop_arraycopy;
+    StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit;
+    StubRoutines::_arrayof_jlong_arraycopy      = StubRoutines::_jlong_arraycopy;
 
     StubRoutines::_checkcast_arraycopy =
-        generate_checkcast_copy("checkcast_arraycopy",
-                                  &entry_checkcast_arraycopy);
+        generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
+    StubRoutines::_checkcast_arraycopy_uninit =
+        generate_checkcast_copy("checkcast_arraycopy_uninit", NULL, /*dest_uninitialized*/true);
 
     StubRoutines::_unsafe_arraycopy =
         generate_unsafe_copy("unsafe_arraycopy",
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Thu Mar 03 21:02:56 2011 -0800
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Thu Mar 03 23:31:45 2011 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1057,20 +1057,6 @@
     return start;
   }
 
-  static address disjoint_byte_copy_entry;
-  static address disjoint_short_copy_entry;
-  static address disjoint_int_copy_entry;
-  static address disjoint_long_copy_entry;
-  static address disjoint_oop_copy_entry;
-
-  static address byte_copy_entry;
-  static address short_copy_entry;
-  static address int_copy_entry;
-  static address long_copy_entry;
-  static address oop_copy_entry;
-
-  static address checkcast_copy_entry;
-
   //
   // Verify that a register contains clean 32-bits positive value
   // (high 32-bits are 0) so it could be used in 64-bits shifts.
@@ -1173,34 +1159,35 @@
   // Generate code for an array write pre barrier
   //
   //     addr    -  starting address
-  //     count    -  element count
+  //     count   -  element count
+  //     tmp     - scratch register
   //
   //     Destroy no registers!
   //
-  void  gen_write_ref_array_pre_barrier(Register addr, Register count) {
+  void  gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) {
     BarrierSet* bs = Universe::heap()->barrier_set();
     switch (bs->kind()) {
       case BarrierSet::G1SATBCT:
       case BarrierSet::G1SATBCTLogging:
-        {
-          __ pusha();                      // push registers
-          if (count == c_rarg0) {
-            if (addr == c_rarg1) {
-              // exactly backwards!!
-              __ xchgptr(c_rarg1, c_rarg0);
-            } else {
-              __ movptr(c_rarg1, count);
-              __ movptr(c_rarg0, addr);
-            }
-
-          } else {
-            __ movptr(c_rarg0, addr);
-            __ movptr(c_rarg1, count);
-          }
-          __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2);
-          __ popa();
+        // With G1, don't generate the call if we statically know that the target in uninitialized
+        if (!dest_uninitialized) {
+           __ pusha();                      // push registers
+           if (count == c_rarg0) {
+             if (addr == c_rarg1) {
+               // exactly backwards!!
+               __ xchgptr(c_rarg1, c_rarg0);
+             } else {
+               __ movptr(c_rarg1, count);
+               __ movptr(c_rarg0, addr);
+             }
+           } else {
+             __ movptr(c_rarg0, addr);
+             __ movptr(c_rarg1, count);
+           }
+           __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2);
+           __ popa();
         }
-        break;
+         break;
       case BarrierSet::CardTableModRef:
       case BarrierSet::CardTableExtension:
       case BarrierSet::ModRef:
@@ -1379,7 +1366,7 @@
   //   disjoint_byte_copy_entry is set to the no-overlap entry point
   //   used by generate_conjoint_byte_copy().
   //
-  address generate_disjoint_byte_copy(bool aligned, const char *name) {
+  address generate_disjoint_byte_copy(bool aligned, address* entry, const char *name) {
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
@@ -1399,9 +1386,11 @@
     __ enter(); // required for proper stackwalking of RuntimeStub frame
     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
 
-    disjoint_byte_copy_entry = __ pc();
-    BLOCK_COMMENT("Entry:");
-    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
+    if (entry != NULL) {
+      *entry = __ pc();
+       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
+      BLOCK_COMMENT("Entry:");
+    }
 
     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
                       // r9 and r10 may be used to save non-volatile registers
@@ -1479,7 +1468,8 @@
   // dwords or qwords that span cache line boundaries will still be loaded
   // and stored atomically.
   //
-  address generate_conjoint_byte_copy(bool aligned, const char *name) {
+  address generate_conjoint_byte_copy(bool aligned, address nooverlap_target,
+                                      address* entry, const char *name) {
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
@@ -1494,11 +1484,13 @@
     __ enter(); // required for proper stackwalking of RuntimeStub frame
     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
 
-    byte_copy_entry = __ pc();
-    BLOCK_COMMENT("Entry:");
-    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
-
-    array_overlap_test(disjoint_byte_copy_entry, Address::times_1);
+    if (entry != NULL) {
+      *entry = __ pc();
+      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
+      BLOCK_COMMENT("Entry:");
+    }
+
+    array_overlap_test(nooverlap_target, Address::times_1);
     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
                       // r9 and r10 may be used to save non-volatile registers
 
@@ -1574,7 +1566,7 @@
   //   disjoint_short_copy_entry is set to the no-overlap entry point
   //   used by generate_conjoint_short_copy().
   //
-  address generate_disjoint_short_copy(bool aligned, const char *name) {
+  address generate_disjoint_short_copy(bool aligned, address *entry, const char *name) {
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
@@ -1593,9 +1585,11 @@
     __ enter(); // required for proper stackwalking of RuntimeStub frame
     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
 
-    disjoint_short_copy_entry = __ pc();
-    BLOCK_COMMENT("Entry:");
-    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
+    if (entry != NULL) {
+      *entry = __ pc();
+      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
+      BLOCK_COMMENT("Entry:");
+    }
 
     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
                       // r9 and r10 may be used to save non-volatile registers
@@ -1686,7 +1680,8 @@
   // or qwords that span cache line boundaries will still be loaded
   // and stored atomically.
   //
-  address generate_conjoint_short_copy(bool aligned, const char *name) {
+  address generate_conjoint_short_copy(bool aligned, address nooverlap_target,
+                                       address *entry, const char *name) {
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
@@ -1701,11 +1696,13 @@
     __ enter(); // required for proper stackwalking of RuntimeStub frame
     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
 
-    short_copy_entry = __ pc();
-    BLOCK_COMMENT("Entry:");
-    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
-
-    array_overlap_test(disjoint_short_copy_entry, Address::times_2);
+    if (entry != NULL) {
+      *entry = __ pc();
+      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
+      BLOCK_COMMENT("Entry:");
+    }
+
+    array_overlap_test(nooverlap_target, Address::times_2);
     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
                       // r9 and r10 may be used to save non-volatile registers
 
@@ -1773,7 +1770,8 @@
   //   disjoint_int_copy_entry is set to the no-overlap entry point
   //   used by generate_conjoint_int_oop_copy().
   //
-  address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) {
+  address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, address* entry,
+                                         const char *name, bool dest_uninitialized = false) {
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
@@ -1793,21 +1791,17 @@
     __ enter(); // required for proper stackwalking of RuntimeStub frame
     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
 
-    (is_oop ? disjoint_oop_copy_entry : disjoint_int_copy_entry) = __ pc();
-
-    if (is_oop) {
-      // no registers are destroyed by this call
-      gen_write_ref_array_pre_barrier(/* dest */ c_rarg1, /* count */ c_rarg2);
+    if (entry != NULL) {
+      *entry = __ pc();
+      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
+      BLOCK_COMMENT("Entry:");
     }
 
-    BLOCK_COMMENT("Entry:");
-    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
-
     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
                       // r9 and r10 may be used to save non-volatile registers
-
     if (is_oop) {
       __ movq(saved_to, to);
+      gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
     }
 
     // 'from', 'to' and 'count' are now valid
@@ -1867,7 +1861,9 @@
   // the hardware handle it.  The two dwords within qwords that span
   // cache line boundaries will still be loaded and stored atomicly.
   //
-  address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) {
+  address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, address nooverlap_target,
+                                         address *entry, const char *name,
+                                         bool dest_uninitialized = false) {
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
@@ -1882,20 +1878,21 @@
     __ enter(); // required for proper stackwalking of RuntimeStub frame
     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
 
+    if (entry != NULL) {
+      *entry = __ pc();
+       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
+      BLOCK_COMMENT("Entry:");
+    }
+
+    array_overlap_test(nooverlap_target, Address::times_4);
+    setup_arg_regs(); // from => rdi, to => rsi, count => rdx
+                      // r9 and r10 may be used to save non-volatile registers
+
     if (is_oop) {
       // no registers are destroyed by this call
-      gen_write_ref_array_pre_barrier(/* dest */ c_rarg1, /* count */ c_rarg2);
+      gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
     }
 
-    (is_oop ? oop_copy_entry : int_copy_entry) = __ pc();
-    BLOCK_COMMENT("Entry:");
-    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
-
-    array_overlap_test(is_oop ? disjoint_oop_copy_entry : disjoint_int_copy_entry,
-                       Address::times_4);
-    setup_arg_regs(); // from => rdi, to => rsi, count => rdx
-                      // r9 and r10 may be used to save non-volatile registers
-
     assert_clean_int(count, rax); // Make sure 'count' is clean int.
     // 'from', 'to' and 'count' are now valid
     __ movptr(dword_count, count);
@@ -1959,7 +1956,8 @@
   //   disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
   //   no-overlap entry point used by generate_conjoint_long_oop_copy().
   //
-  address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) {
+  address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, address *entry,
+                                          const char *name, bool dest_uninitialized = false) {
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
@@ -1978,20 +1976,19 @@
     // Save no-overlap entry point for generate_conjoint_long_oop_copy()
     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
 
-    if (is_oop) {
-      disjoint_oop_copy_entry  = __ pc();
-      // no registers are destroyed by this call
-      gen_write_ref_array_pre_barrier(/* dest */ c_rarg1, /* count */ c_rarg2);
-    } else {
-      disjoint_long_copy_entry = __ pc();
+    if (entry != NULL) {
+      *entry = __ pc();
+      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
+      BLOCK_COMMENT("Entry:");
     }
-    BLOCK_COMMENT("Entry:");
-    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
 
     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
                       // r9 and r10 may be used to save non-volatile registers
-
     // 'from', 'to' and 'qword_count' are now valid
+    if (is_oop) {
+      // no registers are destroyed by this call
+      gen_write_ref_array_pre_barrier(to, qword_count, dest_uninitialized);
+    }
 
     // Copy from low to high addresses.  Use 'to' as scratch.
     __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
@@ -2045,7 +2042,9 @@
   //   c_rarg1   - destination array address
   //   c_rarg2   - element count, treated as ssize_t, can be zero
   //
-  address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) {
+  address generate_conjoint_long_oop_copy(bool aligned, bool is_oop,
+                                          address nooverlap_target, address *entry,
+                                          const char *name, bool dest_uninitialized = false) {
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
@@ -2059,31 +2058,21 @@
     __ enter(); // required for proper stackwalking of RuntimeStub frame
     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
 
-    address disjoint_copy_entry = NULL;
-    if (is_oop) {
-      assert(!UseCompressedOops, "shouldn't be called for compressed oops");
-      disjoint_copy_entry = disjoint_oop_copy_entry;
-      oop_copy_entry  = __ pc();
-      array_overlap_test(disjoint_oop_copy_entry, Address::times_8);
-    } else {
-      disjoint_copy_entry = disjoint_long_copy_entry;
-      long_copy_entry = __ pc();
-      array_overlap_test(disjoint_long_copy_entry, Address::times_8);
+    if (entry != NULL) {
+      *entry = __ pc();
+      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
+      BLOCK_COMMENT("Entry:");
     }
-    BLOCK_COMMENT("Entry:");
-    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
-
-    array_overlap_test(disjoint_copy_entry, Address::times_8);
+
+    array_overlap_test(nooverlap_target, Address::times_8);
     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
                       // r9 and r10 may be used to save non-volatile registers
-
     // 'from', 'to' and 'qword_count' are now valid
-
     if (is_oop) {
       // Save to and count for store barrier
       __ movptr(saved_count, qword_count);
       // No registers are destroyed by this call
-      gen_write_ref_array_pre_barrier(to, saved_count);
+      gen_write_ref_array_pre_barrier(to, saved_count, dest_uninitialized);
     }
 
     __ jmp(L_copy_32_bytes);
@@ -2162,7 +2151,8 @@
   //    rax ==  0  -  success
   //    rax == -1^K - failure, where K is partial transfer count
   //
-  address generate_checkcast_copy(const char *name) {
+  address generate_checkcast_copy(const char *name, address *entry,
+                                  bool dest_uninitialized = false) {
 
     Label L_load_element, L_store_element, L_do_card_marks, L_done;
 
@@ -2216,8 +2206,10 @@
 #endif
 
     // Caller of this entry point must set up the argument registers.
-    checkcast_copy_entry  = __ pc();
-    BLOCK_COMMENT("Entry:");
+    if (entry != NULL) {
+      *entry = __ pc();
+      BLOCK_COMMENT("Entry:");
+    }
 
     // allocate spill slots for r13, r14
     enum {
@@ -2254,7 +2246,7 @@
     Address from_element_addr(end_from, count, TIMES_OOP, 0);
     Address   to_element_addr(end_to,   count, TIMES_OOP, 0);
 
-    gen_write_ref_array_pre_barrier(to, count);
+    gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
 
     // Copy from low to high addresses, indexed from the end of each array.
     __ lea(end_from, end_from_addr);
@@ -2334,7 +2326,9 @@
   // Examines the alignment of the operands and dispatches
   // to a long, int, short, or byte copy loop.
   //
-  address generate_unsafe_copy(const char *name) {
+  address generate_unsafe_copy(const char *name,
+                               address byte_copy_entry, address short_copy_entry,
+                               address int_copy_entry, address long_copy_entry) {
 
     Label L_long_aligned, L_int_aligned, L_short_aligned;
 
@@ -2432,7 +2426,10 @@
   //    rax ==  0  -  success
   //    rax == -1^K - failure, where K is partial transfer count
   //
-  address generate_generic_copy(const char *name) {
+  address generate_generic_copy(const char *name,
+                                address byte_copy_entry, address short_copy_entry,
+                                address int_copy_entry, address long_copy_entry,
+                                address oop_copy_entry, address checkcast_copy_entry) {
 
     Label L_failed, L_failed_0, L_objArray;
     Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs;
@@ -2725,33 +2722,75 @@
   }
 
   void generate_arraycopy_stubs() {
-    // Call the conjoint generation methods immediately after
-    // the disjoint ones so that short branches from the former
-    // to the latter can be generated.
-    StubRoutines::_jbyte_disjoint_arraycopy  = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy");
-    StubRoutines::_jbyte_arraycopy           = generate_conjoint_byte_copy(false, "jbyte_arraycopy");
-
-    StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
-    StubRoutines::_jshort_arraycopy          = generate_conjoint_short_copy(false, "jshort_arraycopy");
-
-    StubRoutines::_jint_disjoint_arraycopy   = generate_disjoint_int_oop_copy(false, false, "jint_disjoint_arraycopy");
-    StubRoutines::_jint_arraycopy            = generate_conjoint_int_oop_copy(false, false, "jint_arraycopy");
-
-    StubRoutines::_jlong_disjoint_arraycopy  = generate_disjoint_long_oop_copy(false, false, "jlong_disjoint_arraycopy");
-    StubRoutines::_jlong_arraycopy           = generate_conjoint_long_oop_copy(false, false, "jlong_arraycopy");
+    address entry;
+    address entry_jbyte_arraycopy;
+    address entry_jshort_arraycopy;
+    address entry_jint_arraycopy;
+    address entry_oop_arraycopy;
+    address entry_jlong_arraycopy;
+    address entry_checkcast_arraycopy;
+
+    StubRoutines::_jbyte_disjoint_arraycopy  = generate_disjoint_byte_copy(false, &entry,
+                                                                           "jbyte_disjoint_arraycopy");
+    StubRoutines::_jbyte_arraycopy           = generate_conjoint_byte_copy(false, entry, &entry_jbyte_arraycopy,
+                                                                           "jbyte_arraycopy");
+
+    StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, &entry,
+                                                                            "jshort_disjoint_arraycopy");
+    StubRoutines::_jshort_arraycopy          = generate_conjoint_short_copy(false, entry, &entry_jshort_arraycopy,
+                                                                            "jshort_arraycopy");
+
+    StubRoutines::_jint_disjoint_arraycopy   = generate_disjoint_int_oop_copy(false, false, &entry,
+                                                                              "jint_disjoint_arraycopy");
+    StubRoutines::_jint_arraycopy            = generate_conjoint_int_oop_copy(false, false, entry,
+                                                                              &entry_jint_arraycopy, "jint_arraycopy");
+
+    StubRoutines::_jlong_disjoint_arraycopy  = generate_disjoint_long_oop_copy(false, false, &entry,
+                                                                               "jlong_disjoint_arraycopy");
+    StubRoutines::_jlong_arraycopy           = generate_conjoint_long_oop_copy(false, false, entry,
+                                                                               &entry_jlong_arraycopy, "jlong_arraycopy");
 
 
     if (UseCompressedOops) {
-      StubRoutines::_oop_disjoint_arraycopy  = generate_disjoint_int_oop_copy(false, true, "oop_disjoint_arraycopy");
-      StubRoutines::_oop_arraycopy           = generate_conjoint_int_oop_copy(false, true, "oop_arraycopy");
+      StubRoutines::_oop_disjoint_arraycopy  = generate_disjoint_int_oop_copy(false, true, &entry,
+                                                                              "oop_disjoint_arraycopy");
+      StubRoutines::_oop_arraycopy           = generate_conjoint_int_oop_copy(false, true, entry,
+                                                                              &entry_oop_arraycopy, "oop_arraycopy");
+      StubRoutines::_oop_disjoint_arraycopy_uninit  = generate_disjoint_int_oop_copy(false, true, &entry,
+                                                                                     "oop_disjoint_arraycopy_uninit",
+                                                                                     /*dest_uninitialized*/true);
+      StubRoutines::_oop_arraycopy_uninit           = generate_conjoint_int_oop_copy(false, true, entry,
+                                                                                     NULL, "oop_arraycopy_uninit",
+                                                                                     /*dest_uninitialized*/true);
     } else {
-      StubRoutines::_oop_disjoint_arraycopy  = generate_disjoint_long_oop_copy(false, true, "oop_disjoint_arraycopy");
-      StubRoutines::_oop_arraycopy           = generate_conjoint_long_oop_copy(false, true, "oop_arraycopy");
+      StubRoutines::_oop_disjoint_arraycopy  = generate_disjoint_long_oop_copy(false, true, &entry,
+                                                                               "oop_disjoint_arraycopy");
+      StubRoutines::_oop_arraycopy           = generate_conjoint_long_oop_copy(false, true, entry,
+                                                                               &entry_oop_arraycopy, "oop_arraycopy");
+      StubRoutines::_oop_disjoint_arraycopy_uninit  = generate_disjoint_long_oop_copy(false, true, &entry,
+                                                                                      "oop_disjoint_arraycopy_uninit",
+                                                                                      /*dest_uninitialized*/true);
+      StubRoutines::_oop_arraycopy_uninit           = generate_conjoint_long_oop_copy(false, true, entry,
+                                                                                      NULL, "oop_arraycopy_uninit",
+                                                                                      /*dest_uninitialized*/true);
     }
 
-    StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy");
-    StubRoutines::_unsafe_arraycopy    = generate_unsafe_copy("unsafe_arraycopy");
-    StubRoutines::_generic_arraycopy   = generate_generic_copy("generic_arraycopy");
+    StubRoutines::_checkcast_arraycopy        = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
+    StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL,
+                                                                        /*dest_uninitialized*/true);
+
+    StubRoutines::_unsafe_arraycopy    = generate_unsafe_copy("unsafe_arraycopy",
+                                                              entry_jbyte_arraycopy,
+                                                              entry_jshort_arraycopy,
+                                                              entry_jint_arraycopy,
+                                                              entry_jlong_arraycopy);
+    StubRoutines::_generic_arraycopy   = generate_generic_copy("generic_arraycopy",
+                                                               entry_jbyte_arraycopy,
+                                                               entry_jshort_arraycopy,
+                                                               entry_jint_arraycopy,
+                                                               entry_oop_arraycopy,
+                                                               entry_jlong_arraycopy,
+                                                               entry_checkcast_arraycopy);
 
     StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
     StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
@@ -2776,6 +2815,9 @@
 
     StubRoutines::_arrayof_oop_disjoint_arraycopy    = StubRoutines::_oop_disjoint_arraycopy;
     StubRoutines::_arrayof_oop_arraycopy             = StubRoutines::_oop_arraycopy;
+
+    StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit    = StubRoutines::_oop_disjoint_arraycopy_uninit;
+    StubRoutines::_arrayof_oop_arraycopy_uninit             = StubRoutines::_oop_arraycopy_uninit;
   }
 
   void generate_math_stubs() {
@@ -3069,20 +3111,6 @@
   }
 }; // end class declaration
 
-address StubGenerator::disjoint_byte_copy_entry  = NULL;
-address StubGenerator::disjoint_short_copy_entry = NULL;
-address StubGenerator::disjoint_int_copy_entry   = NULL;
-address StubGenerator::disjoint_long_copy_entry  = NULL;
-address StubGenerator::disjoint_oop_copy_entry   = NULL;
-
-address StubGenerator::byte_copy_entry  = NULL;
-address StubGenerator::short_copy_entry = NULL;
-address StubGenerator::int_copy_entry   = NULL;
-address StubGenerator::long_copy_entry  = NULL;
-address StubGenerator::oop_copy_entry   = NULL;
-
-address StubGenerator::checkcast_copy_entry = NULL;
-
 void StubGenerator_generate(CodeBuffer* code, bool all) {
   StubGenerator g(code, all);
 }
--- a/src/cpu/x86/vm/x86_32.ad	Thu Mar 03 21:02:56 2011 -0800
+++ b/src/cpu/x86/vm/x86_32.ad	Thu Mar 03 23:31:45 2011 -0800
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
 // This code is free software; you can redistribute it and/or modify it
@@ -12658,17 +12658,46 @@
   ins_pipe( pipe_slow );
 %}
 
+// fast search of substring with known size.
+instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
+                            eBXRegI result, regXD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
+  predicate(UseSSE42Intrinsics);
+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
+  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
+
+  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
+  ins_encode %{
+    int icnt2 = (int)$int_cnt2$$constant;
+    if (icnt2 >= 8) {
+      // IndexOf for constant substrings with size >= 8 elements
+      // which don't need to be loaded through stack.
+      __ string_indexofC8($str1$$Register, $str2$$Register,
+                          $cnt1$$Register, $cnt2$$Register,
+                          icnt2, $result$$Register,
+                          $vec$$XMMRegister, $tmp$$Register);
+    } else {
+      // Small strings are loaded through stack if they cross page boundary.
+      __ string_indexof($str1$$Register, $str2$$Register,
+                        $cnt1$$Register, $cnt2$$Register,
+                        icnt2, $result$$Register,
+                        $vec$$XMMRegister, $tmp$$Register);
+    }
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
-                        eBXRegI result, regXD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
+                        eBXRegI result, regXD vec, eCXRegI tmp, eFlagsReg cr) %{
   predicate(UseSSE42Intrinsics);
   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
-  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp2, KILL cr);
-
-  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp2, $tmp1" %}
+  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
+
+  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
   ins_encode %{
     __ string_indexof($str1$$Register, $str2$$Register,
-                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
-                      $tmp1$$XMMRegister, $tmp2$$Register);
+                      $cnt1$$Register, $cnt2$$Register,
+                      (-1), $result$$Register,
+                      $vec$$XMMRegister, $tmp$$Register);
   %}
   ins_pipe( pipe_slow );
 %}
--- a/src/cpu/x86/vm/x86_64.ad	Thu Mar 03 21:02:56 2011 -0800
+++ b/src/cpu/x86/vm/x86_64.ad	Thu Mar 03 23:31:45 2011 -0800
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
 // This code is free software; you can redistribute it and/or modify it
@@ -11598,18 +11598,48 @@
   ins_pipe( pipe_slow );
 %}
 
+// fast search of substring with known size.
+instruct string_indexof_con(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
+                            rbx_RegI result, regD vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
+%{
+  predicate(UseSSE42Intrinsics);
+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
+  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
+
+  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
+  ins_encode %{
+    int icnt2 = (int)$int_cnt2$$constant;
+    if (icnt2 >= 8) {
+      // IndexOf for constant substrings with size >= 8 elements
+      // which don't need to be loaded through stack.
+      __ string_indexofC8($str1$$Register, $str2$$Register,
+                          $cnt1$$Register, $cnt2$$Register,
+                          icnt2, $result$$Register,
+                          $vec$$XMMRegister, $tmp$$Register);
+    } else {
+      // Small strings are loaded through stack if they cross page boundary.
+      __ string_indexof($str1$$Register, $str2$$Register,
+                        $cnt1$$Register, $cnt2$$Register,
+                        icnt2, $result$$Register,
+                        $vec$$XMMRegister, $tmp$$Register);
+    }
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct string_indexof(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
-                        rbx_RegI result, regD tmp1, rcx_RegI tmp2, rFlagsReg cr)
+                        rbx_RegI result, regD vec, rcx_RegI tmp, rFlagsReg cr)
 %{
   predicate(UseSSE42Intrinsics);
   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
-  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp2, KILL cr);
-
-  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1, $tmp2" %}
+  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
+
+  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
   ins_encode %{
     __ string_indexof($str1$$Register, $str2$$Register,
-                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
-                      $tmp1$$XMMRegister, $tmp2$$Register);
+                      $cnt1$$Register, $cnt2$$Register,
+                      (-1), $result$$Register,
+                      $vec$$XMMRegister, $tmp$$Register);
   %}
   ins_pipe( pipe_slow );
 %}
--- a/src/share/tools/hsdis/hsdis-demo.c	Thu Mar 03 21:02:56 2011 -0800
+++ b/src/share/tools/hsdis/hsdis-demo.c	Thu Mar 03 23:31:45 2011 -0800
@@ -22,8 +22,6 @@
  *
  */
 
-#include "precompiled.hpp"
-
 /* hsdis-demo.c -- dump a range of addresses as native instructions
    This demonstrates the protocol required by the HotSpot PrintAssembly option.
 */
--- a/src/share/tools/hsdis/hsdis.c	Thu Mar 03 21:02:56 2011 -0800
+++ b/src/share/tools/hsdis/hsdis.c	Thu Mar 03 23:31:45 2011 -0800
@@ -22,8 +22,6 @@
  *
  */
 
-#include "precompiled.hpp"
-
 /* hsdis.c -- dump a range of addresses as native instructions
    This implements the plugin protocol required by the
    HotSpot PrintAssembly option.
--- a/src/share/vm/c1/c1_GraphBuilder.cpp	Thu Mar 03 21:02:56 2011 -0800
+++ b/src/share/vm/c1/c1_GraphBuilder.cpp	Thu Mar 03 23:31:45 2011 -0800
@@ -3308,22 +3308,23 @@
   Value exception = append_with_bci(new ExceptionObject(), SynchronizationEntryBCI);
   assert(exception->is_pinned(), "must be");
 
+  int bci = SynchronizationEntryBCI;
   if (compilation()->env()->dtrace_method_probes()) {
-    // Report exit from inline methods
+    // Report exit from inline methods.  We don't have a stream here
+    // so pass an explicit bci of SynchronizationEntryBCI.
     Values* args = new Values(1);
-    args->push(append(new Constant(new ObjectConstant(method()))));
-    append(new RuntimeCall(voidType, "dtrace_method_exit", CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), args));
+    args->push(append_with_bci(new Constant(new ObjectConstant(method())), bci));
+    append_with_bci(new RuntimeCall(voidType, "dtrace_method_exit", CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), args), bci);
   }
 
-  int bci = SynchronizationEntryBCI;
   if (lock) {
     assert(state()->locks_size() > 0 && state()->lock_at(state()->locks_size() - 1) == lock, "lock is missing");
     if (!lock->is_linked()) {
-      lock = append_with_bci(lock, -1);
+      lock = append_with_bci(lock, bci);
     }
 
     // exit the monitor in the context of the synchronized method
-    monitorexit(lock, SynchronizationEntryBCI);
+    monitorexit(lock, bci);
 
     // exit the context of the synchronized method
     if (!default_handler) {
--- a/src/share/vm/c1/c1_Runtime1.cpp	Thu Mar 03 21:02:56 2011 -0800
+++ b/src/share/vm/c1/c1_Runtime1.cpp	Thu Mar 03 23:31:45 2011 -0800
@@ -426,10 +426,9 @@
 // been deoptimized. If that is the case we return the deopt blob
 // unpack_with_exception entry instead. This makes life for the exception blob easier
 // because making that same check and diverting is painful from assembly language.
-//
-
-
 JRT_ENTRY_NO_ASYNC(static address, exception_handler_for_pc_helper(JavaThread* thread, oopDesc* ex, address pc, nmethod*& nm))
+  // Reset method handle flag.
+  thread->set_is_method_handle_return(false);
 
   Handle exception(thread, ex);
   nm = CodeCache::find_nmethod(pc);
@@ -480,11 +479,12 @@
     return SharedRuntime::deopt_blob()->unpack_with_exception_in_tls();
   }
 
-  // ExceptionCache is used only for exceptions at call and not for implicit exceptions
+  // ExceptionCache is used only for exceptions at call sites and not for implicit exceptions
   if (guard_pages_enabled) {
     address fast_continuation = nm->handler_for_exception_and_pc(exception, pc);
     if (fast_continuation != NULL) {
-      if (fast_continuation == ExceptionCache::unwind_handler()) fast_continuation = NULL;
+      // Set flag if return address is a method handle call site.
+      thread->set_is_method_handle_return(nm->is_method_handle_return(pc));
       return fast_continuation;
     }
   }
@@ -522,14 +522,14 @@
     thread->set_exception_pc(pc);
 
     // the exception cache is used only by non-implicit exceptions
-    if (continuation == NULL) {
-      nm->add_handler_for_exception_and_pc(exception, pc, ExceptionCache::unwind_handler());
-    } else {
+    if (continuation != NULL) {
       nm->add_handler_for_exception_and_pc(exception, pc, continuation);
     }
   }
 
   thread->set_vm_result(exception());
+  // Set flag if return address is a method handle call site.
+  thread->set_is_method_handle_return(nm->is_method_handle_return(pc));
 
   if (TraceExceptions) {
     ttyLocker ttyl;
@@ -542,20 +542,19 @@
 JRT_END
 
 // Enter this method from compiled code only if there is a Java exception handler
-// in the method handling the exception
+// in the method handling the exception.
 // We are entering here from exception stub. We don't do a normal VM transition here.
 // We do it in a helper. This is so we can check to see if the nmethod we have just
 // searched for an exception handler has been deoptimized in the meantime.
-address  Runtime1::exception_handler_for_pc(JavaThread* thread) {
+address Runtime1::exception_handler_for_pc(JavaThread* thread) {
   oop exception = thread->exception_oop();
   address pc = thread->exception_pc();
   // Still in Java mode
-  debug_only(ResetNoHandleMark rnhm);
+  DEBUG_ONLY(ResetNoHandleMark rnhm);
   nmethod* nm = NULL;
   address continuation = NULL;
   {
     // Enter VM mode by calling the helper
-
     ResetNoHandleMark rnhm;
     continuation = exception_handler_for_pc_helper(thread, exception, pc, nm);
   }
@@ -563,11 +562,11 @@
 
   // Now check to see if the nmethod we were called from is now deoptimized.
   // If so we must return to the deopt blob and deoptimize the nmethod
-
   if (nm != NULL && caller_is_deopted()) {
     continuation = SharedRuntime::deopt_blob()->unpack_with_exception_in_tls();
   }
 
+  assert(continuation != NULL, "no handler found");
   return continuation;
 }
 
--- a/src/share/vm/c1/c1_Runtime1.hpp	Thu Mar 03 21:02:56 2011 -0800
+++ b/src/share/vm/c1/c1_Runtime1.hpp	Thu Mar 03 23:31:45 2011 -0800
@@ -54,6 +54,7 @@
   stub(new_multi_array)              \
   stub(handle_exception_nofpu)         /* optimized version that does not preserve fpu registers */ \
   stub(handle_exception)             \
+  stub(handle_exception_from_callee) \
   stub(throw_array_store_exception)  \
   stub(throw_class_cast_exception)   \
   stub(throw_incompatible_class_change_error)   \
@@ -116,11 +117,11 @@
   static const char* _blob_names[];
 
   // stub generation
-  static void generate_blob_for(BufferBlob* blob, StubID id);
-  static OopMapSet* generate_code_for(StubID id, StubAssembler* masm);
+  static void       generate_blob_for(BufferBlob* blob, StubID id);
+  static OopMapSet* generate_code_for(StubID id, StubAssembler* sasm);
   static OopMapSet* generate_exception_throw(StubAssembler* sasm, address target, bool has_argument);
-  static void generate_handle_exception(StubAssembler *sasm, OopMapSet* oop_maps, OopMap* oop_map, bool ignore_fpu_registers = false);
-  static void generate_unwind_exception(StubAssembler *sasm);
+  static OopMapSet* generate_handle_exception(StubID id, StubAssembler* sasm);
+  static void       generate_unwind_exception(StubAssembler *sasm);
   static OopMapSet* generate_patching(StubAssembler* sasm, address target);
 
   static OopMapSet* generate_stub_call(StubAssembler* sasm, Register result, address entry,
--- a/src/share/vm/classfile/classLoader.cpp	Thu Mar 03 21:02:56 2011 -0800
+++ b/src/share/vm/classfile/classLoader.cpp	Thu Mar 03 23:31:45 2011 -0800
@@ -1332,7 +1332,7 @@
       }
 
       if (_compile_the_world_counter >= CompileTheWorldStartAt) {
-        if (k.is_null() || (exception_occurred && !CompileTheWorldIgnoreInitErrors)) {
+        if (k.is_null() || exception_occurred) {
           // If something went wrong (e.g. ExceptionInInitializerError) we skip this class
           tty->print_cr("CompileTheWorld (%d) : Skipping %s", _compile_the_world_counter, buffer);
         } else {
--- a/src/share/vm/code/nmethod.cpp	Thu Mar 03 21:02:56 2011 -0800
+++ b/src/share/vm/code/nmethod.cpp	Thu Mar 03 23:31:45 2011 -0800
@@ -190,15 +190,10 @@
 } nmethod_stats;
 #endif //PRODUCT
 
+
 //---------------------------------------------------------------------------------
 
 
-// The _unwind_handler is a special marker address, which says that
-// for given exception oop and address, the frame should be removed
-// as the tuple cannot be caught in the nmethod
-address ExceptionCache::_unwind_handler = (address) -1;
-
-
 ExceptionCache::ExceptionCache(Handle exception, address pc, address handler) {
   assert(pc != NULL, "Must be non null");
   assert(exception.not_null(), "Must be non null");
--- a/src/share/vm/code/nmethod.hpp	Thu Mar 03 21:02:56 2011 -0800
+++ b/src/share/vm/code/nmethod.hpp	Thu Mar 03 23:31:45 2011 -0800
@@ -34,7 +34,6 @@
 class ExceptionCache : public CHeapObj {
   friend class VMStructs;
  private:
-  static address _unwind_handler;
   enum { cache_size = 16 };
   klassOop _exception_type;
   address  _pc[cache_size];
@@ -62,8 +61,6 @@
   bool    match_exception_with_space(Handle exception) ;
   address test_address(address addr);
   bool    add_address_and_handler(address addr, address handler) ;
-
-  static address unwind_handler() { return _unwind_handler; }
 };
 
 
--- a/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp	Thu Mar 03 21:02:56 2011 -0800
+++ b/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp	Thu Mar 03 23:31:45 2011 -0800
@@ -83,11 +83,15 @@
   }
 
   template <class T> void write_ref_array_pre_work(T* dst, int count);
-  virtual void write_ref_array_pre(oop* dst, int count) {
-    write_ref_array_pre_work(dst, count);
+  virtual void write_ref_array_pre(oop* dst, int count, bool dest_uninitialized) {
+    if (!dest_uninitialized) {
+      write_ref_array_pre_work(dst, count);
+    }
   }
-  virtual void write_ref_array_pre(narrowOop* dst, int count) {
-    write_ref_array_pre_work(dst, count);
+  virtual void write_ref_array_pre(narrowOop* dst, int count, bool dest_uninitialized) {
+    if (!dest_uninitialized) {
+      write_ref_array_pre_work(dst, count);
+    }
   }
 };
 
--- a/src/share/vm/memory/barrierSet.cpp	Thu Mar 03 21:02:56 2011 -0800
+++ b/src/share/vm/memory/barrierSet.cpp	Thu Mar 03 23:31:45 2011 -0800
@@ -35,9 +35,9 @@
                    start,            count);
 #endif
   if (UseCompressedOops) {
-    Universe::heap()->barrier_set()->write_ref_array_pre((narrowOop*)start, (int)count);
+    Universe::heap()->barrier_set()->write_ref_array_pre((narrowOop*)start, (int)count, false);
   } else {
-    Universe::heap()->barrier_set()->write_ref_array_pre(      (oop*)start, (int)count);
+    Universe::heap()->barrier_set()->write_ref_array_pre(      (oop*)start, (int)count, false);
   }
 }
 
--- a/src/share/vm/memory/barrierSet.hpp	Thu Mar 03 21:02:56 2011 -0800
+++ b/src/share/vm/memory/barrierSet.hpp	Thu Mar 03 23:31:45 2011 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -44,6 +44,10 @@
     Uninit
   };
 
+  enum Flags {
+    None                = 0,
+    TargetUninitialized = 1
+  };
 protected:
   int _max_covered_regions;
   Name _kind;
@@ -128,8 +132,10 @@
   virtual void read_prim_array(MemRegion mr) = 0;
 
   // Below length is the # array elements being written
-  virtual void write_ref_array_pre(      oop* dst, int length) {}
-  virtual void write_ref_array_pre(narrowOop* dst, int length) {}
+  virtual void write_ref_array_pre(oop* dst, int length,
+                                   bool dest_uninitialized = false) {}
+  virtual void write_ref_array_pre(narrowOop* dst, int length,
+                                   bool dest_uninitialized = false) {}
   // Below count is the # array elements being written, starting
   // at the address "start", which may not necessarily be HeapWord-aligned
   inline void write_ref_array(HeapWord* start, size_t count);
--- a/src/share/vm/opto/library_call.cpp	Thu Mar 03 21:02:56 2011 -0800
+++ b/src/share/vm/opto/library_call.cpp	Thu Mar 03 23:31:45 2011 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -97,7 +97,7 @@
                              RegionNode* region);
   Node* generate_current_thread(Node* &tls_output);
   address basictype2arraycopy(BasicType t, Node *src_offset, Node *dest_offset,
-                              bool disjoint_bases, const char* &name);
+                              bool disjoint_bases, const char* &name, bool dest_uninitialized);
   Node* load_mirror_from_klass(Node* klass);
   Node* load_klass_from_mirror_common(Node* mirror, bool never_see_null,
                                       int nargs,
@@ -212,26 +212,26 @@
                                 AllocateNode* alloc,
                                 Node* src,  Node* src_offset,
                                 Node* dest, Node* dest_offset,
-                                Node* dest_size);
+                                Node* dest_size, bool dest_uninitialized);
   void generate_slow_arraycopy(const TypePtr* adr_type,
                                Node* src,  Node* src_offset,
                                Node* dest, Node* dest_offset,
-                               Node* copy_length);
+                               Node* copy_length, bool dest_uninitialized);
   Node* generate_checkcast_arraycopy(const TypePtr* adr_type,
                                      Node* dest_elem_klass,
                                      Node* src,  Node* src_offset,
                                      Node* dest, Node* dest_offset,
-                                     Node* copy_length);
+                                     Node* copy_length, bool dest_uninitialized);
   Node* generate_generic_arraycopy(const TypePtr* adr_type,
                                    Node* src,  Node* src_offset,
                                    Node* dest, Node* dest_offset,
-                                   Node* copy_length);
+                                   Node* copy_length, bool dest_uninitialized);
   void generate_unchecked_arraycopy(const TypePtr* adr_type,
                                     BasicType basic_elem_type,
                                     bool disjoint_bases,
                                     Node* src,  Node* src_offset,
                                     Node* dest, Node* dest_offset,
-                                    Node* copy_length);
+                                    Node* copy_length, bool dest_uninitialized);
   bool inline_unsafe_CAS(BasicType type);
   bool inline_unsafe_ordered_store(BasicType type);
   bool inline_fp_conversions(vmIntrinsics::ID id);
@@ -1193,7 +1193,7 @@
   Node* result;
   // Disable the use of pcmpestri until it can be guaranteed that
   // the load doesn't cross into the uncommited space.
-  if (false && Matcher::has_match_rule(Op_StrIndexOf) &&
+  if (Matcher::has_match_rule(Op_StrIndexOf) &&
       UseSSE42Intrinsics) {
     // Generate SSE4.2 version of indexOf
     // We currently only have match rules that use SSE4.2
@@ -1211,14 +1211,14 @@
       return true;
     }
 
+    ciInstanceKlass* str_klass = env()->String_klass();
+    const TypeOopPtr* string_type = TypeOopPtr::make_from_klass(str_klass);
+
     // Make the merge point
-    RegionNode* result_rgn = new (C, 3) RegionNode(3);
-    Node*       result_phi = new (C, 3) PhiNode(result_rgn, TypeInt::INT);
+    RegionNode* result_rgn = new (C, 4) RegionNode(4);
+    Node*       result_phi = new (C, 4) PhiNode(result_rgn, TypeInt::INT);
     Node* no_ctrl  = NULL;
 
-    ciInstanceKlass* klass = env()->String_klass();
-    const TypeOopPtr* string_type = TypeOopPtr::make_from_klass(klass);
-
     // Get counts for string and substr
     Node* source_cnta = basic_plus_adr(receiver, receiver, count_offset);
     Node* source_cnt  = make_load(no_ctrl, source_cnta, TypeInt::INT, T_INT, string_type->add_offset(count_offset));
@@ -1236,6 +1236,17 @@
     }
 
     if (!stopped()) {
+      // Check for substr count == 0
+      cmp = _gvn.transform( new(C, 3) CmpINode(substr_cnt, intcon(0)) );
+      bol = _gvn.transform( new(C, 2) BoolNode(cmp, BoolTest::eq) );
+      Node* if_zero = generate_slow_guard(bol, NULL);
+      if (if_zero != NULL) {
+        result_phi->init_req(3, intcon(0));
+        result_rgn->init_req(3, if_zero);
+      }
+    }
+
+    if (!stopped()) {
       result = make_string_method_node(Op_StrIndexOf, receiver, source_cnt, argument, substr_cnt);
       result_phi->init_req(1, result);
       result_rgn->init_req(1, control());
@@ -1244,8 +1255,8 @@
     record_for_igvn(result_rgn);
     result = _gvn.transform(result_phi);
 
-  } else { //Use LibraryCallKit::string_indexOf
-    // don't intrinsify is argument isn't a constant string.
+  } else { // Use LibraryCallKit::string_indexOf
+    // don't intrinsify if argument isn't a constant string.
     if (!argument->is_Con()) {
      return false;
     }
@@ -1281,7 +1292,7 @@
     // No null check on the argument is needed since it's a constant String oop.
     _sp -= 2;
     if (stopped()) {
-     return true;
+      return true;
     }
 
     // The null string as a pattern always returns 0 (match at beginning of string)
@@ -4081,7 +4092,8 @@
   const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
   bool disjoint_bases = true;
   generate_unchecked_arraycopy(raw_adr_type, T_LONG, disjoint_bases,
-                               src, NULL, dest, NULL, countx);
+                               src, NULL, dest, NULL, countx,
+                               /*dest_uninitialized*/true);
 
   // If necessary, emit some card marks afterwards.  (Non-arrays only.)
   if (card_mark) {
@@ -4295,7 +4307,7 @@
 // Note:  The condition "disjoint" applies also for overlapping copies
 // where an descending copy is permitted (i.e., dest_offset <= src_offset).
 static address
-select_arraycopy_function(BasicType t, bool aligned, bool disjoint, const char* &name) {
+select_arraycopy_function(BasicType t, bool aligned, bool disjoint, const char* &name, bool dest_uninitialized) {
   int selector =
     (aligned  ? COPYFUNC_ALIGNED  : COPYFUNC_UNALIGNED) +
     (disjoint ? COPYFUNC_DISJOINT : COPYFUNC_CONJOINT);
@@ -4304,6 +4316,10 @@
   name = #xxx_arraycopy; \
   return StubRoutines::xxx_arraycopy(); }
 
+#define RETURN_STUB_PARM(xxx_arraycopy, parm) {           \
+  name = #xxx_arraycopy; \
+  return StubRoutines::xxx_arraycopy(parm); }
+
   switch (t) {
   case T_BYTE:
   case T_BOOLEAN:
@@ -4340,10 +4356,10 @@
   case T_ARRAY:
   case T_OBJECT:
     switch (selector) {
-    case COPYFUNC_CONJOINT | COPYFUNC_UNALIGNED:  RETURN_STUB(oop_arraycopy);
-    case COPYFUNC_CONJOINT | COPYFUNC_ALIGNED:    RETURN_STUB(arrayof_oop_arraycopy);
-    case COPYFUNC_DISJOINT | COPYFUNC_UNALIGNED:  RETURN_STUB(oop_disjoint_arraycopy);
-    case COPYFUNC_DISJOINT | COPYFUNC_ALIGNED:    RETURN_STUB(arrayof_oop_disjoint_arraycopy);
+    case COPYFUNC_CONJOINT | COPYFUNC_UNALIGNED:  RETURN_STUB_PARM(oop_arraycopy, dest_uninitialized);
+    case COPYFUNC_CONJOINT | COPYFUNC_ALIGNED:    RETURN_STUB_PARM(arrayof_oop_arraycopy, dest_uninitialized);
+    case COPYFUNC_DISJOINT | COPYFUNC_UNALIGNED:  RETURN_STUB_PARM(oop_disjoint_arraycopy, dest_uninitialized);
+    case COPYFUNC_DISJOINT | COPYFUNC_ALIGNED:    RETURN_STUB_PARM(arrayof_oop_disjoint_arraycopy, dest_uninitialized);
     }
   default:
     ShouldNotReachHere();
@@ -4351,6 +4367,7 @@
   }
 
 #undef RETURN_STUB
+#undef RETURN_STUB_PARM
 }
 
 //------------------------------basictype2arraycopy----------------------------
@@ -4358,7 +4375,8 @@
                                             Node* src_offset,
                                             Node* dest_offset,
                                             bool disjoint_bases,
-                                            const char* &name) {
+                                            const char* &name,
+                                            bool dest_uninitialized) {
   const TypeInt* src_offset_inttype  = gvn().find_int_type(src_offset);;
   const TypeInt* dest_offset_inttype = gvn().find_int_type(dest_offset);;
 
@@ -4384,7 +4402,7 @@
     disjoint = true;
   }
 
-  return select_arraycopy_function(t, aligned, disjoint, name);
+  return select_arraycopy_function(t, aligned, disjoint, name, dest_uninitialized);
 }
 
 
@@ -4440,7 +4458,8 @@
     // The component types are not the same or are not recognized.  Punt.
     // (But, avoid the native method wrapper to JVM_ArrayCopy.)
     generate_slow_arraycopy(TypePtr::BOTTOM,
-                            src, src_offset, dest, dest_offset, length);
+                            src, src_offset, dest, dest_offset, length,
+                            /*dest_uninitialized*/false);
     return true;
   }
 
@@ -4553,7 +4572,7 @@
 
   Node* original_dest      = dest;
   AllocateArrayNode* alloc = NULL;  // used for zeroing, if needed
-  bool  must_clear_dest    = false;
+  bool  dest_uninitialized = false;
 
   // See if this is the initialization of a newly-allocated array.
   // If so, we will take responsibility here for initializing it to zero.
@@ -4576,12 +4595,14 @@
     adr_type = TypeRawPtr::BOTTOM;  // all initializations are into raw memory
     // From this point on, every exit path is responsible for
     // initializing any non-copied parts of the object to zero.
-    must_clear_dest = true;
+    // Also, if this flag is set we make sure that arraycopy interacts properly
+    // with G1, eliding pre-barriers. See CR 6627983.
+    dest_uninitialized = true;
   } else {
     // No zeroing elimination here.
     alloc             = NULL;
     //original_dest   = dest;
-    //must_clear_dest = false;
+    //dest_uninitialized = false;
   }
 
   // Results are placed here:
@@ -4613,10 +4634,10 @@
   Node* checked_value   = NULL;
 
   if (basic_elem_type == T_CONFLICT) {
-    assert(!must_clear_dest, "");
+    assert(!dest_uninitialized, "");
     Node* cv = generate_generic_arraycopy(adr_type,
                                           src, src_offset, dest, dest_offset,
-                                          copy_length);
+                                          copy_length, dest_uninitialized);
     if (cv == NULL)  cv = intcon(-1);  // failure (no stub available)
     checked_control = control();
     checked_i_o     = i_o();
@@ -4636,7 +4657,7 @@
     }
 
     // copy_length is 0.
-    if (!stopped() && must_clear_dest) {
+    if (!stopped() && dest_uninitialized) {
       Node* dest_length = alloc->in(AllocateNode::ALength);
       if (_gvn.eqv_uncast(copy_length, dest_length)
           || _gvn.find_int_con(dest_length, 1) <= 0) {
@@ -4662,7 +4683,7 @@
     result_memory->init_req(zero_path, memory(adr_type));
   }
 
-  if (!stopped() && must_clear_dest) {
+  if (!stopped() && dest_uninitialized) {
     // We have to initialize the *uncopied* part of the array to zero.
     // The copy destination is the slice dest[off..off+len].  The other slices
     // are dest_head = dest[0..off] and dest_tail = dest[off+len..dest.length].
@@ -4698,7 +4719,7 @@
       { PreserveJVMState pjvms(this);
         didit = generate_block_arraycopy(adr_type, basic_elem_type, alloc,
                                          src, src_offset, dest, dest_offset,
-                                         dest_size);
+                                         dest_size, dest_uninitialized);
         if (didit) {
           // Present the results of the block-copying fast call.
           result_region->init_req(bcopy_path, control());
@@ -4774,7 +4795,7 @@
       Node* cv = generate_checkcast_arraycopy(adr_type,
                                               dest_elem_klass,
                                               src, src_offset, dest, dest_offset,
-                                              ConvI2X(copy_length));
+                                              ConvI2X(copy_length), dest_uninitialized);
       if (cv == NULL)  cv = intcon(-1);  // failure (no stub available)
       checked_control = control();
       checked_i_o     = i_o();
@@ -4797,7 +4818,7 @@
     PreserveJVMState pjvms(this);
     generate_unchecked_arraycopy(adr_type, copy_type, disjoint_bases,
                                  src, src_offset, dest, dest_offset,
-                                 ConvI2X(copy_length));
+                                 ConvI2X(copy_length), dest_uninitialized);
 
     // Present the results of the fast call.
     result_region->init_req(fast_path, control());
@@ -4876,7 +4897,7 @@
     set_memory(slow_mem, adr_type);
     set_i_o(slow_i_o);
 
-    if (must_clear_dest) {
+    if (dest_uninitialized) {
       generate_clear_array(adr_type, dest, basic_elem_type,
                            intcon(0), NULL,
                            alloc->in(AllocateNode::AllocSize));
@@ -4884,7 +4905,7 @@
 
     generate_slow_arraycopy(adr_type,
                             src, src_offset, dest, dest_offset,
-                            copy_length);
+                            copy_length, /*dest_uninitialized*/false);
 
     result_region->init_req(slow_call_path, control());
     result_i_o   ->init_req(slow_call_path, i_o());
@@ -5128,7 +5149,7 @@
                                          AllocateNode* alloc,
                                          Node* src,  Node* src_offset,
                                          Node* dest, Node* dest_offset,
-                                         Node* dest_size) {
+                                         Node* dest_size, bool dest_uninitialized) {
   // See if there is an advantage from block transfer.
   int scale = exact_log2(type2aelembytes(basic_elem_type));
   if (scale >= LogBytesPerLong)
@@ -5173,7 +5194,7 @@
 
   bool disjoint_bases = true;   // since alloc != NULL
   generate_unchecked_arraycopy(adr_type, T_LONG, disjoint_bases,
-                               sptr, NULL, dptr, NULL, countx);
+                               sptr, NULL, dptr, NULL, countx, dest_uninitialized);
 
   return true;
 }
@@ -5186,7 +5207,8 @@
 LibraryCallKit::generate_slow_arraycopy(const TypePtr* adr_type,
                                         Node* src,  Node* src_offset,
                                         Node* dest, Node* dest_offset,
-                                        Node* copy_length) {
+                                        Node* copy_length, bool dest_uninitialized) {
+  assert(!dest_uninitialized, "Invariant");
   Node* call = make_runtime_call(RC_NO_LEAF | RC_UNCOMMON,
                                  OptoRuntime::slow_arraycopy_Type(),
                                  OptoRuntime::slow_arraycopy_Java(),
@@ -5204,10 +5226,10 @@
                                              Node* dest_elem_klass,
                                              Node* src,  Node* src_offset,
                                              Node* dest, Node* dest_offset,
-                                             Node* copy_length) {
+                                             Node* copy_length, bool dest_uninitialized) {
   if (stopped())  return NULL;
 
-  address copyfunc_addr = StubRoutines::checkcast_arraycopy();
+  address copyfunc_addr = StubRoutines::checkcast_arraycopy(dest_uninitialized);
   if (copyfunc_addr == NULL) { // Stub was not generated, go slow path.
     return NULL;
   }
@@ -5245,9 +5267,9 @@
 LibraryCallKit::generate_generic_arraycopy(const TypePtr* adr_type,
                                            Node* src,  Node* src_offset,
                                            Node* dest, Node* dest_offset,
-                                           Node* copy_length) {
+                                           Node* copy_length, bool dest_uninitialized) {
+  assert(!dest_uninitialized, "Invariant");
   if (stopped())  return NULL;
-
   address copyfunc_addr = StubRoutines::generic_arraycopy();
   if (copyfunc_addr == NULL) { // Stub was not generated, go slow path.
     return NULL;
@@ -5268,7 +5290,7 @@
                                              bool disjoint_bases,
                                              Node* src,  Node* src_offset,
                                              Node* dest, Node* dest_offset,
-                                             Node* copy_length) {
+                                             Node* copy_length, bool dest_uninitialized) {
   if (stopped())  return;               // nothing to do
 
   Node* src_start  = src;
@@ -5283,7 +5305,7 @@
   const char* copyfunc_name = "arraycopy";
   address     copyfunc_addr =
       basictype2arraycopy(basic_elem_type, src_offset, dest_offset,
-                          disjoint_bases, copyfunc_name);
+                          disjoint_bases, copyfunc_name, dest_uninitialized);
 
   // Call it.  Note that the count_ix value is not scaled to a byte-size.
   make_runtime_call(RC_LEAF|RC_NO_FP,
--- a/src/share/vm/opto/memnode.cpp	Thu Mar 03 21:02:56 2011 -0800
+++ b/src/share/vm/opto/memnode.cpp	Thu Mar 03 23:31:45 2011 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1559,21 +1559,24 @@
             phase->C->has_unsafe_access(),
             "Field accesses must be precise" );
     // For oop loads, we expect the _type to be precise
-    if (OptimizeStringConcat && klass == phase->C->env()->String_klass() &&
+    if (klass == phase->C->env()->String_klass() &&
         adr->is_AddP() && off != Type::OffsetBot) {
-      // For constant Strings treat the fields as compile time constants.
+      // For constant Strings treat the final fields as compile time constants.
       Node* base = adr->in(AddPNode::Base);
       const TypeOopPtr* t = phase->type(base)->isa_oopptr();
       if (t != NULL && t->singleton()) {
-        ciObject* string = t->const_oop();
-        ciConstant constant = string->as_instance()->field_value_by_offset(off);
-        if (constant.basic_type() == T_INT) {
-          return TypeInt::make(constant.as_int());
-        } else if (constant.basic_type() == T_ARRAY) {
-          if (adr->bottom_type()->is_ptr_to_narrowoop()) {
-            return TypeNarrowOop::make_from_constant(constant.as_object());
-          } else {
-            return TypeOopPtr::make_from_constant(constant.as_object());
+        ciField* field = phase->C->env()->String_klass()->get_field_by_offset(off, false);
+        if (field != NULL && field->is_final()) {
+          ciObject* string = t->const_oop();
+          ciConstant constant = string->as_instance()->field_value(field);
+          if (constant.basic_type() == T_INT) {
+            return TypeInt::make(constant.as_int());
+          } else if (constant.basic_type() == T_ARRAY) {
+            if (adr->bottom_type()->is_ptr_to_narrowoop()) {
+              return TypeNarrowOop::make_from_constant(constant.as_object());
+            } else {
+              return TypeOopPtr::make_from_constant(constant.as_object());
+            }
           }
         }
       }
@@ -4077,6 +4080,7 @@
     n = base_memory();
     assert(Node::in_dump()
            || n == NULL || n->bottom_type() == Type::TOP
+           || n->adr_type() == NULL // address is TOP
            || n->adr_type() == TypePtr::BOTTOM
            || n->adr_type() == TypeRawPtr::BOTTOM
            || Compile::current()->AliasLevel() == 0,
--- a/src/share/vm/prims/unsafe.cpp	Thu Mar 03 21:02:56 2011 -0800
+++ b/src/share/vm/prims/unsafe.cpp	Thu Mar 03 23:31:45 2011 -0800
@@ -110,6 +110,8 @@
 
 inline void* index_oop_from_field_offset_long(oop p, jlong field_offset) {
   jlong byte_offset = field_offset_to_byte_offset(field_offset);
+  // Don't allow unsafe to be used to read or write the header word of oops
+  assert(p == NULL || field_offset >= oopDesc::header_size(), "offset must be outside of header");
 #ifdef ASSERT
   if (p != NULL) {
     assert(byte_offset >= 0 && byte_offset <= (jlong)MAX_OBJECT_SIZE, "sane offset");
--- a/src/share/vm/runtime/arguments.cpp	Thu Mar 03 21:02:56 2011 -0800
+++ b/src/share/vm/runtime/arguments.cpp	Thu Mar 03 23:31:45 2011 -0800
@@ -2798,10 +2798,6 @@
   if (!FLAG_IS_DEFAULT(OptoLoopAlignment) && FLAG_IS_DEFAULT(MaxLoopPad)) {
     FLAG_SET_DEFAULT(MaxLoopPad, OptoLoopAlignment-1);
   }
-  // Temporary disable bulk zeroing reduction with G1. See CR 6627983.
-  if (UseG1GC) {
-    FLAG_SET_DEFAULT(ReduceBulkZeroing, false);
-  }
 #endif
 
   // If we are running in a headless jre, force java.awt.headless property
--- a/src/share/vm/runtime/globals.hpp	Thu Mar 03 21:02:56 2011 -0800
+++ b/src/share/vm/runtime/globals.hpp	Thu Mar 03 23:31:45 2011 -0800
@@ -2610,9 +2610,6 @@
   develop(bool, CompileTheWorldPreloadClasses, true,                        \
           "Preload all classes used by a class before start loading")       \
                                                                             \
-  notproduct(bool, CompileTheWorldIgnoreInitErrors, false,                  \
-          "Compile all methods although class initializer failed")          \
-                                                                            \
   notproduct(intx, CompileTheWorldSafepointInterval, 100,                   \
           "Force a safepoint every n compiles so sweeper can keep up")      \
                                                                             \
--- a/src/share/vm/runtime/sharedRuntime.cpp	Thu Mar 03 21:02:56 2011 -0800
+++ b/src/share/vm/runtime/sharedRuntime.cpp	Thu Mar 03 23:31:45 2011 -0800
@@ -431,25 +431,24 @@
 // previous frame depending on the return address.
 
 address SharedRuntime::raw_exception_handler_for_return_address(JavaThread* thread, address return_address) {
-  assert(frame::verify_return_pc(return_address), "must be a return pc");
-
-  // Reset MethodHandle flag.
+  assert(frame::verify_return_pc(return_address), err_msg("must be a return address: " INTPTR_FORMAT, return_address));
+
+  // Reset method handle flag.
   thread->set_is_method_handle_return(false);
 
-  // the fastest case first
+  // The fastest case first
   CodeBlob* blob = CodeCache::find_blob(return_address);
-  if (blob != NULL && blob->is_nmethod()) {
-    nmethod* code = (nmethod*)blob;
-    assert(code != NULL, "nmethod must be present");
-    // Check if the return address is a MethodHandle call site.
-    thread->set_is_method_handle_return(code->is_method_handle_return(return_address));
+  nmethod* nm = (blob != NULL) ? blob->as_nmethod_or_null() : NULL;
+  if (nm != NULL) {
+    // Set flag if return address is a method handle call site.
+    thread->set_is_method_handle_return(nm->is_method_handle_return(return_address));
     // native nmethods don't have exception handlers
-    assert(!code->is_native_method(), "no exception handler");
-    assert(code->header_begin() != code->exception_begin(), "no exception handler");
-    if (code->is_deopt_pc(return_address)) {
+    assert(!nm->is_native_method(), "no exception handler");
+    assert(nm->header_begin() != nm->exception_begin(), "no exception handler");
+    if (nm->is_deopt_pc(return_address)) {
       return SharedRuntime::deopt_blob()->unpack_with_exception();
     } else {
-      return code->exception_begin();
+      return nm->exception_begin();
     }
   }
 
@@ -462,22 +461,9 @@
     return Interpreter::rethrow_exception_entry();
   }
 
-  // Compiled code
-  if (CodeCache::contains(return_address)) {
-    CodeBlob* blob = CodeCache::find_blob(return_address);
-    if (blob->is_nmethod()) {
-      nmethod* code = (nmethod*)blob;
-      assert(code != NULL, "nmethod must be present");
-      // Check if the return address is a MethodHandle call site.
-      thread->set_is_method_handle_return(code->is_method_handle_return(return_address));
-      assert(code->header_begin() != code->exception_begin(), "no exception handler");
-      return code->exception_begin();
-    }
-    if (blob->is_runtime_stub()) {
-      ShouldNotReachHere();   // callers are responsible for skipping runtime stub frames
-    }
-  }
+  guarantee(blob == NULL || !blob->is_runtime_stub(), "caller should have skipped stub");
   guarantee(!VtableStubs::contains(return_address), "NULL exceptions in vtables should have been handled already!");
+
 #ifndef PRODUCT
   { ResourceMark rm;
     tty->print_cr("No exception handler found for exception at " INTPTR_FORMAT " - potential problems:", return_address);
@@ -485,6 +471,7 @@
     tty->print_cr("b) other problem");
   }
 #endif // PRODUCT
+
   ShouldNotReachHere();
   return NULL;
 }
--- a/src/share/vm/runtime/stubRoutines.cpp	Thu Mar 03 21:02:56 2011 -0800
+++ b/src/share/vm/runtime/stubRoutines.cpp	Thu Mar 03 23:31:45 2011 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -80,30 +80,36 @@
 jint    StubRoutines::_fpu_subnormal_bias2[3]                   = { 0, 0, 0 };
 
 // Compiled code entry points default values
-// The dafault functions don't have separate disjoint versions.
+// The default functions don't have separate disjoint versions.
 address StubRoutines::_jbyte_arraycopy          = CAST_FROM_FN_PTR(address, StubRoutines::jbyte_copy);
 address StubRoutines::_jshort_arraycopy         = CAST_FROM_FN_PTR(address, StubRoutines::jshort_copy);
 address StubRoutines::_jint_arraycopy           = CAST_FROM_FN_PTR(address, StubRoutines::jint_copy);
 address StubRoutines::_jlong_arraycopy          = CAST_FROM_FN_PTR(address, StubRoutines::jlong_copy);
 address StubRoutines::_oop_arraycopy            = CAST_FROM_FN_PTR(address, StubRoutines::oop_copy);
+address StubRoutines::_oop_arraycopy_uninit     = CAST_FROM_FN_PTR(address, StubRoutines::oop_copy_uninit);
 address StubRoutines::_jbyte_disjoint_arraycopy          = CAST_FROM_FN_PTR(address, StubRoutines::jbyte_copy);
 address StubRoutines::_jshort_disjoint_arraycopy         = CAST_FROM_FN_PTR(address, StubRoutines::jshort_copy);
 address StubRoutines::_jint_disjoint_arraycopy           = CAST_FROM_FN_PTR(address, StubRoutines::jint_copy);
 address StubRoutines::_jlong_disjoint_arraycopy          = CAST_FROM_FN_PTR(address, StubRoutines::jlong_copy);
 address StubRoutines::_oop_disjoint_arraycopy            = CAST_FROM_FN_PTR(address, StubRoutines::oop_copy);
+address StubRoutines::_oop_disjoint_arraycopy_uninit     = CAST_FROM_FN_PTR(address, StubRoutines::oop_copy_uninit);
 
 address StubRoutines::_arrayof_jbyte_arraycopy  = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_jbyte_copy);
 address StubRoutines::_arrayof_jshort_arraycopy = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_jshort_copy);
 address StubRoutines::_arrayof_jint_arraycopy   = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_jint_copy);
 address StubRoutines::_arrayof_jlong_arraycopy  = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_jlong_copy);
 address StubRoutines::_arrayof_oop_arraycopy    = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_oop_copy);
+address StubRoutines::_arrayof_oop_arraycopy_uninit      = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_oop_copy_uninit);
 address StubRoutines::_arrayof_jbyte_disjoint_arraycopy  = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_jbyte_copy);
 address StubRoutines::_arrayof_jshort_disjoint_arraycopy = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_jshort_copy);
 address StubRoutines::_arrayof_jint_disjoint_arraycopy   = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_jint_copy);
 address StubRoutines::_arrayof_jlong_disjoint_arraycopy  = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_jlong_copy);
-address StubRoutines::_arrayof_oop_disjoint_arraycopy  = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_oop_copy);
+address StubRoutines::_arrayof_oop_disjoint_arraycopy    = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_oop_copy);
+address StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit  = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_oop_copy_uninit);
+
 
 address StubRoutines::_checkcast_arraycopy               = NULL;
+address StubRoutines::_checkcast_arraycopy_uninit        = NULL;
 address StubRoutines::_unsafe_arraycopy                  = NULL;
 address StubRoutines::_generic_arraycopy                 = NULL;
 
@@ -282,12 +288,12 @@
 // Default versions of arraycopy functions
 //
 
-static void gen_arraycopy_barrier_pre(oop* dest, size_t count) {
+static void gen_arraycopy_barrier_pre(oop* dest, size_t count, bool dest_uninitialized) {
     assert(count != 0, "count should be non-zero");
     assert(count <= (size_t)max_intx, "count too large");
     BarrierSet* bs = Universe::heap()->barrier_set();
     assert(bs->has_write_ref_array_pre_opt(), "Must have pre-barrier opt");
-    bs->write_ref_array_pre(dest, (int)count);
+    bs->write_ref_array_pre(dest, (int)count, dest_uninitialized);
 }
 
 static void gen_arraycopy_barrier(oop* dest, size_t count) {
@@ -330,7 +336,17 @@
   SharedRuntime::_oop_array_copy_ctr++;        // Slow-path oop array copy
 #endif // !PRODUCT
   assert(count != 0, "count should be non-zero");
-  gen_arraycopy_barrier_pre(dest, count);
+  gen_arraycopy_barrier_pre(dest, count, /*dest_uninitialized*/false);
+  Copy::conjoint_oops_atomic(src, dest, count);
+  gen_arraycopy_barrier(dest, count);
+JRT_END
+
+JRT_LEAF(void, StubRoutines::oop_copy_uninit(oop* src, oop* dest, size_t count))
+#ifndef PRODUCT
+  SharedRuntime::_oop_array_copy_ctr++;        // Slow-path oop array copy
+#endif // !PRODUCT
+  assert(count != 0, "count should be non-zero");
+  gen_arraycopy_barrier_pre(dest, count, /*dest_uninitialized*/true);
   Copy::conjoint_oops_atomic(src, dest, count);
   gen_arraycopy_barrier(dest, count);
 JRT_END
@@ -368,11 +384,20 @@
   SharedRuntime::_oop_array_copy_ctr++;        // Slow-path oop array copy
 #endif // !PRODUCT
   assert(count != 0, "count should be non-zero");
-  gen_arraycopy_barrier_pre((oop *) dest, count);
+  gen_arraycopy_barrier_pre((oop *) dest, count, /*dest_uninitialized*/false);
   Copy::arrayof_conjoint_oops(src, dest, count);
   gen_arraycopy_barrier((oop *) dest, count);
 JRT_END
 
+JRT_LEAF(void, StubRoutines::arrayof_oop_copy_uninit(HeapWord* src, HeapWord* dest, size_t count))
+#ifndef PRODUCT
+  SharedRuntime::_oop_array_copy_ctr++;        // Slow-path oop array copy
+#endif // !PRODUCT
+  assert(count != 0, "count should be non-zero");
+  gen_arraycopy_barrier_pre((oop *) dest, count, /*dest_uninitialized*/true);
+  Copy::arrayof_conjoint_oops(src, dest, count);
+  gen_arraycopy_barrier((oop *) dest, count);
+JRT_END
 
 address StubRoutines::select_fill_function(BasicType t, bool aligned, const char* &name) {
 #define RETURN_STUB(xxx_fill) { \
--- a/src/share/vm/runtime/stubRoutines.hpp	Thu Mar 03 21:02:56 2011 -0800
+++ b/src/share/vm/runtime/stubRoutines.hpp	Thu Mar 03 23:31:45 2011 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -164,12 +164,12 @@
   static address _jshort_arraycopy;
   static address _jint_arraycopy;
   static address _jlong_arraycopy;
-  static address _oop_arraycopy;
+  static address _oop_arraycopy, _oop_arraycopy_uninit;
   static address _jbyte_disjoint_arraycopy;
   static address _jshort_disjoint_arraycopy;
   static address _jint_disjoint_arraycopy;
   static address _jlong_disjoint_arraycopy;
-  static address _oop_disjoint_arraycopy;
+  static address _oop_disjoint_arraycopy, _oop_disjoint_arraycopy_uninit;
 
   // arraycopy operands aligned on zero'th element boundary
   // These are identical to the ones aligned aligned on an
@@ -179,15 +179,15 @@
   static address _arrayof_jshort_arraycopy;
   static address _arrayof_jint_arraycopy;
   static address _arrayof_jlong_arraycopy;
-  static address _arrayof_oop_arraycopy;
+  static address _arrayof_oop_arraycopy, _arrayof_oop_arraycopy_uninit;
   static address _arrayof_jbyte_disjoint_arraycopy;
   static address _arrayof_jshort_disjoint_arraycopy;
   static address _arrayof_jint_disjoint_arraycopy;
   static address _arrayof_jlong_disjoint_arraycopy;
-  static address _arrayof_oop_disjoint_arraycopy;
+  static address _arrayof_oop_disjoint_arraycopy, _arrayof_oop_disjoint_arraycopy_uninit;
 
   // these are recommended but optional:
-  static address _checkcast_arraycopy;
+  static address _checkcast_arraycopy, _checkcast_arraycopy_uninit;
   static address _unsafe_arraycopy;
   static address _generic_arraycopy;
 
@@ -286,26 +286,36 @@
   static address jshort_arraycopy() { return _jshort_arraycopy; }
   static address jint_arraycopy()   { return _jint_arraycopy; }
   static address jlong_arraycopy()  { return _jlong_arraycopy; }
-  static address oop_arraycopy()    { return _oop_arraycopy; }
+  static address oop_arraycopy(bool dest_uninitialized = false) {
+    return dest_uninitialized ? _oop_arraycopy_uninit : _oop_arraycopy;
+  }
   static address jbyte_disjoint_arraycopy()  { return _jbyte_disjoint_arraycopy; }
   static address jshort_disjoint_arraycopy() { return _jshort_disjoint_arraycopy; }
   static address jint_disjoint_arraycopy()   { return _jint_disjoint_arraycopy; }
   static address jlong_disjoint_arraycopy()  { return _jlong_disjoint_arraycopy; }
-  static address oop_disjoint_arraycopy()    { return _oop_disjoint_arraycopy; }
+  static address oop_disjoint_arraycopy(bool dest_uninitialized = false) {
+    return dest_uninitialized ?  _oop_disjoint_arraycopy_uninit : _oop_disjoint_arraycopy;
+  }
 
   static address arrayof_jbyte_arraycopy()  { return _arrayof_jbyte_arraycopy; }
   static address arrayof_jshort_arraycopy() { return _arrayof_jshort_arraycopy; }
   static address arrayof_jint_arraycopy()   { return _arrayof_jint_arraycopy; }
   static address arrayof_jlong_arraycopy()  { return _arrayof_jlong_arraycopy; }
-  static address arrayof_oop_arraycopy()    { return _arrayof_oop_arraycopy; }
+  static address arrayof_oop_arraycopy(bool dest_uninitialized = false) {
+    return dest_uninitialized ? _arrayof_oop_arraycopy_uninit : _arrayof_oop_arraycopy;
+  }
 
   static address arrayof_jbyte_disjoint_arraycopy()  { return _arrayof_jbyte_disjoint_arraycopy; }
   static address arrayof_jshort_disjoint_arraycopy() { return _arrayof_jshort_disjoint_arraycopy; }
   static address arrayof_jint_disjoint_arraycopy()   { return _arrayof_jint_disjoint_arraycopy; }
   static address arrayof_jlong_disjoint_arraycopy()  { return _arrayof_jlong_disjoint_arraycopy; }
-  static address arrayof_oop_disjoint_arraycopy()    { return _arrayof_oop_disjoint_arraycopy; }
+  static address arrayof_oop_disjoint_arraycopy(bool dest_uninitialized = false) {
+    return dest_uninitialized ? _arrayof_oop_disjoint_arraycopy_uninit : _arrayof_oop_disjoint_arraycopy;
+  }
 
-  static address checkcast_arraycopy()     { return _checkcast_arraycopy; }
+  static address checkcast_arraycopy(bool dest_uninitialized = false) {
+    return dest_uninitialized ? _checkcast_arraycopy_uninit : _checkcast_arraycopy;
+  }
   static address unsafe_arraycopy()        { return _unsafe_arraycopy; }
   static address generic_arraycopy()       { return _generic_arraycopy; }
 
@@ -352,17 +362,19 @@
   // Default versions of the above arraycopy functions for platforms which do
   // not have specialized versions
   //
-  static void jbyte_copy (jbyte*  src, jbyte*  dest, size_t count);
-  static void jshort_copy(jshort* src, jshort* dest, size_t count);
-  static void jint_copy  (jint*   src, jint*   dest, size_t count);
-  static void jlong_copy (jlong*  src, jlong*  dest, size_t count);
-  static void oop_copy   (oop*    src, oop*    dest, size_t count);
+  static void jbyte_copy     (jbyte*  src, jbyte*  dest, size_t count);
+  static void jshort_copy    (jshort* src, jshort* dest, size_t count);
+  static void jint_copy      (jint*   src, jint*   dest, size_t count);
+  static void jlong_copy     (jlong*  src, jlong*  dest, size_t count);
+  static void oop_copy       (oop*    src, oop*    dest, size_t count);
+  static void oop_copy_uninit(oop*    src, oop*    dest, size_t count);
 
-  static void arrayof_jbyte_copy (HeapWord* src, HeapWord* dest, size_t count);
-  static void arrayof_jshort_copy(HeapWord* src, HeapWord* dest, size_t count);
-  static void arrayof_jint_copy  (HeapWord* src, HeapWord* dest, size_t count);
-  static void arrayof_jlong_copy (HeapWord* src, HeapWord* dest, size_t count);
-  static void arrayof_oop_copy   (HeapWord* src, HeapWord* dest, size_t count);
+  static void arrayof_jbyte_copy     (HeapWord* src, HeapWord* dest, size_t count);
+  static void arrayof_jshort_copy    (HeapWord* src, HeapWord* dest, size_t count);
+  static void arrayof_jint_copy      (HeapWord* src, HeapWord* dest, size_t count);
+  static void arrayof_jlong_copy     (HeapWord* src, HeapWord* dest, size_t count);
+  static void arrayof_oop_copy       (HeapWord* src, HeapWord* dest, size_t count);
+  static void arrayof_oop_copy_uninit(HeapWord* src, HeapWord* dest, size_t count);
 };
 
 #endif // SHARE_VM_RUNTIME_STUBROUTINES_HPP
--- a/src/share/vm/utilities/macros.hpp	Thu Mar 03 21:02:56 2011 -0800
+++ b/src/share/vm/utilities/macros.hpp	Thu Mar 03 23:31:45 2011 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -161,6 +161,14 @@
 #define NOT_WINDOWS(code) code
 #endif
 
+#ifdef _WIN64
+#define WIN64_ONLY(code) code
+#define NOT_WIN64(code)
+#else
+#define WIN64_ONLY(code)
+#define NOT_WIN64(code) code
+#endif
+
 #if defined(IA32) || defined(AMD64)
 #define X86
 #define X86_ONLY(code) code
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6942326/Test.java	Thu Mar 03 23:31:45 2011 -0800
@@ -0,0 +1,409 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 6942326
+ * @summary x86 code in string_indexof() could read beyond reserved heap space
+ *
+ * @run main/othervm/timeout=300 -Xmx32m -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:CompileCommand=exclude,Test,main -XX:CompileCommand=exclude,Test,test_varsub_indexof -XX:CompileCommand=exclude,Test,test_varstr_indexof -XX:CompileCommand=exclude,Test,test_missub_indexof -XX:CompileCommand=exclude,Test,test_consub_indexof -XX:CompileCommand=exclude,Test,test_conmis_indexof -XX:CompileCommand=exclude,Test,test_subcon Test
+ *
+ */
+
+public class Test {
+
+    static String[] strings = new String[1024];
+    private static final int ITERATIONS = 100000;
+
+    public static void main(String[] args) {
+
+        long start_total = System.currentTimeMillis();
+
+        // search variable size substring in string (33 chars).
+        String a = " 1111111111111xx1111111111111xx11y"; // +1 to execute a.substring(1) first
+        String b =  "1111111111111xx1111111111111xx11y";
+        test_varsub_indexof(a, b);
+
+        // search variable size substring in string (32 chars).
+        a = " 1111111111111xx1111111111111xx1y";
+        b =  "1111111111111xx1111111111111xx1y";
+        test_varsub_indexof(a, b);
+
+        // search variable size substring in string (17 chars).
+        a = " 1111111111111xx1y";
+        b =  "1111111111111xx1y";
+        test_varsub_indexof(a, b);
+
+        // search variable size substring in string (16 chars).
+        a = " 111111111111xx1y";
+        b =  "111111111111xx1y";
+        test_varsub_indexof(a, b);
+
+        // search variable size substring in string (8 chars).
+        a = " 1111xx1y";
+        b =  "1111xx1y";
+        test_varsub_indexof(a, b);
+
+        // search variable size substring in string (7 chars).
+        a = " 111xx1y";
+        b =  "111xx1y";
+        test_varsub_indexof(a, b);
+
+
+
+        // search substring (17 chars) in variable size string.
+        a =                 "1111111111111xx1x";
+        b = " 1111111111111xx1111111111111xx1x"; // +1 to execute b.substring(1) first
+        test_varstr_indexof(a, b);
+
+        // search substring (16 chars) in variable size string.
+        a =                  "111111111111xx1x";
+        b = " 1111111111111xx1111111111111xx1x";
+        test_varstr_indexof(a, b);
+
+        // search substring (9 chars) in variable size string.
+        a =                         "11111xx1x";
+        b = " 1111111111111xx1111111111111xx1x";
+        test_varstr_indexof(a, b);
+
+        // search substring (8 chars) in variable size string.
+        a =                          "1111xx1x";
+        b = " 1111111111111xx1111111111111xx1x";
+        test_varstr_indexof(a, b);
+
+        // search substring (4 chars) in variable size string.
+        a =                              "xx1x";
+        b = " 1111111111111xx1111111111111xx1x";
+        test_varstr_indexof(a, b);
+
+        // search substring (3 chars) in variable size string.
+        a =                               "x1x";
+        b = " 1111111111111xx1111111111111xx1x";
+        test_varstr_indexof(a, b);
+
+        // search substring (2 chars) in variable size string.
+        a =                                "1y";
+        b = " 1111111111111xx1111111111111xx1y";
+        test_varstr_indexof(a, b);
+
+
+
+        // search non matching variable size substring in string (33 chars).
+        a = " 1111111111111xx1111111111111xx11z"; // +1 to execute a.substring(1) first
+        b =  "1111111111111xx1111111111111xx11y";
+        test_missub_indexof(a, b);
+
+        // search non matching variable size substring in string (32 chars).
+        a = " 1111111111111xx1111111111111xx1z";
+        b =  "1111111111111xx1111111111111xx1y";
+        test_missub_indexof(a, b);
+
+        // search non matching variable size substring in string (17 chars).
+        a = " 1111111111111xx1z";
+        b =  "1111111111111xx1y";
+        test_missub_indexof(a, b);
+
+        // search non matching variable size substring in string (16 chars).
+        a = " 111111111111xx1z";
+        b =  "111111111111xx1y";
+        test_missub_indexof(a, b);
+
+        // search non matching variable size substring in string (8 chars).
+        a = " 1111xx1z";
+        b =  "1111xx1y";
+        test_missub_indexof(a, b);
+
+        // search non matching variable size substring in string (7 chars).
+        a = " 111xx1z";
+        b =  "111xx1y";
+        test_missub_indexof(a, b);
+
+
+
+        // Testing constant substring search in variable size string.
+
+        // search constant substring (17 chars).
+        b = " 1111111111111xx1111111111111xx1x"; // +1 to execute b.substring(1) first
+        TestCon tc = new TestCon17();
+        test_consub_indexof(tc, b);
+
+        // search constant substring (16 chars).
+        b = " 1111111111111xx1111111111111xx1x";
+        tc = new TestCon16();
+        test_consub_indexof(tc, b);
+
+        // search constant substring (9 chars).
+        b = " 1111111111111xx1111111111111xx1x";
+        tc = new TestCon9();
+        test_consub_indexof(tc, b);
+
+        // search constant substring (8 chars).
+        b = " 1111111111111xx1111111111111xx1x";
+        tc = new TestCon8();
+        test_consub_indexof(tc, b);
+
+        // search constant substring (4 chars).
+        b = " 1111111111111xx1111111111111xx1x";
+        tc = new TestCon4();
+        test_consub_indexof(tc, b);
+
+        // search constant substring (3 chars).
+        b = " 1111111111111xx1111111111111xx1x";
+        tc = new TestCon3();
+        test_consub_indexof(tc, b);
+
+        // search constant substring (2 chars).
+        b = " 1111111111111xx1111111111111xx1y";
+        tc = new TestCon2();
+        test_consub_indexof(tc, b);
+
+        // search constant substring (1 chars).
+        b = " 1111111111111xx1111111111111xx1y";
+        tc = new TestCon1();
+        test_consub_indexof(tc, b);
+
+
+        // search non matching constant substring (17 chars).
+        b = " 1111111111111xx1111111111111xx1z"; // +1 to execute b.substring(1) first
+        tc = new TestCon17();
+        test_conmis_indexof(tc, b);
+
+        // search non matching constant substring (16 chars).
+        b = " 1111111111111xx1111111111111xx1z";
+        tc = new TestCon16();
+        test_conmis_indexof(tc, b);
+
+        // search non matching constant substring (9 chars).
+        b = " 1111111111111xx1111111111111xx1z";
+        tc = new TestCon9();
+        test_conmis_indexof(tc, b);
+
+        // search non matching constant substring (8 chars).
+        b = " 1111111111111xx1111111111111xx1z";
+        tc = new TestCon8();
+        test_conmis_indexof(tc, b);
+
+        // search non matching constant substring (4 chars).
+        b = " 1111111111111xx1111111111111xx1z";
+        tc = new TestCon4();
+        test_conmis_indexof(tc, b);
+
+        // search non matching constant substring (3 chars).
+        b = " 1111111111111xx1111111111111xx1z";
+        tc = new TestCon3();
+        test_conmis_indexof(tc, b);
+
+        // search non matching constant substring (2 chars).
+        b = " 1111111111111xx1111111111111xx1z";
+        tc = new TestCon2();
+        test_conmis_indexof(tc, b);
+
+        // search non matching constant substring (1 chars).
+        b = " 1111111111111xx1111111111111xx1z";
+        tc = new TestCon1();
+        test_conmis_indexof(tc, b);
+
+        long end_total = System.currentTimeMillis();
+        System.out.println("End run time: " + (end_total - start_total));
+
+    }
+
+    public static long test_init(String a, String b) {
+        for (int i = 0; i < 512; i++) {
+            strings[i * 2] = new String(b.toCharArray());
+            strings[i * 2 + 1] = new String(a.toCharArray());
+        }
+        System.out.print(a.length() + " " + b.length() + " ");
+        return System.currentTimeMillis();
+    }
+
+    public static void test_end(String a, String b, int v, int expected, long start) {
+        long end = System.currentTimeMillis();
+        int res = (v/ITERATIONS);
+        System.out.print(" " + res);
+        System.out.println(" time:" + (end - start));
+        if (res != expected) {
+            System.out.println("wrong indexOf result: " + res + ", expected " + expected);
+            System.out.println("\"" + b + "\".indexOf(\"" + a + "\")");
+            System.exit(97);
+        }
+    }
+
+    public static int test_subvar() {
+        int s = 0;
+        int v = 0;
+        for (int i = 0; i < ITERATIONS; i++) {
+            v += strings[s].indexOf(strings[s + 1]);
+            s += 2;
+            if (s >= strings.length) s = 0;
+        }
+        return v;
+    }
+
+    public static void test_varsub_indexof(String a, String b) {
+        System.out.println("Start search variable size substring in string (" + b.length() + " chars)");
+        long start_it = System.currentTimeMillis();
+        int limit = 1; // last a.length() == 1
+        while (a.length() > limit) {
+            a = a.substring(1);
+            long start = test_init(a, b);
+            int v = test_subvar();
+            test_end(a, b, v, (b.length() - a.length()), start);
+        }
+        long end_it = System.currentTimeMillis();
+        System.out.println("End search variable size substring in string (" + b.length() + " chars), time: " + (end_it - start_it));
+    }
+
+    public static void test_varstr_indexof(String a, String b) {
+        System.out.println("Start search substring (" + a.length() + " chars) in variable size string");
+        long start_it = System.currentTimeMillis();
+        int limit = a.length();
+        while (b.length() > limit) {
+            b = b.substring(1);
+            long start = test_init(a, b);
+            int v = test_subvar();
+            test_end(a, b, v, (b.length() - a.length()), start);
+        }
+        long end_it = System.currentTimeMillis();
+        System.out.println("End search substring (" + a.length() + " chars) in variable size string, time: " + (end_it - start_it));
+    }
+
+    public static void test_missub_indexof(String a, String b) {
+        System.out.println("Start search non matching variable size substring in string (" + b.length() + " chars)");
+        long start_it = System.currentTimeMillis();
+        int limit = 1; // last a.length() == 1
+        while (a.length() > limit) {
+            a = a.substring(1);
+            long start = test_init(a, b);
+            int v = test_subvar();
+            test_end(a, b, v, (-1), start);
+        }
+        long end_it = System.currentTimeMillis();
+        System.out.println("End search non matching variable size substring in string (" + b.length() + " chars), time: " + (end_it - start_it));
+    }
+
+
+
+    public static void test_consub_indexof(TestCon tc, String b) {
+        System.out.println("Start search constant substring (" + tc.constr().length() + " chars)");
+        long start_it = System.currentTimeMillis();
+        int limit = tc.constr().length();
+        while (b.length() > limit) {
+            b = b.substring(1);
+            long start = test_init(tc.constr(), b);
+            int v = test_subcon(tc);
+            test_end(tc.constr(), b, v, (b.length() - tc.constr().length()), start);
+        }
+        long end_it = System.currentTimeMillis();
+        System.out.println("End search constant substring (" + tc.constr().length() + " chars), time: " + (end_it - start_it));
+    }
+
+    public static void test_conmis_indexof(TestCon tc, String b) {
+        System.out.println("Start search non matching constant substring (" + tc.constr().length() + " chars)");
+        long start_it = System.currentTimeMillis();
+        int limit = tc.constr().length();
+        while (b.length() > limit) {
+            b = b.substring(1);
+            long start = test_init(tc.constr(), b);
+            int v = test_subcon(tc);
+            test_end(tc.constr(), b, v, (-1), start);
+        }
+        long end_it = System.currentTimeMillis();
+        System.out.println("End search non matching constant substring (" + tc.constr().length() + " chars), time: " + (end_it - start_it));
+    }
+
+    public static int test_subcon(TestCon tc) {
+        int s = 0;
+        int v = 0;
+        for (int i = 0; i < ITERATIONS; i++) {
+            v += tc.indexOf(strings[s]);
+            s += 2;
+            if (s >= strings.length) s = 0;
+        }
+        return v;
+    }
+
+    private interface TestCon {
+        public String constr();
+        public int indexOf(String str);
+    }
+
+    // search constant substring (17 chars).
+    private final static class TestCon17 implements TestCon {
+        private static final String constr = "1111111111111xx1x";
+        public String constr() { return constr; }
+        public int indexOf(String str) { return str.indexOf(constr); }
+    }
+
+    // search constant substring (16 chars).
+    private final static class TestCon16 implements TestCon {
+        private static final String constr = "111111111111xx1x";
+        public String constr() { return constr; }
+        public int indexOf(String str) { return str.indexOf(constr); }
+    }
+
+    // search constant substring (9 chars).
+    private final static class TestCon9 implements TestCon {
+        private static final String constr = "11111xx1x";
+        public String constr() { return constr; }
+        public int indexOf(String str) { return str.indexOf(constr); }
+    }
+
+    // search constant substring (8 chars).
+    private final static class TestCon8 implements TestCon {
+        private static final String constr = "1111xx1x";
+        public String constr() { return constr; }
+        public int indexOf(String str) { return str.indexOf(constr); }
+    }
+
+    // search constant substring (4 chars).
+    private final static class TestCon4 implements TestCon {
+        private static final String constr = "xx1x";
+        public String constr() { return constr; }
+        public int indexOf(String str) { return str.indexOf(constr); }
+    }
+
+    // search constant substring (3 chars).
+    private final static class TestCon3 implements TestCon {
+        private static final String constr = "x1x";
+        public String constr() { return constr; }
+        public int indexOf(String str) { return str.indexOf(constr); }
+    }
+
+    // search constant substring (2 chars).
+    private final static class TestCon2 implements TestCon {
+        private static final String constr = "1y";
+        public String constr() { return constr; }
+        public int indexOf(String str) { return str.indexOf(constr); }
+    }
+
+
+    // search constant substring (1 chars).
+    private final static class TestCon1 implements TestCon {
+        private static final String constr = "y";
+        public String constr() { return constr; }
+        public int indexOf(String str) { return str.indexOf(constr); }
+    }
+}
+