changeset 911:16314a31b961

Merge
author trims
date Thu, 13 Aug 2009 17:59:05 -0700
parents f753dffae23e 10d8c0d0d60e
children ac59d4e6dae5
files
diffstat 88 files changed, 2656 insertions(+), 922 deletions(-) [+]
line wrap: on
line diff
--- a/agent/make/saenv.sh	Thu Aug 13 17:47:43 2009 -0700
+++ b/agent/make/saenv.sh	Thu Aug 13 17:59:05 2009 -0700
@@ -48,6 +48,8 @@
      CPU=i386
    fi
 else
+   LD_AUDIT_32=$STARTDIR/../src/os/solaris/proc/`uname -p`/libsaproc_audit.so
+   export LD_AUDIT_32
    SA_LIBPATH=$STARTDIR/../src/os/solaris/proc/`uname -p`:$STARTDIR/solaris/`uname -p`
    OPTIONS="-Dsa.library.path=$SA_LIBPATH -Dsun.jvm.hotspot.debugger.useProcDebugger"
    CPU=sparc
--- a/agent/make/saenv64.sh	Thu Aug 13 17:47:43 2009 -0700
+++ b/agent/make/saenv64.sh	Thu Aug 13 17:59:05 2009 -0700
@@ -43,6 +43,8 @@
   fi
 fi
 
+LD_AUDIT_64=$STARTDIR/../src/os/solaris/proc/$CPU/libsaproc_audit.so
+export LD_AUDIT_64
 SA_LIBPATH=$STARTDIR/../src/os/solaris/proc/$CPU:$STARTDIR/solaris/$CPU
 
 OPTIONS="-Dsa.library.path=$SA_LIBPATH -Dsun.jvm.hotspot.debugger.useProcDebugger"
--- a/agent/src/os/solaris/proc/Makefile	Thu Aug 13 17:47:43 2009 -0700
+++ b/agent/src/os/solaris/proc/Makefile	Thu Aug 13 17:59:05 2009 -0700
@@ -56,24 +56,28 @@
 	@javah -classpath $(CLASSES_DIR) -jni sun.jvm.hotspot.debugger.proc.ProcDebuggerLocal
 	CC -G -KPIC -I${JAVA_HOME}/include -I${JAVA_HOME}/include/solaris saproc.cpp \
            -M mapfile -o $@/libsaproc.so -ldemangle
+	CC -o $@/libsaproc_audit.so -G -Kpic -z defs saproc_audit.cpp -lmapmalloc -ldl -lc 
 
 amd64:: javahomecheck
 	$(MKDIRS) $@
 	@javah -classpath $(CLASSES_DIR) -jni sun.jvm.hotspot.debugger.proc.ProcDebuggerLocal
 	CC -G -KPIC -xarch=amd64 -I${JAVA_HOME}/include -I${JAVA_HOME}/include/solaris saproc.cpp \
            -M mapfile -o $@/libsaproc.so -ldemangle
+	CC -xarch=amd64 -o $@/libsaproc_audit.so -G -Kpic -z defs saproc_audit.cpp -lmapmalloc -ldl -lc 
 
 sparc:: javahomecheck
 	$(MKDIRS) $@
 	@javah -classpath $(CLASSES_DIR) -jni sun.jvm.hotspot.debugger.proc.ProcDebuggerLocal
 	CC -G -KPIC -xarch=v8  -I${JAVA_HOME}/include -I${JAVA_HOME}/include/solaris saproc.cpp \
            -M mapfile -o $@/libsaproc.so -ldemangle
+	CC -xarch=v8 -o $@/libsaproc_audit.so -G -Kpic -z defs saproc_audit.cpp -lmapmalloc -ldl -lc 
 
 sparcv9:: javahomecheck
 	$(MKDIRS) $@
 	@javah -classpath $(CLASSES_DIR) -jni sun.jvm.hotspot.debugger.proc.ProcDebuggerLocal
 	CC -G -KPIC -xarch=v9 -I${JAVA_HOME}/include -I${JAVA_HOME}/include/solaris saproc.cpp \
            -M mapfile -o $@/libsaproc.so -ldemangle
+	CC -xarch=v9 -o $@/libsaproc_audit.so -G -Kpic -z defs saproc_audit.cpp -lmapmalloc -ldl -lc 
 
 clean::
 	$(RM) -rf sun_jvm_hotspot_debugger_proc_ProcDebuggerLocal.h
--- a/agent/src/os/solaris/proc/mapfile	Thu Aug 13 17:47:43 2009 -0700
+++ b/agent/src/os/solaris/proc/mapfile	Thu Aug 13 17:59:05 2009 -0700
@@ -45,6 +45,8 @@
 		Java_sun_jvm_hotspot_debugger_proc_ProcDebuggerLocal_resume0;
 		Java_sun_jvm_hotspot_debugger_proc_ProcDebuggerLocal_suspend0;
 		Java_sun_jvm_hotspot_debugger_proc_ProcDebuggerLocal_writeBytesToProcess0;
+                # this is needed by saproc_audit.c to redirect opens in libproc.so
+                libsaproc_open;
 local:
 	*;
 };
--- a/agent/src/os/solaris/proc/saproc.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/agent/src/os/solaris/proc/saproc.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -214,49 +214,58 @@
   }
 }
 
-static int find_file_hook(const char * name, int elf_checksum) {
-  init_alt_root();
+// This function is a complete substitute for the open system call
+// since it's also used to override open calls from libproc to
+// implement as a pathmap style facility for the SA.  If libproc
+// starts using other interfaces then this might have to extended to
+// cover other calls.
+extern "C" int libsaproc_open(const char * name, int oflag, ...) {
+  if (oflag == O_RDONLY) {
+    init_alt_root();
 
-  if (_libsaproc_debug) {
-    printf("libsaproc DEBUG: find_file_hook %s 0x%x\n", name, elf_checksum);
-  }
-
-  if (alt_root_len > 0) {
-    int fd = -1;
-    char alt_path[PATH_MAX+1];
-
-    strcpy(alt_path, alt_root);
-    strcat(alt_path, name);
-    fd = open(alt_path, O_RDONLY);
-    if (fd >= 0) {
-      if (_libsaproc_debug) {
-        printf("libsaproc DEBUG: find_file_hook substituted %s\n", alt_path);
-      }
-      return fd;
+    if (_libsaproc_debug) {
+      printf("libsaproc DEBUG: libsaproc_open %s\n", name);
     }
 
-    if (strrchr(name, '/')) {
+    if (alt_root_len > 0) {
+      int fd = -1;
+      char alt_path[PATH_MAX+1];
+
       strcpy(alt_path, alt_root);
-      strcat(alt_path, strrchr(name, '/'));
+      strcat(alt_path, name);
       fd = open(alt_path, O_RDONLY);
       if (fd >= 0) {
         if (_libsaproc_debug) {
-          printf("libsaproc DEBUG: find_file_hook substituted %s\n", alt_path);
+          printf("libsaproc DEBUG: libsaproc_open substituted %s\n", alt_path);
         }
         return fd;
       }
+
+      if (strrchr(name, '/')) {
+        strcpy(alt_path, alt_root);
+        strcat(alt_path, strrchr(name, '/'));
+        fd = open(alt_path, O_RDONLY);
+        if (fd >= 0) {
+          if (_libsaproc_debug) {
+            printf("libsaproc DEBUG: libsaproc_open substituted %s\n", alt_path);
+          }
+          return fd;
+        }
+      }
     }
   }
-  return -1;
+
+  {
+    mode_t mode;
+    va_list ap;
+    va_start(ap, oflag);
+    mode = va_arg(ap, mode_t);
+    va_end(ap);
+
+    return open(name, oflag, mode);
+  }
 }
 
-static int pathmap_open(const char* name) {
-  int fd = open(name, O_RDONLY);
-  if (fd < 0) {
-    fd = find_file_hook(name, 0);
-  }
-  return fd;
-}
 
 static void * pathmap_dlopen(const char * name, int mode) {
   init_alt_root();
@@ -608,7 +617,7 @@
   print_debug("looking for %s\n", classes_jsa);
 
   // open the classes[_g].jsa
-  int fd = pathmap_open(classes_jsa);
+  int fd = libsaproc_open(classes_jsa, O_RDONLY);
   if (fd < 0) {
     char errMsg[ERR_MSG_SIZE];
     sprintf(errMsg, "can't open shared archive file %s", classes_jsa);
@@ -1209,8 +1218,6 @@
   return res;
 }
 
-typedef int (*find_file_hook_t)(const char *, int elf_checksum);
-
 /*
  * Class:       sun_jvm_hotspot_debugger_proc_ProcDebuggerLocal
  * Method:      initIDs
@@ -1230,16 +1237,6 @@
   if (libproc_handle == 0)
      THROW_NEW_DEBUGGER_EXCEPTION("can't load libproc.so, if you are using Solaris 5.7 or below, copy libproc.so from 5.8!");
 
-  // If possible, set shared object find file hook.
-  void (*set_hook)(find_file_hook_t) = (void(*)(find_file_hook_t))dlsym(libproc_handle, "Pset_find_file_hook");
-  if (set_hook) {
-    // we found find file hook symbol, set up our hook function.
-    set_hook(find_file_hook);
-  } else if (getenv(SA_ALTROOT)) {
-    printf("libsaproc WARNING: %s set, but can't set file hook. " \
-           "Did you use right version of libproc.so?\n", SA_ALTROOT);
-  }
-
   p_ps_prochandle_ID = env->GetFieldID(clazz, "p_ps_prochandle", "J");
   CHECK_EXCEPTION;
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/agent/src/os/solaris/proc/saproc_audit.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -0,0 +1,98 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include <link.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <varargs.h>
+
+// This class sets up an interposer on open calls from libproc.so to
+// support a pathmap facility in the SA.
+
+static uintptr_t* libproc_cookie;
+static uintptr_t* libc_cookie;
+static uintptr_t* libsaproc_cookie;
+
+
+uint_t
+la_version(uint_t version)
+{
+  return (LAV_CURRENT);
+}
+
+
+uint_t
+la_objopen(Link_map * lmp, Lmid_t lmid, uintptr_t * cookie)
+{
+  if (strstr(lmp->l_name, "/libproc.so") != NULL) {
+    libproc_cookie = cookie;
+    return LA_FLG_BINDFROM;
+  }
+  if (strstr(lmp->l_name, "/libc.so") != NULL) {
+    libc_cookie = cookie;
+    return LA_FLG_BINDTO;
+  }
+  if (strstr(lmp->l_name, "/libsaproc.so") != NULL) {
+    libsaproc_cookie = cookie;
+    return LA_FLG_BINDTO | LA_FLG_BINDFROM;
+  }
+  return 0;
+}
+
+
+#if     defined(_LP64)
+uintptr_t
+la_symbind64(Elf64_Sym *symp, uint_t symndx, uintptr_t *refcook,
+             uintptr_t *defcook, uint_t *sb_flags, const char *sym_name)
+#else
+uintptr_t
+la_symbind32(Elf32_Sym *symp, uint_t symndx, uintptr_t *refcook,
+             uintptr_t *defcook, uint_t *sb_flags)
+#endif
+{
+#if     !defined(_LP64)
+  const char      *sym_name = (const char *)symp->st_name;
+#endif
+  if (strcmp(sym_name, "open") == 0 && refcook == libproc_cookie) {
+    // redirect all open calls from libproc.so through libsaproc_open which will
+    // try the alternate library locations first.
+    void* handle = dlmopen(LM_ID_BASE, "libsaproc.so", RTLD_NOLOAD);
+    if (handle == NULL) {
+      fprintf(stderr, "libsaproc_audit.so: didn't find libsaproc.so during linking\n");
+    } else {
+      uintptr_t libsaproc_open = (uintptr_t)dlsym(handle, "libsaproc_open");
+      if (libsaproc_open == 0) {
+        fprintf(stderr, "libsaproc_audit.so: didn't find libsaproc_open during linking\n");
+      } else {
+        return libsaproc_open;
+      }
+    }
+  }
+  return symp->st_value;
+}
--- a/agent/src/share/classes/sun/jvm/hotspot/code/DebugInfoReadStream.java	Thu Aug 13 17:47:43 2009 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/code/DebugInfoReadStream.java	Thu Aug 13 17:59:05 2009 -0700
@@ -81,8 +81,4 @@
     Assert.that(false, "should not reach here");
     return null;
   }
-
-  public int readBCI() {
-    return readInt() + InvocationEntryBCI;
-  }
 }
--- a/agent/src/share/classes/sun/jvm/hotspot/code/PCDesc.java	Thu Aug 13 17:47:43 2009 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/code/PCDesc.java	Thu Aug 13 17:59:05 2009 -0700
@@ -82,6 +82,7 @@
       tty.print(" ");
       sd.getMethod().printValueOn(tty);
       tty.print("  @" + sd.getBCI());
+      tty.print("  reexecute=" + sd.getReexecute());
       tty.println();
     }
   }
--- a/agent/src/share/classes/sun/jvm/hotspot/code/ScopeDesc.java	Thu Aug 13 17:47:43 2009 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/code/ScopeDesc.java	Thu Aug 13 17:59:05 2009 -0700
@@ -41,6 +41,7 @@
   private NMethod code;
   private Method  method;
   private int     bci;
+  private boolean reexecute;
   /** Decoding offsets */
   private int     decodeOffset;
   private int     senderDecodeOffset;
@@ -61,7 +62,7 @@
 
     senderDecodeOffset = stream.readInt();
     method = (Method) VM.getVM().getObjectHeap().newOop(stream.readOopHandle());
-    bci    = stream.readBCI();
+    setBCIAndReexecute(stream.readInt());
     // Decode offsets for body and sender
     localsDecodeOffset      = stream.readInt();
     expressionsDecodeOffset = stream.readInt();
@@ -78,7 +79,7 @@
 
     senderDecodeOffset = stream.readInt();
     method = (Method) VM.getVM().getObjectHeap().newOop(stream.readOopHandle());
-    bci    = stream.readBCI();
+    setBCIAndReexecute(stream.readInt());
     // Decode offsets for body and sender
     localsDecodeOffset      = stream.readInt();
     expressionsDecodeOffset = stream.readInt();
@@ -88,6 +89,7 @@
   public NMethod getNMethod() { return code; }
   public Method getMethod() { return method; }
   public int    getBCI()    { return bci;    }
+  public boolean getReexecute() {return reexecute;}
 
   /** Returns a List&lt;ScopeValue&gt; */
   public List getLocals() {
@@ -150,6 +152,7 @@
     tty.print("ScopeDesc for ");
     method.printValueOn(tty);
     tty.println(" @bci " + bci);
+    tty.println(" reexecute: " + reexecute);
   }
 
   // FIXME: add more accessors
@@ -157,6 +160,11 @@
   //--------------------------------------------------------------------------------
   // Internals only below this point
   //
+  private void setBCIAndReexecute(int combination) {
+    int InvocationEntryBci = VM.getVM().getInvocationEntryBCI();
+    bci = (combination >> 1) + InvocationEntryBci;
+    reexecute = (combination & 1)==1 ? true : false;
+  }
 
   private DebugInfoReadStream streamAt(int decodeOffset) {
     return new DebugInfoReadStream(code, decodeOffset, objects);
--- a/agent/src/share/classes/sun/jvm/hotspot/memory/CompactibleFreeListSpace.java	Thu Aug 13 17:47:43 2009 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/memory/CompactibleFreeListSpace.java	Thu Aug 13 17:59:05 2009 -0700
@@ -176,19 +176,6 @@
 
       for (; cur.lessThan(limit);) {
          Address klassOop = cur.getAddressAt(addressSize);
-         // FIXME: need to do a better job here.
-         // can I use bitMap here?
-         if (klassOop == null) {
-            //Find the object size using Printezis bits and skip over
-            System.err.println("Finding object size using Printezis bits and skipping over...");
-            long size = collector().blockSizeUsingPrintezisBits(cur);
-            if (size == -1) {
-              System.err.println("Printezis bits not set...");
-              break;
-            }
-            cur = cur.addOffsetTo(adjustObjectSizeInBytes(size));
-         }
-
          if (FreeChunk.indicatesFreeChunk(cur)) {
             if (! cur.equals(regionStart)) {
                res.add(new MemRegion(regionStart, cur));
@@ -200,12 +187,21 @@
             }
             // note that fc.size() gives chunk size in heap words
             cur = cur.addOffsetTo(chunkSize * addressSize);
-            System.err.println("Free chunk in CMS heap, size="+chunkSize * addressSize);
             regionStart = cur;
          } else if (klassOop != null) {
             Oop obj = heap.newOop(cur.addOffsetToAsOopHandle(0));
             long objectSize = obj.getObjectSize();
             cur = cur.addOffsetTo(adjustObjectSizeInBytes(objectSize));
+         } else {
+            // FIXME: need to do a better job here.
+            // can I use bitMap here?
+            //Find the object size using Printezis bits and skip over
+            long size = collector().blockSizeUsingPrintezisBits(cur);
+            if (size == -1) {
+              System.err.println("Printezis bits not set...");
+              break;
+            }
+            cur = cur.addOffsetTo(adjustObjectSizeInBytes(size));
          }
       }
       return res;
--- a/agent/src/share/classes/sun/jvm/hotspot/memory/FreeChunk.java	Thu Aug 13 17:47:43 2009 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/memory/FreeChunk.java	Thu Aug 13 17:59:05 2009 -0700
@@ -63,7 +63,7 @@
 
    public long size() {
       if (VM.getVM().isCompressedOopsEnabled()) {
-        Mark mark = new Mark(sizeField.getValue(addr));
+        Mark mark = new Mark(addr.addOffsetTo(sizeField.getOffset()));
         return mark.getSize();
       } else {
         Address size = sizeField.getValue(addr);
@@ -83,7 +83,7 @@
 
    public boolean isFree() {
       if (VM.getVM().isCompressedOopsEnabled()) {
-        Mark mark = new Mark(sizeField.getValue(addr));
+        Mark mark = new Mark(addr.addOffsetTo(sizeField.getOffset()));
         return mark.isCmsFreeChunk();
       } else {
         Address prev = prevField.getValue(addr);
--- a/make/jprt.properties	Thu Aug 13 17:47:43 2009 -0700
+++ b/make/jprt.properties	Thu Aug 13 17:59:05 2009 -0700
@@ -306,7 +306,6 @@
     ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCBasher_ParallelGC, \
     ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCBasher_ParNewGC, \
     ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCBasher_CMS, \
-    ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCBasher_G1, \
     ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCBasher_ParOldGC, \
     ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCOld_default, \
     ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCOld_SerialGC, \
--- a/src/cpu/x86/vm/assembler_x86.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -8335,15 +8335,13 @@
   // Cannot assert, unverified entry point counts instructions (see .ad file)
   // vtableStubs also counts instructions in pd_code_size_limit.
   // Also do not verify_oop as this is called by verify_oop.
-  if (Universe::narrow_oop_base() == NULL) {
-    if (Universe::narrow_oop_shift() != 0) {
-      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
-      shlq(r, LogMinObjAlignmentInBytes);
-    }
+  if (Universe::narrow_oop_shift() != 0) {
+    assert (Address::times_8 == LogMinObjAlignmentInBytes &&
+            Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong");
+    // Don't use Shift since it modifies flags.
+    leaq(r, Address(r12_heapbase, r, Address::times_8, 0));
   } else {
-      assert (Address::times_8 == LogMinObjAlignmentInBytes &&
-              Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong");
-    leaq(r, Address(r12_heapbase, r, Address::times_8, 0));
+    assert (Universe::narrow_oop_base() == NULL, "sanity");
   }
 }
 
@@ -8358,6 +8356,7 @@
             Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong");
     leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
   } else if (dst != src) {
+    assert (Universe::narrow_oop_base() == NULL, "sanity");
     movq(dst, src);
   }
 }
--- a/src/os/solaris/vm/os_solaris.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/os/solaris/vm/os_solaris.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -1643,7 +1643,8 @@
 inline hrtime_t getTimeNanos() {
   if (VM_Version::supports_cx8()) {
     const hrtime_t now = gethrtime();
-    const hrtime_t prev = max_hrtime;
+    // Use atomic long load since 32-bit x86 uses 2 registers to keep long.
+    const hrtime_t prev = Atomic::load((volatile jlong*)&max_hrtime);
     if (now <= prev)  return prev;   // same or retrograde time;
     const hrtime_t obsv = Atomic::cmpxchg(now, (volatile jlong*)&max_hrtime, prev);
     assert(obsv >= prev, "invariant");   // Monotonicity
--- a/src/os/windows/vm/os_windows.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/os/windows/vm/os_windows.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -616,12 +616,13 @@
 }
 
 julong os::win32::available_memory() {
-  // FIXME: GlobalMemoryStatus() may return incorrect value if total memory
-  // is larger than 4GB
-  MEMORYSTATUS ms;
-  GlobalMemoryStatus(&ms);
-
-  return (julong)ms.dwAvailPhys;
+  // Use GlobalMemoryStatusEx() because GlobalMemoryStatus() may return incorrect
+  // value if total memory is larger than 4GB
+  MEMORYSTATUSEX ms;
+  ms.dwLength = sizeof(ms);
+  GlobalMemoryStatusEx(&ms);
+
+  return (julong)ms.ullAvailPhys;
 }
 
 julong os::physical_memory() {
@@ -1579,16 +1580,17 @@
   st->print("Memory:");
   st->print(" %dk page", os::vm_page_size()>>10);
 
-  // FIXME: GlobalMemoryStatus() may return incorrect value if total memory
-  // is larger than 4GB
-  MEMORYSTATUS ms;
-  GlobalMemoryStatus(&ms);
+  // Use GlobalMemoryStatusEx() because GlobalMemoryStatus() may return incorrect
+  // value if total memory is larger than 4GB
+  MEMORYSTATUSEX ms;
+  ms.dwLength = sizeof(ms);
+  GlobalMemoryStatusEx(&ms);
 
   st->print(", physical %uk", os::physical_memory() >> 10);
   st->print("(%uk free)", os::available_memory() >> 10);
 
-  st->print(", swap %uk", ms.dwTotalPageFile >> 10);
-  st->print("(%uk free)", ms.dwAvailPageFile >> 10);
+  st->print(", swap %uk", ms.ullTotalPageFile >> 10);
+  st->print("(%uk free)", ms.ullAvailPageFile >> 10);
   st->cr();
 }
 
@@ -3135,11 +3137,13 @@
   _processor_level = si.wProcessorLevel;
   _processor_count = si.dwNumberOfProcessors;
 
-  MEMORYSTATUS ms;
+  MEMORYSTATUSEX ms;
+  ms.dwLength = sizeof(ms);
+
   // also returns dwAvailPhys (free physical memory bytes), dwTotalVirtual, dwAvailVirtual,
   // dwMemoryLoad (% of memory in use)
-  GlobalMemoryStatus(&ms);
-  _physical_memory = ms.dwTotalPhys;
+  GlobalMemoryStatusEx(&ms);
+  _physical_memory = ms.ullTotalPhys;
 
   OSVERSIONINFO oi;
   oi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
--- a/src/os_cpu/solaris_sparc/vm/atomic_solaris_sparc.inline.hpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/os_cpu/solaris_sparc/vm/atomic_solaris_sparc.inline.hpp	Thu Aug 13 17:59:05 2009 -0700
@@ -46,6 +46,8 @@
 inline void Atomic::dec_ptr(volatile intptr_t* dest) { (void)add_ptr(-1, dest); }
 inline void Atomic::dec_ptr(volatile void*     dest) { (void)add_ptr(-1, dest); }
 
+inline jlong Atomic::load(volatile jlong* src) { return *src; }
+
 #ifdef _GNU_SOURCE
 
 inline jint     Atomic::add    (jint     add_value, volatile jint*     dest) {
--- a/src/os_cpu/solaris_x86/vm/atomic_solaris_x86.inline.hpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/os_cpu/solaris_x86/vm/atomic_solaris_x86.inline.hpp	Thu Aug 13 17:59:05 2009 -0700
@@ -99,6 +99,8 @@
   return (void*)_Atomic_cmpxchg_long((jlong)exchange_value, (volatile jlong*)dest, (jlong)compare_value, (int) os::is_MP());
 }
 
+inline jlong Atomic::load(volatile jlong* src) { return *src; }
+
 #else // !AMD64
 
 inline intptr_t Atomic::add_ptr(intptr_t add_value, volatile intptr_t* dest) {
@@ -131,6 +133,15 @@
 inline void*    Atomic::cmpxchg_ptr(void*    exchange_value, volatile void*     dest, void*    compare_value) {
   return (void*)cmpxchg((jint)exchange_value, (volatile jint*)dest, (jint)compare_value);
 }
+
+extern "C" void _Atomic_load_long(volatile jlong* src, volatile jlong* dst);
+
+inline jlong Atomic::load(volatile jlong* src) {
+  volatile jlong dest;
+  _Atomic_load_long(src, &dest);
+  return dest;
+}
+
 #endif // AMD64
 
 #ifdef _GNU_SOURCE
--- a/src/os_cpu/solaris_x86/vm/solaris_x86_32.il	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/os_cpu/solaris_x86/vm/solaris_x86_32.il	Thu Aug 13 17:59:05 2009 -0700
@@ -97,6 +97,15 @@
       popl     %ebx
       .end
 
+  // Support for void Atomic::load(volatile jlong* src, volatile jlong* dest).
+      .inline _Atomic_load_long,2
+      movl     0(%esp), %eax   // src
+      fildll    (%eax)
+      movl     4(%esp), %eax   // dest
+      fistpll   (%eax)
+      .end
+
+
   // Support for OrderAccess::acquire()
       .inline _OrderAccess_acquire,0
       movl     0(%esp), %eax
--- a/src/share/vm/c1/c1_IR.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/c1/c1_IR.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -208,6 +208,15 @@
   return scope->caller_bci();
 }
 
+bool IRScopeDebugInfo::should_reexecute() {
+  ciMethod* cur_method = scope()->method();
+  int       cur_bci    = bci();
+  if (cur_method != NULL && cur_bci != SynchronizationEntryBCI) {
+    Bytecodes::Code code = cur_method->java_code_at_bci(cur_bci);
+    return Interpreter::bytecode_should_reexecute(code);
+  } else
+    return false;
+}
 
 
 // Implementation of CodeEmitInfo
@@ -253,7 +262,7 @@
 void CodeEmitInfo::record_debug_info(DebugInformationRecorder* recorder, int pc_offset) {
   // record the safepoint before recording the debug info for enclosing scopes
   recorder->add_safepoint(pc_offset, _oop_map->deep_copy());
-  _scope_debug_info->record_debug_info(recorder, pc_offset);
+  _scope_debug_info->record_debug_info(recorder, pc_offset, true/*topmost*/);
   recorder->end_safepoint(pc_offset);
 }
 
--- a/src/share/vm/c1/c1_IR.hpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/c1/c1_IR.hpp	Thu Aug 13 17:59:05 2009 -0700
@@ -239,15 +239,20 @@
   GrowableArray<MonitorValue*>* monitors()    { return _monitors;    }
   IRScopeDebugInfo*             caller()      { return _caller;      }
 
-  void record_debug_info(DebugInformationRecorder* recorder, int pc_offset) {
+  //Whether we should reexecute this bytecode for deopt
+  bool should_reexecute();
+
+  void record_debug_info(DebugInformationRecorder* recorder, int pc_offset, bool topmost) {
     if (caller() != NULL) {
       // Order is significant:  Must record caller first.
-      caller()->record_debug_info(recorder, pc_offset);
+      caller()->record_debug_info(recorder, pc_offset, false/*topmost*/);
     }
     DebugToken* locvals = recorder->create_scope_values(locals());
     DebugToken* expvals = recorder->create_scope_values(expressions());
     DebugToken* monvals = recorder->create_monitor_values(monitors());
-    recorder->describe_scope(pc_offset, scope()->method(), bci(), locvals, expvals, monvals);
+    // reexecute allowed only for the topmost frame
+    bool      reexecute = topmost ? should_reexecute() : false;
+    recorder->describe_scope(pc_offset, scope()->method(), bci(), reexecute, locvals, expvals, monvals);
   }
 };
 
--- a/src/share/vm/c1/c1_LIRAssembler.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/c1/c1_LIRAssembler.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -379,7 +379,8 @@
     ValueStack* s = nth_oldest(vstack, n, s_bci);
     if (s == NULL)  break;
     IRScope* scope = s->scope();
-    debug_info->describe_scope(pc_offset, scope->method(), s_bci);
+    //Always pass false for reexecute since these ScopeDescs are never used for deopt
+    debug_info->describe_scope(pc_offset, scope->method(), s_bci, false/*reexecute*/);
   }
 
   debug_info->end_non_safepoint(pc_offset);
--- a/src/share/vm/ci/ciObjectFactory.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/ci/ciObjectFactory.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -219,24 +219,27 @@
   ASSERT_IN_VM;
 
 #ifdef ASSERT
-  oop last = NULL;
-  for (int j = 0; j< _ci_objects->length(); j++) {
-    oop o = _ci_objects->at(j)->get_oop();
-    assert(last < o, "out of order");
-    last = o;
+  if (CIObjectFactoryVerify) {
+    oop last = NULL;
+    for (int j = 0; j< _ci_objects->length(); j++) {
+      oop o = _ci_objects->at(j)->get_oop();
+      assert(last < o, "out of order");
+      last = o;
+    }
   }
 #endif // ASSERT
   int len = _ci_objects->length();
   int index = find(key, _ci_objects);
 #ifdef ASSERT
-  for (int i=0; i<_ci_objects->length(); i++) {
-    if (_ci_objects->at(i)->get_oop() == key) {
-      assert(index == i, " bad lookup");
+  if (CIObjectFactoryVerify) {
+    for (int i=0; i<_ci_objects->length(); i++) {
+      if (_ci_objects->at(i)->get_oop() == key) {
+        assert(index == i, " bad lookup");
+      }
     }
   }
 #endif
   if (!is_found_at(index, key, _ci_objects)) {
-
     // Check in the non-perm area before putting it in the list.
     NonPermObject* &bucket = find_non_perm(key);
     if (bucket != NULL) {
@@ -539,11 +542,13 @@
     objects->at_put(index, obj);
   }
 #ifdef ASSERT
-  oop last = NULL;
-  for (int j = 0; j< objects->length(); j++) {
-    oop o = objects->at(j)->get_oop();
-    assert(last < o, "out of order");
-    last = o;
+  if (CIObjectFactoryVerify) {
+    oop last = NULL;
+    for (int j = 0; j< objects->length(); j++) {
+      oop o = objects->at(j)->get_oop();
+      assert(last < o, "out of order");
+      last = o;
+    }
   }
 #endif // ASSERT
 }
--- a/src/share/vm/classfile/classFileParser.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/classfile/classFileParser.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -547,7 +547,6 @@
                                                  int length,
                                                  Handle class_loader,
                                                  Handle protection_domain,
-                                                 PerfTraceTime* vmtimer,
                                                  symbolHandle class_name,
                                                  TRAPS) {
   ClassFileStream* cfs = stream();
@@ -575,13 +574,11 @@
       guarantee_property(unresolved_klass->byte_at(0) != JVM_SIGNATURE_ARRAY,
                          "Bad interface name in class file %s", CHECK_(nullHandle));
 
-      vmtimer->suspend();  // do not count recursive loading twice
       // Call resolve_super so classcircularity is checked
       klassOop k = SystemDictionary::resolve_super_or_fail(class_name,
                     unresolved_klass, class_loader, protection_domain,
                     false, CHECK_(nullHandle));
       interf = KlassHandle(THREAD, k);
-      vmtimer->resume();
 
       if (LinkWellKnownClasses)  // my super type is well known to me
         cp->klass_at_put(interface_index, interf()); // eagerly resolve
@@ -2558,7 +2555,15 @@
 
   ClassFileStream* cfs = stream();
   // Timing
-  PerfTraceTime vmtimer(ClassLoader::perf_accumulated_time());
+  assert(THREAD->is_Java_thread(), "must be a JavaThread");
+  JavaThread* jt = (JavaThread*) THREAD;
+
+  PerfClassTraceTime ctimer(ClassLoader::perf_class_parse_time(),
+                            ClassLoader::perf_class_parse_selftime(),
+                            NULL,
+                            jt->get_thread_stat()->perf_recursion_counts_addr(),
+                            jt->get_thread_stat()->perf_timers_addr(),
+                            PerfClassTraceTime::PARSE_CLASS);
 
   _has_finalizer = _has_empty_finalizer = _has_vanilla_constructor = false;
 
@@ -2738,7 +2743,7 @@
     if (itfs_len == 0) {
       local_interfaces = objArrayHandle(THREAD, Universe::the_empty_system_obj_array());
     } else {
-      local_interfaces = parse_interfaces(cp, itfs_len, class_loader, protection_domain, &vmtimer, _class_name, CHECK_(nullHandle));
+      local_interfaces = parse_interfaces(cp, itfs_len, class_loader, protection_domain, _class_name, CHECK_(nullHandle));
     }
 
     // Fields (offsets are filled in later)
@@ -2782,6 +2787,7 @@
                                                            protection_domain,
                                                            true,
                                                            CHECK_(nullHandle));
+
       KlassHandle kh (THREAD, k);
       super_klass = instanceKlassHandle(THREAD, kh());
       if (LinkWellKnownClasses)  // my super class is well known to me
--- a/src/share/vm/classfile/classFileParser.hpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/classfile/classFileParser.hpp	Thu Aug 13 17:59:05 2009 -0700
@@ -61,7 +61,6 @@
                                   int length,
                                   Handle class_loader,
                                   Handle protection_domain,
-                                  PerfTraceTime* vmtimer,
                                   symbolHandle class_name,
                                   TRAPS);
 
--- a/src/share/vm/classfile/classLoader.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/classfile/classLoader.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -48,9 +48,26 @@
 PerfCounter*    ClassLoader::_perf_accumulated_time = NULL;
 PerfCounter*    ClassLoader::_perf_classes_inited = NULL;
 PerfCounter*    ClassLoader::_perf_class_init_time = NULL;
+PerfCounter*    ClassLoader::_perf_class_init_selftime = NULL;
+PerfCounter*    ClassLoader::_perf_classes_verified = NULL;
 PerfCounter*    ClassLoader::_perf_class_verify_time = NULL;
+PerfCounter*    ClassLoader::_perf_class_verify_selftime = NULL;
 PerfCounter*    ClassLoader::_perf_classes_linked = NULL;
 PerfCounter*    ClassLoader::_perf_class_link_time = NULL;
+PerfCounter*    ClassLoader::_perf_class_link_selftime = NULL;
+PerfCounter*    ClassLoader::_perf_class_parse_time = NULL;
+PerfCounter*    ClassLoader::_perf_class_parse_selftime = NULL;
+PerfCounter*    ClassLoader::_perf_sys_class_lookup_time = NULL;
+PerfCounter*    ClassLoader::_perf_shared_classload_time = NULL;
+PerfCounter*    ClassLoader::_perf_sys_classload_time = NULL;
+PerfCounter*    ClassLoader::_perf_app_classload_time = NULL;
+PerfCounter*    ClassLoader::_perf_app_classload_selftime = NULL;
+PerfCounter*    ClassLoader::_perf_app_classload_count = NULL;
+PerfCounter*    ClassLoader::_perf_define_appclasses = NULL;
+PerfCounter*    ClassLoader::_perf_define_appclass_time = NULL;
+PerfCounter*    ClassLoader::_perf_define_appclass_selftime = NULL;
+PerfCounter*    ClassLoader::_perf_app_classfile_bytes_read = NULL;
+PerfCounter*    ClassLoader::_perf_sys_classfile_bytes_read = NULL;
 PerfCounter*    ClassLoader::_sync_systemLoaderLockContentionRate = NULL;
 PerfCounter*    ClassLoader::_sync_nonSystemLoaderLockContentionRate = NULL;
 PerfCounter*    ClassLoader::_sync_JVMFindLoadedClassLockFreeCounter = NULL;
@@ -152,6 +169,9 @@
       hpi::close(file_handle);
       // construct ClassFileStream
       if (num_read == (size_t)st.st_size) {
+        if (UsePerfData) {
+          ClassLoader::perf_sys_classfile_bytes_read()->inc(num_read);
+        }
         return new ClassFileStream(buffer, st.st_size, _dir);    // Resource allocated
       }
     }
@@ -198,6 +218,9 @@
       buffer     = NEW_RESOURCE_ARRAY(u1, filesize);
       if (!(*ReadEntry)(_zip, entry, buffer, filename)) return NULL;
   }
+  if (UsePerfData) {
+    ClassLoader::perf_sys_classfile_bytes_read()->inc(filesize);
+  }
   // return result
   return new ClassFileStream(buffer, filesize, _zip_name);    // Resource allocated
 }
@@ -825,7 +848,9 @@
   ClassFileStream* stream = NULL;
   int classpath_index = 0;
   {
-    PerfTraceTime vmtimer(perf_accumulated_time());
+    PerfClassTraceTime vmtimer(perf_sys_class_lookup_time(),
+                               ((JavaThread*) THREAD)->get_thread_stat()->perf_timers_addr(),
+                               PerfClassTraceTime::CLASS_LOAD);
     ClassPathEntry* e = _first_entry;
     while (e != NULL) {
       stream = e->open_stream(name);
@@ -890,11 +915,29 @@
     // jvmstat performance counters
     NEWPERFTICKCOUNTER(_perf_accumulated_time, SUN_CLS, "time");
     NEWPERFTICKCOUNTER(_perf_class_init_time, SUN_CLS, "classInitTime");
+    NEWPERFTICKCOUNTER(_perf_class_init_selftime, SUN_CLS, "classInitTime.self");
     NEWPERFTICKCOUNTER(_perf_class_verify_time, SUN_CLS, "classVerifyTime");
+    NEWPERFTICKCOUNTER(_perf_class_verify_selftime, SUN_CLS, "classVerifyTime.self");
     NEWPERFTICKCOUNTER(_perf_class_link_time, SUN_CLS, "classLinkedTime");
-
+    NEWPERFTICKCOUNTER(_perf_class_link_selftime, SUN_CLS, "classLinkedTime.self");
     NEWPERFEVENTCOUNTER(_perf_classes_inited, SUN_CLS, "initializedClasses");
     NEWPERFEVENTCOUNTER(_perf_classes_linked, SUN_CLS, "linkedClasses");
+    NEWPERFEVENTCOUNTER(_perf_classes_verified, SUN_CLS, "verifiedClasses");
+
+    NEWPERFTICKCOUNTER(_perf_class_parse_time, SUN_CLS, "parseClassTime");
+    NEWPERFTICKCOUNTER(_perf_class_parse_selftime, SUN_CLS, "parseClassTime.self");
+    NEWPERFTICKCOUNTER(_perf_sys_class_lookup_time, SUN_CLS, "lookupSysClassTime");
+    NEWPERFTICKCOUNTER(_perf_shared_classload_time, SUN_CLS, "sharedClassLoadTime");
+    NEWPERFTICKCOUNTER(_perf_sys_classload_time, SUN_CLS, "sysClassLoadTime");
+    NEWPERFTICKCOUNTER(_perf_app_classload_time, SUN_CLS, "appClassLoadTime");
+    NEWPERFTICKCOUNTER(_perf_app_classload_selftime, SUN_CLS, "appClassLoadTime.self");
+    NEWPERFEVENTCOUNTER(_perf_app_classload_count, SUN_CLS, "appClassLoadCount");
+    NEWPERFTICKCOUNTER(_perf_define_appclasses, SUN_CLS, "defineAppClasses");
+    NEWPERFTICKCOUNTER(_perf_define_appclass_time, SUN_CLS, "defineAppClassTime");
+    NEWPERFTICKCOUNTER(_perf_define_appclass_selftime, SUN_CLS, "defineAppClassTime.self");
+    NEWPERFBYTECOUNTER(_perf_app_classfile_bytes_read, SUN_CLS, "appClassBytes");
+    NEWPERFBYTECOUNTER(_perf_sys_classfile_bytes_read, SUN_CLS, "sysClassBytes");
+
 
     // The following performance counters are added for measuring the impact
     // of the bug fix of 6365597. They are mainly focused on finding out
--- a/src/share/vm/classfile/classLoader.hpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/classfile/classLoader.hpp	Thu Aug 13 17:59:05 2009 -0700
@@ -149,9 +149,26 @@
   static PerfCounter* _perf_accumulated_time;
   static PerfCounter* _perf_classes_inited;
   static PerfCounter* _perf_class_init_time;
+  static PerfCounter* _perf_class_init_selftime;
+  static PerfCounter* _perf_classes_verified;
   static PerfCounter* _perf_class_verify_time;
+  static PerfCounter* _perf_class_verify_selftime;
   static PerfCounter* _perf_classes_linked;
   static PerfCounter* _perf_class_link_time;
+  static PerfCounter* _perf_class_link_selftime;
+  static PerfCounter* _perf_class_parse_time;
+  static PerfCounter* _perf_class_parse_selftime;
+  static PerfCounter* _perf_sys_class_lookup_time;
+  static PerfCounter* _perf_shared_classload_time;
+  static PerfCounter* _perf_sys_classload_time;
+  static PerfCounter* _perf_app_classload_time;
+  static PerfCounter* _perf_app_classload_selftime;
+  static PerfCounter* _perf_app_classload_count;
+  static PerfCounter* _perf_define_appclasses;
+  static PerfCounter* _perf_define_appclass_time;
+  static PerfCounter* _perf_define_appclass_selftime;
+  static PerfCounter* _perf_app_classfile_bytes_read;
+  static PerfCounter* _perf_sys_classfile_bytes_read;
 
   static PerfCounter* _sync_systemLoaderLockContentionRate;
   static PerfCounter* _sync_nonSystemLoaderLockContentionRate;
@@ -196,12 +213,29 @@
   static void print_bootclasspath();
 
   // Timing
-  static PerfCounter* perf_accumulated_time()  { return _perf_accumulated_time; }
-  static PerfCounter* perf_classes_inited()    { return _perf_classes_inited; }
-  static PerfCounter* perf_class_init_time()   { return _perf_class_init_time; }
-  static PerfCounter* perf_class_verify_time() { return _perf_class_verify_time; }
-  static PerfCounter* perf_classes_linked()    { return _perf_classes_linked; }
-  static PerfCounter* perf_class_link_time() { return _perf_class_link_time; }
+  static PerfCounter* perf_accumulated_time()         { return _perf_accumulated_time; }
+  static PerfCounter* perf_classes_inited()           { return _perf_classes_inited; }
+  static PerfCounter* perf_class_init_time()          { return _perf_class_init_time; }
+  static PerfCounter* perf_class_init_selftime()      { return _perf_class_init_selftime; }
+  static PerfCounter* perf_classes_verified()         { return _perf_classes_verified; }
+  static PerfCounter* perf_class_verify_time()        { return _perf_class_verify_time; }
+  static PerfCounter* perf_class_verify_selftime()    { return _perf_class_verify_selftime; }
+  static PerfCounter* perf_classes_linked()           { return _perf_classes_linked; }
+  static PerfCounter* perf_class_link_time()          { return _perf_class_link_time; }
+  static PerfCounter* perf_class_link_selftime()      { return _perf_class_link_selftime; }
+  static PerfCounter* perf_class_parse_time()         { return _perf_class_parse_time; }
+  static PerfCounter* perf_class_parse_selftime()     { return _perf_class_parse_selftime; }
+  static PerfCounter* perf_sys_class_lookup_time()    { return _perf_sys_class_lookup_time; }
+  static PerfCounter* perf_shared_classload_time()    { return _perf_shared_classload_time; }
+  static PerfCounter* perf_sys_classload_time()       { return _perf_sys_classload_time; }
+  static PerfCounter* perf_app_classload_time()       { return _perf_app_classload_time; }
+  static PerfCounter* perf_app_classload_selftime()   { return _perf_app_classload_selftime; }
+  static PerfCounter* perf_app_classload_count()      { return _perf_app_classload_count; }
+  static PerfCounter* perf_define_appclasses()        { return _perf_define_appclasses; }
+  static PerfCounter* perf_define_appclass_time()     { return _perf_define_appclass_time; }
+  static PerfCounter* perf_define_appclass_selftime() { return _perf_define_appclass_selftime; }
+  static PerfCounter* perf_app_classfile_bytes_read() { return _perf_app_classfile_bytes_read; }
+  static PerfCounter* perf_sys_classfile_bytes_read() { return _perf_sys_classfile_bytes_read; }
 
   // Record how often system loader lock object is contended
   static PerfCounter* sync_systemLoaderLockContentionRate() {
@@ -307,3 +341,118 @@
   static int  compile_the_world_counter() { return _compile_the_world_counter; }
 #endif //PRODUCT
 };
+
+// PerfClassTraceTime is used to measure time for class loading related events.
+// This class tracks cumulative time and exclusive time for specific event types.
+// During the execution of one event, other event types (e.g. class loading and
+// resolution) as well as recursive calls of the same event type could happen.
+// Only one elapsed timer (cumulative) and one thread-local self timer (exclusive)
+// (i.e. only one event type) are active at a time even multiple PerfClassTraceTime
+// instances have been created as multiple events are happening.
+class PerfClassTraceTime {
+  public:
+    enum {
+       CLASS_LOAD   = 0,
+       PARSE_CLASS  = 1,
+       CLASS_LINK   = 2,
+       CLASS_VERIFY = 3,
+       CLASS_CLINIT = 4,
+       DEFINE_CLASS = 5,
+       EVENT_TYPE_COUNT = 6
+    };
+  protected:
+    // _t tracks time from initialization to destruction of this timer instance
+    // including time for all other event types, and recursive calls of this type.
+    // When a timer is called recursively, the elapsedTimer _t would not be used.
+    elapsedTimer     _t;
+    PerfLongCounter* _timep;
+    PerfLongCounter* _selftimep;
+    PerfLongCounter* _eventp;
+    // pointer to thread-local recursion counter and timer array
+    // The thread_local timers track cumulative time for specific event types
+    // exclusive of time for other event types, but including recursive calls
+    // of the same type.
+    int*             _recursion_counters;
+    elapsedTimer*    _timers;
+    int              _event_type;
+    int              _prev_active_event;
+
+  public:
+
+    inline PerfClassTraceTime(PerfLongCounter* timep,     /* counter incremented with inclusive time */
+                              PerfLongCounter* selftimep, /* counter incremented with exclusive time */
+                              PerfLongCounter* eventp,    /* event counter */
+                              int* recursion_counters,    /* thread-local recursion counter array */
+                              elapsedTimer* timers,       /* thread-local timer array */
+                              int type                    /* event type */ ) :
+        _timep(timep), _selftimep(selftimep), _eventp(eventp), _recursion_counters(recursion_counters), _timers(timers), _event_type(type) {
+      initialize();
+    }
+
+    inline PerfClassTraceTime(PerfLongCounter* timep,     /* counter incremented with inclusive time */
+                              elapsedTimer* timers,       /* thread-local timer array */
+                              int type                    /* event type */ ) :
+        _timep(timep), _selftimep(NULL), _eventp(NULL), _recursion_counters(NULL), _timers(timers), _event_type(type) {
+      initialize();
+    }
+
+    void initialize() {
+      if (!UsePerfData) return;
+
+      if (_eventp != NULL) {
+        // increment the event counter
+        _eventp->inc();
+      }
+
+      // stop the current active thread-local timer to measure inclusive time
+      _prev_active_event = -1;
+      for (int i=0; i < EVENT_TYPE_COUNT; i++) {
+         if (_timers[i].is_active()) {
+           assert(_prev_active_event == -1, "should have only one active timer");
+           _prev_active_event = i;
+           _timers[i].stop();
+         }
+      }
+
+      if (_recursion_counters == NULL || (_recursion_counters[_event_type])++ == 0) {
+        // start the inclusive timer if not recursively called
+        _t.start();
+      }
+
+      // start thread-local timer of the given event type
+      if (!_timers[_event_type].is_active()) {
+        _timers[_event_type].start();
+      }
+    }
+
+    inline void suspend() { _t.stop(); _timers[_event_type].stop(); }
+    inline void resume()  { _t.start(); _timers[_event_type].start(); }
+
+    ~PerfClassTraceTime() {
+      if (!UsePerfData) return;
+
+      // stop the thread-local timer as the event completes
+      // and resume the thread-local timer of the event next on the stack
+      _timers[_event_type].stop();
+      jlong selftime = _timers[_event_type].ticks();
+
+      if (_prev_active_event >= 0) {
+        _timers[_prev_active_event].start();
+      }
+
+      if (_recursion_counters != NULL && --(_recursion_counters[_event_type]) > 0) return;
+
+      // increment the counters only on the leaf call
+      _t.stop();
+      _timep->inc(_t.ticks());
+      if (_selftimep != NULL) {
+        _selftimep->inc(selftime);
+      }
+      // add all class loading related event selftime to the accumulated time counter
+      ClassLoader::perf_accumulated_time()->inc(selftime);
+
+      // reset the timer
+      _timers[_event_type].reset();
+    }
+};
+
--- a/src/share/vm/classfile/javaClasses.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/classfile/javaClasses.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -1229,10 +1229,13 @@
 
     // Compiled java method case.
     if (decode_offset != 0) {
+      bool dummy_reexecute = false;
       DebugInfoReadStream stream(nm, decode_offset);
       decode_offset = stream.read_int();
       method = (methodOop)nm->oop_at(stream.read_int());
-      bci = stream.read_bci();
+      //fill_in_stack_trace does not need the reexecute information which is designed
+      //for the deopt to reexecute
+      bci = stream.read_bci_and_reexecute(dummy_reexecute);
     } else {
       if (fr.is_first_frame()) break;
       address pc = fr.pc();
--- a/src/share/vm/classfile/systemDictionary.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/classfile/systemDictionary.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -1306,13 +1306,18 @@
 instanceKlassHandle SystemDictionary::load_instance_class(symbolHandle class_name, Handle class_loader, TRAPS) {
   instanceKlassHandle nh = instanceKlassHandle(); // null Handle
   if (class_loader.is_null()) {
+
     // Search the shared system dictionary for classes preloaded into the
     // shared spaces.
     instanceKlassHandle k;
-    k = load_shared_class(class_name, class_loader, THREAD);
+    {
+      PerfTraceTime vmtimer(ClassLoader::perf_shared_classload_time());
+      k = load_shared_class(class_name, class_loader, THREAD);
+    }
 
     if (k.is_null()) {
       // Use VM class loader
+      PerfTraceTime vmtimer(ClassLoader::perf_sys_classload_time());
       k = ClassLoader::load_classfile(class_name, CHECK_(nh));
     }
 
@@ -1334,6 +1339,16 @@
     // Use user specified class loader to load class. Call loadClass operation on class_loader.
     ResourceMark rm(THREAD);
 
+    assert(THREAD->is_Java_thread(), "must be a JavaThread");
+    JavaThread* jt = (JavaThread*) THREAD;
+
+    PerfClassTraceTime vmtimer(ClassLoader::perf_app_classload_time(),
+                               ClassLoader::perf_app_classload_selftime(),
+                               ClassLoader::perf_app_classload_count(),
+                               jt->get_thread_stat()->perf_recursion_counts_addr(),
+                               jt->get_thread_stat()->perf_timers_addr(),
+                               PerfClassTraceTime::CLASS_LOAD);
+
     Handle s = java_lang_String::create_from_symbol(class_name, CHECK_(nh));
     // Translate to external class name format, i.e., convert '/' chars to '.'
     Handle string = java_lang_String::externalize_classname(s, CHECK_(nh));
--- a/src/share/vm/code/debugInfo.hpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/code/debugInfo.hpp	Thu Aug 13 17:59:05 2009 -0700
@@ -255,7 +255,8 @@
   ScopeValue* read_object_value();
   ScopeValue* get_cached_object();
   // BCI encoding is mostly unsigned, but -1 is a distinguished value
-  int read_bci() { return read_int() + InvocationEntryBci; }
+  // Decoding based on encoding: bci = InvocationEntryBci + read_int()/2; reexecute = read_int()%2 == 1 ? true : false;
+  int read_bci_and_reexecute(bool& reexecute) { int i = read_int(); reexecute = (i & 1) ? true : false; return (i >> 1) + InvocationEntryBci; }
 };
 
 // DebugInfoWriteStream specializes CompressedWriteStream for
@@ -268,5 +269,6 @@
  public:
   DebugInfoWriteStream(DebugInformationRecorder* recorder, int initial_size);
   void write_handle(jobject h);
-  void write_bci(int bci) { write_int(bci - InvocationEntryBci); }
+  //Encoding bci and reexecute into one word as (bci - InvocationEntryBci)*2 + reexecute
+  void write_bci_and_reexecute(int bci, bool reexecute) { write_int(((bci - InvocationEntryBci) << 1) + (reexecute ? 1 : 0)); }
 };
--- a/src/share/vm/code/debugInfoRec.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/code/debugInfoRec.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -280,6 +280,7 @@
 void DebugInformationRecorder::describe_scope(int         pc_offset,
                                               ciMethod*   method,
                                               int         bci,
+                                              bool        reexecute,
                                               DebugToken* locals,
                                               DebugToken* expressions,
                                               DebugToken* monitors) {
@@ -297,7 +298,7 @@
   // serialize scope
   jobject method_enc = (method == NULL)? NULL: method->encoding();
   stream()->write_int(oop_recorder()->find_index(method_enc));
-  stream()->write_bci(bci);
+  stream()->write_bci_and_reexecute(bci, reexecute);
   assert(method == NULL ||
          (method->is_native() && bci == 0) ||
          (!method->is_native() && 0 <= bci && bci < method->code_size()) ||
--- a/src/share/vm/code/debugInfoRec.hpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/code/debugInfoRec.hpp	Thu Aug 13 17:59:05 2009 -0700
@@ -87,6 +87,7 @@
   void describe_scope(int         pc_offset,
                       ciMethod*   method,
                       int         bci,
+                      bool        reexecute,
                       DebugToken* locals      = NULL,
                       DebugToken* expressions = NULL,
                       DebugToken* monitors    = NULL);
--- a/src/share/vm/code/scopeDesc.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/code/scopeDesc.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -46,6 +46,7 @@
   _decode_offset = parent->_sender_decode_offset;
   _objects       = parent->_objects;
   decode_body();
+  assert(_reexecute == false, "reexecute not allowed");
 }
 
 
@@ -56,6 +57,7 @@
     _sender_decode_offset = DebugInformationRecorder::serialized_null;
     _method = methodHandle(_code->method());
     _bci = InvocationEntryBci;
+    _reexecute = false;
     _locals_decode_offset = DebugInformationRecorder::serialized_null;
     _expressions_decode_offset = DebugInformationRecorder::serialized_null;
     _monitors_decode_offset = DebugInformationRecorder::serialized_null;
@@ -65,7 +67,8 @@
 
     _sender_decode_offset = stream->read_int();
     _method = methodHandle((methodOop) stream->read_oop());
-    _bci    = stream->read_bci();
+    _bci    = stream->read_bci_and_reexecute(_reexecute);
+
     // decode offsets for body and sender
     _locals_decode_offset      = stream->read_int();
     _expressions_decode_offset = stream->read_int();
@@ -170,6 +173,7 @@
     st->print("ScopeDesc[%d]@" PTR_FORMAT " ", _decode_offset, _code->instructions_begin());
     st->print_cr(" offset:     %d",    _decode_offset);
     st->print_cr(" bci:        %d",    bci());
+    st->print_cr(" reexecute:  %s",    should_reexecute() ? "true" : "false");
     st->print_cr(" locals:     %d",    _locals_decode_offset);
     st->print_cr(" stack:      %d",    _expressions_decode_offset);
     st->print_cr(" monitor:    %d",    _monitors_decode_offset);
--- a/src/share/vm/code/scopeDesc.hpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/code/scopeDesc.hpp	Thu Aug 13 17:59:05 2009 -0700
@@ -39,7 +39,8 @@
     DebugInfoReadStream buffer(code, pc_desc->scope_decode_offset());
     int ignore_sender = buffer.read_int();
     _method           = methodOop(buffer.read_oop());
-    _bci              = buffer.read_bci();
+    bool dummy_reexecute; //only methodOop and bci are needed!
+    _bci              = buffer.read_bci_and_reexecute(dummy_reexecute);
   }
 
   methodOop method() { return _method; }
@@ -60,8 +61,9 @@
   ScopeDesc(const nmethod* code, int decode_offset);
 
   // JVM state
-  methodHandle method() const { return _method; }
-  int          bci()    const { return _bci;    }
+  methodHandle method()   const { return _method; }
+  int          bci()      const { return _bci;    }
+  bool should_reexecute() const { return _reexecute; }
 
   GrowableArray<ScopeValue*>*   locals();
   GrowableArray<ScopeValue*>*   expressions();
@@ -86,6 +88,7 @@
   // JVM state
   methodHandle  _method;
   int           _bci;
+  bool          _reexecute;
 
   // Decoding offsets
   int _decode_offset;
--- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -25,11 +25,21 @@
 #include "incls/_precompiled.incl"
 #include "incls/_concurrentG1Refine.cpp.incl"
 
+// Possible sizes for the card counts cache: odd primes that roughly double in size.
+// (See jvmtiTagMap.cpp).
+int ConcurrentG1Refine::_cc_cache_sizes[] = {
+        16381,    32771,    76831,    150001,   307261,
+       614563,  1228891,  2457733,   4915219,  9830479,
+     19660831, 39321619, 78643219, 157286461,       -1
+  };
+
 ConcurrentG1Refine::ConcurrentG1Refine() :
-  _card_counts(NULL), _cur_card_count_histo(NULL), _cum_card_count_histo(NULL),
+  _card_counts(NULL), _card_epochs(NULL),
+  _n_card_counts(0), _max_n_card_counts(0),
+  _cache_size_index(0), _expand_card_counts(false),
   _hot_cache(NULL),
   _def_use_cache(false), _use_cache(false),
-  _n_periods(0), _total_cards(0), _total_travs(0),
+  _n_periods(0),
   _threads(NULL), _n_threads(0)
 {
   if (G1ConcRefine) {
@@ -57,32 +67,51 @@
 }
 
 void ConcurrentG1Refine::init() {
-  if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) {
-    G1CollectedHeap* g1h = G1CollectedHeap::heap();
-    _n_card_counts =
-      (unsigned) (g1h->g1_reserved_obj_bytes() >> CardTableModRefBS::card_shift);
-    _card_counts = NEW_C_HEAP_ARRAY(unsigned char, _n_card_counts);
-    for (size_t i = 0; i < _n_card_counts; i++) _card_counts[i] = 0;
-    ModRefBarrierSet* bs = g1h->mr_bs();
+  if (G1ConcRSLogCacheSize > 0) {
+    _g1h = G1CollectedHeap::heap();
+    _max_n_card_counts =
+      (unsigned) (_g1h->g1_reserved_obj_bytes() >> CardTableModRefBS::card_shift);
+
+    size_t max_card_num = ((size_t)1 << (sizeof(unsigned)*BitsPerByte-1)) - 1;
+    guarantee(_max_n_card_counts < max_card_num, "card_num representation");
+
+    int desired = _max_n_card_counts / InitialCacheFraction;
+    for (_cache_size_index = 0;
+              _cc_cache_sizes[_cache_size_index] >= 0; _cache_size_index++) {
+      if (_cc_cache_sizes[_cache_size_index] >= desired) break;
+    }
+    _cache_size_index = MAX2(0, (_cache_size_index - 1));
+
+    int initial_size = _cc_cache_sizes[_cache_size_index];
+    if (initial_size < 0) initial_size = _max_n_card_counts;
+
+    // Make sure we don't go bigger than we will ever need
+    _n_card_counts = MIN2((unsigned) initial_size, _max_n_card_counts);
+
+    _card_counts = NEW_C_HEAP_ARRAY(CardCountCacheEntry, _n_card_counts);
+    _card_epochs = NEW_C_HEAP_ARRAY(CardEpochCacheEntry, _n_card_counts);
+
+    Copy::fill_to_bytes(&_card_counts[0],
+                        _n_card_counts * sizeof(CardCountCacheEntry));
+    Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry));
+
+    ModRefBarrierSet* bs = _g1h->mr_bs();
     guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition");
-    CardTableModRefBS* ctbs = (CardTableModRefBS*)bs;
-    _ct_bot = ctbs->byte_for_const(g1h->reserved_region().start());
-    if (G1ConcRSCountTraversals) {
-      _cur_card_count_histo = NEW_C_HEAP_ARRAY(unsigned, 256);
-      _cum_card_count_histo = NEW_C_HEAP_ARRAY(unsigned, 256);
-      for (int i = 0; i < 256; i++) {
-        _cur_card_count_histo[i] = 0;
-        _cum_card_count_histo[i] = 0;
-      }
-    }
-  }
-  if (G1ConcRSLogCacheSize > 0) {
+    _ct_bs = (CardTableModRefBS*)bs;
+    _ct_bot = _ct_bs->byte_for_const(_g1h->reserved_region().start());
+
     _def_use_cache = true;
     _use_cache = true;
     _hot_cache_size = (1 << G1ConcRSLogCacheSize);
     _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size);
     _n_hot = 0;
     _hot_cache_idx = 0;
+
+    // For refining the cards in the hot cache in parallel
+    int n_workers = (ParallelGCThreads > 0 ?
+                        _g1h->workers()->total_workers() : 1);
+    _hot_cache_par_chunk_size = MAX2(1, _hot_cache_size / n_workers);
+    _hot_cache_par_claimed_idx = 0;
   }
 }
 
@@ -95,15 +124,11 @@
 }
 
 ConcurrentG1Refine::~ConcurrentG1Refine() {
-  if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) {
+  if (G1ConcRSLogCacheSize > 0) {
     assert(_card_counts != NULL, "Logic");
-    FREE_C_HEAP_ARRAY(unsigned char, _card_counts);
-    assert(_cur_card_count_histo != NULL, "Logic");
-    FREE_C_HEAP_ARRAY(unsigned, _cur_card_count_histo);
-    assert(_cum_card_count_histo != NULL, "Logic");
-    FREE_C_HEAP_ARRAY(unsigned, _cum_card_count_histo);
-  }
-  if (G1ConcRSLogCacheSize > 0) {
+    FREE_C_HEAP_ARRAY(CardCountCacheEntry, _card_counts);
+    assert(_card_epochs != NULL, "Logic");
+    FREE_C_HEAP_ARRAY(CardEpochCacheEntry, _card_epochs);
     assert(_hot_cache != NULL, "Logic");
     FREE_C_HEAP_ARRAY(jbyte*, _hot_cache);
   }
@@ -123,165 +148,232 @@
   }
 }
 
-
-int ConcurrentG1Refine::add_card_count(jbyte* card_ptr) {
-  size_t card_num = (card_ptr - _ct_bot);
-  guarantee(0 <= card_num && card_num < _n_card_counts, "Bounds");
-  unsigned char cnt = _card_counts[card_num];
-  if (cnt < 255) _card_counts[card_num]++;
-  return cnt;
-  _total_travs++;
+bool ConcurrentG1Refine::is_young_card(jbyte* card_ptr) {
+  HeapWord* start = _ct_bs->addr_for(card_ptr);
+  HeapRegion* r = _g1h->heap_region_containing(start);
+  if (r != NULL && r->is_young()) {
+    return true;
+  }
+  // This card is not associated with a heap region
+  // so can't be young.
+  return false;
 }
 
-jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr) {
-  int count = add_card_count(card_ptr);
-  // Count previously unvisited cards.
-  if (count == 0) _total_cards++;
-  // We'll assume a traversal unless we store it in the cache.
+jbyte* ConcurrentG1Refine::add_card_count(jbyte* card_ptr, int* count, bool* defer) {
+  unsigned new_card_num = ptr_2_card_num(card_ptr);
+  unsigned bucket = hash(new_card_num);
+  assert(0 <= bucket && bucket < _n_card_counts, "Bounds");
+
+  CardCountCacheEntry* count_ptr = &_card_counts[bucket];
+  CardEpochCacheEntry* epoch_ptr = &_card_epochs[bucket];
+
+  // We have to construct a new entry if we haven't updated the counts
+  // during the current period, or if the count was updated for a
+  // different card number.
+  unsigned int new_epoch = (unsigned int) _n_periods;
+  julong new_epoch_entry = make_epoch_entry(new_card_num, new_epoch);
+
+  while (true) {
+    // Fetch the previous epoch value
+    julong prev_epoch_entry = epoch_ptr->_value;
+    julong cas_res;
+
+    if (extract_epoch(prev_epoch_entry) != new_epoch) {
+      // This entry has not yet been updated during this period.
+      // Note: we update the epoch value atomically to ensure
+      // that there is only one winner that updates the cached
+      // card_ptr value even though all the refine threads share
+      // the same epoch value.
+
+      cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry,
+                                         (volatile jlong*)&epoch_ptr->_value,
+                                         (jlong) prev_epoch_entry);
+
+      if (cas_res == prev_epoch_entry) {
+        // We have successfully won the race to update the
+        // epoch and card_num value. Make it look like the
+        // count and eviction count were previously cleared.
+        count_ptr->_count = 1;
+        count_ptr->_evict_count = 0;
+        *count = 0;
+        // We can defer the processing of card_ptr
+        *defer = true;
+        return card_ptr;
+      }
+      // We did not win the race to update the epoch field, so some other
+      // thread must have done it. The value that gets returned by CAS
+      // should be the new epoch value.
+      assert(extract_epoch(cas_res) == new_epoch, "unexpected epoch");
+      // We could 'continue' here or just re-read the previous epoch value
+      prev_epoch_entry = epoch_ptr->_value;
+    }
+
+    // The epoch entry for card_ptr has been updated during this period.
+    unsigned old_card_num = extract_card_num(prev_epoch_entry);
+
+    // The card count that will be returned to caller
+    *count = count_ptr->_count;
+
+    // Are we updating the count for the same card?
+    if (new_card_num == old_card_num) {
+      // Same card - just update the count. We could have more than one
+      // thread racing to update count for the current card. It should be
+      // OK not to use a CAS as the only penalty should be some missed
+      // increments of the count which delays identifying the card as "hot".
+
+      if (*count < max_jubyte) count_ptr->_count++;
+      // We can defer the processing of card_ptr
+      *defer = true;
+      return card_ptr;
+    }
+
+    // Different card - evict old card info
+    if (count_ptr->_evict_count < max_jubyte) count_ptr->_evict_count++;
+    if (count_ptr->_evict_count > G1CardCountCacheExpandThreshold) {
+      // Trigger a resize the next time we clear
+      _expand_card_counts = true;
+    }
+
+    cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry,
+                                       (volatile jlong*)&epoch_ptr->_value,
+                                       (jlong) prev_epoch_entry);
+
+    if (cas_res == prev_epoch_entry) {
+      // We successfully updated the card num value in the epoch entry
+      count_ptr->_count = 0; // initialize counter for new card num
+
+      // Even though the region containg the card at old_card_num was not
+      // in the young list when old_card_num was recorded in the epoch
+      // cache it could have been added to the free list and subsequently
+      // added to the young list in the intervening time. If the evicted
+      // card is in a young region just return the card_ptr and the evicted
+      // card will not be cleaned. See CR 6817995.
+
+      jbyte* old_card_ptr = card_num_2_ptr(old_card_num);
+      if (is_young_card(old_card_ptr)) {
+        *count = 0;
+        // We can defer the processing of card_ptr
+        *defer = true;
+        return card_ptr;
+      }
+
+      // We do not want to defer processing of card_ptr in this case
+      // (we need to refine old_card_ptr and card_ptr)
+      *defer = false;
+      return old_card_ptr;
+    }
+    // Someone else beat us - try again.
+  }
+}
+
+jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr, bool* defer) {
+  int count;
+  jbyte* cached_ptr = add_card_count(card_ptr, &count, defer);
+  assert(cached_ptr != NULL, "bad cached card ptr");
+  assert(!is_young_card(cached_ptr), "shouldn't get a card in young region");
+
+  // The card pointer we obtained from card count cache is not hot
+  // so do not store it in the cache; return it for immediate
+  // refining.
   if (count < G1ConcRSHotCardLimit) {
-    _total_travs++;
-    return card_ptr;
+    return cached_ptr;
   }
-  // Otherwise, it's hot.
+
+  // Otherwise, the pointer we got from the _card_counts is hot.
   jbyte* res = NULL;
   MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag);
   if (_n_hot == _hot_cache_size) {
-    _total_travs++;
     res = _hot_cache[_hot_cache_idx];
     _n_hot--;
   }
   // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx.
-  _hot_cache[_hot_cache_idx] = card_ptr;
+  _hot_cache[_hot_cache_idx] = cached_ptr;
   _hot_cache_idx++;
   if (_hot_cache_idx == _hot_cache_size) _hot_cache_idx = 0;
   _n_hot++;
+
+  if (res != NULL) {
+    // Even though the region containg res was not in the young list
+    // when it was recorded in the hot cache it could have been added
+    // to the free list and subsequently added to the young list in
+    // the intervening time. If res is in a young region, return NULL
+    // so that res is not cleaned. See CR 6817995.
+
+    if (is_young_card(res)) {
+      res = NULL;
+    }
+  }
+
   return res;
 }
 
-
 void ConcurrentG1Refine::clean_up_cache(int worker_i, G1RemSet* g1rs) {
   assert(!use_cache(), "cache should be disabled");
-  int start_ind = _hot_cache_idx-1;
-  for (int i = 0; i < _n_hot; i++) {
-    int ind = start_ind - i;
-    if (ind < 0) ind = ind + _hot_cache_size;
-    jbyte* entry = _hot_cache[ind];
-    if (entry != NULL) {
-      g1rs->concurrentRefineOneCard(entry, worker_i);
+  int start_idx;
+
+  while ((start_idx = _hot_cache_par_claimed_idx) < _n_hot) { // read once
+    int end_idx = start_idx + _hot_cache_par_chunk_size;
+
+    if (start_idx ==
+        Atomic::cmpxchg(end_idx, &_hot_cache_par_claimed_idx, start_idx)) {
+      // The current worker has successfully claimed the chunk [start_idx..end_idx)
+      end_idx = MIN2(end_idx, _n_hot);
+      for (int i = start_idx; i < end_idx; i++) {
+        jbyte* entry = _hot_cache[i];
+        if (entry != NULL) {
+          g1rs->concurrentRefineOneCard(entry, worker_i);
+        }
+      }
     }
   }
-  _n_hot = 0;
-  _hot_cache_idx = 0;
+}
+
+void ConcurrentG1Refine::expand_card_count_cache() {
+  if (_n_card_counts < _max_n_card_counts) {
+    int new_idx = _cache_size_index+1;
+    int new_size = _cc_cache_sizes[new_idx];
+    if (new_size < 0) new_size = _max_n_card_counts;
+
+    // Make sure we don't go bigger than we will ever need
+    new_size = MIN2((unsigned) new_size, _max_n_card_counts);
+
+    // Expand the card count and card epoch tables
+    if (new_size > (int)_n_card_counts) {
+      // We can just free and allocate a new array as we're
+      // not interested in preserving the contents
+      assert(_card_counts != NULL, "Logic!");
+      assert(_card_epochs != NULL, "Logic!");
+      FREE_C_HEAP_ARRAY(CardCountCacheEntry, _card_counts);
+      FREE_C_HEAP_ARRAY(CardEpochCacheEntry, _card_epochs);
+      _n_card_counts = new_size;
+      _card_counts = NEW_C_HEAP_ARRAY(CardCountCacheEntry, _n_card_counts);
+      _card_epochs = NEW_C_HEAP_ARRAY(CardEpochCacheEntry, _n_card_counts);
+      _cache_size_index = new_idx;
+    }
+  }
 }
 
 void ConcurrentG1Refine::clear_and_record_card_counts() {
-  if (G1ConcRSLogCacheSize == 0 && !G1ConcRSCountTraversals) return;
+  if (G1ConcRSLogCacheSize == 0) return;
+
+#ifndef PRODUCT
+  double start = os::elapsedTime();
+#endif
+
+  if (_expand_card_counts) {
+    expand_card_count_cache();
+    _expand_card_counts = false;
+    // Only need to clear the epochs.
+    Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry));
+  }
+
+  int this_epoch = (int) _n_periods;
+  assert((this_epoch+1) <= max_jint, "to many periods");
+  // Update epoch
   _n_periods++;
-  if (G1ConcRSCountTraversals) {
-    for (size_t i = 0; i < _n_card_counts; i++) {
-      unsigned char bucket = _card_counts[i];
-      _cur_card_count_histo[bucket]++;
-      _card_counts[i] = 0;
-    }
-    gclog_or_tty->print_cr("Card counts:");
-    for (int i = 0; i < 256; i++) {
-      if (_cur_card_count_histo[i] > 0) {
-        gclog_or_tty->print_cr("  %3d: %9d", i, _cur_card_count_histo[i]);
-        _cum_card_count_histo[i] += _cur_card_count_histo[i];
-        _cur_card_count_histo[i] = 0;
-      }
-    }
-  } else {
-    assert(G1ConcRSLogCacheSize > 0, "Logic");
-    Copy::fill_to_words((HeapWord*)(&_card_counts[0]),
-                        _n_card_counts / HeapWordSize);
-  }
+
+#ifndef PRODUCT
+  double elapsed = os::elapsedTime() - start;
+  _g1h->g1_policy()->record_cc_clear_time(elapsed * 1000.0);
+#endif
 }
-
-void
-ConcurrentG1Refine::
-print_card_count_histo_range(unsigned* histo, int from, int to,
-                             float& cum_card_pct,
-                             float& cum_travs_pct) {
-  unsigned cards = 0;
-  unsigned travs = 0;
-  guarantee(to <= 256, "Precondition");
-  for (int i = from; i < to-1; i++) {
-    cards += histo[i];
-    travs += histo[i] * i;
-  }
-  if (to == 256) {
-    unsigned histo_card_sum = 0;
-    unsigned histo_trav_sum = 0;
-    for (int i = 1; i < 255; i++) {
-      histo_trav_sum += histo[i] * i;
-    }
-    cards += histo[255];
-    // correct traversals for the last one.
-    unsigned travs_255 = (unsigned) (_total_travs - histo_trav_sum);
-    travs += travs_255;
-
-  } else {
-    cards += histo[to-1];
-    travs += histo[to-1] * (to-1);
-  }
-  float fperiods = (float)_n_periods;
-  float f_tot_cards = (float)_total_cards/fperiods;
-  float f_tot_travs = (float)_total_travs/fperiods;
-  if (cards > 0) {
-    float fcards = (float)cards/fperiods;
-    float ftravs = (float)travs/fperiods;
-    if (to == 256) {
-      gclog_or_tty->print(" %4d-       %10.2f%10.2f", from, fcards, ftravs);
-    } else {
-      gclog_or_tty->print(" %4d-%4d   %10.2f%10.2f", from, to-1, fcards, ftravs);
-    }
-    float pct_cards = fcards*100.0/f_tot_cards;
-    cum_card_pct += pct_cards;
-    float pct_travs = ftravs*100.0/f_tot_travs;
-    cum_travs_pct += pct_travs;
-    gclog_or_tty->print_cr("%10.2f%10.2f%10.2f%10.2f",
-                  pct_cards, cum_card_pct,
-                  pct_travs, cum_travs_pct);
-  }
-}
-
-void ConcurrentG1Refine::print_final_card_counts() {
-  if (!G1ConcRSCountTraversals) return;
-
-  gclog_or_tty->print_cr("Did %d total traversals of %d distinct cards.",
-                _total_travs, _total_cards);
-  float fperiods = (float)_n_periods;
-  gclog_or_tty->print_cr("  This is an average of %8.2f traversals, %8.2f cards, "
-                "per collection.", (float)_total_travs/fperiods,
-                (float)_total_cards/fperiods);
-  gclog_or_tty->print_cr("  This is an average of %8.2f traversals/distinct "
-                "dirty card.\n",
-                _total_cards > 0 ?
-                (float)_total_travs/(float)_total_cards : 0.0);
-
-
-  gclog_or_tty->print_cr("Histogram:\n\n%10s   %10s%10s%10s%10s%10s%10s",
-                "range", "# cards", "# travs", "% cards", "(cum)",
-                "% travs", "(cum)");
-  gclog_or_tty->print_cr("------------------------------------------------------------"
-                "-------------");
-  float cum_cards_pct = 0.0;
-  float cum_travs_pct = 0.0;
-  for (int i = 1; i < 10; i++) {
-    print_card_count_histo_range(_cum_card_count_histo, i, i+1,
-                                 cum_cards_pct, cum_travs_pct);
-  }
-  for (int i = 10; i < 100; i += 10) {
-    print_card_count_histo_range(_cum_card_count_histo, i, i+10,
-                                 cum_cards_pct, cum_travs_pct);
-  }
-  print_card_count_histo_range(_cum_card_count_histo, 100, 150,
-                               cum_cards_pct, cum_travs_pct);
-  print_card_count_histo_range(_cum_card_count_histo, 150, 200,
-                               cum_cards_pct, cum_travs_pct);
-  print_card_count_histo_range(_cum_card_count_histo, 150, 255,
-                               cum_cards_pct, cum_travs_pct);
-  print_card_count_histo_range(_cum_card_count_histo, 255, 256,
-                               cum_cards_pct, cum_travs_pct);
-}
--- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Thu Aug 13 17:59:05 2009 -0700
@@ -29,29 +29,117 @@
 class ConcurrentG1Refine: public CHeapObj {
   ConcurrentG1RefineThread** _threads;
   int _n_threads;
+
   // The cache for card refinement.
-  bool     _use_cache;
-  bool     _def_use_cache;
-  size_t _n_periods;
-  size_t _total_cards;
-  size_t _total_travs;
+  bool   _use_cache;
+  bool   _def_use_cache;
 
-  unsigned char*  _card_counts;
+  size_t _n_periods;    // Used as clearing epoch
+
+  // An evicting cache of the number of times each card
+  // is accessed. Reduces, but does not eliminate, the amount
+  // of duplicated processing of dirty cards.
+
+  enum SomePrivateConstants {
+    epoch_bits           = 32,
+    card_num_shift       = epoch_bits,
+    epoch_mask           = AllBits,
+    card_num_mask        = AllBits,
+
+    // The initial cache size is approximately this fraction
+    // of a maximal cache (i.e. the size needed for all cards
+    // in the heap)
+    InitialCacheFraction = 512
+  };
+
+  const static julong card_num_mask_in_place =
+                        (julong) card_num_mask << card_num_shift;
+
+  typedef struct {
+    julong _value;      // |  card_num   |  epoch   |
+  } CardEpochCacheEntry;
+
+  julong make_epoch_entry(unsigned int card_num, unsigned int epoch) {
+    assert(0 <= card_num && card_num < _max_n_card_counts, "Bounds");
+    assert(0 <= epoch && epoch <= _n_periods, "must be");
+
+    return ((julong) card_num << card_num_shift) | epoch;
+  }
+
+  unsigned int extract_epoch(julong v) {
+    return (v & epoch_mask);
+  }
+
+  unsigned int extract_card_num(julong v) {
+    return (v & card_num_mask_in_place) >> card_num_shift;
+  }
+
+  typedef struct {
+    unsigned char _count;
+    unsigned char _evict_count;
+  } CardCountCacheEntry;
+
+  CardCountCacheEntry* _card_counts;
+  CardEpochCacheEntry* _card_epochs;
+
+  // The current number of buckets in the card count cache
   unsigned _n_card_counts;
+
+  // The max number of buckets required for the number of
+  // cards for the entire reserved heap
+  unsigned _max_n_card_counts;
+
+  // Possible sizes of the cache: odd primes that roughly double in size.
+  // (See jvmtiTagMap.cpp).
+  static int _cc_cache_sizes[];
+
+  // The index in _cc_cache_sizes corresponding to the size of
+  // _card_counts.
+  int _cache_size_index;
+
+  bool _expand_card_counts;
+
   const jbyte* _ct_bot;
-  unsigned* _cur_card_count_histo;
-  unsigned* _cum_card_count_histo;
-  jbyte**  _hot_cache;
-  int      _hot_cache_size;
-  int      _n_hot;
-  int      _hot_cache_idx;
+
+  jbyte**      _hot_cache;
+  int          _hot_cache_size;
+  int          _n_hot;
+  int          _hot_cache_idx;
+
+  int          _hot_cache_par_chunk_size;
+  volatile int _hot_cache_par_claimed_idx;
+
+  // Needed to workaround 6817995
+  CardTableModRefBS* _ct_bs;
+  G1CollectedHeap*   _g1h;
+
+  // Expands the array that holds the card counts to the next size up
+  void expand_card_count_cache();
+
+  // hash a given key (index of card_ptr) with the specified size
+  static unsigned int hash(size_t key, int size) {
+    return (unsigned int) key % size;
+  }
+
+  // hash a given key (index of card_ptr)
+  unsigned int hash(size_t key) {
+    return hash(key, _n_card_counts);
+  }
+
+  unsigned ptr_2_card_num(jbyte* card_ptr) {
+    return (unsigned) (card_ptr - _ct_bot);
+  }
+
+  jbyte* card_num_2_ptr(unsigned card_num) {
+    return (jbyte*) (_ct_bot + card_num);
+  }
 
   // Returns the count of this card after incrementing it.
-  int add_card_count(jbyte* card_ptr);
+  jbyte* add_card_count(jbyte* card_ptr, int* count, bool* defer);
 
-  void print_card_count_histo_range(unsigned* histo, int from, int to,
-                                    float& cum_card_pct,
-                                    float& cum_travs_pct);
+  // Returns true if this card is in a young region
+  bool is_young_card(jbyte* card_ptr);
+
  public:
   ConcurrentG1Refine();
   ~ConcurrentG1Refine();
@@ -65,11 +153,16 @@
   // If this is the first entry for the slot, writes into the cache and
   // returns NULL.  If it causes an eviction, returns the evicted pointer.
   // Otherwise, its a cache hit, and returns NULL.
-  jbyte* cache_insert(jbyte* card_ptr);
+  jbyte* cache_insert(jbyte* card_ptr, bool* defer);
 
   // Process the cached entries.
   void clean_up_cache(int worker_i, G1RemSet* g1rs);
 
+  // Set up for parallel processing of the cards in the hot cache
+  void clear_hot_cache_claimed_index() {
+    _hot_cache_par_claimed_idx = 0;
+  }
+
   // Discard entries in the hot cache.
   void clear_hot_cache() {
     _hot_cache_idx = 0; _n_hot = 0;
@@ -84,7 +177,6 @@
   }
 
   void clear_and_record_card_counts();
-  void print_final_card_counts();
 
   static size_t thread_num();
 };
--- a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -104,17 +104,17 @@
     double start_vtime_sec; // only used when G1SmoothConcRefine is on
     int prev_buffer_num; // only used when G1SmoothConcRefine is on
     // This thread activation threshold
-    int threshold = DCQBarrierProcessCompletedThreshold * _worker_id;
+    int threshold = G1UpdateBufferQueueProcessingThreshold * _worker_id;
     // Next thread activation threshold
-    int next_threshold = threshold + DCQBarrierProcessCompletedThreshold;
-    int deactivation_threshold = MAX2<int>(threshold - DCQBarrierProcessCompletedThreshold / 2, 0);
+    int next_threshold = threshold + G1UpdateBufferQueueProcessingThreshold;
+    int deactivation_threshold = MAX2<int>(threshold - G1UpdateBufferQueueProcessingThreshold / 2, 0);
 
     if (G1SmoothConcRefine) {
       lower_limit = 0;
       start_vtime_sec = os::elapsedVTime();
       prev_buffer_num = (int) dcqs.completed_buffers_num();
     } else {
-      lower_limit = DCQBarrierProcessCompletedThreshold / 4; // For now.
+      lower_limit = G1UpdateBufferQueueProcessingThreshold / 4; // For now.
     }
     while (dcqs.apply_closure_to_completed_buffer(_worker_id + _worker_id_offset, lower_limit)) {
       double end_vtime_sec;
--- a/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -80,8 +80,8 @@
                                    int max_completed_queue,
                                    Mutex* lock, PtrQueueSet* fl_owner) {
   PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue, fl_owner);
-  set_buffer_size(DCQBarrierQueueBufferSize);
-  set_process_completed_threshold(DCQBarrierProcessCompletedThreshold);
+  set_buffer_size(G1UpdateBufferSize);
+  set_process_completed_threshold(G1UpdateBufferQueueProcessingThreshold);
 
   _shared_dirty_card_queue.set_lock(lock);
   _free_ids = new FreeIdSet((int) num_par_ids(), _cbl_mon);
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -1591,7 +1591,7 @@
 
   JavaThread::dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon,
                                                 DirtyCardQ_FL_lock,
-                                                G1DirtyCardQueueMax,
+                                                G1UpdateBufferQueueMaxLength,
                                                 Shared_DirtyCardQ_lock);
 
   if (G1DeferredRSUpdate) {
@@ -1637,6 +1637,9 @@
 
 void G1CollectedHeap::iterate_dirty_card_closure(bool concurrent,
                                                  int worker_i) {
+  // Clean cards in the hot card cache
+  concurrent_g1_refine()->clean_up_cache(worker_i, g1_rem_set());
+
   DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
   int n_completed_buffers = 0;
   while (dcqs.apply_closure_to_completed_buffer(worker_i, 0, true)) {
@@ -1645,9 +1648,6 @@
   g1_policy()->record_update_rs_processed_buffers(worker_i,
                                                   (double) n_completed_buffers);
   dcqs.clear_n_completed_buffers();
-  // Finish up the queue...
-  if (worker_i == 0) concurrent_g1_refine()->clean_up_cache(worker_i,
-                                                            g1_rem_set());
   assert(!dcqs.completed_buffers_exist_dirty(), "Completed buffers exist!");
 }
 
@@ -2414,8 +2414,6 @@
 }
 
 void G1CollectedHeap::print_tracing_info() const {
-  concurrent_g1_refine()->print_final_card_counts();
-
   // We'll overload this to mean "trace GC pause statistics."
   if (TraceGen0Time || TraceGen1Time) {
     // The "G1CollectorPolicy" is keeping track of these stats, so delegate
@@ -2845,6 +2843,11 @@
   if (PrintHeapAtGC) {
     Universe::print_heap_after_gc();
   }
+  if (G1SummarizeRSetStats &&
+      (G1SummarizeRSetStatsPeriod > 0) &&
+      (total_collections() % G1SummarizeRSetStatsPeriod == 0)) {
+    g1_rem_set()->print_summary_info();
+  }
 }
 
 void G1CollectedHeap::set_gc_alloc_region(int purpose, HeapRegion* r) {
@@ -4106,6 +4109,8 @@
 
   g1_rem_set()->prepare_for_oops_into_collection_set_do();
   concurrent_g1_refine()->set_use_cache(false);
+  concurrent_g1_refine()->clear_hot_cache_claimed_index();
+
   int n_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1);
   set_par_threads(n_workers);
   G1ParTask g1_par_task(this, n_workers, _task_queues);
@@ -4138,6 +4143,7 @@
   }
   g1_rem_set()->cleanup_after_oops_into_collection_set_do();
 
+  concurrent_g1_refine()->clear_hot_cache();
   concurrent_g1_refine()->set_use_cache(true);
 
   finalize_for_evac_failure();
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -94,7 +94,14 @@
   _summary(new Summary()),
   _abandoned_summary(new AbandonedSummary()),
 
+#ifndef PRODUCT
   _cur_clear_ct_time_ms(0.0),
+  _min_clear_cc_time_ms(-1.0),
+  _max_clear_cc_time_ms(-1.0),
+  _cur_clear_cc_time_ms(0.0),
+  _cum_clear_cc_time_ms(0.0),
+  _num_cc_clears(0L),
+#endif
 
   _region_num_young(0),
   _region_num_tenured(0),
@@ -1648,6 +1655,15 @@
         print_stats(1, "Object Copying", obj_copy_time);
       }
     }
+#ifndef PRODUCT
+    print_stats(1, "Cur Clear CC", _cur_clear_cc_time_ms);
+    print_stats(1, "Cum Clear CC", _cum_clear_cc_time_ms);
+    print_stats(1, "Min Clear CC", _min_clear_cc_time_ms);
+    print_stats(1, "Max Clear CC", _max_clear_cc_time_ms);
+    if (_num_cc_clears > 0) {
+      print_stats(1, "Avg Clear CC", _cum_clear_cc_time_ms / ((double)_num_cc_clears));
+    }
+#endif
     print_stats(1, "Other", other_time_ms);
     for (int i = 0; i < _aux_num; ++i) {
       if (_cur_aux_times_set[i]) {
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Thu Aug 13 17:59:05 2009 -0700
@@ -112,7 +112,6 @@
     return 8*M;
   }
 
-
   double _cur_collection_start_sec;
   size_t _cur_collection_pause_used_at_start_bytes;
   size_t _cur_collection_pause_used_regions_at_start;
@@ -122,6 +121,15 @@
   double _cur_clear_ct_time_ms;
   bool   _satb_drain_time_set;
 
+#ifndef PRODUCT
+  // Card Table Count Cache stats
+  double _min_clear_cc_time_ms;         // min
+  double _max_clear_cc_time_ms;         // max
+  double _cur_clear_cc_time_ms;         // clearing time during current pause
+  double _cum_clear_cc_time_ms;         // cummulative clearing time
+  jlong  _num_cc_clears;                // number of times the card count cache has been cleared
+#endif
+
   double _cur_CH_strong_roots_end_sec;
   double _cur_CH_strong_roots_dur_ms;
   double _cur_G1_strong_roots_end_sec;
@@ -931,6 +939,18 @@
     _cur_aux_times_ms[i] += ms;
   }
 
+#ifndef PRODUCT
+  void record_cc_clear_time(double ms) {
+    if (_min_clear_cc_time_ms < 0.0 || ms <= _min_clear_cc_time_ms)
+      _min_clear_cc_time_ms = ms;
+    if (_max_clear_cc_time_ms < 0.0 || ms >= _max_clear_cc_time_ms)
+      _max_clear_cc_time_ms = ms;
+    _cur_clear_cc_time_ms = ms;
+    _cum_clear_cc_time_ms += ms;
+    _num_cc_clears++;
+  }
+#endif
+
   // Record the fact that "bytes" bytes allocated in a region.
   void record_before_bytes(size_t bytes);
   void record_after_bytes(size_t bytes);
--- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -676,6 +676,55 @@
 
 static IntHistogram out_of_histo(50, 50);
 
+void HRInto_G1RemSet::concurrentRefineOneCard_impl(jbyte* card_ptr, int worker_i) {
+  // Construct the region representing the card.
+  HeapWord* start = _ct_bs->addr_for(card_ptr);
+  // And find the region containing it.
+  HeapRegion* r = _g1->heap_region_containing(start);
+  assert(r != NULL, "unexpected null");
+
+  HeapWord* end   = _ct_bs->addr_for(card_ptr + 1);
+  MemRegion dirtyRegion(start, end);
+
+#if CARD_REPEAT_HISTO
+  init_ct_freq_table(_g1->g1_reserved_obj_bytes());
+  ct_freq_note_card(_ct_bs->index_for(start));
+#endif
+
+  UpdateRSOopClosure update_rs_oop_cl(this, worker_i);
+  update_rs_oop_cl.set_from(r);
+  FilterOutOfRegionClosure filter_then_update_rs_oop_cl(r, &update_rs_oop_cl);
+
+  // Undirty the card.
+  *card_ptr = CardTableModRefBS::clean_card_val();
+  // We must complete this write before we do any of the reads below.
+  OrderAccess::storeload();
+  // And process it, being careful of unallocated portions of TLAB's.
+  HeapWord* stop_point =
+    r->oops_on_card_seq_iterate_careful(dirtyRegion,
+                                        &filter_then_update_rs_oop_cl);
+  // If stop_point is non-null, then we encountered an unallocated region
+  // (perhaps the unfilled portion of a TLAB.)  For now, we'll dirty the
+  // card and re-enqueue: if we put off the card until a GC pause, then the
+  // unallocated portion will be filled in.  Alternatively, we might try
+  // the full complexity of the technique used in "regular" precleaning.
+  if (stop_point != NULL) {
+    // The card might have gotten re-dirtied and re-enqueued while we
+    // worked.  (In fact, it's pretty likely.)
+    if (*card_ptr != CardTableModRefBS::dirty_card_val()) {
+      *card_ptr = CardTableModRefBS::dirty_card_val();
+      MutexLockerEx x(Shared_DirtyCardQ_lock,
+                      Mutex::_no_safepoint_check_flag);
+      DirtyCardQueue* sdcq =
+        JavaThread::dirty_card_queue_set().shared_dirty_card_queue();
+      sdcq->enqueue(card_ptr);
+    }
+  } else {
+    out_of_histo.add_entry(filter_then_update_rs_oop_cl.out_of_region());
+    _conc_refine_cards++;
+  }
+}
+
 void HRInto_G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {
   // If the card is no longer dirty, nothing to do.
   if (*card_ptr != CardTableModRefBS::dirty_card_val()) return;
@@ -716,61 +765,63 @@
     return;
   }
 
-  // Should we defer it?
+  // Should we defer processing the card?
+  //
+  // Previously the result from the insert_cache call would be
+  // either card_ptr (implying that card_ptr was currently "cold"),
+  // null (meaning we had inserted the card ptr into the "hot"
+  // cache, which had some headroom), or a "hot" card ptr
+  // extracted from the "hot" cache.
+  //
+  // Now that the _card_counts cache in the ConcurrentG1Refine
+  // instance is an evicting hash table, the result we get back
+  // could be from evicting the card ptr in an already occupied
+  // bucket (in which case we have replaced the card ptr in the
+  // bucket with card_ptr and "defer" is set to false). To avoid
+  // having a data structure (updates to which would need a lock)
+  // to hold these unprocessed dirty cards, we need to immediately
+  // process card_ptr. The actions needed to be taken on return
+  // from cache_insert are summarized in the following table:
+  //
+  // res      defer   action
+  // --------------------------------------------------------------
+  // null     false   card evicted from _card_counts & replaced with
+  //                  card_ptr; evicted ptr added to hot cache.
+  //                  No need to process res; immediately process card_ptr
+  //
+  // null     true    card not evicted from _card_counts; card_ptr added
+  //                  to hot cache.
+  //                  Nothing to do.
+  //
+  // non-null false   card evicted from _card_counts & replaced with
+  //                  card_ptr; evicted ptr is currently "cold" or
+  //                  caused an eviction from the hot cache.
+  //                  Immediately process res; process card_ptr.
+  //
+  // non-null true    card not evicted from _card_counts; card_ptr is
+  //                  currently cold, or caused an eviction from hot
+  //                  cache.
+  //                  Immediately process res; no need to process card_ptr.
+
+  jbyte* res = card_ptr;
+  bool defer = false;
   if (_cg1r->use_cache()) {
-    card_ptr = _cg1r->cache_insert(card_ptr);
-    // If it was not an eviction, nothing to do.
-    if (card_ptr == NULL) return;
-
-    // OK, we have to reset the card start, region, etc.
-    start = _ct_bs->addr_for(card_ptr);
-    r = _g1->heap_region_containing(start);
-    if (r == NULL) {
-      guarantee(_g1->is_in_permanent(start), "Or else where?");
-      return;  // Not in the G1 heap (might be in perm, for example.)
+    jbyte* res = _cg1r->cache_insert(card_ptr, &defer);
+    if (res != NULL && (res != card_ptr || defer)) {
+      start = _ct_bs->addr_for(res);
+      r = _g1->heap_region_containing(start);
+      if (r == NULL) {
+        assert(_g1->is_in_permanent(start), "Or else where?");
+      } else {
+        guarantee(!r->is_young(), "It was evicted in the current minor cycle.");
+        // Process card pointer we get back from the hot card cache
+        concurrentRefineOneCard_impl(res, worker_i);
+      }
     }
-    guarantee(!r->is_young(), "It was evicted in the current minor cycle.");
   }
 
-  HeapWord* end   = _ct_bs->addr_for(card_ptr + 1);
-  MemRegion dirtyRegion(start, end);
-
-#if CARD_REPEAT_HISTO
-  init_ct_freq_table(_g1->g1_reserved_obj_bytes());
-  ct_freq_note_card(_ct_bs->index_for(start));
-#endif
-
-  UpdateRSOopClosure update_rs_oop_cl(this, worker_i);
-  update_rs_oop_cl.set_from(r);
-  FilterOutOfRegionClosure filter_then_update_rs_oop_cl(r, &update_rs_oop_cl);
-
-  // Undirty the card.
-  *card_ptr = CardTableModRefBS::clean_card_val();
-  // We must complete this write before we do any of the reads below.
-  OrderAccess::storeload();
-  // And process it, being careful of unallocated portions of TLAB's.
-  HeapWord* stop_point =
-    r->oops_on_card_seq_iterate_careful(dirtyRegion,
-                                        &filter_then_update_rs_oop_cl);
-  // If stop_point is non-null, then we encountered an unallocated region
-  // (perhaps the unfilled portion of a TLAB.)  For now, we'll dirty the
-  // card and re-enqueue: if we put off the card until a GC pause, then the
-  // unallocated portion will be filled in.  Alternatively, we might try
-  // the full complexity of the technique used in "regular" precleaning.
-  if (stop_point != NULL) {
-    // The card might have gotten re-dirtied and re-enqueued while we
-    // worked.  (In fact, it's pretty likely.)
-    if (*card_ptr != CardTableModRefBS::dirty_card_val()) {
-      *card_ptr = CardTableModRefBS::dirty_card_val();
-      MutexLockerEx x(Shared_DirtyCardQ_lock,
-                      Mutex::_no_safepoint_check_flag);
-      DirtyCardQueue* sdcq =
-        JavaThread::dirty_card_queue_set().shared_dirty_card_queue();
-      sdcq->enqueue(card_ptr);
-    }
-  } else {
-    out_of_histo.add_entry(filter_then_update_rs_oop_cl.out_of_region());
-    _conc_refine_cards++;
+  if (!defer) {
+    concurrentRefineOneCard_impl(card_ptr, worker_i);
   }
 }
 
--- a/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Thu Aug 13 17:59:05 2009 -0700
@@ -157,6 +157,10 @@
     }
   }
 
+  // The routine that performs the actual work of refining a dirty
+  // card.
+  void concurrentRefineOneCard_impl(jbyte* card_ptr, int worker_i);
+
 protected:
   template <class T> void write_ref_nv(HeapRegion* from, T* p);
   template <class T> void par_write_ref_nv(HeapRegion* from, T* p, int tid);
--- a/src/share/vm/gc_implementation/g1/g1_globals.hpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp	Thu Aug 13 17:59:05 2009 -0700
@@ -34,7 +34,7 @@
   product(intx, G1ConfidencePercent, 50,                                    \
           "Confidence level for MMU/pause predictions")                     \
                                                                             \
-  develop(intx, G1MarkingOverheadPercent, 0,                                   \
+  develop(intx, G1MarkingOverheadPercent, 0,                                \
           "Overhead of concurrent marking")                                 \
                                                                             \
   develop(bool, G1AccountConcurrentOverhead, false,                         \
@@ -47,7 +47,7 @@
   develop(bool, G1Gen, true,                                                \
           "If true, it will enable the generational G1")                    \
                                                                             \
-  develop(intx, G1GCPercent, 10,                                                \
+  develop(intx, G1GCPercent, 10,                                            \
           "The desired percent time spent on GC")                           \
                                                                             \
   develop(intx, G1PolicyVerbose, 0,                                         \
@@ -74,6 +74,12 @@
   diagnostic(bool, G1SummarizeRSetStats, false,                             \
           "Summarize remembered set processing info")                       \
                                                                             \
+  diagnostic(intx, G1SummarizeRSetStatsPeriod, 0,                           \
+          "The period (in number of GCs) at which we will generate "        \
+          "update buffer processing info "                                  \
+          "(0 means do not periodically generate this info); "              \
+          "it also requires -XX:+G1SummarizeRSetStats")                     \
+                                                                            \
   diagnostic(bool, G1SummarizeZFStats, false,                               \
           "Summarize zero-filling info")                                    \
                                                                             \
@@ -167,17 +173,20 @@
   develop(bool, G1DisablePostBarrier, false,                                \
           "Disable generation of post-barrier (i.e., RS barrier)   ")       \
                                                                             \
-  product(intx, G1DirtyCardQueueMax, 30,                                    \
-          "Maximum number of completed RS buffers before mutator threads "  \
-          "start processing them.")                                         \
+  product(intx, G1UpdateBufferSize, 256,                                    \
+          "Size of an update buffer")                                       \
+                                                                            \
+  product(intx, G1UpdateBufferQueueProcessingThreshold, 5,                  \
+          "Number of enqueued update buffers that will "                    \
+          "trigger concurrent processing")                                  \
+                                                                            \
+  product(intx, G1UpdateBufferQueueMaxLength, 30,                           \
+          "Maximum number of enqueued update buffers before mutator "       \
+          "threads start processing new ones instead of enqueueing them")   \
                                                                             \
   develop(intx, G1ConcRSLogCacheSize, 10,                                   \
           "Log base 2 of the length of conc RS hot-card cache.")            \
                                                                             \
-  develop(bool, G1ConcRSCountTraversals, false,                             \
-          "If true, gather data about the number of times CR traverses "    \
-          "cards ")                                                         \
-                                                                            \
   develop(intx, G1ConcRSHotCardLimit, 4,                                    \
           "The threshold that defines (>=) a hot card.")                    \
                                                                             \
@@ -251,6 +260,10 @@
                                                                             \
   product(uintx, G1ParallelRSetThreads, 0,                                  \
           "If non-0 is the number of parallel rem set update threads, "     \
-          "otherwise the value is determined ergonomically.")
+          "otherwise the value is determined ergonomically.")               \
+                                                                            \
+  develop(intx, G1CardCountCacheExpandThreshold, 16,                        \
+          "Expand the card count cache if the number of collisions for "    \
+          "a particular entry exceeds this value.")
 
 G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG)
--- a/src/share/vm/gc_implementation/includeDB_gc_g1	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/gc_implementation/includeDB_gc_g1	Thu Aug 13 17:59:05 2009 -0700
@@ -45,11 +45,14 @@
 concurrentG1Refine.cpp			concurrentG1RefineThread.hpp
 concurrentG1Refine.cpp			copy.hpp
 concurrentG1Refine.cpp			g1CollectedHeap.inline.hpp
+concurrentG1Refine.cpp                  g1CollectorPolicy.hpp
 concurrentG1Refine.cpp			g1RemSet.hpp
 concurrentG1Refine.cpp			space.inline.hpp
+concurrentG1Refine.cpp                  heapRegionSeq.inline.hpp
 
 concurrentG1Refine.hpp			globalDefinitions.hpp
 concurrentG1Refine.hpp			allocation.hpp
+concurrentG1Refine.hpp                  cardTableModRefBS.hpp
 concurrentG1Refine.hpp			thread.hpp
 
 concurrentG1RefineThread.cpp		concurrentG1Refine.hpp
--- a/src/share/vm/includeDB_core	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/includeDB_core	Thu Aug 13 17:59:05 2009 -0700
@@ -872,6 +872,7 @@
 classFileParser.cpp                     symbolOop.hpp
 classFileParser.cpp                     symbolTable.hpp
 classFileParser.cpp                     systemDictionary.hpp
+classFileParser.cpp                     threadService.hpp
 classFileParser.cpp                     timer.hpp
 classFileParser.cpp                     universe.inline.hpp
 classFileParser.cpp                     verificationType.hpp
@@ -924,6 +925,7 @@
 classLoader.cpp                         symbolOop.hpp
 classLoader.cpp                         systemDictionary.hpp
 classLoader.cpp                         threadCritical.hpp
+classLoader.cpp                         threadService.hpp
 classLoader.cpp                         timer.hpp
 classLoader.cpp                         universe.inline.hpp
 classLoader.cpp                         vmSymbols.hpp
@@ -4019,6 +4021,7 @@
 systemDictionary.cpp                    resolutionErrors.hpp
 systemDictionary.cpp                    signature.hpp
 systemDictionary.cpp                    systemDictionary.hpp
+systemDictionary.cpp                    threadService.hpp
 systemDictionary.cpp                    typeArrayKlass.hpp
 systemDictionary.cpp                    vmSymbols.hpp
 
--- a/src/share/vm/interpreter/abstractInterpreter.hpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/interpreter/abstractInterpreter.hpp	Thu Aug 13 17:59:05 2009 -0700
@@ -122,11 +122,15 @@
   static int        size_top_interpreter_activation(methodOop method);
 
   // Deoptimization support
-  static address    continuation_for(methodOop method,
-                                     address bcp,
-                                     int callee_parameters,
-                                     bool is_top_frame,
-                                     bool& use_next_mdp);
+  // Compute the entry address for continuation after
+  static address deopt_continue_after_entry(methodOop method,
+                                            address bcp,
+                                            int callee_parameters,
+                                            bool is_top_frame);
+  // Compute the entry address for reexecution
+  static address deopt_reexecute_entry(methodOop method, address bcp);
+  // Deoptimization should reexecute this bytecode
+  static bool    bytecode_should_reexecute(Bytecodes::Code code);
 
   // share implementation of size_activation and layout_activation:
   static int        size_activation(methodOop method,
--- a/src/share/vm/interpreter/interpreter.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/interpreter/interpreter.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -284,76 +284,19 @@
 //------------------------------------------------------------------------------------------------------------------------
 // Deoptimization support
 
-// If deoptimization happens, this method returns the point where to continue in
-// interpreter. For calls (invokexxxx, newxxxx) the continuation is at next
-// bci and the top of stack is in eax/edx/FPU tos.
-// For putfield/getfield, put/getstatic, the continuation is at the same
-// bci and the TOS is on stack.
-
-// Note: deopt_entry(type, 0) means reexecute bytecode
-//       deopt_entry(type, length) means continue at next bytecode
-
-address AbstractInterpreter::continuation_for(methodOop method, address bcp, int callee_parameters, bool is_top_frame, bool& use_next_mdp) {
+// If deoptimization happens, this function returns the point of next bytecode to continue execution
+address AbstractInterpreter::deopt_continue_after_entry(methodOop method, address bcp, int callee_parameters, bool is_top_frame) {
   assert(method->contains(bcp), "just checkin'");
   Bytecodes::Code code   = Bytecodes::java_code_at(bcp);
+  assert(!Interpreter::bytecode_should_reexecute(code), "should not reexecute");
   int             bci    = method->bci_from(bcp);
   int             length = -1; // initial value for debugging
   // compute continuation length
   length = Bytecodes::length_at(bcp);
   // compute result type
   BasicType type = T_ILLEGAL;
-  // when continuing after a compiler safepoint, re-execute the bytecode
-  // (an invoke is continued after the safepoint)
-  use_next_mdp = true;
+
   switch (code) {
-    case Bytecodes::_lookupswitch:
-    case Bytecodes::_tableswitch:
-    case Bytecodes::_fast_binaryswitch:
-    case Bytecodes::_fast_linearswitch:
-    // recompute condtional expression folded into _if<cond>
-    case Bytecodes::_lcmp      :
-    case Bytecodes::_fcmpl     :
-    case Bytecodes::_fcmpg     :
-    case Bytecodes::_dcmpl     :
-    case Bytecodes::_dcmpg     :
-    case Bytecodes::_ifnull    :
-    case Bytecodes::_ifnonnull :
-    case Bytecodes::_goto      :
-    case Bytecodes::_goto_w    :
-    case Bytecodes::_ifeq      :
-    case Bytecodes::_ifne      :
-    case Bytecodes::_iflt      :
-    case Bytecodes::_ifge      :
-    case Bytecodes::_ifgt      :
-    case Bytecodes::_ifle      :
-    case Bytecodes::_if_icmpeq :
-    case Bytecodes::_if_icmpne :
-    case Bytecodes::_if_icmplt :
-    case Bytecodes::_if_icmpge :
-    case Bytecodes::_if_icmpgt :
-    case Bytecodes::_if_icmple :
-    case Bytecodes::_if_acmpeq :
-    case Bytecodes::_if_acmpne :
-    // special cases
-    case Bytecodes::_getfield  :
-    case Bytecodes::_putfield  :
-    case Bytecodes::_getstatic :
-    case Bytecodes::_putstatic :
-    case Bytecodes::_aastore   :
-      // reexecute the operation and TOS value is on stack
-      assert(is_top_frame, "must be top frame");
-      use_next_mdp = false;
-      return Interpreter::deopt_entry(vtos, 0);
-      break;
-
-#ifdef COMPILER1
-    case Bytecodes::_athrow    :
-      assert(is_top_frame, "must be top frame");
-      use_next_mdp = false;
-      return Interpreter::rethrow_exception_entry();
-      break;
-#endif /* COMPILER1 */
-
     case Bytecodes::_invokevirtual  :
     case Bytecodes::_invokespecial  :
     case Bytecodes::_invokestatic   :
@@ -392,6 +335,70 @@
     : Interpreter::return_entry(as_TosState(type), length);
 }
 
+// If deoptimization happens, this function returns the point where the interpreter reexecutes
+// the bytecode.
+// Note: Bytecodes::_athrow is a special case in that it does not return
+//       Interpreter::deopt_entry(vtos, 0) like others
+address AbstractInterpreter::deopt_reexecute_entry(methodOop method, address bcp) {
+  assert(method->contains(bcp), "just checkin'");
+  Bytecodes::Code code   = Bytecodes::java_code_at(bcp);
+#ifdef COMPILER1
+  if(code == Bytecodes::_athrow ) {
+    return Interpreter::rethrow_exception_entry();
+  }
+#endif /* COMPILER1 */
+  return Interpreter::deopt_entry(vtos, 0);
+}
+
+// If deoptimization happens, the interpreter should reexecute these bytecodes.
+// This function mainly helps the compilers to set up the reexecute bit.
+bool AbstractInterpreter::bytecode_should_reexecute(Bytecodes::Code code) {
+  switch (code) {
+    case Bytecodes::_lookupswitch:
+    case Bytecodes::_tableswitch:
+    case Bytecodes::_fast_binaryswitch:
+    case Bytecodes::_fast_linearswitch:
+    // recompute condtional expression folded into _if<cond>
+    case Bytecodes::_lcmp      :
+    case Bytecodes::_fcmpl     :
+    case Bytecodes::_fcmpg     :
+    case Bytecodes::_dcmpl     :
+    case Bytecodes::_dcmpg     :
+    case Bytecodes::_ifnull    :
+    case Bytecodes::_ifnonnull :
+    case Bytecodes::_goto      :
+    case Bytecodes::_goto_w    :
+    case Bytecodes::_ifeq      :
+    case Bytecodes::_ifne      :
+    case Bytecodes::_iflt      :
+    case Bytecodes::_ifge      :
+    case Bytecodes::_ifgt      :
+    case Bytecodes::_ifle      :
+    case Bytecodes::_if_icmpeq :
+    case Bytecodes::_if_icmpne :
+    case Bytecodes::_if_icmplt :
+    case Bytecodes::_if_icmpge :
+    case Bytecodes::_if_icmpgt :
+    case Bytecodes::_if_icmple :
+    case Bytecodes::_if_acmpeq :
+    case Bytecodes::_if_acmpne :
+    // special cases
+    case Bytecodes::_getfield  :
+    case Bytecodes::_putfield  :
+    case Bytecodes::_getstatic :
+    case Bytecodes::_putstatic :
+    case Bytecodes::_aastore   :
+#ifdef COMPILER1
+    //special case of reexecution
+    case Bytecodes::_athrow    :
+#endif
+      return true;
+
+    default:
+      return false;
+  }
+}
+
 void AbstractInterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
   // Quick & dirty stack overflow checking: bang the stack & handle trap.
   // Note that we do the banging after the frame is setup, since the exception
--- a/src/share/vm/interpreter/templateInterpreter.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/interpreter/templateInterpreter.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -605,28 +605,41 @@
   }
 }
 
-// If deoptimization happens, this method returns the point where to continue in
-// interpreter. For calls (invokexxxx, newxxxx) the continuation is at next
-// bci and the top of stack is in eax/edx/FPU tos.
-// For putfield/getfield, put/getstatic, the continuation is at the same
-// bci and the TOS is on stack.
+//------------------------------------------------------------------------------------------------------------------------
+// Deoptimization support
 
-// Note: deopt_entry(type, 0) means reexecute bytecode
-//       deopt_entry(type, length) means continue at next bytecode
+// If deoptimization happens, this function returns the point of next bytecode to continue execution
+address TemplateInterpreter::deopt_continue_after_entry(methodOop method, address bcp, int callee_parameters, bool is_top_frame) {
+  return AbstractInterpreter::deopt_continue_after_entry(method, bcp, callee_parameters, is_top_frame);
+}
 
-address TemplateInterpreter::continuation_for(methodOop method, address bcp, int callee_parameters, bool is_top_frame, bool& use_next_mdp) {
+// If deoptimization happens, this function returns the point where the interpreter reexecutes
+// the bytecode.
+// Note: Bytecodes::_athrow (C1 only) and Bytecodes::_return are the special cases
+//       that do not return "Interpreter::deopt_entry(vtos, 0)"
+address TemplateInterpreter::deopt_reexecute_entry(methodOop method, address bcp) {
   assert(method->contains(bcp), "just checkin'");
   Bytecodes::Code code   = Bytecodes::java_code_at(bcp);
   if (code == Bytecodes::_return) {
-      // This is used for deopt during registration of finalizers
-      // during Object.<init>.  We simply need to resume execution at
-      // the standard return vtos bytecode to pop the frame normally.
-      // reexecuting the real bytecode would cause double registration
-      // of the finalizable object.
-      assert(is_top_frame, "must be on top");
-      return _normal_table.entry(Bytecodes::_return).entry(vtos);
+    // This is used for deopt during registration of finalizers
+    // during Object.<init>.  We simply need to resume execution at
+    // the standard return vtos bytecode to pop the frame normally.
+    // reexecuting the real bytecode would cause double registration
+    // of the finalizable object.
+    return _normal_table.entry(Bytecodes::_return).entry(vtos);
   } else {
-    return AbstractInterpreter::continuation_for(method, bcp, callee_parameters, is_top_frame, use_next_mdp);
+    return AbstractInterpreter::deopt_reexecute_entry(method, bcp);
+  }
+}
+
+// If deoptimization happens, the interpreter should reexecute this bytecode.
+// This function mainly helps the compilers to set up the reexecute bit.
+bool TemplateInterpreter::bytecode_should_reexecute(Bytecodes::Code code) {
+  if (code == Bytecodes::_return) {
+    //Yes, we consider Bytecodes::_return as a special case of reexecution
+    return true;
+  } else {
+    return AbstractInterpreter::bytecode_should_reexecute(code);
   }
 }
 
--- a/src/share/vm/interpreter/templateInterpreter.hpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/interpreter/templateInterpreter.hpp	Thu Aug 13 17:59:05 2009 -0700
@@ -171,11 +171,15 @@
   static void       ignore_safepoints();                        // ignores safepoints
 
   // Deoptimization support
-  static address    continuation_for(methodOop method,
-                                     address bcp,
-                                     int callee_parameters,
-                                     bool is_top_frame,
-                                     bool& use_next_mdp);
+  // Compute the entry address for continuation after
+  static address deopt_continue_after_entry(methodOop method,
+                                            address bcp,
+                                            int callee_parameters,
+                                            bool is_top_frame);
+  // Deoptimization should reexecute this bytecode
+  static bool    bytecode_should_reexecute(Bytecodes::Code code);
+  // Compute the address for reexecution
+  static address deopt_reexecute_entry(methodOop method, address bcp);
 
 #include "incls/_templateInterpreter_pd.hpp.incl"
 
--- a/src/share/vm/memory/cardTableModRefBS.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/memory/cardTableModRefBS.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -253,8 +253,16 @@
     }
 #endif
     // The guard page is always committed and should not be committed over.
-    HeapWord* const new_end_for_commit = MIN2(new_end_aligned,
-                                              _guard_region.start());
+    // "guarded" is used for assertion checking below and recalls the fact
+    // that the would-be end of the new committed region would have
+    // penetrated the guard page.
+    HeapWord* new_end_for_commit = new_end_aligned;
+
+    DEBUG_ONLY(bool guarded = false;)
+    if (new_end_for_commit > _guard_region.start()) {
+      new_end_for_commit = _guard_region.start();
+      DEBUG_ONLY(guarded = true;)
+    }
 
     if (new_end_for_commit > cur_committed.end()) {
       // Must commit new pages.
@@ -302,7 +310,7 @@
     // not the aligned up expanded region.
     // jbyte* const end = byte_after(new_region.last());
     jbyte* const end = (jbyte*) new_end_for_commit;
-    assert((end >= byte_after(new_region.last())) || collided,
+    assert((end >= byte_after(new_region.last())) || collided || guarded,
       "Expect to be beyond new region unless impacting another region");
     // do nothing if we resized downward.
 #ifdef ASSERT
--- a/src/share/vm/memory/genCollectedHeap.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/memory/genCollectedHeap.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -482,6 +482,10 @@
     for (int i = starting_level; i <= max_level; i++) {
       if (_gens[i]->should_collect(full, size, is_tlab)) {
         if (i == n_gens() - 1) {  // a major collection is to happen
+          if (!complete) {
+            // The full_collections increment was missed above.
+            increment_total_full_collections();
+          }
           pre_full_gc_dump();    // do any pre full gc dumps
         }
         // Timer for individual generations. Last argument is false: no CR
--- a/src/share/vm/memory/serialize.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/memory/serialize.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -51,7 +51,7 @@
   soc->do_tag(arrayOopDesc::base_offset_in_bytes(T_BYTE));
   soc->do_tag(sizeof(constantPoolOopDesc));
   soc->do_tag(sizeof(constantPoolCacheOopDesc));
-  soc->do_tag(objArrayOopDesc::base_offset_in_bytes(T_BYTE));
+  soc->do_tag(objArrayOopDesc::base_offset_in_bytes());
   soc->do_tag(typeArrayOopDesc::base_offset_in_bytes(T_BYTE));
   soc->do_tag(sizeof(symbolOopDesc));
   soc->do_tag(sizeof(klassOopDesc));
--- a/src/share/vm/oops/arrayKlass.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/oops/arrayKlass.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -140,6 +140,7 @@
     THROW_0(vmSymbols::java_lang_NegativeArraySizeException());
   }
   if (length > arrayOopDesc::max_array_length(T_ARRAY)) {
+    report_java_out_of_memory("Requested array size exceeds VM limit");
     THROW_OOP_0(Universe::out_of_memory_error_array_size());
   }
   int size = objArrayOopDesc::object_size(length);
--- a/src/share/vm/oops/instanceKlass.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/oops/instanceKlass.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -158,9 +158,6 @@
   // timer handles recursion
   assert(THREAD->is_Java_thread(), "non-JavaThread in link_class_impl");
   JavaThread* jt = (JavaThread*)THREAD;
-  PerfTraceTimedEvent vmtimer(ClassLoader::perf_class_link_time(),
-                        ClassLoader::perf_classes_linked(),
-                        jt->get_thread_stat()->class_link_recursion_count_addr());
 
   // link super class before linking this class
   instanceKlassHandle super(THREAD, this_oop->super());
@@ -194,6 +191,15 @@
     return true;
   }
 
+  // trace only the link time for this klass that includes
+  // the verification time
+  PerfClassTraceTime vmtimer(ClassLoader::perf_class_link_time(),
+                             ClassLoader::perf_class_link_selftime(),
+                             ClassLoader::perf_classes_linked(),
+                             jt->get_thread_stat()->perf_recursion_counts_addr(),
+                             jt->get_thread_stat()->perf_timers_addr(),
+                             PerfClassTraceTime::CLASS_LINK);
+
   // verification & rewriting
   {
     ObjectLocker ol(this_oop, THREAD);
@@ -203,12 +209,14 @@
     if (!this_oop->is_linked()) {
       if (!this_oop->is_rewritten()) {
         {
-          assert(THREAD->is_Java_thread(), "non-JavaThread in link_class_impl");
-          JavaThread* jt = (JavaThread*)THREAD;
           // Timer includes any side effects of class verification (resolution,
           // etc), but not recursive entry into verify_code().
-          PerfTraceTime timer(ClassLoader::perf_class_verify_time(),
-                            jt->get_thread_stat()->class_verify_recursion_count_addr());
+          PerfClassTraceTime timer(ClassLoader::perf_class_verify_time(),
+                                   ClassLoader::perf_class_verify_selftime(),
+                                   ClassLoader::perf_classes_verified(),
+                                   jt->get_thread_stat()->perf_recursion_counts_addr(),
+                                   jt->get_thread_stat()->perf_timers_addr(),
+                                   PerfClassTraceTime::CLASS_VERIFY);
           bool verify_ok = verify_code(this_oop, throw_verifyerror, THREAD);
           if (!verify_ok) {
             return false;
@@ -350,9 +358,12 @@
     JavaThread* jt = (JavaThread*)THREAD;
     // Timer includes any side effects of class initialization (resolution,
     // etc), but not recursive entry into call_class_initializer().
-    PerfTraceTimedEvent timer(ClassLoader::perf_class_init_time(),
-                              ClassLoader::perf_classes_inited(),
-                              jt->get_thread_stat()->class_init_recursion_count_addr());
+    PerfClassTraceTime timer(ClassLoader::perf_class_init_time(),
+                             ClassLoader::perf_class_init_selftime(),
+                             ClassLoader::perf_classes_inited(),
+                             jt->get_thread_stat()->perf_recursion_counts_addr(),
+                             jt->get_thread_stat()->perf_timers_addr(),
+                             PerfClassTraceTime::CLASS_CLINIT);
     this_oop->call_class_initializer(THREAD);
   }
 
@@ -497,6 +508,7 @@
 objArrayOop instanceKlass::allocate_objArray(int n, int length, TRAPS) {
   if (length < 0) THROW_0(vmSymbols::java_lang_NegativeArraySizeException());
   if (length > arrayOopDesc::max_array_length(T_OBJECT)) {
+    report_java_out_of_memory("Requested array size exceeds VM limit");
     THROW_OOP_0(Universe::out_of_memory_error_array_size());
   }
   int size = objArrayOopDesc::object_size(length);
--- a/src/share/vm/oops/objArrayKlass.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/oops/objArrayKlass.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -39,6 +39,7 @@
       assert(a->is_parsable(), "Can't publish unless parsable");
       return a;
     } else {
+      report_java_out_of_memory("Requested array size exceeds VM limit");
       THROW_OOP_0(Universe::out_of_memory_error_array_size());
     }
   } else {
--- a/src/share/vm/oops/objArrayOop.hpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/oops/objArrayOop.hpp	Thu Aug 13 17:59:05 2009 -0700
@@ -38,6 +38,11 @@
   }
 
  public:
+  // Returns the offset of the first element.
+  static int base_offset_in_bytes() {
+    return arrayOopDesc::base_offset_in_bytes(T_OBJECT);
+  }
+
   // base is the address following the header.
   HeapWord* base() const      { return (HeapWord*) arrayOopDesc::base(T_OBJECT); }
 
--- a/src/share/vm/oops/typeArrayKlass.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/oops/typeArrayKlass.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -80,6 +80,7 @@
       assert(t->is_parsable(), "Don't publish unless parsable");
       return t;
     } else {
+      report_java_out_of_memory("Requested array size exceeds VM limit");
       THROW_OOP_0(Universe::out_of_memory_error_array_size());
     }
   } else {
--- a/src/share/vm/opto/block.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/opto/block.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -910,7 +910,16 @@
               !(b->head()->is_Loop() && n->is_Phi()) &&
               // See (+++) comment in reg_split.cpp
               !(n->jvms() != NULL && n->jvms()->is_monitor_use(k)) ) {
-            assert( b->find_node(def) < j, "uses must follow definitions" );
+            bool is_loop = false;
+            if (n->is_Phi()) {
+              for( uint l = 1; l < def->req(); l++ ) {
+                if (n == def->in(l)) {
+                  is_loop = true;
+                  break; // Some kind of loop
+                }
+              }
+            }
+            assert( is_loop || b->find_node(def) < j, "uses must follow definitions" );
           }
           if( def->is_SafePointScalarObject() ) {
             assert(_bbs[def->_idx] == b, "SafePointScalarObject Node should be at the same block as its SafePoint node");
--- a/src/share/vm/opto/bytecodeInfo.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/opto/bytecodeInfo.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -37,6 +37,7 @@
     // Keep a private copy of the caller_jvms:
     _caller_jvms = new (C) JVMState(caller_jvms->method(), caller_tree->caller_jvms());
     _caller_jvms->set_bci(caller_jvms->bci());
+    assert(!caller_jvms->should_reexecute(), "there should be no reexecute bytecode with inlining");
   }
   assert(_caller_jvms->same_calls_as(caller_jvms), "consistent JVMS");
   assert((caller_tree == NULL ? 0 : caller_tree->inline_depth() + 1) == inline_depth(), "correct (redundant) depth parameter");
--- a/src/share/vm/opto/callnode.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/opto/callnode.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -223,6 +223,7 @@
 JVMState::JVMState(ciMethod* method, JVMState* caller) {
   assert(method != NULL, "must be valid call site");
   _method = method;
+  _reexecute = Reexecute_Undefined;
   debug_only(_bci = -99);  // random garbage value
   debug_only(_map = (SafePointNode*)-1);
   _caller = caller;
@@ -237,6 +238,7 @@
 JVMState::JVMState(int stack_size) {
   _method = NULL;
   _bci = InvocationEntryBci;
+  _reexecute = Reexecute_Undefined;
   debug_only(_map = (SafePointNode*)-1);
   _caller = NULL;
   _depth  = 1;
@@ -269,6 +271,7 @@
     if (p->_method != q->_method)    return false;
     if (p->_method == NULL)          return true;   // bci is irrelevant
     if (p->_bci    != q->_bci)       return false;
+    if (p->_reexecute != q->_reexecute)  return false;
     p = p->caller();
     q = q->caller();
     if (p == q)                      return true;
@@ -490,6 +493,7 @@
     if (!printed)
       _method->print_short_name(st);
     st->print(" @ bci:%d",_bci);
+    st->print(" reexecute:%s", _reexecute==Reexecute_True?"true":"false");
   } else {
     st->print(" runtime stub");
   }
@@ -509,8 +513,8 @@
     }
     _map->dump(2);
   }
-  st->print("JVMS depth=%d loc=%d stk=%d mon=%d scalar=%d end=%d mondepth=%d sp=%d bci=%d method=",
-             depth(), locoff(), stkoff(), monoff(), scloff(), endoff(), monitor_depth(), sp(), bci());
+  st->print("JVMS depth=%d loc=%d stk=%d mon=%d scalar=%d end=%d mondepth=%d sp=%d bci=%d reexecute=%s method=",
+             depth(), locoff(), stkoff(), monoff(), scloff(), endoff(), monitor_depth(), sp(), bci(), should_reexecute()?"true":"false");
   if (_method == NULL) {
     st->print_cr("(none)");
   } else {
@@ -537,6 +541,7 @@
 JVMState* JVMState::clone_shallow(Compile* C) const {
   JVMState* n = has_method() ? new (C) JVMState(_method, _caller) : new (C) JVMState(0);
   n->set_bci(_bci);
+  n->_reexecute = _reexecute;
   n->set_locoff(_locoff);
   n->set_stkoff(_stkoff);
   n->set_monoff(_monoff);
--- a/src/share/vm/opto/callnode.hpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/opto/callnode.hpp	Thu Aug 13 17:59:05 2009 -0700
@@ -178,6 +178,13 @@
 // This provides a way to map the optimized program back into the interpreter,
 // or to let the GC mark the stack.
 class JVMState : public ResourceObj {
+public:
+  typedef enum {
+    Reexecute_Undefined = -1, // not defined -- will be translated into false later
+    Reexecute_False     =  0, // false       -- do not reexecute
+    Reexecute_True      =  1  // true        -- reexecute the bytecode
+  } ReexecuteState; //Reexecute State
+
 private:
   JVMState*         _caller;    // List pointer for forming scope chains
   uint              _depth;     // One mroe than caller depth, or one.
@@ -188,10 +195,12 @@
   uint              _endoff;    // Offset to end of input edge mapping
   uint              _sp;        // Jave Expression Stack Pointer for this state
   int               _bci;       // Byte Code Index of this JVM point
+  ReexecuteState    _reexecute; // Whether this bytecode need to be re-executed
   ciMethod*         _method;    // Method Pointer
   SafePointNode*    _map;       // Map node associated with this scope
 public:
   friend class Compile;
+  friend class PreserveReexecuteState;
 
   // Because JVMState objects live over the entire lifetime of the
   // Compile object, they are allocated into the comp_arena, which
@@ -222,16 +231,18 @@
   bool        is_mon(uint i) const { return i >= _monoff && i < _scloff; }
   bool        is_scl(uint i) const { return i >= _scloff && i < _endoff; }
 
-  uint              sp()     const { return _sp; }
-  int               bci()    const { return _bci; }
-  bool          has_method() const { return _method != NULL; }
-  ciMethod*         method() const { assert(has_method(), ""); return _method; }
-  JVMState*         caller() const { return _caller; }
-  SafePointNode*    map()    const { return _map; }
-  uint              depth()  const { return _depth; }
-  uint        debug_start()  const; // returns locoff of root caller
-  uint        debug_end()    const; // returns endoff of self
-  uint        debug_size()   const {
+  uint                      sp() const { return _sp; }
+  int                      bci() const { return _bci; }
+  bool        should_reexecute() const { return _reexecute==Reexecute_True; }
+  bool  is_reexecute_undefined() const { return _reexecute==Reexecute_Undefined; }
+  bool              has_method() const { return _method != NULL; }
+  ciMethod*             method() const { assert(has_method(), ""); return _method; }
+  JVMState*             caller() const { return _caller; }
+  SafePointNode*           map() const { return _map; }
+  uint                   depth() const { return _depth; }
+  uint             debug_start() const; // returns locoff of root caller
+  uint               debug_end() const; // returns endoff of self
+  uint              debug_size() const {
     return loc_size() + sp() + mon_size() + scl_size();
   }
   uint        debug_depth()  const; // returns sum of debug_size values at all depths
@@ -267,7 +278,9 @@
   }
   void              set_map(SafePointNode *map) { _map = map; }
   void              set_sp(uint sp) { _sp = sp; }
-  void              set_bci(int bci) { _bci = bci; }
+                    // _reexecute is initialized to "undefined" for a new bci
+  void              set_bci(int bci) {if(_bci != bci)_reexecute=Reexecute_Undefined; _bci = bci; }
+  void              set_should_reexecute(bool reexec) {_reexecute = reexec ? Reexecute_True : Reexecute_False;}
 
   // Miscellaneous utility functions
   JVMState* clone_deep(Compile* C) const;    // recursively clones caller chain
--- a/src/share/vm/opto/cfgnode.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/opto/cfgnode.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -1792,15 +1792,12 @@
   if (UseCompressedOops && can_reshape && progress == NULL) {
     bool may_push = true;
     bool has_decodeN = false;
-    Node* in_decodeN = NULL;
     for (uint i=1; i<req(); ++i) {// For all paths in
       Node *ii = in(i);
       if (ii->is_DecodeN() && ii->bottom_type() == bottom_type()) {
-        // Note: in_decodeN is used only to define the type of new phi.
-        // Find a non dead path otherwise phi type will be wrong.
+        // Do optimization if a non dead path exist.
         if (ii->in(1)->bottom_type() != Type::TOP) {
           has_decodeN = true;
-          in_decodeN = ii->in(1);
         }
       } else if (!ii->is_Phi()) {
         may_push = false;
@@ -1809,7 +1806,9 @@
 
     if (has_decodeN && may_push) {
       PhaseIterGVN *igvn = phase->is_IterGVN();
-      PhiNode *new_phi = PhiNode::make_blank(in(0), in_decodeN);
+      // Make narrow type for new phi.
+      const Type* narrow_t = TypeNarrowOop::make(this->bottom_type()->is_ptr());
+      PhiNode* new_phi = new (phase->C, r->req()) PhiNode(r, narrow_t);
       uint orig_cnt = req();
       for (uint i=1; i<req(); ++i) {// For all paths in
         Node *ii = in(i);
@@ -1822,7 +1821,7 @@
           if (ii->as_Phi() == this) {
             new_ii = new_phi;
           } else {
-            new_ii = new (phase->C, 2) EncodePNode(ii, in_decodeN->bottom_type());
+            new_ii = new (phase->C, 2) EncodePNode(ii, narrow_t);
             igvn->register_new_node_with_optimizer(new_ii);
           }
         }
--- a/src/share/vm/opto/graphKit.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/opto/graphKit.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -620,6 +620,16 @@
   assert(kit->stopped(), "cutout code must stop, throw, return, etc.");
 }
 
+//---------------------------PreserveReexecuteState----------------------------
+PreserveReexecuteState::PreserveReexecuteState(GraphKit* kit) {
+  _kit    =    kit;
+  _sp     =    kit->sp();
+  _reexecute = kit->jvms()->_reexecute;
+}
+PreserveReexecuteState::~PreserveReexecuteState() {
+  _kit->jvms()->_reexecute = _reexecute;
+  _kit->set_sp(_sp);
+}
 
 //------------------------------clone_map--------------------------------------
 // Implementation of PreserveJVMState
@@ -738,6 +748,18 @@
 
 #endif //ASSERT
 
+// Helper function for enforcing certain bytecodes to reexecute if
+// deoptimization happens
+static bool should_reexecute_implied_by_bytecode(JVMState *jvms) {
+  ciMethod* cur_method = jvms->method();
+  int       cur_bci   = jvms->bci();
+  if (cur_method != NULL && cur_bci != InvocationEntryBci) {
+    Bytecodes::Code code = cur_method->java_code_at_bci(cur_bci);
+    return Interpreter::bytecode_should_reexecute(code);
+  } else
+    return false;
+}
+
 // Helper function for adding JVMState and debug information to node
 void GraphKit::add_safepoint_edges(SafePointNode* call, bool must_throw) {
   // Add the safepoint edges to the call (or other safepoint).
@@ -781,6 +803,13 @@
   JVMState* out_jvms = youngest_jvms->clone_deep(C);
   call->set_jvms(out_jvms); // Start jvms list for call node
 
+  // For a known set of bytecodes, the interpreter should reexecute them if
+  // deoptimization happens. We set the reexecute state for them here
+  if (out_jvms->is_reexecute_undefined() && //don't change if already specified
+      should_reexecute_implied_by_bytecode(out_jvms)) {
+    out_jvms->set_should_reexecute(true); //NOTE: youngest_jvms not changed
+  }
+
   // Presize the call:
   debug_only(uint non_debug_edges = call->req());
   call->add_req_batch(top(), youngest_jvms->debug_depth());
--- a/src/share/vm/opto/graphKit.hpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/opto/graphKit.hpp	Thu Aug 13 17:59:05 2009 -0700
@@ -763,3 +763,16 @@
   BuildCutout(GraphKit* kit, Node* p, float prob, float cnt = COUNT_UNKNOWN);
   ~BuildCutout();
 };
+
+// Helper class to preserve the original _reexecute bit and _sp and restore
+// them back
+class PreserveReexecuteState: public StackObj {
+ protected:
+  GraphKit*                 _kit;
+  uint                      _sp;
+  JVMState::ReexecuteState  _reexecute;
+
+ public:
+  PreserveReexecuteState(GraphKit* kit);
+  ~PreserveReexecuteState();
+};
--- a/src/share/vm/opto/library_call.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/opto/library_call.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -2064,7 +2064,7 @@
 
     // See if it is a narrow oop array.
     if (adr_type->isa_aryptr()) {
-      if (adr_type->offset() >= objArrayOopDesc::base_offset_in_bytes(type)) {
+      if (adr_type->offset() >= objArrayOopDesc::base_offset_in_bytes()) {
         const TypeOopPtr *elem_type = adr_type->is_aryptr()->elem()->isa_oopptr();
         if (elem_type != NULL) {
           sharpened_klass = elem_type->klass();
@@ -3169,78 +3169,85 @@
   Node* end               = is_copyOfRange? argument(2): argument(1);
   Node* array_type_mirror = is_copyOfRange? argument(3): argument(2);
 
-  _sp += nargs;  // set original stack for use by uncommon_trap
-  array_type_mirror = do_null_check(array_type_mirror, T_OBJECT);
-  original          = do_null_check(original, T_OBJECT);
-  _sp -= nargs;
-
-  // Check if a null path was taken unconditionally.
-  if (stopped())  return true;
-
-  Node* orig_length = load_array_length(original);
-
-  Node* klass_node = load_klass_from_mirror(array_type_mirror, false, nargs,
-                                            NULL, 0);
-  _sp += nargs;  // set original stack for use by uncommon_trap
-  klass_node = do_null_check(klass_node, T_OBJECT);
-  _sp -= nargs;
-
-  RegionNode* bailout = new (C, 1) RegionNode(1);
-  record_for_igvn(bailout);
-
-  // Despite the generic type of Arrays.copyOf, the mirror might be int, int[], etc.
-  // Bail out if that is so.
-  Node* not_objArray = generate_non_objArray_guard(klass_node, bailout);
-  if (not_objArray != NULL) {
-    // Improve the klass node's type from the new optimistic assumption:
-    ciKlass* ak = ciArrayKlass::make(env()->Object_klass());
-    const Type* akls = TypeKlassPtr::make(TypePtr::NotNull, ak, 0/*offset*/);
-    Node* cast = new (C, 2) CastPPNode(klass_node, akls);
-    cast->init_req(0, control());
-    klass_node = _gvn.transform(cast);
-  }
-
-  // Bail out if either start or end is negative.
-  generate_negative_guard(start, bailout, &start);
-  generate_negative_guard(end,   bailout, &end);
-
-  Node* length = end;
-  if (_gvn.type(start) != TypeInt::ZERO) {
-    length = _gvn.transform( new (C, 3) SubINode(end, start) );
-  }
-
-  // Bail out if length is negative.
-  // ...Not needed, since the new_array will throw the right exception.
-  //generate_negative_guard(length, bailout, &length);
-
-  if (bailout->req() > 1) {
-    PreserveJVMState pjvms(this);
-    set_control( _gvn.transform(bailout) );
-    _sp += nargs;  // push the arguments back on the stack
-    uncommon_trap(Deoptimization::Reason_intrinsic,
-                  Deoptimization::Action_maybe_recompile);
-  }
-
-  if (!stopped()) {
-    // How many elements will we copy from the original?
-    // The answer is MinI(orig_length - start, length).
-    Node* orig_tail = _gvn.transform( new(C, 3) SubINode(orig_length, start) );
-    Node* moved = generate_min_max(vmIntrinsics::_min, orig_tail, length);
-
-    const bool raw_mem_only = true;
-    Node* newcopy = new_array(klass_node, length, nargs, raw_mem_only);
-
-    // Generate a direct call to the right arraycopy function(s).
-    // We know the copy is disjoint but we might not know if the
-    // oop stores need checking.
-    // Extreme case:  Arrays.copyOf((Integer[])x, 10, String[].class).
-    // This will fail a store-check if x contains any non-nulls.
-    bool disjoint_bases = true;
-    bool length_never_negative = true;
-    generate_arraycopy(TypeAryPtr::OOPS, T_OBJECT,
-                       original, start, newcopy, intcon(0), moved,
-                       disjoint_bases, length_never_negative);
-
+  Node* newcopy;
+
+  //set the original stack and the reexecute bit for the interpreter to reexecute
+  //the bytecode that invokes Arrays.copyOf if deoptimization happens
+  { PreserveReexecuteState preexecs(this);
+    _sp += nargs;
+    jvms()->set_should_reexecute(true);
+
+    array_type_mirror = do_null_check(array_type_mirror, T_OBJECT);
+    original          = do_null_check(original, T_OBJECT);
+
+    // Check if a null path was taken unconditionally.
+    if (stopped())  return true;
+
+    Node* orig_length = load_array_length(original);
+
+    Node* klass_node = load_klass_from_mirror(array_type_mirror, false, 0,
+                                              NULL, 0);
+    klass_node = do_null_check(klass_node, T_OBJECT);
+
+    RegionNode* bailout = new (C, 1) RegionNode(1);
+    record_for_igvn(bailout);
+
+    // Despite the generic type of Arrays.copyOf, the mirror might be int, int[], etc.
+    // Bail out if that is so.
+    Node* not_objArray = generate_non_objArray_guard(klass_node, bailout);
+    if (not_objArray != NULL) {
+      // Improve the klass node's type from the new optimistic assumption:
+      ciKlass* ak = ciArrayKlass::make(env()->Object_klass());
+      const Type* akls = TypeKlassPtr::make(TypePtr::NotNull, ak, 0/*offset*/);
+      Node* cast = new (C, 2) CastPPNode(klass_node, akls);
+      cast->init_req(0, control());
+      klass_node = _gvn.transform(cast);
+    }
+
+    // Bail out if either start or end is negative.
+    generate_negative_guard(start, bailout, &start);
+    generate_negative_guard(end,   bailout, &end);
+
+    Node* length = end;
+    if (_gvn.type(start) != TypeInt::ZERO) {
+      length = _gvn.transform( new (C, 3) SubINode(end, start) );
+    }
+
+    // Bail out if length is negative.
+    // ...Not needed, since the new_array will throw the right exception.
+    //generate_negative_guard(length, bailout, &length);
+
+    if (bailout->req() > 1) {
+      PreserveJVMState pjvms(this);
+      set_control( _gvn.transform(bailout) );
+      uncommon_trap(Deoptimization::Reason_intrinsic,
+                    Deoptimization::Action_maybe_recompile);
+    }
+
+    if (!stopped()) {
+
+      // How many elements will we copy from the original?
+      // The answer is MinI(orig_length - start, length).
+      Node* orig_tail = _gvn.transform( new(C, 3) SubINode(orig_length, start) );
+      Node* moved = generate_min_max(vmIntrinsics::_min, orig_tail, length);
+
+      const bool raw_mem_only = true;
+      newcopy = new_array(klass_node, length, 0, raw_mem_only);
+
+      // Generate a direct call to the right arraycopy function(s).
+      // We know the copy is disjoint but we might not know if the
+      // oop stores need checking.
+      // Extreme case:  Arrays.copyOf((Integer[])x, 10, String[].class).
+      // This will fail a store-check if x contains any non-nulls.
+      bool disjoint_bases = true;
+      bool length_never_negative = true;
+      generate_arraycopy(TypeAryPtr::OOPS, T_OBJECT,
+                         original, start, newcopy, intcon(0), moved,
+                         disjoint_bases, length_never_negative);
+    }
+  } //original reexecute and sp are set back here
+
+  if(!stopped()) {
     push(newcopy);
   }
 
@@ -3992,146 +3999,159 @@
 //
 bool LibraryCallKit::inline_native_clone(bool is_virtual) {
   int nargs = 1;
-  Node* obj = null_check_receiver(callee());
-  if (stopped())  return true;
-  Node* obj_klass = load_object_klass(obj);
-  const TypeKlassPtr* tklass = _gvn.type(obj_klass)->isa_klassptr();
-  const TypeOopPtr*   toop   = ((tklass != NULL)
+  PhiNode* result_val;
+
+  //set the original stack and the reexecute bit for the interpreter to reexecute
+  //the bytecode that invokes Object.clone if deoptimization happens
+  { PreserveReexecuteState preexecs(this);
+    jvms()->set_should_reexecute(true);
+
+    //null_check_receiver will adjust _sp (push and pop)
+    Node* obj = null_check_receiver(callee());
+    if (stopped())  return true;
+
+    _sp += nargs;
+
+    Node* obj_klass = load_object_klass(obj);
+    const TypeKlassPtr* tklass = _gvn.type(obj_klass)->isa_klassptr();
+    const TypeOopPtr*   toop   = ((tklass != NULL)
                                 ? tklass->as_instance_type()
                                 : TypeInstPtr::NOTNULL);
 
-  // Conservatively insert a memory barrier on all memory slices.
-  // Do not let writes into the original float below the clone.
-  insert_mem_bar(Op_MemBarCPUOrder);
-
-  // paths into result_reg:
-  enum {
-    _slow_path = 1,     // out-of-line call to clone method (virtual or not)
-    _objArray_path,     // plain array allocation, plus arrayof_oop_arraycopy
-    _array_path,        // plain array allocation, plus arrayof_long_arraycopy
-    _instance_path,     // plain instance allocation, plus arrayof_long_arraycopy
-    PATH_LIMIT
-  };
-  RegionNode* result_reg = new(C, PATH_LIMIT) RegionNode(PATH_LIMIT);
-  PhiNode*    result_val = new(C, PATH_LIMIT) PhiNode(result_reg,
-                                                      TypeInstPtr::NOTNULL);
-  PhiNode*    result_i_o = new(C, PATH_LIMIT) PhiNode(result_reg, Type::ABIO);
-  PhiNode*    result_mem = new(C, PATH_LIMIT) PhiNode(result_reg, Type::MEMORY,
-                                                      TypePtr::BOTTOM);
-  record_for_igvn(result_reg);
-
-  const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
-  int raw_adr_idx = Compile::AliasIdxRaw;
-  const bool raw_mem_only = true;
-
-  Node* array_ctl = generate_array_guard(obj_klass, (RegionNode*)NULL);
-  if (array_ctl != NULL) {
-    // It's an array.
-    PreserveJVMState pjvms(this);
-    set_control(array_ctl);
-    Node* obj_length = load_array_length(obj);
-    Node* obj_size = NULL;
-    Node* alloc_obj = new_array(obj_klass, obj_length, nargs,
-                                raw_mem_only, &obj_size);
-
-    if (!use_ReduceInitialCardMarks()) {
-      // If it is an oop array, it requires very special treatment,
-      // because card marking is required on each card of the array.
-      Node* is_obja = generate_objArray_guard(obj_klass, (RegionNode*)NULL);
-      if (is_obja != NULL) {
-        PreserveJVMState pjvms2(this);
-        set_control(is_obja);
-        // Generate a direct call to the right arraycopy function(s).
-        bool disjoint_bases = true;
-        bool length_never_negative = true;
-        generate_arraycopy(TypeAryPtr::OOPS, T_OBJECT,
-                           obj, intcon(0), alloc_obj, intcon(0),
-                           obj_length,
-                           disjoint_bases, length_never_negative);
-        result_reg->init_req(_objArray_path, control());
-        result_val->init_req(_objArray_path, alloc_obj);
-        result_i_o ->set_req(_objArray_path, i_o());
-        result_mem ->set_req(_objArray_path, reset_memory());
+    // Conservatively insert a memory barrier on all memory slices.
+    // Do not let writes into the original float below the clone.
+    insert_mem_bar(Op_MemBarCPUOrder);
+
+    // paths into result_reg:
+    enum {
+      _slow_path = 1,     // out-of-line call to clone method (virtual or not)
+      _objArray_path,     // plain array allocation, plus arrayof_oop_arraycopy
+      _array_path,        // plain array allocation, plus arrayof_long_arraycopy
+      _instance_path,     // plain instance allocation, plus arrayof_long_arraycopy
+      PATH_LIMIT
+    };
+    RegionNode* result_reg = new(C, PATH_LIMIT) RegionNode(PATH_LIMIT);
+    result_val             = new(C, PATH_LIMIT) PhiNode(result_reg,
+                                                        TypeInstPtr::NOTNULL);
+    PhiNode*    result_i_o = new(C, PATH_LIMIT) PhiNode(result_reg, Type::ABIO);
+    PhiNode*    result_mem = new(C, PATH_LIMIT) PhiNode(result_reg, Type::MEMORY,
+                                                        TypePtr::BOTTOM);
+    record_for_igvn(result_reg);
+
+    const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
+    int raw_adr_idx = Compile::AliasIdxRaw;
+    const bool raw_mem_only = true;
+
+
+    Node* array_ctl = generate_array_guard(obj_klass, (RegionNode*)NULL);
+    if (array_ctl != NULL) {
+      // It's an array.
+      PreserveJVMState pjvms(this);
+      set_control(array_ctl);
+      Node* obj_length = load_array_length(obj);
+      Node* obj_size  = NULL;
+      Node* alloc_obj = new_array(obj_klass, obj_length, 0,
+                                  raw_mem_only, &obj_size);
+
+      if (!use_ReduceInitialCardMarks()) {
+        // If it is an oop array, it requires very special treatment,
+        // because card marking is required on each card of the array.
+        Node* is_obja = generate_objArray_guard(obj_klass, (RegionNode*)NULL);
+        if (is_obja != NULL) {
+          PreserveJVMState pjvms2(this);
+          set_control(is_obja);
+          // Generate a direct call to the right arraycopy function(s).
+          bool disjoint_bases = true;
+          bool length_never_negative = true;
+          generate_arraycopy(TypeAryPtr::OOPS, T_OBJECT,
+                             obj, intcon(0), alloc_obj, intcon(0),
+                             obj_length,
+                             disjoint_bases, length_never_negative);
+          result_reg->init_req(_objArray_path, control());
+          result_val->init_req(_objArray_path, alloc_obj);
+          result_i_o ->set_req(_objArray_path, i_o());
+          result_mem ->set_req(_objArray_path, reset_memory());
+        }
+      }
+      // We can dispense with card marks if we know the allocation
+      // comes out of eden (TLAB)...  In fact, ReduceInitialCardMarks
+      // causes the non-eden paths to simulate a fresh allocation,
+      // insofar that no further card marks are required to initialize
+      // the object.
+
+      // Otherwise, there are no card marks to worry about.
+
+      if (!stopped()) {
+        copy_to_clone(obj, alloc_obj, obj_size, true, false);
+
+        // Present the results of the copy.
+        result_reg->init_req(_array_path, control());
+        result_val->init_req(_array_path, alloc_obj);
+        result_i_o ->set_req(_array_path, i_o());
+        result_mem ->set_req(_array_path, reset_memory());
       }
     }
-    // We can dispense with card marks if we know the allocation
-    // comes out of eden (TLAB)...  In fact, ReduceInitialCardMarks
-    // causes the non-eden paths to simulate a fresh allocation,
-    // insofar that no further card marks are required to initialize
-    // the object.
-
-    // Otherwise, there are no card marks to worry about.
-
+
+    // We only go to the instance fast case code if we pass a number of guards.
+    // The paths which do not pass are accumulated in the slow_region.
+    RegionNode* slow_region = new (C, 1) RegionNode(1);
+    record_for_igvn(slow_region);
     if (!stopped()) {
-      copy_to_clone(obj, alloc_obj, obj_size, true, false);
-
-      // Present the results of the copy.
-      result_reg->init_req(_array_path, control());
-      result_val->init_req(_array_path, alloc_obj);
-      result_i_o ->set_req(_array_path, i_o());
-      result_mem ->set_req(_array_path, reset_memory());
+      // It's an instance (we did array above).  Make the slow-path tests.
+      // If this is a virtual call, we generate a funny guard.  We grab
+      // the vtable entry corresponding to clone() from the target object.
+      // If the target method which we are calling happens to be the
+      // Object clone() method, we pass the guard.  We do not need this
+      // guard for non-virtual calls; the caller is known to be the native
+      // Object clone().
+      if (is_virtual) {
+        generate_virtual_guard(obj_klass, slow_region);
+      }
+
+      // The object must be cloneable and must not have a finalizer.
+      // Both of these conditions may be checked in a single test.
+      // We could optimize the cloneable test further, but we don't care.
+      generate_access_flags_guard(obj_klass,
+                                  // Test both conditions:
+                                  JVM_ACC_IS_CLONEABLE | JVM_ACC_HAS_FINALIZER,
+                                  // Must be cloneable but not finalizer:
+                                  JVM_ACC_IS_CLONEABLE,
+                                  slow_region);
     }
-  }
-
-  // We only go to the instance fast case code if we pass a number of guards.
-  // The paths which do not pass are accumulated in the slow_region.
-  RegionNode* slow_region = new (C, 1) RegionNode(1);
-  record_for_igvn(slow_region);
-  if (!stopped()) {
-    // It's an instance (we did array above).  Make the slow-path tests.
-    // If this is a virtual call, we generate a funny guard.  We grab
-    // the vtable entry corresponding to clone() from the target object.
-    // If the target method which we are calling happens to be the
-    // Object clone() method, we pass the guard.  We do not need this
-    // guard for non-virtual calls; the caller is known to be the native
-    // Object clone().
-    if (is_virtual) {
-      generate_virtual_guard(obj_klass, slow_region);
+
+    if (!stopped()) {
+      // It's an instance, and it passed the slow-path tests.
+      PreserveJVMState pjvms(this);
+      Node* obj_size  = NULL;
+      Node* alloc_obj = new_instance(obj_klass, NULL, raw_mem_only, &obj_size);
+
+      copy_to_clone(obj, alloc_obj, obj_size, false, !use_ReduceInitialCardMarks());
+
+      // Present the results of the slow call.
+      result_reg->init_req(_instance_path, control());
+      result_val->init_req(_instance_path, alloc_obj);
+      result_i_o ->set_req(_instance_path, i_o());
+      result_mem ->set_req(_instance_path, reset_memory());
     }
 
-    // The object must be cloneable and must not have a finalizer.
-    // Both of these conditions may be checked in a single test.
-    // We could optimize the cloneable test further, but we don't care.
-    generate_access_flags_guard(obj_klass,
-                                // Test both conditions:
-                                JVM_ACC_IS_CLONEABLE | JVM_ACC_HAS_FINALIZER,
-                                // Must be cloneable but not finalizer:
-                                JVM_ACC_IS_CLONEABLE,
-                                slow_region);
-  }
-
-  if (!stopped()) {
-    // It's an instance, and it passed the slow-path tests.
-    PreserveJVMState pjvms(this);
-    Node* obj_size = NULL;
-    Node* alloc_obj = new_instance(obj_klass, NULL, raw_mem_only, &obj_size);
-
-    copy_to_clone(obj, alloc_obj, obj_size, false, !use_ReduceInitialCardMarks());
-
-    // Present the results of the slow call.
-    result_reg->init_req(_instance_path, control());
-    result_val->init_req(_instance_path, alloc_obj);
-    result_i_o ->set_req(_instance_path, i_o());
-    result_mem ->set_req(_instance_path, reset_memory());
-  }
-
-  // Generate code for the slow case.  We make a call to clone().
-  set_control(_gvn.transform(slow_region));
-  if (!stopped()) {
-    PreserveJVMState pjvms(this);
-    CallJavaNode* slow_call = generate_method_call(vmIntrinsics::_clone, is_virtual);
-    Node* slow_result = set_results_for_java_call(slow_call);
-    // this->control() comes from set_results_for_java_call
-    result_reg->init_req(_slow_path, control());
-    result_val->init_req(_slow_path, slow_result);
-    result_i_o ->set_req(_slow_path, i_o());
-    result_mem ->set_req(_slow_path, reset_memory());
-  }
-
-  // Return the combined state.
-  set_control(    _gvn.transform(result_reg) );
-  set_i_o(        _gvn.transform(result_i_o) );
-  set_all_memory( _gvn.transform(result_mem) );
+    // Generate code for the slow case.  We make a call to clone().
+    set_control(_gvn.transform(slow_region));
+    if (!stopped()) {
+      PreserveJVMState pjvms(this);
+      CallJavaNode* slow_call = generate_method_call(vmIntrinsics::_clone, is_virtual);
+      Node* slow_result = set_results_for_java_call(slow_call);
+      // this->control() comes from set_results_for_java_call
+      result_reg->init_req(_slow_path, control());
+      result_val->init_req(_slow_path, slow_result);
+      result_i_o ->set_req(_slow_path, i_o());
+      result_mem ->set_req(_slow_path, reset_memory());
+    }
+
+    // Return the combined state.
+    set_control(    _gvn.transform(result_reg) );
+    set_i_o(        _gvn.transform(result_i_o) );
+    set_all_memory( _gvn.transform(result_mem) );
+  } //original reexecute and sp are set back here
 
   push(_gvn.transform(result_val));
 
--- a/src/share/vm/opto/mulnode.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/opto/mulnode.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -608,16 +608,14 @@
   }
 
   // Are we masking a long that was converted from an int with a mask
-  // that fits in 32-bits?  Commute them and use an AndINode.
-  if (op == Op_ConvI2L && (mask & CONST64(0xFFFFFFFF00000000)) == 0) {
-    // If we are doing an UI2L conversion (i.e. the mask is
-    // 0x00000000FFFFFFFF) we cannot convert the AndL to an AndI
-    // because the AndI would be optimized away later in Identity.
-    if (mask != CONST64(0x00000000FFFFFFFF)) {
-      Node* andi = new (phase->C, 3) AndINode(in1->in(1), phase->intcon(mask));
-      andi = phase->transform(andi);
-      return new (phase->C, 2) ConvI2LNode(andi);
-    }
+  // that fits in 32-bits?  Commute them and use an AndINode.  Don't
+  // convert masks which would cause a sign extension of the integer
+  // value.  This check includes UI2L masks (0x00000000FFFFFFFF) which
+  // would be optimized away later in Identity.
+  if (op == Op_ConvI2L && (mask & CONST64(0xFFFFFFFF80000000)) == 0) {
+    Node* andi = new (phase->C, 3) AndINode(in1->in(1), phase->intcon(mask));
+    andi = phase->transform(andi);
+    return new (phase->C, 2) ConvI2LNode(andi);
   }
 
   // Masking off sign bits?  Dont make them!
--- a/src/share/vm/opto/output.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/opto/output.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -911,8 +911,9 @@
     ciMethod* scope_method = method ? method : _method;
     // Describe the scope here
     assert(jvms->bci() >= InvocationEntryBci && jvms->bci() <= 0x10000, "must be a valid or entry BCI");
+    assert(!jvms->should_reexecute() || depth==max_depth, "reexecute allowed only for the youngest");
     // Now we can describe the scope.
-    debug_info()->describe_scope(safepoint_pc_offset,scope_method,jvms->bci(),locvals,expvals,monvals);
+    debug_info()->describe_scope(safepoint_pc_offset,scope_method,jvms->bci(),jvms->should_reexecute(),locvals,expvals,monvals);
   } // End jvms loop
 
   // Mark the end of the scope set.
@@ -994,7 +995,8 @@
   for (int depth = 1; depth <= max_depth; depth++) {
     JVMState* jvms = youngest_jvms->of_depth(depth);
     ciMethod* method = jvms->has_method() ? jvms->method() : NULL;
-    debug_info->describe_scope(pc_offset, method, jvms->bci());
+    assert(!jvms->should_reexecute() || depth==max_depth, "reexecute allowed only for the youngest");
+    debug_info->describe_scope(pc_offset, method, jvms->bci(), jvms->should_reexecute());
   }
 
   // Mark the end of the scope set.
--- a/src/share/vm/prims/jvm.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/prims/jvm.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -638,32 +638,10 @@
   if (PrintJVMWarnings) warning("JVM_ResolveClass not implemented");
 JVM_END
 
-// Common implementation for JVM_FindClassFromBootLoader and
-// JVM_FindClassFromLoader
-static jclass jvm_find_class_from_class_loader(JNIEnv* env, const char* name,
-                                  jboolean init, jobject loader,
-                                  jboolean throwError, TRAPS) {
-  // Java libraries should ensure that name is never null...
-  if (name == NULL || (int)strlen(name) > symbolOopDesc::max_length()) {
-    // It's impossible to create this class;  the name cannot fit
-    // into the constant pool.
-    if (throwError) {
-      THROW_MSG_0(vmSymbols::java_lang_NoClassDefFoundError(), name);
-    } else {
-      THROW_MSG_0(vmSymbols::java_lang_ClassNotFoundException(), name);
-    }
-  }
-  symbolHandle h_name = oopFactory::new_symbol_handle(name, CHECK_NULL);
-  Handle h_loader(THREAD, JNIHandles::resolve(loader));
-  jclass result = find_class_from_class_loader(env, h_name, init, h_loader,
-                                               Handle(), throwError, THREAD);
-
-  if (TraceClassResolution && result != NULL) {
-    trace_class_resolution(java_lang_Class::as_klassOop(JNIHandles::resolve_non_null(result)));
-  }
-  return result;
-}
-
+
+// Returns a class loaded by the bootstrap class loader; or null
+// if not found.  ClassNotFoundException is not thrown.
+//
 // Rationale behind JVM_FindClassFromBootLoader
 // a> JVM_FindClassFromClassLoader was never exported in the export tables.
 // b> because of (a) java.dll has a direct dependecy on the  unexported
@@ -681,12 +659,26 @@
 // to the JDK, and is not intended to be a public API.
 
 JVM_ENTRY(jclass, JVM_FindClassFromBootLoader(JNIEnv* env,
-                                              const char* name,
-                                              jboolean throwError))
-  JVMWrapper3("JVM_FindClassFromBootLoader %s throw %s", name,
-              throwError ? "error" : "exception");
-  return jvm_find_class_from_class_loader(env, name, JNI_FALSE,
-                                          (jobject)NULL, throwError, THREAD);
+                                              const char* name))
+  JVMWrapper2("JVM_FindClassFromBootLoader %s", name);
+
+  // Java libraries should ensure that name is never null...
+  if (name == NULL || (int)strlen(name) > symbolOopDesc::max_length()) {
+    // It's impossible to create this class;  the name cannot fit
+    // into the constant pool.
+    return NULL;
+  }
+
+  symbolHandle h_name = oopFactory::new_symbol_handle(name, CHECK_NULL);
+  klassOop k = SystemDictionary::resolve_or_null(h_name, CHECK_NULL);
+  if (k == NULL) {
+    return NULL;
+  }
+
+  if (TraceClassResolution) {
+    trace_class_resolution(k);
+  }
+  return (jclass) JNIHandles::make_local(env, Klass::cast(k)->java_mirror());
 JVM_END
 
 JVM_ENTRY(jclass, JVM_FindClassFromClassLoader(JNIEnv* env, const char* name,
@@ -694,8 +686,25 @@
                                                jboolean throwError))
   JVMWrapper3("JVM_FindClassFromClassLoader %s throw %s", name,
                throwError ? "error" : "exception");
-  return jvm_find_class_from_class_loader(env, name, init, loader,
-                                          throwError, THREAD);
+  // Java libraries should ensure that name is never null...
+  if (name == NULL || (int)strlen(name) > symbolOopDesc::max_length()) {
+    // It's impossible to create this class;  the name cannot fit
+    // into the constant pool.
+    if (throwError) {
+      THROW_MSG_0(vmSymbols::java_lang_NoClassDefFoundError(), name);
+    } else {
+      THROW_MSG_0(vmSymbols::java_lang_ClassNotFoundException(), name);
+    }
+  }
+  symbolHandle h_name = oopFactory::new_symbol_handle(name, CHECK_NULL);
+  Handle h_loader(THREAD, JNIHandles::resolve(loader));
+  jclass result = find_class_from_class_loader(env, h_name, init, h_loader,
+                                               Handle(), throwError, THREAD);
+
+  if (TraceClassResolution && result != NULL) {
+    trace_class_resolution(java_lang_Class::as_klassOop(JNIHandles::resolve_non_null(result)));
+  }
+  return result;
 JVM_END
 
 
@@ -756,6 +765,20 @@
 static jclass jvm_define_class_common(JNIEnv *env, const char *name, jobject loader, const jbyte *buf, jsize len, jobject pd, const char *source, TRAPS) {
   if (source == NULL)  source = "__JVM_DefineClass__";
 
+  assert(THREAD->is_Java_thread(), "must be a JavaThread");
+  JavaThread* jt = (JavaThread*) THREAD;
+
+  PerfClassTraceTime vmtimer(ClassLoader::perf_define_appclass_time(),
+                             ClassLoader::perf_define_appclass_selftime(),
+                             ClassLoader::perf_define_appclasses(),
+                             jt->get_thread_stat()->perf_recursion_counts_addr(),
+                             jt->get_thread_stat()->perf_timers_addr(),
+                             PerfClassTraceTime::DEFINE_CLASS);
+
+  if (UsePerfData) {
+    ClassLoader::perf_app_classfile_bytes_read()->inc(len);
+  }
+
   // Since exceptions can be thrown, class initialization can take place
   // if name is NULL no check for class name in .class stream has to be made.
   symbolHandle class_name;
@@ -3905,6 +3928,7 @@
   //   The Java level wrapper will perform the necessary security check allowing
   //   us to pass the NULL as the initiating class loader.
   klassOop klass = SystemDictionary::resolve_or_fail(name, loader, protection_domain, throwError != 0, CHECK_NULL);
+
   KlassHandle klass_handle(THREAD, klass);
   // Check if we should initialize the class
   if (init && klass_handle->oop_is_instance()) {
--- a/src/share/vm/prims/jvm.h	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/prims/jvm.h	Thu Aug 13 17:59:05 2009 -0700
@@ -390,15 +390,10 @@
                              jobject loader, jboolean throwError);
 
 /*
- * Find a class from a boot class loader. Throw ClassNotFoundException
- * or NoClassDefFoundError depending on the value of the last
- * argument. This is the same as FindClassFromClassLoader but provided
- * as a convenience method exported correctly on all platforms for
- * JSR 277 launcher class loading.
+ * Find a class from a boot class loader. Returns NULL if class not found.
  */
 JNIEXPORT jclass JNICALL
-JVM_FindClassFromBootLoader(JNIEnv *env, const char *name,
-                            jboolean throwError);
+JVM_FindClassFromBootLoader(JNIEnv *env, const char *name);
 
 /*
  * Find a class from a given class.
--- a/src/share/vm/prims/jvmtiExport.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/prims/jvmtiExport.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 2003-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2003-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -2427,6 +2427,11 @@
     return;
   }
 
+  if (ForceFullGCJVMTIEpilogues) {
+    // force 'Full GC' was done semantics for JVMTI GC epilogues
+    _full = true;
+  }
+
   // GarbageCollectionStart event posted from VM thread - okay because
   // JVMTI is clear that the "world is stopped" and callback shouldn't
   // try to call into the VM.
--- a/src/share/vm/runtime/arguments.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/runtime/arguments.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -1054,7 +1054,15 @@
 
   // Unless explicitly requested otherwise, size young gen
   // for "short" pauses ~ 4M*ParallelGCThreads
-  if (FLAG_IS_DEFAULT(MaxNewSize)) {  // MaxNewSize not set at command-line
+
+  // If either MaxNewSize or NewRatio is set on the command line,
+  // assume the user is trying to set the size of the young gen.
+
+  if (FLAG_IS_DEFAULT(MaxNewSize) && FLAG_IS_DEFAULT(NewRatio)) {
+
+    // Set MaxNewSize to our calculated preferred_max_new_size unless
+    // NewSize was set on the command line and it is larger than
+    // preferred_max_new_size.
     if (!FLAG_IS_DEFAULT(NewSize)) {   // NewSize explicitly set at command-line
       FLAG_SET_ERGO(uintx, MaxNewSize, MAX2(NewSize, preferred_max_new_size));
     } else {
@@ -1063,15 +1071,32 @@
     if(PrintGCDetails && Verbose) {
       // Too early to use gclog_or_tty
       tty->print_cr("Ergo set MaxNewSize: " SIZE_FORMAT, MaxNewSize);
-  }
-  }
-  // Unless explicitly requested otherwise, prefer a large
-  // Old to Young gen size so as to shift the collection load
-  // to the old generation concurrent collector
-  if (FLAG_IS_DEFAULT(NewRatio)) {
+    }
+
+    // Unless explicitly requested otherwise, prefer a large
+    // Old to Young gen size so as to shift the collection load
+    // to the old generation concurrent collector
+
+    // If this is only guarded by FLAG_IS_DEFAULT(NewRatio)
+    // then NewSize and OldSize may be calculated.  That would
+    // generally lead to some differences with ParNewGC for which
+    // there was no obvious reason.  Also limit to the case where
+    // MaxNewSize has not been set.
+
     FLAG_SET_ERGO(intx, NewRatio, MAX2(NewRatio, new_ratio));
 
-    size_t min_new  = align_size_up(ScaleForWordSize(min_new_default), os::vm_page_size());
+    // Code along this path potentially sets NewSize and OldSize
+
+    // Calculate the desired minimum size of the young gen but if
+    // NewSize has been set on the command line, use it here since
+    // it should be the final value.
+    size_t min_new;
+    if (FLAG_IS_DEFAULT(NewSize)) {
+      min_new = align_size_up(ScaleForWordSize(min_new_default),
+                              os::vm_page_size());
+    } else {
+      min_new = NewSize;
+    }
     size_t prev_initial_size = initial_heap_size();
     if (prev_initial_size != 0 && prev_initial_size < min_new+OldSize) {
       set_initial_heap_size(min_new+OldSize);
@@ -1083,9 +1108,11 @@
                 initial_heap_size()/M, prev_initial_size/M);
       }
     }
+
     // MaxHeapSize is aligned down in collectorPolicy
-    size_t max_heap = align_size_down(MaxHeapSize,
-                                      CardTableRS::ct_max_alignment_constraint());
+    size_t max_heap =
+      align_size_down(MaxHeapSize,
+                      CardTableRS::ct_max_alignment_constraint());
 
     if(PrintGCDetails && Verbose) {
       // Too early to use gclog_or_tty
@@ -1150,8 +1177,9 @@
       // CMSParPromoteBlocksToClaim is a collector-specific flag, so
       // we'll let it to take precedence.
       jio_fprintf(defaultStream::error_stream(),
-                  "Both OldPLABSize and CMSParPromoteBlocksToClaim options are specified "
-                  "for the CMS collector. CMSParPromoteBlocksToClaim will take precedence.\n");
+                  "Both OldPLABSize and CMSParPromoteBlocksToClaim"
+                  " options are specified for the CMS collector."
+                  " CMSParPromoteBlocksToClaim will take precedence.\n");
     }
   }
 }
--- a/src/share/vm/runtime/atomic.hpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/runtime/atomic.hpp	Thu Aug 13 17:59:05 2009 -0700
@@ -39,6 +39,8 @@
   static void store_ptr(intptr_t store_value, volatile intptr_t* dest);
   static void store_ptr(void*    store_value, volatile void*     dest);
 
+  static jlong load(volatile jlong* src);
+
   // Atomically add to a location, return updated value
   static jint     add    (jint     add_value, volatile jint*     dest);
   static intptr_t add_ptr(intptr_t add_value, volatile intptr_t* dest);
--- a/src/share/vm/runtime/globals.hpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/runtime/globals.hpp	Thu Aug 13 17:59:05 2009 -0700
@@ -1082,6 +1082,9 @@
   product(ccstr, TraceJVMTI, NULL,                                          \
           "Trace flags for JVMTI functions and events")                     \
                                                                             \
+  product(bool, ForceFullGCJVMTIEpilogues, false,                           \
+          "Force 'Full GC' was done semantics for JVMTI GC epilogues")      \
+                                                                            \
   /* This option can change an EMCP method into an obsolete method. */      \
   /* This can affect tests that except specific methods to be EMCP. */      \
   /* This option should be used with caution. */                            \
@@ -2924,12 +2927,6 @@
           "how many entries we'll try to leave on the stack during "        \
           "parallel GC")                                                    \
                                                                             \
-  product(intx, DCQBarrierQueueBufferSize, 256,                             \
-          "Number of elements in a dirty card queue buffer")                \
-                                                                            \
-  product(intx, DCQBarrierProcessCompletedThreshold, 5,                     \
-          "Number of completed dirty card buffers to trigger processing.")  \
-                                                                            \
   /* stack parameters */                                                    \
   product_pd(intx, StackYellowPages,                                        \
           "Number of yellow zone (recoverable overflows) pages")            \
@@ -3037,6 +3034,9 @@
           "Wait for this many CI accesses to occur in all compiles before " \
           "beginning to throw OutOfMemoryErrors in each compile")           \
                                                                             \
+  notproduct(bool, CIObjectFactoryVerify, false,                            \
+          "enable potentially expensive verification in ciObjectFactory")   \
+                                                                            \
   /* Priorities */                                                          \
   product_pd(bool, UseThreadPriorities,  "Use native thread priorities")    \
                                                                             \
@@ -3287,7 +3287,7 @@
   product(uintx, SharedReadWriteSize,  12*M,                                \
           "Size of read-write space in permanent generation (in bytes)")    \
                                                                             \
-  product(uintx, SharedReadOnlySize,    8*M,                                \
+  product(uintx, SharedReadOnlySize,   10*M,                                \
           "Size of read-only space in permanent generation (in bytes)")     \
                                                                             \
   product(uintx, SharedMiscDataSize,    4*M,                                \
@@ -3312,7 +3312,7 @@
   product(bool, AnonymousClasses, false,                                    \
           "support sun.misc.Unsafe.defineAnonymousClass")                   \
                                                                             \
-  product(bool, EnableMethodHandles, false,                                 \
+  experimental(bool, EnableMethodHandles, false,                            \
           "support method handles (true by default under JSR 292)")         \
                                                                             \
   diagnostic(intx, MethodHandlePushLimit, 3,                                \
@@ -3327,7 +3327,7 @@
   diagnostic(bool, OptimizeMethodHandles, true,                             \
           "when constructing method handles, try to improve them")          \
                                                                             \
-  product(bool, EnableInvokeDynamic, false,                                 \
+  experimental(bool, EnableInvokeDynamic, false,                            \
           "recognize the invokedynamic instruction")                        \
                                                                             \
   develop(bool, TraceInvokeDynamic, false,                                  \
--- a/src/share/vm/runtime/perfData.hpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/runtime/perfData.hpp	Thu Aug 13 17:59:05 2009 -0700
@@ -868,6 +868,10 @@
   {counter = PerfDataManager::create_counter(counter_ns, counter_name, \
                                              PerfData::U_Events,CHECK);}
 
+#define NEWPERFBYTECOUNTER(counter, counter_ns, counter_name)  \
+  {counter = PerfDataManager::create_counter(counter_ns, counter_name, \
+                                             PerfData::U_Bytes,CHECK);}
+
 // Utility Classes
 
 /*
--- a/src/share/vm/runtime/vframe.hpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/runtime/vframe.hpp	Thu Aug 13 17:59:05 2009 -0700
@@ -402,7 +402,12 @@
   DebugInfoReadStream buffer(nm(), decode_offset);
   _sender_decode_offset = buffer.read_int();
   _method               = methodOop(buffer.read_oop());
-  _bci                  = buffer.read_bci();
+  // Deoptimization needs reexecute bit to determine whether to reexecute the bytecode
+  // only at the time when it "unpack_frames", and the reexecute bit info could always
+  // be obtained from the scopeDesc in the compiledVFrame. As a result, we don't keep
+  // the reexecute bit here.
+  bool dummy_reexecute;
+  _bci                  = buffer.read_bci_and_reexecute(dummy_reexecute);
 
   assert(_method->is_method(), "checking type of decoded method");
 }
--- a/src/share/vm/runtime/vframeArray.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/runtime/vframeArray.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -44,6 +44,7 @@
 
   _method = vf->method();
   _bci    = vf->raw_bci();
+  _reexecute = vf->should_reexecute();
 
   int index;
 
@@ -148,16 +149,20 @@
   // C++ interpreter doesn't need a pc since it will figure out what to do when it
   // begins execution
   address pc;
-  bool use_next_mdp; // true if we should use the mdp associated with the next bci
-                     // rather than the one associated with bcp
+  bool use_next_mdp = false; // true if we should use the mdp associated with the next bci
+                             // rather than the one associated with bcp
   if (raw_bci() == SynchronizationEntryBCI) {
     // We are deoptimizing while hanging in prologue code for synchronized method
     bcp = method()->bcp_from(0); // first byte code
     pc  = Interpreter::deopt_entry(vtos, 0); // step = 0 since we don't skip current bytecode
-    use_next_mdp = false;
+  } else if (should_reexecute()) { //reexecute this bytecode
+    assert(is_top_frame, "reexecute allowed only for the top frame");
+    bcp = method()->bcp_from(bci());
+    pc  = Interpreter::deopt_reexecute_entry(method(), bcp);
   } else {
     bcp = method()->bcp_from(bci());
-    pc  = Interpreter::continuation_for(method(), bcp, callee_parameters, is_top_frame, use_next_mdp);
+    pc  = Interpreter::deopt_continue_after_entry(method(), bcp, callee_parameters, is_top_frame);
+    use_next_mdp = true;
   }
   assert(Bytecodes::is_defined(*bcp), "must be a valid bytecode");
 
--- a/src/share/vm/runtime/vframeArray.hpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/runtime/vframeArray.hpp	Thu Aug 13 17:59:05 2009 -0700
@@ -41,7 +41,8 @@
   private:
 
     frame _frame;                                                // the interpreter frame we will unpack into
-    int _bci;                                                    // raw bci for this vframe
+    int  _bci;                                                   // raw bci for this vframe
+    bool _reexecute;                                             // whether sould we reexecute this bytecode
     methodOop  _method;                                          // the method for this vframe
     MonitorChunk* _monitors;                                     // active monitors for this vframe
     StackValueCollection* _locals;
@@ -54,6 +55,7 @@
   int bci(void) const;
 
   int raw_bci(void) const            { return _bci; }
+  bool should_reexecute(void) const  { return _reexecute; }
 
   methodOop method(void) const       { return _method; }
 
--- a/src/share/vm/runtime/vframe_hp.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/runtime/vframe_hp.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -276,6 +276,15 @@
   return scope()->bci();
 }
 
+bool compiledVFrame::should_reexecute() const {
+  if (scope() == NULL) {
+    // native nmethods have no scope the method/bci is implied
+    nmethod* nm = code();
+    assert(nm->is_native_method(), "must be native");
+    return false;
+  }
+  return scope()->should_reexecute();
+}
 
 vframe* compiledVFrame::sender() const {
   const frame f = fr();
--- a/src/share/vm/runtime/vframe_hp.hpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/runtime/vframe_hp.hpp	Thu Aug 13 17:59:05 2009 -0700
@@ -25,11 +25,12 @@
 class compiledVFrame: public javaVFrame {
  public:
   // JVM state
-  methodOop                    method()         const;
-  int                          bci()            const;
-  StackValueCollection*        locals()         const;
-  StackValueCollection*        expressions()    const;
-  GrowableArray<MonitorInfo*>* monitors()       const;
+  methodOop                    method()             const;
+  int                          bci()                const;
+  bool                         should_reexecute()   const;
+  StackValueCollection*        locals()             const;
+  StackValueCollection*        expressions()        const;
+  GrowableArray<MonitorInfo*>* monitors()           const;
 
   void set_locals(StackValueCollection* values) const;
 
--- a/src/share/vm/services/threadService.cpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/services/threadService.cpp	Thu Aug 13 17:59:05 2009 -0700
@@ -688,10 +688,9 @@
   _contended_enter_count = 0;
   _monitor_wait_count = 0;
   _sleep_count = 0;
-  _class_init_recursion_count = 0;
-  _class_verify_recursion_count = 0;
   _count_pending_reset = false;
   _timer_pending_reset = false;
+  memset((void*) _perf_recursion_counts, 0, sizeof(_perf_recursion_counts));
 }
 
 ThreadSnapshot::ThreadSnapshot(JavaThread* thread) {
--- a/src/share/vm/services/threadService.hpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/services/threadService.hpp	Thu Aug 13 17:59:05 2009 -0700
@@ -120,9 +120,8 @@
   bool         _timer_pending_reset;
 
   // Keep accurate times for potentially recursive class operations
-  int          _class_init_recursion_count;
-  int          _class_verify_recursion_count;
-  int          _class_link_recursion_count;
+  int           _perf_recursion_counts[6];
+  elapsedTimer  _perf_timers[6];
 
   // utility functions
   void  check_and_reset_count()            {
@@ -165,9 +164,8 @@
   void reset_count_stat()                  { _count_pending_reset = true; }
   void reset_time_stat()                   { _timer_pending_reset = true; }
 
-  int* class_init_recursion_count_addr()   { return &_class_init_recursion_count; }
-  int* class_verify_recursion_count_addr() { return &_class_verify_recursion_count; }
-  int* class_link_recursion_count_addr()   { return &_class_link_recursion_count; }
+  int* perf_recursion_counts_addr()        { return _perf_recursion_counts; }
+  elapsedTimer* perf_timers_addr()         { return _perf_timers; }
 };
 
 // Thread snapshot to represent the thread state and statistics
--- a/src/share/vm/utilities/taskqueue.hpp	Thu Aug 13 17:47:43 2009 -0700
+++ b/src/share/vm/utilities/taskqueue.hpp	Thu Aug 13 17:59:05 2009 -0700
@@ -22,94 +22,90 @@
  *
  */
 
-#ifdef LP64
-typedef juint TAG_TYPE;
-// for a taskqueue size of 4M
-#define LOG_TASKQ_SIZE 22
-#else
-typedef jushort TAG_TYPE;
-// for a taskqueue size of 16K
-#define LOG_TASKQ_SIZE 14
-#endif
-
 class TaskQueueSuper: public CHeapObj {
 protected:
-  // The first free element after the last one pushed (mod _n).
+  // Internal type for indexing the queue; also used for the tag.
+  typedef NOT_LP64(uint16_t) LP64_ONLY(uint32_t) idx_t;
+
+  // The first free element after the last one pushed (mod N).
   volatile uint _bottom;
 
-  // log2 of the size of the queue.
-  enum SomeProtectedConstants {
-    Log_n = LOG_TASKQ_SIZE
+  enum {
+    N = 1 << NOT_LP64(14) LP64_ONLY(17), // Queue size: 16K or 128K
+    MOD_N_MASK = N - 1                   // To compute x mod N efficiently.
   };
-#undef LOG_TASKQ_SIZE
 
-  // Size of the queue.
-  uint n() { return (1 << Log_n); }
-  // For computing "x mod n" efficiently.
-  uint n_mod_mask() { return n() - 1; }
+  class Age {
+  public:
+    Age(size_t data = 0)         { _data = data; }
+    Age(const Age& age)          { _data = age._data; }
+    Age(idx_t top, idx_t tag)    { _fields._top = top; _fields._tag = tag; }
 
-  struct Age {
-    TAG_TYPE _top;
-    TAG_TYPE _tag;
+    Age   get()        const volatile { return _data; }
+    void  set(Age age) volatile       { _data = age._data; }
 
-    TAG_TYPE tag() const { return _tag; }
-    TAG_TYPE top() const { return _top; }
+    idx_t top()        const volatile { return _fields._top; }
+    idx_t tag()        const volatile { return _fields._tag; }
 
-    Age() { _tag = 0; _top = 0; }
+    // Increment top; if it wraps, increment tag also.
+    void increment() {
+      _fields._top = increment_index(_fields._top);
+      if (_fields._top == 0) ++_fields._tag;
+    }
 
-    friend bool operator ==(const Age& a1, const Age& a2) {
-      return a1.tag() == a2.tag() && a1.top() == a2.top();
+    Age cmpxchg(const Age new_age, const Age old_age) volatile {
+      return (size_t) Atomic::cmpxchg_ptr((intptr_t)new_age._data,
+                                          (volatile intptr_t *)&_data,
+                                          (intptr_t)old_age._data);
     }
+
+    bool operator ==(const Age& other) const { return _data == other._data; }
+
+  private:
+    struct fields {
+      idx_t _top;
+      idx_t _tag;
+    };
+    union {
+      size_t _data;
+      fields _fields;
+    };
   };
-  Age _age;
-  // These make sure we do single atomic reads and writes.
-  Age get_age() {
-    uint res = *(volatile uint*)(&_age);
-    return *(Age*)(&res);
+
+  volatile Age _age;
+
+  // These both operate mod N.
+  static uint increment_index(uint ind) {
+    return (ind + 1) & MOD_N_MASK;
   }
-  void set_age(Age a) {
-    *(volatile uint*)(&_age) = *(uint*)(&a);
+  static uint decrement_index(uint ind) {
+    return (ind - 1) & MOD_N_MASK;
   }
 
-  TAG_TYPE get_top() {
-    return get_age().top();
-  }
-
-  // These both operate mod _n.
-  uint increment_index(uint ind) {
-    return (ind + 1) & n_mod_mask();
-  }
-  uint decrement_index(uint ind) {
-    return (ind - 1) & n_mod_mask();
-  }
-
-  // Returns a number in the range [0.._n).  If the result is "n-1", it
-  // should be interpreted as 0.
+  // Returns a number in the range [0..N).  If the result is "N-1", it should be
+  // interpreted as 0.
   uint dirty_size(uint bot, uint top) {
-    return ((int)bot - (int)top) & n_mod_mask();
+    return (bot - top) & MOD_N_MASK;
   }
 
   // Returns the size corresponding to the given "bot" and "top".
   uint size(uint bot, uint top) {
     uint sz = dirty_size(bot, top);
-    // Has the queue "wrapped", so that bottom is less than top?
-    // There's a complicated special case here.  A pair of threads could
-    // perform pop_local and pop_global operations concurrently, starting
-    // from a state in which _bottom == _top+1.  The pop_local could
-    // succeed in decrementing _bottom, and the pop_global in incrementing
-    // _top (in which case the pop_global will be awarded the contested
-    // queue element.)  The resulting state must be interpreted as an empty
-    // queue.  (We only need to worry about one such event: only the queue
-    // owner performs pop_local's, and several concurrent threads
-    // attempting to perform the pop_global will all perform the same CAS,
-    // and only one can succeed.  Any stealing thread that reads after
-    // either the increment or decrement will see an empty queue, and will
-    // not join the competitors.  The "sz == -1 || sz == _n-1" state will
-    // not be modified  by concurrent queues, so the owner thread can reset
-    // the state to _bottom == top so subsequent pushes will be performed
-    // normally.
-    if (sz == (n()-1)) return 0;
-    else return sz;
+    // Has the queue "wrapped", so that bottom is less than top?  There's a
+    // complicated special case here.  A pair of threads could perform pop_local
+    // and pop_global operations concurrently, starting from a state in which
+    // _bottom == _top+1.  The pop_local could succeed in decrementing _bottom,
+    // and the pop_global in incrementing _top (in which case the pop_global
+    // will be awarded the contested queue element.)  The resulting state must
+    // be interpreted as an empty queue.  (We only need to worry about one such
+    // event: only the queue owner performs pop_local's, and several concurrent
+    // threads attempting to perform the pop_global will all perform the same
+    // CAS, and only one can succeed.)  Any stealing thread that reads after
+    // either the increment or decrement will see an empty queue, and will not
+    // join the competitors.  The "sz == -1 || sz == N-1" state will not be
+    // modified by concurrent queues, so the owner thread can reset the state to
+    // _bottom == top so subsequent pushes will be performed normally.
+    return (sz == N - 1) ? 0 : sz;
   }
 
 public:
@@ -122,22 +118,21 @@
   // The "careful" version admits the possibility of pop_local/pop_global
   // races.
   uint size() {
-    return size(_bottom, get_top());
+    return size(_bottom, _age.top());
   }
 
   uint dirty_size() {
-    return dirty_size(_bottom, get_top());
+    return dirty_size(_bottom, _age.top());
   }
 
   void set_empty() {
     _bottom = 0;
-    _age = Age();
+    _age.set(0);
   }
 
   // Maximum number of elements allowed in the queue.  This is two less
   // than the actual queue size, for somewhat complicated reasons.
-  uint max_elems() { return n() - 2; }
-
+  uint max_elems() { return N - 2; }
 };
 
 template<class E> class GenericTaskQueue: public TaskQueueSuper {
@@ -179,12 +174,12 @@
 
 template<class E>
 GenericTaskQueue<E>::GenericTaskQueue():TaskQueueSuper() {
-  assert(sizeof(Age) == sizeof(int), "Depends on this.");
+  assert(sizeof(Age) == sizeof(size_t), "Depends on this.");
 }
 
 template<class E>
 void GenericTaskQueue<E>::initialize() {
-  _elems = NEW_C_HEAP_ARRAY(E, n());
+  _elems = NEW_C_HEAP_ARRAY(E, N);
   guarantee(_elems != NULL, "Allocation failed.");
 }
 
@@ -208,14 +203,14 @@
 
 template<class E>
 bool GenericTaskQueue<E>::push_slow(E t, uint dirty_n_elems) {
-  if (dirty_n_elems == n() - 1) {
+  if (dirty_n_elems == N - 1) {
     // Actually means 0, so do the push.
     uint localBot = _bottom;
     _elems[localBot] = t;
     _bottom = increment_index(localBot);
     return true;
-  } else
-    return false;
+  }
+  return false;
 }
 
 template<class E>
@@ -230,53 +225,45 @@
   // then have the owner thread do a pop followed by another push.  Without
   // the incrementing of "tag", the pop_global's CAS could succeed,
   // allowing it to believe it has claimed the stale element.)
-  Age newAge;
-  newAge._top = localBot;
-  newAge._tag = oldAge.tag() + 1;
+  Age newAge((idx_t)localBot, oldAge.tag() + 1);
   // Perhaps a competing pop_global has already incremented "top", in which
   // case it wins the element.
   if (localBot == oldAge.top()) {
-    Age tempAge;
     // No competing pop_global has yet incremented "top"; we'll try to
     // install new_age, thus claiming the element.
-    assert(sizeof(Age) == sizeof(int), "Assumption about CAS unit.");
-    *(uint*)&tempAge = Atomic::cmpxchg(*(uint*)&newAge, (volatile uint*)&_age, *(uint*)&oldAge);
+    Age tempAge = _age.cmpxchg(newAge, oldAge);
     if (tempAge == oldAge) {
       // We win.
-      assert(dirty_size(localBot, get_top()) != n() - 1,
-             "Shouldn't be possible...");
+      assert(dirty_size(localBot, _age.top()) != N - 1, "sanity");
       return true;
     }
   }
-  // We fail; a completing pop_global gets the element.  But the queue is
-  // empty (and top is greater than bottom.)  Fix this representation of
-  // the empty queue to become the canonical one.
-  set_age(newAge);
-  assert(dirty_size(localBot, get_top()) != n() - 1,
-         "Shouldn't be possible...");
+  // We lose; a completing pop_global gets the element.  But the queue is empty
+  // and top is greater than bottom.  Fix this representation of the empty queue
+  // to become the canonical one.
+  _age.set(newAge);
+  assert(dirty_size(localBot, _age.top()) != N - 1, "sanity");
   return false;
 }
 
 template<class E>
 bool GenericTaskQueue<E>::pop_global(E& t) {
-  Age newAge;
-  Age oldAge = get_age();
+  Age oldAge = _age.get();
   uint localBot = _bottom;
   uint n_elems = size(localBot, oldAge.top());
   if (n_elems == 0) {
     return false;
   }
+
   t = _elems[oldAge.top()];
-  newAge = oldAge;
-  newAge._top = increment_index(newAge.top());
-  if ( newAge._top == 0 ) newAge._tag++;
-  Age resAge;
-  *(uint*)&resAge = Atomic::cmpxchg(*(uint*)&newAge, (volatile uint*)&_age, *(uint*)&oldAge);
+  Age newAge(oldAge);
+  newAge.increment();
+  Age resAge = _age.cmpxchg(newAge, oldAge);
+
   // Note that using "_bottom" here might fail, since a pop_local might
   // have decremented it.
-  assert(dirty_size(localBot, newAge._top) != n() - 1,
-         "Shouldn't be possible...");
-  return (resAge == oldAge);
+  assert(dirty_size(localBot, newAge.top()) != N - 1, "sanity");
+  return resAge == oldAge;
 }
 
 template<class E>
@@ -459,7 +446,7 @@
     return offer_termination(NULL);
   }
 
-  // As above, but it also terminates of the should_exit_termination()
+  // As above, but it also terminates if the should_exit_termination()
   // method of the terminator parameter returns true. If terminator is
   // NULL, then it is ignored.
   bool offer_termination(TerminatorTerminator* terminator);
@@ -492,11 +479,10 @@
   }
 #else
   uint localBot = _bottom;
-  assert((localBot >= 0) && (localBot < n()), "_bottom out of range.");
-  TAG_TYPE top = get_top();
+  assert((localBot >= 0) && (localBot < N), "_bottom out of range.");
+  idx_t top = _age.top();
   uint dirty_n_elems = dirty_size(localBot, top);
-  assert((dirty_n_elems >= 0) && (dirty_n_elems < n()),
-         "n_elems out of range.");
+  assert((dirty_n_elems >= 0) && (dirty_n_elems < N), "n_elems out of range.");
   if (dirty_n_elems < max_elems()) {
     _elems[localBot] = t;
     _bottom = increment_index(localBot);
@@ -517,12 +503,12 @@
   return true;
 #else
   uint localBot = _bottom;
-  // This value cannot be n-1.  That can only occur as a result of
+  // This value cannot be N-1.  That can only occur as a result of
   // the assignment to bottom in this method.  If it does, this method
   // resets the size( to 0 before the next call (which is sequential,
   // since this is pop_local.)
-  uint dirty_n_elems = dirty_size(localBot, get_top());
-  assert(dirty_n_elems != n() - 1, "Shouldn't be possible...");
+  uint dirty_n_elems = dirty_size(localBot, _age.top());
+  assert(dirty_n_elems != N - 1, "Shouldn't be possible...");
   if (dirty_n_elems == 0) return false;
   localBot = decrement_index(localBot);
   _bottom = localBot;
@@ -534,15 +520,14 @@
   // If there's still at least one element in the queue, based on the
   // "_bottom" and "age" we've read, then there can be no interference with
   // a "pop_global" operation, and we're done.
-  TAG_TYPE tp = get_top();    // XXX
+  idx_t tp = _age.top();    // XXX
   if (size(localBot, tp) > 0) {
-    assert(dirty_size(localBot, tp) != n() - 1,
-           "Shouldn't be possible...");
+    assert(dirty_size(localBot, tp) != N - 1, "sanity");
     return true;
   } else {
     // Otherwise, the queue contained exactly one element; we take the slow
     // path.
-    return pop_local_slow(localBot, get_age());
+    return pop_local_slow(localBot, _age.get());
   }
 #endif
 }
--- a/test/compiler/6826736/Test.java	Thu Aug 13 17:47:43 2009 -0700
+++ b/test/compiler/6826736/Test.java	Thu Aug 13 17:59:05 2009 -0700
@@ -27,7 +27,7 @@
  * @bug 6826736
  * @summary CMS: core dump with -XX:+UseCompressedOops
  *
- * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -Xbatch -XX:+ScavengeALot -XX:+UseCompressedOops -XX:HeapBaseMinAddress=32g -XX:CompileThreshold=100 -XX:CompileOnly=Test.test -XX:-BlockLayoutRotateLoops -XX:LoopUnrollLimit=0 Test
+ * @run main/othervm/timeout=600 -XX:+IgnoreUnrecognizedVMOptions -Xbatch -XX:+ScavengeALot -XX:+UseCompressedOops -XX:HeapBaseMinAddress=32g -XX:CompileThreshold=100 -XX:CompileOnly=Test.test -XX:-BlockLayoutRotateLoops -XX:LoopUnrollLimit=0 Test
  */
 
 public class Test {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6833129/Test.java	Thu Aug 13 17:59:05 2009 -0700
@@ -0,0 +1,62 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/**
+ * @test
+ * @bug 6833129
+ * @summary Object.clone() and Arrays.copyOf ignore coping with -XX:+DeoptimizeALot
+ * @run main/othervm -Xbatch -XX:+DeoptimizeALot Test
+ */
+
+public class Test{
+    public static void init(int src[]) {
+        for (int i =0; i<src.length; i++) {
+            src[i] =  i;
+        }
+    }
+
+    public static void clone_and_verify(int src[]) {
+        for (int i = 0; i < src.length; i++) {
+            int [] src_clone = src.clone();
+            if (src[i] != src_clone[i]) {
+                System.out.println("Error: allocated but not copied: ");
+                for( int j =0; j < src_clone.length; j++)
+                    System.out.print(" " + src_clone[j]);
+                System.out.println();
+                System.exit(97);
+            }
+        }
+    }
+
+    public static void test() {
+        int[] src = new int[34];
+        init(src);
+        clone_and_verify(src);
+    }
+
+    public static void main(String[] args) {
+        for (int i=0; i< 20000; i++) {
+            test();
+        }
+    }
+}
--- a/test/compiler/6851282/Test.java	Thu Aug 13 17:47:43 2009 -0700
+++ b/test/compiler/6851282/Test.java	Thu Aug 13 17:59:05 2009 -0700
@@ -27,7 +27,7 @@
  * @bug 6851282
  * @summary JIT miscompilation results in null entry in array when using CompressedOops
  *
- * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+UseCompressedOops Test
+ * @run main/othervm/timeout=600 -Xmx256m -XX:+IgnoreUnrecognizedVMOptions -XX:+UseCompressedOops Test
  */
 
 import java.util.ArrayList;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6863155/Test6863155.java	Thu Aug 13 17:59:05 2009 -0700
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/**
+ * @test
+ * @bug 6863155
+ * @summary Server compiler generates incorrect code (x86, long, bitshift, bitmask)
+ *
+ * @run main/othervm -Xcomp -XX:CompileOnly=Test6863155.test Test6863155
+ */
+
+public class Test6863155 {
+    private static long test(byte b) {
+        return b << 24 & 0xff000000L;
+    }
+
+    public static void main(String... args) {
+        long result = test((byte) 0xc2);
+        long expected = 0x00000000c2000000L;
+        if (result != expected)
+            throw new InternalError(Long.toHexString(result) + " != " + Long.toHexString(expected));
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6863420/Test.java	Thu Aug 13 17:59:05 2009 -0700
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2009 D.E. Shaw.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 6863420
+ * @summary os::javaTimeNanos() go backward on Solaris x86
+ *
+ * @run main/othervm Test
+ */
+
+public class Test {
+    static long value = 0;
+    static boolean got_backward_time = false;
+
+    public static void main(String args[]) {
+        final int count = 100000;
+
+        for (int numThreads = 1; numThreads <= 32; numThreads++) {
+            final int numRuns = 1;
+            for (int t=1; t <= numRuns; t++) {
+                final int curRun = t;
+
+                System.out.println("Spawning " + numThreads + " threads");
+                final Thread threads[] = new Thread[numThreads];
+                for (int i = 0; i < threads.length; i++) {
+                    Runnable thread =
+                        new Runnable() {
+                            public void run() {
+                                for (long l = 0; l < 100000; l++) {
+                                    final long start = System.nanoTime();
+                                    if (value == 12345678) {
+                                        System.out.println("Wow!");
+                                    }
+                                    final long end = System.nanoTime();
+                                    final long time = end - start;
+                                    value += time;
+                                    if (time < 0) {
+                                        System.out.println(
+                                            "Backwards: " +
+                                            "start=" + start + " " +
+                                            "end=" + end + " " +
+                                            "time= " + time
+                                        );
+                                        got_backward_time = true;
+                                    }
+                                }
+                            }
+                        };
+                    threads[i] = new Thread(thread, "Thread" + i);
+                }
+                for (int i = 0; i < threads.length; i++) {
+                    threads[i].start();
+                }
+                for (int i = 0; i < threads.length; i++) {
+                    try {
+                        threads[i].join();
+                    }
+                    catch (InterruptedException e) {
+                        continue;
+                    }
+                }
+            }
+        }
+
+        if (got_backward_time) {
+            System.exit(97);
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6865031/Test.java	Thu Aug 13 17:59:05 2009 -0700
@@ -0,0 +1,650 @@
+/*
+ * Copyright 2009 Goldman Sachs International.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+/*
+ * @test
+ * @bug 6865031
+ * @summary Application gives bad result (throws bad exception) with compressed oops
+ * @run main/othervm -XX:+UseCompressedOops -XX:HeapBaseMinAddress=32g -XX:-LoopUnswitching -XX:CompileCommand=inline,AbstractMemoryEfficientList.equals Test hello goodbye
+ */
+
+import java.lang.ref.ReferenceQueue;
+import java.lang.ref.WeakReference;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+interface MyList {
+    public int size();
+    public Object set(final int index, final Object element);
+    public Object get(final int index);
+}
+
+abstract class AbstractMemoryEfficientList implements MyList {
+    abstract public int size();
+    abstract public Object get(final int index);
+    abstract public Object set(final int index, final Object element);
+
+    public boolean equals(Object o) {
+        if (o == this) {
+            return true;
+        }
+
+        if (!(o instanceof MyList)) {
+            return false;
+        }
+
+        final MyList that = (MyList) o;
+        if (this.size() != that.size()) {
+            return false;
+        }
+
+        for (int i = 0; i < this.size(); i++) {
+            try {
+                if (!((this.get(i)).equals(that.get(i)))) {
+                    return false;
+                }
+            } catch (IndexOutOfBoundsException e) {
+                System.out.println("THROWING RT EXC");
+                System.out.println("concurrent modification of this:" + this.getClass() + ":" + System.identityHashCode(this) + "; that:" + that.getClass() + ":" + System.identityHashCode(that) + "; i:" + i);
+                e.printStackTrace();
+                System.exit(97);
+                throw new RuntimeException("concurrent modification of this:" + this.getClass() + ":" + System.identityHashCode(this) + "; that:" + that.getClass() + ":" + System.identityHashCode(that) + "; i:" + i, e);
+            }
+        }
+        return true;
+    }
+
+    public int hashCode() {
+        int hashCode = 1;
+        for (int i = 0; i < this.size(); i++) {
+            Object obj = this.get(i);
+            hashCode = 31 * hashCode + (obj == null ? 0 : obj.hashCode());
+        }
+        return hashCode;
+    }
+}
+
+final class SingletonList extends AbstractMemoryEfficientList {
+    private Object element1;
+
+    SingletonList(final Object obj1) {
+        super();
+        this.element1 = obj1;
+    }
+
+    public int size() {
+        return 1;
+    }
+
+    public Object get(final int index) {
+        if (index == 0) {
+            return this.element1;
+        } else {
+            throw new IndexOutOfBoundsException("Index: " + index + ", Size: " + this.size());
+        }
+    }
+
+    public Object set(final int index, final Object element) {
+        if (index == 0) {
+            final Object previousElement = this.element1;
+            this.element1 = element;
+            return previousElement;
+        } else {
+            throw new IndexOutOfBoundsException("Index: " + index + ", Size: " + this.size());
+        }
+    }
+}
+
+final class DoubletonList extends AbstractMemoryEfficientList {
+    private Object element1;
+    private Object element2;
+
+    DoubletonList(final Object obj1, final Object obj2) {
+        this.element1 = obj1;
+        this.element2 = obj2;
+    }
+
+    public int size() {
+        return 2;
+    }
+
+    public Object get(final int index) {
+        switch (index) {
+            case 0 : return this.element1;
+            case 1 : return this.element2;
+            default: throw new IndexOutOfBoundsException("Index: " + index + ", Size: " + this.size());
+        }
+    }
+
+    public Object set(final int index, final Object element) {
+        switch (index) {
+            case 0 :
+            {
+                final Object previousElement = this.element1;
+                this.element1 = element;
+                return previousElement;
+            }
+            case 1 :
+            {
+                final Object previousElement = this.element2;
+                this.element2 = element;
+                return previousElement;
+            }
+            default : throw new IndexOutOfBoundsException("Index: " + index + ", Size: " + this.size());
+        }
+    }
+}
+
+class WeakPool<V> {
+    protected static final int DEFAULT_INITIAL_CAPACITY = 16;
+    private static final int MAXIMUM_CAPACITY = 1 << 30;
+    private static final float DEFAULT_LOAD_FACTOR = 0.75f;
+
+    protected Entry<V>[] table;
+
+    private int size;
+    protected int threshold;
+    private final float loadFactor;
+    private final ReferenceQueue<V> queue = new ReferenceQueue<V>();
+
+    public WeakPool()
+    {
+        this.loadFactor = DEFAULT_LOAD_FACTOR;
+        threshold = DEFAULT_INITIAL_CAPACITY;
+        table = new Entry[DEFAULT_INITIAL_CAPACITY];
+    }
+
+    /**
+     * Check for equality of non-null reference x and possibly-null y.  By
+     * default uses Object.equals.
+     */
+    private boolean eq(Object x, Object y)
+    {
+        return x == y || x.equals(y);
+    }
+
+    /**
+     * Return index for hash code h.
+     */
+    private int indexFor(int h, int length)
+    {
+        return h & length - 1;
+    }
+
+    /**
+     * Expunge stale entries from the table.
+     */
+    private void expungeStaleEntries()
+    {
+        Object r;
+        while ((r = queue.poll()) != null)
+        {
+            Entry e = (Entry) r;
+            int h = e.hash;
+            int i = indexFor(h, table.length);
+
+            // System.out.println("EXPUNGING " + h);
+            Entry<V> prev = table[i];
+            Entry<V> p = prev;
+            while (p != null)
+            {
+                Entry<V> next = p.next;
+                if (p == e)
+                {
+                    if (prev == e)
+                    {
+                        table[i] = next;
+                    }
+                    else
+                    {
+                        prev.next = next;
+                    }
+                    e.next = null;  // Help GC
+                    size--;
+                    break;
+                }
+                prev = p;
+                p = next;
+            }
+        }
+    }
+
+    /**
+     * Return the table after first expunging stale entries
+     */
+    private Entry<V>[] getTable()
+    {
+        expungeStaleEntries();
+        return table;
+    }
+
+    /**
+     * Returns the number of key-value mappings in this map.
+     * This result is a snapshot, and may not reflect unprocessed
+     * entries that will be removed before next attempted access
+     * because they are no longer referenced.
+     */
+    public int size()
+    {
+        if (size == 0)
+        {
+            return 0;
+        }
+        expungeStaleEntries();
+        return size;
+    }
+
+    /**
+     * Returns <tt>true</tt> if this map contains no key-value mappings.
+     * This result is a snapshot, and may not reflect unprocessed
+     * entries that will be removed before next attempted access
+     * because they are no longer referenced.
+     */
+    public boolean isEmpty()
+    {
+        return size() == 0;
+    }
+
+    /**
+     * Returns the value stored in the pool that equals the requested key
+     * or <tt>null</tt> if the map contains no mapping for
+     * this key (or the key is null)
+     *
+     * @param key the key whose equals value is to be returned.
+     * @return the object that is equal the specified key, or
+     *         <tt>null</tt> if key is null or no object in the pool equals the key.
+     */
+    public V get(V key)
+    {
+        if (key == null)
+        {
+            return null;
+        }
+        int h = key.hashCode();
+        Entry<V>[] tab = getTable();
+        int index = indexFor(h, tab.length);
+        Entry<V> e = tab[index];
+        while (e != null)
+        {
+            V candidate = e.get();
+            if (e.hash == h && eq(key, candidate))
+            {
+                return candidate;
+            }
+            e = e.next;
+        }
+        return null;
+    }
+
+    /**
+     * Returns the entry associated with the specified key in the HashMap.
+     * Returns null if the HashMap contains no mapping for this key.
+     */
+    Entry getEntry(Object key)
+    {
+        int h = key.hashCode();
+        Entry[] tab = getTable();
+        int index = indexFor(h, tab.length);
+        Entry e = tab[index];
+        while (e != null && !(e.hash == h && eq(key, e.get())))
+        {
+            e = e.next;
+        }
+        return e;
+    }
+
+    /**
+     * Places the object into the pool. If the object is null, nothing happens.
+     * If an equal object already exists, it is not replaced.
+     *
+     * @param key the object to put into the pool. key may be null.
+     * @return the object in the pool that is equal to the key, or the newly placed key if no such object existed when put was called
+     */
+    public V put(V key)
+    {
+        if (key == null)
+        {
+            return null;
+        }
+        int h = key.hashCode();
+        Entry<V>[] tab = getTable();
+        int i = indexFor(h, tab.length);
+
+        for (Entry<V> e = tab[i]; e != null; e = e.next)
+        {
+            V candidate = e.get();
+            if (h == e.hash && eq(key, candidate))
+            {
+                return candidate;
+            }
+        }
+
+        tab[i] = new Entry<V>(key, queue, h, tab[i]);
+
+        if (++size >= threshold)
+        {
+            resize(tab.length * 2);
+        }
+
+    // System.out.println("Added " + key + " to pool");
+        return key;
+    }
+
+    /**
+     * Rehashes the contents of this map into a new array with a
+     * larger capacity.  This method is called automatically when the
+     * number of keys in this map reaches its threshold.
+     * <p/>
+     * If current capacity is MAXIMUM_CAPACITY, this method does not
+     * resize the map, but but sets threshold to Integer.MAX_VALUE.
+     * This has the effect of preventing future calls.
+     *
+     * @param newCapacity the new capacity, MUST be a power of two;
+     *                    must be greater than current capacity unless current
+     *                    capacity is MAXIMUM_CAPACITY (in which case value
+     *                    is irrelevant).
+     */
+    void resize(int newCapacity)
+    {
+        Entry<V>[] oldTable = getTable();
+        int oldCapacity = oldTable.length;
+        if (oldCapacity == MAXIMUM_CAPACITY)
+        {
+            threshold = Integer.MAX_VALUE;
+            return;
+        }
+
+        Entry<V>[] newTable = new Entry[newCapacity];
+        transfer(oldTable, newTable);
+        table = newTable;
+
+        /*
+         * If ignoring null elements and processing ref queue caused massive
+         * shrinkage, then restore old table.  This should be rare, but avoids
+         * unbounded expansion of garbage-filled tables.
+         */
+        if (size >= threshold / 2)
+        {
+            threshold = (int) (newCapacity * loadFactor);
+        }
+        else
+        {
+            expungeStaleEntries();
+            transfer(newTable, oldTable);
+            table = oldTable;
+        }
+    }
+
+    /**
+     * Transfer all entries from src to dest tables
+     */
+    private void transfer(Entry[] src, Entry[] dest)
+    {
+        for (int j = 0; j < src.length; ++j)
+        {
+            Entry e = src[j];
+            src[j] = null;
+            while (e != null)
+            {
+                Entry next = e.next;
+                Object key = e.get();
+                if (key == null)
+                {
+                    e.next = null;  // Help GC
+                    size--;
+                }
+                else
+                {
+                    int i = indexFor(e.hash, dest.length);
+                    e.next = dest[i];
+                    dest[i] = e;
+                }
+                e = next;
+            }
+        }
+    }
+
+    /**
+     * Removes the object in the pool that equals the key.
+     *
+     * @param key
+     * @return previous value associated with specified key, or <tt>null</tt>
+     *         if there was no mapping for key or the key is null.
+     */
+    public V removeFromPool(V key)
+    {
+        if (key == null)
+        {
+            return null;
+        }
+        int h = key.hashCode();
+        Entry<V>[] tab = getTable();
+        int i = indexFor(h, tab.length);
+        Entry<V> prev = tab[i];
+        Entry<V> e = prev;
+
+        while (e != null)
+        {
+            Entry<V> next = e.next;
+            V candidate = e.get();
+            if (h == e.hash && eq(key, candidate))
+            {
+                size--;
+                if (prev == e)
+                {
+                    tab[i] = next;
+                }
+                else
+                {
+                    prev.next = next;
+                }
+                return candidate;
+            }
+            prev = e;
+            e = next;
+        }
+
+        return null;
+    }
+
+    /**
+     * Removes all mappings from this map.
+     */
+    public void clear()
+    {
+        // clear out ref queue. We don't need to expunge entries
+        // since table is getting cleared.
+        while (queue.poll() != null)
+        {
+            // nop
+        }
+
+        table = new Entry[DEFAULT_INITIAL_CAPACITY];
+        threshold = DEFAULT_INITIAL_CAPACITY;
+        size = 0;
+
+        // Allocation of array may have caused GC, which may have caused
+        // additional entries to go stale.  Removing these entries from the
+        // reference queue will make them eligible for reclamation.
+        while (queue.poll() != null)
+        {
+            // nop
+        }
+    }
+
+    /**
+     * The entries in this hash table extend WeakReference, using its main ref
+     * field as the key.
+     */
+    protected static class Entry<V>
+    extends WeakReference<V>
+    {
+        private final int hash;
+        private Entry<V> next;
+
+        /**
+         * Create new entry.
+         */
+        Entry(final V key, final ReferenceQueue<V> queue, final int hash, final Entry<V> next)
+        {
+            super(key, queue);
+            this.hash = hash;
+            this.next = next;
+        }
+
+        public V getKey()
+        {
+            return super.get();
+        }
+
+        public boolean equals(Object o)
+        {
+            if (!(o instanceof WeakPool.Entry))
+            {
+                return false;
+            }
+            WeakPool.Entry<V> that = (WeakPool.Entry<V>) o;
+            V k1 = this.getKey();
+            V k2 = that.getKey();
+            return (k1==k2 || k1.equals(k2));
+        }
+
+        public int hashCode()
+        {
+            return this.hash;
+        }
+
+        public String toString()
+        {
+            return String.valueOf(this.getKey());
+        }
+    }
+}
+
+final class MultiSynonymKey {
+    private List<MyList> keys;
+
+    public MultiSynonymKey() {
+        keys = new ArrayList<MyList>();
+    }
+
+    public MultiSynonymKey(MyList... arg) {
+        keys = Arrays.asList(arg);
+    }
+
+    public List<MyList> getKeys() {
+        return keys;
+    }
+
+    public int hashCode() {
+        return this.getKeys().hashCode();
+    }
+
+    public boolean equals(Object obj) {
+        if (this == obj) {
+            return true;
+        }
+
+        if (!(obj instanceof MultiSynonymKey)) {
+            return false;
+        }
+
+        MultiSynonymKey that = (MultiSynonymKey) obj;
+        return this.getKeys().equals(that.getKeys());
+    }
+
+    public String toString() {
+        return this.getClass().getName() + this.getKeys().toString();
+    }
+}
+
+public class Test extends Thread {
+    static public Test test;
+    static private byte[] arg1;
+    static private byte[] arg2;
+    static public WeakPool<MultiSynonymKey> wp;
+    public volatile MultiSynonymKey ml1;
+    public volatile MultiSynonymKey ml2;
+    private volatile MultiSynonymKey ml3;
+
+    public void run() {
+        int count=0;
+        while (true) {
+            try {
+                Thread.sleep(10);
+            } catch (Exception e) {}
+            synchronized (wp) {
+                ml2 = new MultiSynonymKey(new DoubletonList(new String(arg1), new String(arg2)));
+                wp.put(ml2);
+                ml3 = new MultiSynonymKey(new DoubletonList(new String(arg1), new String(arg2)));
+            }
+            try {
+                Thread.sleep(10);
+            } catch (Exception e) {}
+            synchronized (wp) {
+                ml1 = new MultiSynonymKey(new SingletonList(new String(arg1)));
+                wp.put(ml1);
+                ml3 = new MultiSynonymKey(new SingletonList(new String(arg1)));
+            }
+            if (count++==100)
+                System.exit(95);
+        }
+    }
+
+    public static void main(String[] args) throws Exception {
+        wp = new WeakPool<MultiSynonymKey>();
+        test = new Test();
+
+        test.arg1 = args[0].getBytes();
+        test.arg2 = args[1].getBytes();
+
+        test.ml1 = new MultiSynonymKey(new SingletonList(new String(test.arg1)));
+        test.ml2 = new MultiSynonymKey(new DoubletonList(new String(test.arg1), new String(test.arg2)));
+        test.ml3 = new MultiSynonymKey(new DoubletonList(new String(test.arg1), new String(test.arg2)));
+
+        wp.put(test.ml1);
+        wp.put(test.ml2);
+
+        test.setDaemon(true);
+        test.start();
+
+        int counter = 0;
+        while (true) {
+            synchronized (wp) {
+                MultiSynonymKey foo = test.ml3;
+
+                if (wp.put(foo) == foo) {
+                    // System.out.println("foo " + counter);
+                    // System.out.println(foo);
+                }
+            }
+            counter++;
+        }
+    }
+
+    private boolean eq(Object x, Object y) {
+        return x == y || x.equals(y);
+    }
+}