changeset 6362:7bc844cd6603

Updated Fork/Join pool executor. Now uses F/J default pool. Contributed-by: Aleksey Shipilev <aleksey.shipilev@oracle.com>
author mduigou
date Tue, 30 Oct 2012 17:30:48 -0700
parents ef437fbf4a28
children 0afeaddee9cf
files src/share/classes/java/util/concurrent/CountedCompleter.java src/share/classes/java/util/concurrent/ForkJoinPool.java src/share/classes/java/util/concurrent/ForkJoinTask.java src/share/classes/java/util/concurrent/ForkJoinUtils.java src/share/classes/java/util/streams/AbstractPipeline.java src/share/classes/java/util/streams/ops/AbstractTask.java src/share/classes/java/util/streams/ops/TreeUtils.java
diffstat 7 files changed, 680 insertions(+), 599 deletions(-) [+]
line wrap: on
line diff
--- a/src/share/classes/java/util/concurrent/CountedCompleter.java	Tue Oct 30 16:46:32 2012 -0700
+++ b/src/share/classes/java/util/concurrent/CountedCompleter.java	Tue Oct 30 17:30:48 2012 -0700
@@ -22,12 +22,13 @@
  * decremented; otherwise, the completion action is performed, and if
  * this completer itself has a completer, the process is continued
  * with its completer.  As is the case with related synchronization
- * components such as {@link Phaser} and {@link
- * java.util.concurrent.Semaphore} these methods affect only internal
- * counts; they do not establish any further internal bookkeeping. In
- * particular, the identities of pending tasks are not maintained. As
- * illustrated below, you can create subclasses that do record some or
- * all pended tasks or their results when needed.
+ * components such as {@link java.util.concurrent.Phaser Phaser} and
+ * {@link java.util.concurrent.Semaphore Semaphore}, these methods
+ * affect only internal counts; they do not establish any further
+ * internal bookkeeping. In particular, the identities of pending
+ * tasks are not maintained. As illustrated below, you can create
+ * subclasses that do record some or all pending tasks or their
+ * results when needed.
  *
  * <p>A concrete CountedCompleter class must define method {@link
  * #compute}, that should, in almost all use cases, invoke {@code
@@ -141,7 +142,9 @@
  *
  * As a further improvement, notice that the left task need not even
  * exist.  Instead of creating a new one, we can iterate using the
- * original task, and add a pending count for each fork:
+ * original task, and add a pending count for each fork. Additionally,
+ * this version uses {@code helpComplete} to streamline assistance in
+ * the execution of forked tasks.
  *
  * <pre> {@code
  * class ForEach<E> ...
@@ -155,7 +158,7 @@
  *         }
  *         if (h > l)
  *             op.apply(array[l]);
- *         tryComplete();
+ *         helpComplete();
  *     }
  * }</pre>
  *
@@ -377,6 +380,19 @@
     }
 
     /**
+     * Returns the root of the current computation; i.e., this
+     * task if it has no completer, else its completer's root.
+     *
+     * @return the root of the current computation
+     */
+    public final CountedCompleter<?> getRoot() {
+        CountedCompleter<?> a = this, p;
+        while ((p = a.completer) != null)
+            a = p;
+        return a;
+    }
+
+    /**
      * If the pending count is nonzero, decrements the count;
      * otherwise invokes {@link #onCompletion} and then similarly
      * tries to complete this task's completer, if one exists,
@@ -398,6 +414,39 @@
     }
 
     /**
+     * Identical to {@link #tryComplete}, but may additionally execute
+     * other tasks within the current computation (i.e., those
+     * with the same {@link #getRoot}.
+     */
+    public final void helpComplete() {
+        CountedCompleter<?> a = this, s = a;
+        for (int c;;) {
+            if ((c = a.pending) == 0) {
+                a.onCompletion(s);
+                if ((a = (s = a).completer) == null) {
+                    s.quietlyComplete();
+                    return;
+                }
+            }
+            else if (U.compareAndSwapInt(a, PENDING, c, c - 1)) {
+                CountedCompleter<?> root = a.getRoot();
+                Thread thread = Thread.currentThread();
+                ForkJoinPool.WorkQueue wq =
+                    (thread instanceof ForkJoinWorkerThread)?
+                    ((ForkJoinWorkerThread)thread).workQueue : null;
+                ForkJoinTask<?> t;
+                while ((t = (wq != null) ? wq.popCC(root) :
+                        ForkJoinPool.popCCFromCommonPool(root)) != null) {
+                    t.doExec();
+                    if (root.isDone())
+                        break;
+                }
+                return;
+            }
+        }
+    }
+
+    /**
      * Regardless of pending count, invokes {@link #onCompletion},
      * marks this task as complete and further triggers {@link
      * #tryComplete} on this task's completer, if one exists. This
@@ -456,41 +505,11 @@
     private static final long PENDING;
     static {
         try {
-            U = getUnsafe();
+            U = sun.misc.Unsafe.getUnsafe();
             PENDING = U.objectFieldOffset
                 (CountedCompleter.class.getDeclaredField("pending"));
         } catch (Exception e) {
             throw new Error(e);
         }
     }
-
-
-    /**
-     * Returns a sun.misc.Unsafe.  Suitable for use in a 3rd party package.
-     * Replace with a simple call to Unsafe.getUnsafe when integrating
-     * into a jdk.
-     *
-     * @return a sun.misc.Unsafe
-     */
-    private static sun.misc.Unsafe getUnsafe() {
-        try {
-            return sun.misc.Unsafe.getUnsafe();
-        } catch (SecurityException se) {
-            try {
-                return java.security.AccessController.doPrivileged
-                    (new java.security
-                     .PrivilegedExceptionAction<sun.misc.Unsafe>() {
-                        public sun.misc.Unsafe run() throws Exception {
-                            java.lang.reflect.Field f = sun.misc
-                                .Unsafe.class.getDeclaredField("theUnsafe");
-                            f.setAccessible(true);
-                            return (sun.misc.Unsafe) f.get(null);
-                        }});
-            } catch (java.security.PrivilegedActionException e) {
-                throw new RuntimeException("Could not initialize intrinsics",
-                                           e.getCause());
-            }
-        }
-    }
-
 }
--- a/src/share/classes/java/util/concurrent/ForkJoinPool.java	Tue Oct 30 16:46:32 2012 -0700
+++ b/src/share/classes/java/util/concurrent/ForkJoinPool.java	Tue Oct 30 17:30:48 2012 -0700
@@ -5,6 +5,7 @@
  */
 
 package java.util.concurrent;
+
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@@ -17,6 +18,7 @@
 import java.util.concurrent.Future;
 import java.util.concurrent.RejectedExecutionException;
 import java.util.concurrent.RunnableFuture;
+import java.util.concurrent.ThreadLocalRandom;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
@@ -41,14 +43,26 @@
  * ForkJoinPool}s may also be appropriate for use with event-style
  * tasks that are never joined.
  *
- * <p>A {@code ForkJoinPool} is constructed with a given target
- * parallelism level; by default, equal to the number of available
- * processors. The pool attempts to maintain enough active (or
- * available) threads by dynamically adding, suspending, or resuming
- * internal worker threads, even if some tasks are stalled waiting to
- * join others. However, no such adjustments are guaranteed in the
- * face of blocked IO or other unmanaged synchronization. The nested
- * {@link ManagedBlocker} interface enables extension of the kinds of
+ * <p>A static {@link #commonPool} is available and appropriate for
+ * most applications. The common pool is used by any ForkJoinTask that
+ * is not explicitly submitted to a specified pool. Using the common
+ * pool normally reduces resource usage (its threads are slowly
+ * reclaimed during periods of non-use, and reinstated upon subsequent
+ * use).  The common pool is by default constructed with default
+ * parameters, but these may be controlled by setting any or all of
+ * the three properties {@code
+ * java.util.concurrent.ForkJoinPool.common.{parallelism,
+ * threadFactory, exceptionHandler}}.
+ *
+ * <p>For applications that require separate or custom pools, a {@code
+ * ForkJoinPool} may be constructed with a given target parallelism
+ * level; by default, equal to the number of available processors. The
+ * pool attempts to maintain enough active (or available) threads by
+ * dynamically adding, suspending, or resuming internal worker
+ * threads, even if some tasks are stalled waiting to join
+ * others. However, no such adjustments are guaranteed in the face of
+ * blocked IO or other unmanaged synchronization. The nested {@link
+ * ManagedBlocker} interface enables extension of the kinds of
  * synchronization accommodated.
  *
  * <p>In addition to execution and lifecycle control methods, this
@@ -93,23 +107,6 @@
  *  </tr>
  * </table>
  *
- * <p><b>Sample Usage.</b> Normally a single {@code ForkJoinPool} is
- * used for all parallel task execution in a program or subsystem.
- * Otherwise, use would not usually outweigh the construction and
- * bookkeeping overhead of creating a large set of threads. For
- * example, a common pool could be used for the {@code SortTasks}
- * illustrated in {@link RecursiveAction}. Because {@code
- * ForkJoinPool} uses threads in {@linkplain java.lang.Thread#isDaemon
- * daemon} mode, there is typically no need to explicitly {@link
- * #shutdown} such a pool upon program exit.
- *
- *  <pre> {@code
- * static final ForkJoinPool mainPool = new ForkJoinPool();
- * ...
- * public void sort(long[] array) {
- *   mainPool.invoke(new SortTask(array, 0, array.length));
- * }}</pre>
- *
  * <p><b>Implementation notes</b>: This implementation restricts the
  * maximum number of running threads to 32767. Attempts to create
  * pools with greater than the maximum number result in
@@ -239,16 +236,15 @@
      * when locked remains available to check consistency.
      *
      * Recording WorkQueues.  WorkQueues are recorded in the
-     * "workQueues" array that is created upon pool construction and
-     * expanded if necessary.  Updates to the array while recording
-     * new workers and unrecording terminated ones are protected from
-     * each other by a lock but the array is otherwise concurrently
-     * readable, and accessed directly.  To simplify index-based
-     * operations, the array size is always a power of two, and all
-     * readers must tolerate null slots. Shared (submission) queues
-     * are at even indices, worker queues at odd indices. Grouping
-     * them together in this way simplifies and speeds up task
-     * scanning.
+     * "workQueues" array that is created upon first use and expanded
+     * if necessary.  Updates to the array while recording new workers
+     * and unrecording terminated ones are protected from each other
+     * by a lock but the array is otherwise concurrently readable, and
+     * accessed directly.  To simplify index-based operations, the
+     * array size is always a power of two, and all readers must
+     * tolerate null slots. Shared (submission) queues are at even
+     * indices, worker queues at odd indices. Grouping them together
+     * in this way simplifies and speeds up task scanning.
      *
      * All worker thread creation is on-demand, triggered by task
      * submissions, replacement of terminated workers, and/or
@@ -320,9 +316,11 @@
      *
      * Trimming workers. To release resources after periods of lack of
      * use, a worker starting to wait when the pool is quiescent will
-     * time out and terminate if the pool has remained quiescent for
-     * SHRINK_RATE nanosecs. This will slowly propagate, eventually
-     * terminating all workers after long periods of non-use.
+     * time out and terminate if the pool has remained quiescent for a
+     * given period -- a short period if there are more threads than
+     * parallelism, longer as the number of threads decreases. This
+     * will slowly propagate, eventually terminating all workers after
+     * periods of non-use.
      *
      * Shutdown and Termination. A call to shutdownNow atomically sets
      * a runState bit and then (non-atomically) sets each worker's
@@ -504,29 +502,6 @@
     }
 
     /**
-     * A simple non-reentrant lock used for exclusion when managing
-     * queues and workers. We use a custom lock so that we can readily
-     * probe lock state in constructions that check among alternative
-     * actions. The lock is normally only very briefly held, and
-     * sometimes treated as a spinlock, but other usages block to
-     * reduce overall contention in those cases where locked code
-     * bodies perform allocation/resizing.
-     */
-    static final class Mutex extends AbstractQueuedSynchronizer {
-        public final boolean tryAcquire(int ignore) {
-            return compareAndSetState(0, 1);
-        }
-        public final boolean tryRelease(int ignore) {
-            setState(0);
-            return true;
-        }
-        public final void lock() { acquire(0); }
-        public final void unlock() { release(0); }
-        public final boolean isHeldExclusively() { return getState() == 1; }
-        public final Condition newCondition() { return new ConditionObject(); }
-    }
-
-    /**
      * Class for artificial tasks that are used to replace the target
      * of local joins if they are removed from an interior queue slot
      * in WorkQueue.tryRemoveAndExec. We don't need the proxy to
@@ -717,8 +692,7 @@
 
         /**
          * Takes next task, if one exists, in LIFO order.  Call only
-         * by owner in unshared queues. (We do not have a shared
-         * version of this method because it is never needed.)
+         * by owner in unshared queues.
          */
         final ForkJoinTask<?> pop() {
             ForkJoinTask<?>[] a; ForkJoinTask<?> t; int m;
@@ -736,6 +710,90 @@
             return null;
         }
 
+        final ForkJoinTask<?> sharedPop() {
+            ForkJoinTask<?> task = null;
+            if (runState == 0 && U.compareAndSwapInt(this, RUNSTATE, 0, 1)) {
+                try {
+                    ForkJoinTask<?>[] a; int m;
+                    if ((a = array) != null && (m = a.length - 1) >= 0) {
+                        for (int s; (s = top - 1) - base >= 0;) {
+                            long j = ((m & s) << ASHIFT) + ABASE;
+                            ForkJoinTask<?> t =
+                                (ForkJoinTask<?>)U.getObject(a, j);
+                            if (t == null)
+                                break;
+                            if (U.compareAndSwapObject(a, j, t, null)) {
+                                top = s;
+                                task = t;
+                                break;
+                            }
+                        }
+                    }
+                } finally {
+                    runState = 0;
+                }
+            }
+            return task;
+        }
+
+        /**
+         * Version of pop that takes top element only if it
+         * its root is the given CountedCompleter.
+         */
+        final ForkJoinTask<?> popCC(CountedCompleter<?> root) {
+            ForkJoinTask<?>[] a; int m;
+            if (root != null && (a = array) != null && (m = a.length - 1) >= 0) {
+                for (int s; (s = top - 1) - base >= 0;) {
+                    long j = ((m & s) << ASHIFT) + ABASE;
+                    ForkJoinTask<?> t =
+                        (ForkJoinTask<?>)U.getObject(a, j);
+                    if (t == null || !(t instanceof CountedCompleter) ||
+                        ((CountedCompleter<?>)t).getRoot() != root)
+                        break;
+                    if (U.compareAndSwapObject(a, j, t, null)) {
+                        top = s;
+                        return t;
+                    }
+                    if (root.status < 0)
+                        break;
+                }
+            }
+            return null;
+        }
+
+        /**
+         * Shared version of popCC
+         */
+        final ForkJoinTask<?> sharedPopCC(CountedCompleter<?> root) {
+            ForkJoinTask<?> task = null;
+            if (root != null &&
+                runState == 0 && U.compareAndSwapInt(this, RUNSTATE, 0, 1)) {
+                try {
+                    ForkJoinTask<?>[] a; int m;
+                    if ((a = array) != null && (m = a.length - 1) >= 0) {
+                        for (int s; (s = top - 1) - base >= 0;) {
+                            long j = ((m & s) << ASHIFT) + ABASE;
+                            ForkJoinTask<?> t =
+                                (ForkJoinTask<?>)U.getObject(a, j);
+                            if (t == null || !(t instanceof CountedCompleter) ||
+                                ((CountedCompleter<?>)t).getRoot() != root)
+                                break;
+                            if (U.compareAndSwapObject(a, j, t, null)) {
+                                top = s;
+                                task = t;
+                                break;
+                            }
+                            if (root.status < 0)
+                                break;
+                        }
+                    }
+                } finally {
+                    runState = 0;
+                }
+            }
+            return task;
+        }
+
         /**
          * Takes a task in FIFO order if b is base of queue and a task
          * can be claimed without contention. Specialized versions
@@ -813,6 +871,28 @@
         }
 
         /**
+         * Version of tryUnpush for shared queues; called by non-FJ
+         * submitters after prechecking that task probably exists.
+         */
+        final boolean trySharedUnpush(ForkJoinTask<?> t) {
+            boolean success = false;
+            if (runState == 0 && U.compareAndSwapInt(this, RUNSTATE, 0, 1)) {
+                try {
+                    ForkJoinTask<?>[] a; int s;
+                    if ((a = array) != null && (s = top) != base &&
+                        U.compareAndSwapObject
+                        (a, (((a.length - 1) & --s) << ASHIFT) + ABASE, t, null)) {
+                        top = s;
+                        success = true;
+                    }
+                } finally {
+                    runState = 0;                         // unlock
+                }
+            }
+            return success;
+        }
+
+        /**
          * Polls the given task only if it is at the current base.
          */
         final boolean pollFor(ForkJoinTask<?> task) {
@@ -1032,7 +1112,7 @@
         static {
             int s;
             try {
-                U = getUnsafe();
+                U = sun.misc.Unsafe.getUnsafe();
                 Class<?> k = WorkQueue.class;
                 Class<?> ak = ForkJoinTask[].class;
                 RUNSTATE = U.objectFieldOffset
@@ -1086,6 +1166,23 @@
     public static final ForkJoinWorkerThreadFactory
         defaultForkJoinWorkerThreadFactory;
 
+
+    /** Property prefix for constructing common pool */
+    private static final String propPrefix =
+        "java.util.concurrent.ForkJoinPool.common.";
+
+    /**
+     * Common (static) pool. Non-null for public use unless a static
+     * construction exception, but internal usages must null-check on
+     * use.
+     */
+    static final ForkJoinPool commonPool;
+
+    /**
+     * Common pool parallelism. Must equal commonPool.parallelism.
+     */
+    static final int commonPoolParallelism;
+
     /**
      * Generator for assigning sequence numbers as pool names.
      */
@@ -1113,21 +1210,16 @@
     // static constants
 
     /**
-     * The wakeup interval (in nanoseconds) for a worker waiting for a
-     * task when the pool is quiescent to instead try to shrink the
-     * number of workers.  The exact value does not matter too
-     * much. It must be short enough to release resources during
-     * sustained periods of idleness, but not so short that threads
-     * are continually re-created.
+     * Initial timeout value (in nanoseconds) for the thread triggering
+     * quiescence to park waiting for new work. On timeout, the thread
+     * will instead try to shrink the number of workers.
      */
-    private static final long SHRINK_RATE =
-        4L * 1000L * 1000L * 1000L; // 4 seconds
+    private static final long IDLE_TIMEOUT      = 1000L * 1000L * 1000L; // 1sec
 
     /**
-     * The timeout value for attempted shrinkage, includes
-     * some slop to cope with system timer imprecision.
+     * Timeout value when there are more threads than parallelism level
      */
-    private static final long SHRINK_TIMEOUT = SHRINK_RATE - (SHRINK_RATE / 10);
+    private static final long FAST_IDLE_TIMEOUT =  100L * 1000L * 1000L;
 
     /**
      * The maximum stolen->joining link depth allowed in method
@@ -1247,21 +1339,58 @@
      * empirically works OK on current JVMs.
      */
 
+    volatile long stealCount;                  // collects worker counts
     volatile long ctl;                         // main pool control
     final int parallelism;                     // parallelism level
     final int localMode;                       // per-worker scheduling mode
+    volatile int nextWorkerNumber;             // to create worker name string
     final int submitMask;                      // submit queue index bound
     int nextSeed;                              // for initializing worker seeds
+    volatile int mainLock;                     // spinlock for array updates
     volatile int runState;                     // shutdown status and seq
     WorkQueue[] workQueues;                    // main registry
-    final Mutex lock;                          // for registration
-    final Condition termination;               // for awaitTermination
     final ForkJoinWorkerThreadFactory factory; // factory for new workers
     final Thread.UncaughtExceptionHandler ueh; // per-worker UEH
-    final AtomicLong stealCount;               // collect counts when terminated
-    final AtomicInteger nextWorkerNumber;      // to create worker name string
     final String workerNamePrefix;             // to create worker name string
 
+    /*
+     * Mechanics for main lock protecting worker array updates.  Uses
+     * the same strategy as ConcurrentHashMap bins -- a spinLock for
+     * normal cases, but falling back to builtin lock when (rarely)
+     * needed.  See internal ConcurrentHashMap documentation for
+     * explanation.
+     */
+
+    static final int LOCK_WAITING = 2; // bit to indicate need for signal
+    static final int MAX_LOCK_SPINS = 1 << 8;
+
+    private void tryAwaitMainLock() {
+        int spins = MAX_LOCK_SPINS, r = 0, h;
+        while (((h = mainLock) & 1) != 0) {
+            if (r == 0)
+                r = ThreadLocalRandom.current().nextInt(); // randomize spins
+            else if (spins >= 0) {
+                r ^= r << 1; r ^= r >>> 3; r ^= r << 10; // xorshift
+                if (r >= 0)
+                    --spins;
+            }
+            else if (U.compareAndSwapInt(this, MAINLOCK, h, h | LOCK_WAITING)) {
+                synchronized (this) {
+                    if ((mainLock & LOCK_WAITING) != 0) {
+                        try {
+                            wait();
+                        } catch (InterruptedException ie) {
+                            Thread.currentThread().interrupt();
+                        }
+                    }
+                    else
+                        notifyAll(); // possibly won race vs signaller
+                }
+                break;
+            }
+        }
+    }
+
     //  Creating, registering, and deregistering workers
 
     /**
@@ -1288,8 +1417,10 @@
      * ForkJoinWorkerThread.
      */
     final String nextWorkerName() {
-        return workerNamePrefix.concat
-            (Integer.toString(nextWorkerNumber.addAndGet(1)));
+        int n;
+        do {} while(!U.compareAndSwapInt(this, NEXTWORKERNUMBER,
+                                         n = nextWorkerNumber, ++n));
+        return workerNamePrefix.concat(Integer.toString(n));
     }
 
     /**
@@ -1301,13 +1432,14 @@
      *
      * @param w the worker's queue
      */
-
     final void registerWorker(WorkQueue w) {
-        Mutex lock = this.lock;
-        lock.lock();
+        while (!U.compareAndSwapInt(this, MAINLOCK, 0, 1))
+            tryAwaitMainLock();
         try {
-            WorkQueue[] ws = workQueues;
-            if (w != null && ws != null) {          // skip on shutdown/failure
+            WorkQueue[] ws;
+            if ((ws = workQueues) == null)
+                ws = workQueues = new WorkQueue[submitMask + 1];
+            if (w != null) {
                 int rs, n =  ws.length, m = n - 1;
                 int s = nextSeed += SEED_INCREMENT; // rarely-colliding sequence
                 w.seed = (s == 0) ? 1 : s;          // ensure non-zero seed
@@ -1328,8 +1460,12 @@
                 runState = ((rs = runState) & SHUTDOWN) | ((rs + 2) & ~SHUTDOWN);
             }
         } finally {
-            lock.unlock();
+            if (!U.compareAndSwapInt(this, MAINLOCK, 1, 0)) {
+                mainLock = 0;
+                synchronized (this) { notifyAll(); };
+            }
         }
+
     }
 
     /**
@@ -1342,19 +1478,24 @@
      * @param ex the exception causing failure, or null if none
      */
     final void deregisterWorker(ForkJoinWorkerThread wt, Throwable ex) {
-        Mutex lock = this.lock;
         WorkQueue w = null;
         if (wt != null && (w = wt.workQueue) != null) {
             w.runState = -1;                // ensure runState is set
-            stealCount.getAndAdd(w.totalSteals + w.nsteals);
+            long steals = w.totalSteals + w.nsteals, sc;
+            do {} while(!U.compareAndSwapLong(this, STEALCOUNT,
+                                              sc = stealCount, sc + steals));
             int idx = w.poolIndex;
-            lock.lock();
-            try {                           // remove record from array
+            while (!U.compareAndSwapInt(this, MAINLOCK, 0, 1))
+                tryAwaitMainLock();
+            try {
                 WorkQueue[] ws = workQueues;
                 if (ws != null && idx >= 0 && idx < ws.length && ws[idx] == w)
                     ws[idx] = null;
             } finally {
-                lock.unlock();
+                if (!U.compareAndSwapInt(this, MAINLOCK, 1, 0)) {
+                    mainLock = 0;
+                    synchronized (this) { notifyAll(); };
+                }
             }
         }
 
@@ -1376,7 +1517,6 @@
             U.throwException(ex);
     }
 
-
     // Submissions
 
     /**
@@ -1394,20 +1534,36 @@
         for (int r = s.seed, m = submitMask;;) {
             WorkQueue[] ws; WorkQueue q;
             int k = r & m & SQMASK;          // use only even indices
-            if (runState < 0 || (ws = workQueues) == null || ws.length <= k)
+            if (runState < 0)
                 throw new RejectedExecutionException(); // shutting down
+            else if ((ws = workQueues) == null || ws.length <= k) {
+                while (!U.compareAndSwapInt(this, MAINLOCK, 0, 1))
+                    tryAwaitMainLock();
+                try {
+                    if (workQueues == null)
+                        workQueues = new WorkQueue[submitMask + 1];
+                } finally {
+                    if (!U.compareAndSwapInt(this, MAINLOCK, 1, 0)) {
+                        mainLock = 0;
+                        synchronized (this) { notifyAll(); };
+                    }
+                }
+            }
             else if ((q = ws[k]) == null) {  // create new queue
                 WorkQueue nq = new WorkQueue(this, null, SHARED_QUEUE);
-                Mutex lock = this.lock;      // construct outside lock
-                lock.lock();
-                try {                        // recheck under lock
+                while (!U.compareAndSwapInt(this, MAINLOCK, 0, 1))
+                    tryAwaitMainLock();
+                try {
                     int rs = runState;       // to update seq
                     if (ws == workQueues && ws[k] == null) {
                         ws[k] = nq;
                         runState = ((rs & SHUTDOWN) | ((rs + 2) & ~SHUTDOWN));
                     }
                 } finally {
-                    lock.unlock();
+                    if (!U.compareAndSwapInt(this, MAINLOCK, 1, 0)) {
+                        mainLock = 0;
+                        synchronized (this) { notifyAll(); };
+                    }
                 }
             }
             else if (q.trySharedPush(task)) {
@@ -1424,6 +1580,79 @@
         }
     }
 
+    /**
+     * Submits the given (non-null) task to the common pool, if possible.
+     */
+    static void submitToCommonPool(ForkJoinTask<?> task) {
+        ForkJoinPool p;
+        if ((p = commonPool) == null)
+            throw new RejectedExecutionException("Common Pool Unavailable");
+        p.doSubmit(task);
+    }
+
+    /**
+     * Returns true if caller is (or may be) submitter to the common
+     * pool, and not all workers are active, and there appear to be
+     * tasks in the associated submission queue.
+     */
+    static boolean canHelpCommonPool() {
+        ForkJoinPool p; WorkQueue[] ws; WorkQueue q;
+        int k = submitters.get().seed & SQMASK;
+        return ((p = commonPool) != null &&
+                (int)(p.ctl >> AC_SHIFT) < 0 &&
+                (ws = p.workQueues) != null &&
+                ws.length > (k &= p.submitMask) &&
+                (q = ws[k]) != null &&
+                q.top - q.base > 0);
+    }
+
+    /**
+     * Returns true if the given task was submitted to common pool
+     * and has not yet commenced execution, and is available for
+     * removal according to execution policies; if so removing the
+     * submission from the pool.
+     *
+     * @param task the task
+     * @return true if successful
+     */
+    static boolean tryUnsubmitFromCommonPool(ForkJoinTask<?> task) {
+        // Peek, looking for task and eligibility before
+        // using trySharedUnpush to actually take it under lock
+        ForkJoinPool p; WorkQueue[] ws; WorkQueue q;
+        ForkJoinTask<?>[] a; int s;
+        int k = submitters.get().seed & SQMASK;
+        return ((p = commonPool) != null &&
+                (int)(p.ctl >> AC_SHIFT) < 0 &&
+                (ws = p.workQueues) != null &&
+                ws.length > (k &= p.submitMask) &&
+                (q = ws[k]) != null &&
+                (a = q.array) != null &&
+                (s = q.top - 1) - q.base >= 0 &&
+                s >= 0 && s < a.length &&
+                a[s] == task &&
+                q.trySharedUnpush(task));
+    }
+
+    /**
+     * Tries to pop a task from common pool with given root
+     */
+    static ForkJoinTask<?> popCCFromCommonPool(CountedCompleter<?> root) {
+        ForkJoinPool p; WorkQueue[] ws; WorkQueue q;
+        ForkJoinTask<?> t;
+        int k = submitters.get().seed & SQMASK;
+        if (root != null &&
+            (p = commonPool) != null &&
+            (int)(p.ctl >> AC_SHIFT) < 0 &&
+            (ws = p.workQueues) != null &&
+            ws.length > (k &= p.submitMask) &&
+            (q = ws[k]) != null && q.top - q.base > 0 &&
+            root.status < 0 &&
+            (t = q.sharedPopCC(root)) != null)
+            return t;
+        return null;
+    }
+
+
     // Maintaining ctl counts
 
     /**
@@ -1435,7 +1664,7 @@
     }
 
     /**
-     * Tries to activate or create a worker if too few are active.
+     * Tries to create one or activate one or more workers if too few are active.
      */
     final void signalWork() {
         long c; int u;
@@ -1519,7 +1748,7 @@
      * awaiting signal,
      *
      * @param w the worker (via its WorkQueue)
-     * @return a task or null of none found
+     * @return a task or null if none found
      */
     private final ForkJoinTask<?> scan(WorkQueue w) {
         WorkQueue[] ws;                       // first update random seed
@@ -1536,7 +1765,7 @@
                     t = (ForkJoinTask<?>)U.getObjectVolatile(a, i);
                     if (q.base == b && ec >= 0 && t != null &&
                         U.compareAndSwapObject(a, i, t, null)) {
-                        if (q.top - (q.base = b + 1) > 1)
+                        if (q.top - (q.base = b + 1) > 0)
                             signalWork();    // help pushes signal
                         return t;
                     }
@@ -1582,12 +1811,10 @@
                 }
             }
             else if (w.eventCount < 0) {      // already queued
-                if ((nr = w.rescans) > 0) {   // continue rescanning
-                    int ac = a + parallelism;
-                    if (((w.rescans = (ac < nr) ? ac : nr - 1) & 3) == 0)
-                        Thread.yield();       // yield before block
-                }
-                else {
+                int ac = a + parallelism;
+                if ((nr = w.rescans) > 0)     // continue rescanning
+                    w.rescans = (ac < nr) ? ac : nr - 1;
+                else if (((w.seed >>> 16) & ac) == 0) { // randomize park
                     Thread.interrupted();     // clear status
                     Thread wt = Thread.currentThread();
                     U.putObject(wt, PARKBLOCKER, this);
@@ -1605,8 +1832,8 @@
     /**
      * If inactivating worker w has caused the pool to become
      * quiescent, checks for pool termination, and, so long as this is
-     * not the only worker, waits for event for up to SHRINK_RATE
-     * nanosecs.  On timeout, if ctl has not changed, terminates the
+     * not the only worker, waits for event for up to a given
+     * duration.  On timeout, if ctl has not changed, terminates the
      * worker, which will in turn wake up another worker to possibly
      * repeat this process.
      *
@@ -1617,20 +1844,21 @@
     private void idleAwaitWork(WorkQueue w, long currentCtl, long prevCtl) {
         if (w.eventCount < 0 && !tryTerminate(false, false) &&
             (int)prevCtl != 0 && !hasQueuedSubmissions() && ctl == currentCtl) {
+            int dc = -(short)(currentCtl >>> TC_SHIFT);
+            long parkTime = dc < 0 ? FAST_IDLE_TIMEOUT: (dc + 1) * IDLE_TIMEOUT;
+            long deadline = System.nanoTime() + parkTime - 100000L; // 1ms slop
             Thread wt = Thread.currentThread();
-            Thread.yield();            // yield before block
             while (ctl == currentCtl) {
-                long startTime = System.nanoTime();
                 Thread.interrupted();  // timed variant of version in scan()
                 U.putObject(wt, PARKBLOCKER, this);
                 w.parker = wt;
                 if (ctl == currentCtl)
-                    U.park(false, SHRINK_RATE);
+                    U.park(false, parkTime);
                 w.parker = null;
                 U.putObject(wt, PARKBLOCKER, null);
                 if (ctl != currentCtl)
                     break;
-                if (System.nanoTime() - startTime >= SHRINK_TIMEOUT &&
+                if (deadline - System.nanoTime() <= 0L &&
                     U.compareAndSwapLong(this, CTL, currentCtl, prevCtl)) {
                     w.eventCount = (w.eventCount + E_SEQ) | E_MASK;
                     w.runState = -1;   // shrink
@@ -1915,7 +2143,6 @@
         }
     }
 
-
     /**
      * Runs tasks until {@code isQuiescent()}. We piggyback on
      * active count ctl maintenance, but rather than blocking
@@ -1958,6 +2185,31 @@
     }
 
     /**
+     * Restricted version of helpQuiescePool for non-FJ callers
+     */
+    static void externalHelpQuiescePool() {
+        ForkJoinPool p; WorkQueue[] ws; WorkQueue q, sq;
+        ForkJoinTask<?>[] a; int b;
+        ForkJoinTask<?> t = null;
+        int k = submitters.get().seed & SQMASK;
+        if ((p = commonPool) != null &&
+            (int)(p.ctl >> AC_SHIFT) < 0 &&
+            (ws = p.workQueues) != null &&
+            ws.length > (k &= p.submitMask) &&
+            (q = ws[k]) != null) {
+            while (q.top - q.base > 0) {
+                if ((t = q.sharedPop()) != null)
+                    break;
+            }
+            if (t == null && (sq = p.findNonEmptyStealQueue(q)) != null &&
+                (b = sq.base) - sq.top < 0)
+                t = sq.pollAt(b);
+            if (t != null)
+                t.doExec();
+        }
+    }
+
+    /**
      * Gets and removes a local or stolen task for the given worker.
      *
      * @return a task, if available
@@ -1990,6 +2242,20 @@
                 8);
     }
 
+    /**
+     * Returns approximate submission queue length for the given caller
+     */
+    static int getEstimatedSubmitterQueueLength() {
+        ForkJoinPool p; WorkQueue[] ws; WorkQueue q;
+        int k = submitters.get().seed & SQMASK;
+        return ((p = commonPool) != null &&
+                p.runState >= 0 &&
+                (ws = p.workQueues) != null &&
+                ws.length > (k &= p.submitMask) &&
+                (q = ws[k]) != null) ?
+            q.queueSize() : 0;
+    }
+
     //  Termination
 
     /**
@@ -2007,22 +2273,28 @@
      * @return true if now terminating or terminated
      */
     private boolean tryTerminate(boolean now, boolean enable) {
-        Mutex lock = this.lock;
         for (long c;;) {
             if (((c = ctl) & STOP_BIT) != 0) {      // already terminating
                 if ((short)(c >>> TC_SHIFT) == -parallelism) {
-                    lock.lock();                    // don't need try/finally
-                    termination.signalAll();        // signal when 0 workers
-                    lock.unlock();
+                    synchronized(this) {
+                        notifyAll();                // signal when 0 workers
+                    }
                 }
                 return true;
             }
             if (runState >= 0) {                    // not yet enabled
                 if (!enable)
                     return false;
-                lock.lock();
-                runState |= SHUTDOWN;
-                lock.unlock();
+                while (!U.compareAndSwapInt(this, MAINLOCK, 0, 1))
+                    tryAwaitMainLock();
+                try {
+                    runState |= SHUTDOWN;
+                } finally {
+                    if (!U.compareAndSwapInt(this, MAINLOCK, 1, 0)) {
+                        mainLock = 0;
+                        synchronized (this) { notifyAll(); };
+                    }
+                }
             }
             if (!now) {                             // check if idle & no tasks
                 if ((int)(c >> AC_SHIFT) != -parallelism ||
@@ -2155,20 +2427,43 @@
         // Use nearest power 2 for workQueues size. See Hackers Delight sec 3.2.
         int n = parallelism - 1;
         n |= n >>> 1; n |= n >>> 2; n |= n >>> 4; n |= n >>> 8; n |= n >>> 16;
-        int size = (n + 1) << 1;        // #slots = 2*#workers
-        this.submitMask = size - 1;     // room for max # of submit queues
-        this.workQueues = new WorkQueue[size];
-        this.termination = (this.lock = new Mutex()).newCondition();
-        this.stealCount = new AtomicLong();
-        this.nextWorkerNumber = new AtomicInteger();
+        this.submitMask = ((n + 1) << 1) - 1;
         int pn = poolNumberGenerator.incrementAndGet();
         StringBuilder sb = new StringBuilder("ForkJoinPool-");
         sb.append(Integer.toString(pn));
         sb.append("-worker-");
         this.workerNamePrefix = sb.toString();
-        lock.lock();
         this.runState = 1;              // set init flag
-        lock.unlock();
+    }
+
+    /**
+     * Constructor for common pool, suitable only for static initialization.
+     * Basically the same as above, but uses smallest possible initial footprint.
+     */
+    ForkJoinPool(int parallelism, int submitMask,
+                 ForkJoinWorkerThreadFactory factory,
+                 Thread.UncaughtExceptionHandler handler) {
+        this.factory = factory;
+        this.ueh = handler;
+        this.submitMask = submitMask;
+        this.parallelism = parallelism;
+        long np = (long)(-parallelism);
+        this.ctl = ((np << AC_SHIFT) & AC_MASK) | ((np << TC_SHIFT) & TC_MASK);
+        this.localMode = LIFO_QUEUE;
+        this.workerNamePrefix = "ForkJoinPool.commonPool-worker-";
+        this.runState = 1;
+    }
+
+    /**
+     * Returns the common pool instance.
+     *
+     * @return the common pool instance
+     */
+    public static ForkJoinPool commonPool() {
+        ForkJoinPool p;
+        if ((p = commonPool) == null)
+            throw new Error("Common Pool Unavailable");
+        return p;
     }
 
     // Execution methods
@@ -2344,6 +2639,15 @@
     }
 
     /**
+     * Returns the targeted parallelism level of the common pool.
+     *
+     * @return the targeted parallelism level of the common pool
+     */
+    public static int getCommonPoolParallelism() {
+        return commonPoolParallelism;
+    }
+
+    /**
      * Returns the number of worker threads that have started but not
      * yet terminated.  The result returned by this method may differ
      * from {@link #getParallelism} when threads are created to
@@ -2424,7 +2728,7 @@
      * @return the number of steals
      */
     public long getStealCount() {
-        long count = stealCount.get();
+        long count = stealCount;
         WorkQueue[] ws; WorkQueue w;
         if ((ws = workQueues) != null) {
             for (int i = 1; i < ws.length; i += 2) {
@@ -2554,7 +2858,7 @@
     public String toString() {
         // Use a single pass through workQueues to collect counts
         long qt = 0L, qs = 0L; int rc = 0;
-        long st = stealCount.get();
+        long st = stealCount;
         long c = ctl;
         WorkQueue[] ws; WorkQueue w;
         if ((ws = workQueues) != null) {
@@ -2595,11 +2899,13 @@
     }
 
     /**
-     * Initiates an orderly shutdown in which previously submitted
-     * tasks are executed, but no new tasks will be accepted.
-     * Invocation has no additional effect if already shut down.
-     * Tasks that are in the process of being submitted concurrently
-     * during the course of this method may or may not be rejected.
+     * Possibly initiates an orderly shutdown in which previously
+     * submitted tasks are executed, but no new tasks will be
+     * accepted. Invocation has no effect on execution state if this
+     * is the {@link #commonPool}, and no additional effect if
+     * already shut down.  Tasks that are in the process of being
+     * submitted concurrently during the course of this method may or
+     * may not be rejected.
      *
      * @throws SecurityException if a security manager exists and
      *         the caller is not permitted to modify threads
@@ -2608,18 +2914,21 @@
      */
     public void shutdown() {
         checkPermission();
-        tryTerminate(false, true);
+        if (this != commonPool)
+            tryTerminate(false, true);
     }
 
     /**
-     * Attempts to cancel and/or stop all tasks, and reject all
-     * subsequently submitted tasks.  Tasks that are in the process of
-     * being submitted or executed concurrently during the course of
-     * this method may or may not be rejected. This method cancels
-     * both existing and unexecuted tasks, in order to permit
-     * termination in the presence of task dependencies. So the method
-     * always returns an empty list (unlike the case for some other
-     * Executors).
+     * Possibly attempts to cancel and/or stop all tasks, and reject
+     * all subsequently submitted tasks.  Invocation has no effect on
+     * execution state if this is the {@link #commonPool}, and no
+     * additional effect if already shut down. Otherwise, tasks that
+     * are in the process of being submitted or executed concurrently
+     * during the course of this method may or may not be
+     * rejected. This method cancels both existing and unexecuted
+     * tasks, in order to permit termination in the presence of task
+     * dependencies. So the method always returns an empty list
+     * (unlike the case for some other Executors).
      *
      * @return an empty list
      * @throws SecurityException if a security manager exists and
@@ -2629,7 +2938,8 @@
      */
     public List<Runnable> shutdownNow() {
         checkPermission();
-        tryTerminate(true, true);
+        if (this != commonPool)
+            tryTerminate(true, true);
         return Collections.emptyList();
     }
 
@@ -2686,19 +2996,21 @@
     public boolean awaitTermination(long timeout, TimeUnit unit)
         throws InterruptedException {
         long nanos = unit.toNanos(timeout);
-        final Mutex lock = this.lock;
-        lock.lock();
-        try {
-            for (;;) {
-                if (isTerminated())
-                    return true;
-                if (nanos <= 0)
-                    return false;
-                nanos = termination.awaitNanos(nanos);
+        if (isTerminated())
+            return true;
+        long startTime = System.nanoTime();
+        boolean terminated = false;
+        synchronized(this) {
+            for (long waitTime = nanos, millis = 0L;;) {
+                if (terminated = isTerminated() ||
+                    waitTime <= 0L ||
+                    (millis = unit.toMillis(waitTime)) <= 0L)
+                    break;
+                wait(millis);
+                waitTime = nanos - (System.nanoTime() - startTime);
             }
-        } finally {
-            lock.unlock();
         }
+        return terminated;
     }
 
     /**
@@ -2830,6 +3142,9 @@
     private static final long PARKBLOCKER;
     private static final int ABASE;
     private static final int ASHIFT;
+    private static final long NEXTWORKERNUMBER;
+    private static final long STEALCOUNT;
+    private static final long MAINLOCK;
 
     static {
         poolNumberGenerator = new AtomicInteger();
@@ -2840,49 +3155,52 @@
         submitters = new ThreadSubmitter();
         int s;
         try {
-            U = getUnsafe();
+            U = sun.misc.Unsafe.getUnsafe();
             Class<?> k = ForkJoinPool.class;
             Class<?> ak = ForkJoinTask[].class;
             CTL = U.objectFieldOffset
                 (k.getDeclaredField("ctl"));
+            NEXTWORKERNUMBER = U.objectFieldOffset
+                (k.getDeclaredField("nextWorkerNumber"));
+            STEALCOUNT = U.objectFieldOffset
+                (k.getDeclaredField("stealCount"));
+            MAINLOCK = U.objectFieldOffset
+                (k.getDeclaredField("mainLock"));
             Class<?> tk = Thread.class;
             PARKBLOCKER = U.objectFieldOffset
                 (tk.getDeclaredField("parkBlocker"));
             ABASE = U.arrayBaseOffset(ak);
             s = U.arrayIndexScale(ak);
+            ASHIFT = 31 - Integer.numberOfLeadingZeros(s);
         } catch (Exception e) {
             throw new Error(e);
         }
         if ((s & (s-1)) != 0)
             throw new Error("data type scale not a power of two");
-        ASHIFT = 31 - Integer.numberOfLeadingZeros(s);
-    }
-
-    /**
-     * Returns a sun.misc.Unsafe.  Suitable for use in a 3rd party package.
-     * Replace with a simple call to Unsafe.getUnsafe when integrating
-     * into a jdk.
-     *
-     * @return a sun.misc.Unsafe
-     */
-    private static sun.misc.Unsafe getUnsafe() {
-        try {
-            return sun.misc.Unsafe.getUnsafe();
-        } catch (SecurityException se) {
-            try {
-                return java.security.AccessController.doPrivileged
-                    (new java.security
-                     .PrivilegedExceptionAction<sun.misc.Unsafe>() {
-                        public sun.misc.Unsafe run() throws Exception {
-                            java.lang.reflect.Field f = sun.misc
-                                .Unsafe.class.getDeclaredField("theUnsafe");
-                            f.setAccessible(true);
-                            return (sun.misc.Unsafe) f.get(null);
-                        }});
-            } catch (java.security.PrivilegedActionException e) {
-                throw new RuntimeException("Could not initialize intrinsics",
-                                           e.getCause());
-            }
+        try { // Establish common pool
+            String pp = System.getProperty(propPrefix + "parallelism");
+            String fp = System.getProperty(propPrefix + "threadFactory");
+            String up = System.getProperty(propPrefix + "exceptionHandler");
+            ForkJoinWorkerThreadFactory fac = (fp == null) ?
+                defaultForkJoinWorkerThreadFactory :
+                ((ForkJoinWorkerThreadFactory)ClassLoader.
+                 getSystemClassLoader().loadClass(fp).newInstance());
+            Thread.UncaughtExceptionHandler ueh = (up == null)? null :
+                ((Thread.UncaughtExceptionHandler)ClassLoader.
+                 getSystemClassLoader().loadClass(up).newInstance());
+            int par;
+            if ((pp == null || (par = Integer.parseInt(pp)) <= 0))
+                par = Runtime.getRuntime().availableProcessors();
+            if (par > MAX_CAP)
+                par = MAX_CAP;
+            commonPoolParallelism = par;
+            int n = par - 1; // precompute submit mask
+            n |= n >>> 1; n |= n >>> 2; n |= n >>> 4;
+            n |= n >>> 8; n |= n >>> 16;
+            int mask = ((n + 1) << 1) - 1;
+            commonPool = new ForkJoinPool(par, mask, fac, ueh);
+        } catch (Exception e) {
+            throw new Error(e);
         }
     }
 
--- a/src/share/classes/java/util/concurrent/ForkJoinTask.java	Tue Oct 30 16:46:32 2012 -0700
+++ b/src/share/classes/java/util/concurrent/ForkJoinTask.java	Tue Oct 30 17:30:48 2012 -0700
@@ -30,15 +30,18 @@
  * subtasks may be hosted by a small number of actual threads in a
  * ForkJoinPool, at the price of some usage limitations.
  *
- * <p>A "main" {@code ForkJoinTask} begins execution when submitted
- * to a {@link ForkJoinPool}.  Once started, it will usually in turn
- * start other subtasks.  As indicated by the name of this class,
- * many programs using {@code ForkJoinTask} employ only methods
- * {@link #fork} and {@link #join}, or derivatives such as {@link
+ * <p>A "main" {@code ForkJoinTask} begins execution when it is
+ * explicitly submitted to a {@link ForkJoinPool}, or, if not already
+ * engaged in a ForkJoin computation, commenced in the {@link
+ * ForkJoinPool#commonPool} via {@link #fork}, {@link #invoke}, or
+ * related methods.  Once started, it will usually in turn start other
+ * subtasks.  As indicated by the name of this class, many programs
+ * using {@code ForkJoinTask} employ only methods {@link #fork} and
+ * {@link #join}, or derivatives such as {@link
  * #invokeAll(ForkJoinTask...) invokeAll}.  However, this class also
  * provides a number of other methods that can come into play in
- * advanced usages, as well as extension mechanics that allow
- * support of new forms of fork/join processing.
+ * advanced usages, as well as extension mechanics that allow support
+ * of new forms of fork/join processing.
  *
  * <p>A {@code ForkJoinTask} is a lightweight form of {@link Future}.
  * The efficiency of {@code ForkJoinTask}s stems from a set of
@@ -123,13 +126,7 @@
  * other actions.  Normally, a concrete ForkJoinTask subclass declares
  * fields comprising its parameters, established in a constructor, and
  * then defines a {@code compute} method that somehow uses the control
- * methods supplied by this base class. While these methods have
- * {@code public} access (to allow instances of different task
- * subclasses to call each other's methods), some of them may only be
- * called from within other ForkJoinTasks (as may be determined using
- * method {@link #inForkJoinPool}).  Attempts to invoke them in other
- * contexts result in exceptions or errors, possibly including {@code
- * ClassCastException}.
+ * methods supplied by this base class.
  *
  * <p>Method {@link #join} and its variants are appropriate for use
  * only when completion dependencies are acyclic; that is, the
@@ -287,9 +284,11 @@
      * @return status upon completion
      */
     private int externalAwaitDone() {
+        int s;
         boolean interrupted = false;
-        int s;
-        while ((s = status) >= 0) {
+        if ((s = status) >= 0 && ForkJoinPool.tryUnsubmitFromCommonPool(this))
+            s = doExec();
+        while (s >= 0) {
             if (U.compareAndSwapInt(this, STATUS, s, s | SIGNAL)) {
                 synchronized (this) {
                     if (status >= 0) {
@@ -303,6 +302,7 @@
                         notifyAll();
                 }
             }
+            s = status;
         }
         if (interrupted)
             Thread.currentThread().interrupt();
@@ -313,10 +313,12 @@
      * Blocks a non-worker-thread until completion or interruption.
      */
     private int externalInterruptibleAwaitDone() throws InterruptedException {
-        int s;
         if (Thread.interrupted())
             throw new InterruptedException();
-        while ((s = status) >= 0) {
+        int s;
+        if ((s = status) >= 0 && ForkJoinPool.tryUnsubmitFromCommonPool(this))
+            s = doExec();
+        while (s >= 0) {
             if (U.compareAndSwapInt(this, STATUS, s, s | SIGNAL)) {
                 synchronized (this) {
                     if (status >= 0)
@@ -325,6 +327,7 @@
                         notifyAll();
                 }
             }
+            s = status;
         }
         return s;
     }
@@ -338,16 +341,12 @@
      */
     private int doJoin() {
         int s; Thread t; ForkJoinWorkerThread wt; ForkJoinPool.WorkQueue w;
-        if ((s = status) >= 0) {
-            if (((t = Thread.currentThread()) instanceof ForkJoinWorkerThread)) {
-                if (!(w = (wt = (ForkJoinWorkerThread)t).workQueue).
-                    tryUnpush(this) || (s = doExec()) >= 0)
-                    s = wt.pool.awaitJoin(w, this);
-            }
-            else
-                s = externalAwaitDone();
-        }
-        return s;
+        return (s = status) < 0 ? s :
+            ((t = Thread.currentThread()) instanceof ForkJoinWorkerThread) ?
+            (w = (wt = (ForkJoinWorkerThread)t).workQueue).
+            tryUnpush(this) && (s = doExec()) < 0 ? s :
+            wt.pool.awaitJoin(w, this) :
+            externalAwaitDone();
     }
 
     /**
@@ -357,14 +356,10 @@
      */
     private int doInvoke() {
         int s; Thread t; ForkJoinWorkerThread wt;
-        if ((s = doExec()) >= 0) {
-            if ((t = Thread.currentThread()) instanceof ForkJoinWorkerThread)
-                s = (wt = (ForkJoinWorkerThread)t).pool.awaitJoin(wt.workQueue,
-                                                                  this);
-            else
-                s = externalAwaitDone();
-        }
-        return s;
+        return (s = doExec()) < 0 ? s :
+            ((t = Thread.currentThread()) instanceof ForkJoinWorkerThread) ?
+            (wt = (ForkJoinWorkerThread)t).pool.awaitJoin(wt.workQueue, this) :
+            externalAwaitDone();
     }
 
     // Exception table support
@@ -619,25 +614,26 @@
     // public methods
 
     /**
-     * Arranges to asynchronously execute this task.  While it is not
-     * necessarily enforced, it is a usage error to fork a task more
-     * than once unless it has completed and been reinitialized.
-     * Subsequent modifications to the state of this task or any data
-     * it operates on are not necessarily consistently observable by
-     * any thread other than the one executing it unless preceded by a
-     * call to {@link #join} or related methods, or a call to {@link
-     * #isDone} returning {@code true}.
-     *
-     * <p>This method may be invoked only from within {@code
-     * ForkJoinPool} computations (as may be determined using method
-     * {@link #inForkJoinPool}).  Attempts to invoke in other contexts
-     * result in exceptions or errors, possibly including {@code
-     * ClassCastException}.
+     * Arranges to asynchronously execute this task in the pool the
+     * current task is running in, if applicable, or using the {@link
+     * ForkJoinPool#commonPool} if not {@link #inForkJoinPool}.  While
+     * it is not necessarily enforced, it is a usage error to fork a
+     * task more than once unless it has completed and been
+     * reinitialized.  Subsequent modifications to the state of this
+     * task or any data it operates on are not necessarily
+     * consistently observable by any thread other than the one
+     * executing it unless preceded by a call to {@link #join} or
+     * related methods, or a call to {@link #isDone} returning {@code
+     * true}.
      *
      * @return {@code this}, to simplify usage
      */
     public final ForkJoinTask<V> fork() {
-        ((ForkJoinWorkerThread)Thread.currentThread()).workQueue.push(this);
+        Thread t;
+        if ((t = Thread.currentThread()) instanceof ForkJoinWorkerThread)
+            ((ForkJoinWorkerThread)t).workQueue.push(this);
+        else
+            ForkJoinPool.submitToCommonPool(this);
         return this;
     }
 
@@ -687,12 +683,6 @@
      * cancelled, completed normally or exceptionally, or left
      * unprocessed.
      *
-     * <p>This method may be invoked only from within {@code
-     * ForkJoinPool} computations (as may be determined using method
-     * {@link #inForkJoinPool}).  Attempts to invoke in other contexts
-     * result in exceptions or errors, possibly including {@code
-     * ClassCastException}.
-     *
      * @param t1 the first task
      * @param t2 the second task
      * @throws NullPointerException if any task is null
@@ -718,12 +708,6 @@
      * related methods to check if they have been cancelled, completed
      * normally or exceptionally, or left unprocessed.
      *
-     * <p>This method may be invoked only from within {@code
-     * ForkJoinPool} computations (as may be determined using method
-     * {@link #inForkJoinPool}).  Attempts to invoke in other contexts
-     * result in exceptions or errors, possibly including {@code
-     * ClassCastException}.
-     *
      * @param tasks the tasks
      * @throws NullPointerException if any task is null
      */
@@ -767,12 +751,6 @@
      * cancelled, completed normally or exceptionally, or left
      * unprocessed.
      *
-     * <p>This method may be invoked only from within {@code
-     * ForkJoinPool} computations (as may be determined using method
-     * {@link #inForkJoinPool}).  Attempts to invoke in other contexts
-     * result in exceptions or errors, possibly including {@code
-     * ClassCastException}.
-     *
      * @param tasks the collection of tasks
      * @return the tasks argument, to simplify usage
      * @throws NullPointerException if tasks or any element are null
@@ -1068,17 +1046,15 @@
      * be of use in designs in which many tasks are forked, but none
      * are explicitly joined, instead executing them until all are
      * processed.
-     *
-     * <p>This method may be invoked only from within {@code
-     * ForkJoinPool} computations (as may be determined using method
-     * {@link #inForkJoinPool}).  Attempts to invoke in other contexts
-     * result in exceptions or errors, possibly including {@code
-     * ClassCastException}.
      */
     public static void helpQuiesce() {
-        ForkJoinWorkerThread wt =
-            (ForkJoinWorkerThread)Thread.currentThread();
-        wt.pool.helpQuiescePool(wt.workQueue);
+        Thread t;
+        if ((t = Thread.currentThread()) instanceof ForkJoinWorkerThread) {
+            ForkJoinWorkerThread wt = (ForkJoinWorkerThread)t;
+            wt.pool.helpQuiescePool(wt.workQueue);
+        }
+        else
+            ForkJoinPool.externalHelpQuiescePool();
     }
 
     /**
@@ -1131,23 +1107,19 @@
 
     /**
      * Tries to unschedule this task for execution. This method will
-     * typically succeed if this task is the most recently forked task
-     * by the current thread, and has not commenced executing in
-     * another thread.  This method may be useful when arranging
-     * alternative local processing of tasks that could have been, but
-     * were not, stolen.
-     *
-     * <p>This method may be invoked only from within {@code
-     * ForkJoinPool} computations (as may be determined using method
-     * {@link #inForkJoinPool}).  Attempts to invoke in other contexts
-     * result in exceptions or errors, possibly including {@code
-     * ClassCastException}.
+     * typically (but is not guaranteed to) succeed if this task is
+     * the most recently forked task by the current thread, and has
+     * not commenced executing in another thread.  This method may be
+     * useful when arranging alternative local processing of tasks
+     * that could have been, but were not, stolen.
      *
      * @return {@code true} if unforked
      */
     public boolean tryUnfork() {
-        return ((ForkJoinWorkerThread)Thread.currentThread())
-            .workQueue.tryUnpush(this);
+        Thread t;
+        return ((t = Thread.currentThread()) instanceof ForkJoinWorkerThread) ?
+            ((ForkJoinWorkerThread)t).workQueue.tryUnpush(this) :
+            ForkJoinPool.tryUnsubmitFromCommonPool(this);
     }
 
     /**
@@ -1156,35 +1128,26 @@
      * value may be useful for heuristic decisions about whether to
      * fork other tasks.
      *
-     * <p>This method may be invoked only from within {@code
-     * ForkJoinPool} computations (as may be determined using method
-     * {@link #inForkJoinPool}).  Attempts to invoke in other contexts
-     * result in exceptions or errors, possibly including {@code
-     * ClassCastException}.
-     *
      * @return the number of tasks
      */
     public static int getQueuedTaskCount() {
-        return ((ForkJoinWorkerThread) Thread.currentThread())
-            .workQueue.queueSize();
+        Thread t;
+        return ((t = Thread.currentThread()) instanceof ForkJoinWorkerThread) ?
+            ((ForkJoinWorkerThread)t).workQueue.queueSize() :
+            ForkJoinPool.getEstimatedSubmitterQueueLength();
     }
 
     /**
      * Returns an estimate of how many more locally queued tasks are
      * held by the current worker thread than there are other worker
-     * threads that might steal them.  This value may be useful for
+     * threads that might steal them, or zero if this thread is not
+     * operating in a ForkJoinPool. This value may be useful for
      * heuristic decisions about whether to fork other tasks. In many
      * usages of ForkJoinTasks, at steady state, each worker should
      * aim to maintain a small constant surplus (for example, 3) of
      * tasks, and to process computations locally if this threshold is
      * exceeded.
      *
-     * <p>This method may be invoked only from within {@code
-     * ForkJoinPool} computations (as may be determined using method
-     * {@link #inForkJoinPool}).  Attempts to invoke in other contexts
-     * result in exceptions or errors, possibly including {@code
-     * ClassCastException}.
-     *
      * @return the surplus number of tasks, which may be negative
      */
     public static int getSurplusQueuedTaskCount() {
@@ -1231,9 +1194,10 @@
          * have zero queued tasks, so compensate by a factor of
          * (#idle/#active) threads.
          */
-        ForkJoinWorkerThread wt =
-            (ForkJoinWorkerThread)Thread.currentThread();
-        return wt.workQueue.queueSize() - wt.pool.idlePerActive();
+        Thread t; ForkJoinWorkerThread wt;
+        return ((t = Thread.currentThread()) instanceof ForkJoinWorkerThread) ?
+            (wt = (ForkJoinWorkerThread)t).workQueue.queueSize() - wt.pool.idlePerActive() :
+            0;
     }
 
     // Extension methods
@@ -1277,66 +1241,57 @@
     /**
      * Returns, but does not unschedule or execute, a task queued by
      * the current thread but not yet executed, if one is immediately
-     * available. There is no guarantee that this task will actually
-     * be polled or executed next. Conversely, this method may return
-     * null even if a task exists but cannot be accessed without
-     * contention with other threads.  This method is designed
+     * available and the current thread is operating in a
+     * ForkJoinPool. There is no guarantee that this task will
+     * actually be polled or executed next. Conversely, this method
+     * may return null even if a task exists but cannot be accessed
+     * without contention with other threads.  This method is designed
      * primarily to support extensions, and is unlikely to be useful
      * otherwise.
      *
-     * <p>This method may be invoked only from within {@code
-     * ForkJoinPool} computations (as may be determined using method
-     * {@link #inForkJoinPool}).  Attempts to invoke in other contexts
-     * result in exceptions or errors, possibly including {@code
-     * ClassCastException}.
-     *
      * @return the next task, or {@code null} if none are available
      */
     protected static ForkJoinTask<?> peekNextLocalTask() {
-        return ((ForkJoinWorkerThread) Thread.currentThread()).workQueue.peek();
+        Thread t;
+        return ((t = Thread.currentThread()) instanceof ForkJoinWorkerThread) ?
+            ((ForkJoinWorkerThread)t).workQueue.peek() :
+            null;
     }
 
     /**
      * Unschedules and returns, without executing, the next task
-     * queued by the current thread but not yet executed.  This method
-     * is designed primarily to support extensions, and is unlikely to
-     * be useful otherwise.
-     *
-     * <p>This method may be invoked only from within {@code
-     * ForkJoinPool} computations (as may be determined using method
-     * {@link #inForkJoinPool}).  Attempts to invoke in other contexts
-     * result in exceptions or errors, possibly including {@code
-     * ClassCastException}.
+     * queued by the current thread but not yet executed, if the
+     * current thread is operating in a ForkJoinPool.  This method is
+     * designed primarily to support extensions, and is unlikely to be
+     * useful otherwise.
      *
      * @return the next task, or {@code null} if none are available
      */
     protected static ForkJoinTask<?> pollNextLocalTask() {
-        return ((ForkJoinWorkerThread) Thread.currentThread())
-            .workQueue.nextLocalTask();
+        Thread t;
+        return ((t = Thread.currentThread()) instanceof ForkJoinWorkerThread) ?
+            ((ForkJoinWorkerThread)t).workQueue.nextLocalTask() :
+            null;
     }
 
     /**
-     * Unschedules and returns, without executing, the next task
+     * If the current thread is operating in a ForkJoinPool,
+     * unschedules and returns, without executing, the next task
      * queued by the current thread but not yet executed, if one is
      * available, or if not available, a task that was forked by some
      * other thread, if available. Availability may be transient, so a
-     * {@code null} result does not necessarily imply quiescence
-     * of the pool this task is operating in.  This method is designed
+     * {@code null} result does not necessarily imply quiescence of
+     * the pool this task is operating in.  This method is designed
      * primarily to support extensions, and is unlikely to be useful
      * otherwise.
      *
-     * <p>This method may be invoked only from within {@code
-     * ForkJoinPool} computations (as may be determined using method
-     * {@link #inForkJoinPool}).  Attempts to invoke in other contexts
-     * result in exceptions or errors, possibly including {@code
-     * ClassCastException}.
-     *
      * @return a task, or {@code null} if none are available
      */
     protected static ForkJoinTask<?> pollTask() {
-        ForkJoinWorkerThread wt =
-            (ForkJoinWorkerThread)Thread.currentThread();
-        return wt.pool.nextTaskFor(wt.workQueue);
+        Thread t; ForkJoinWorkerThread wt;
+        return ((t = Thread.currentThread()) instanceof ForkJoinWorkerThread) ?
+            (wt = (ForkJoinWorkerThread)t).pool.nextTaskFor(wt.workQueue) :
+            null;
     }
 
     // tag operations
@@ -1530,7 +1485,7 @@
         exceptionTableRefQueue = new ReferenceQueue<Object>();
         exceptionTable = new ExceptionNode[EXCEPTION_MAP_CAPACITY];
         try {
-            U = getUnsafe();
+            U = sun.misc.Unsafe.getUnsafe();
             STATUS = U.objectFieldOffset
                 (ForkJoinTask.class.getDeclaredField("status"));
         } catch (Exception e) {
@@ -1538,31 +1493,4 @@
         }
     }
 
-    /**
-     * Returns a sun.misc.Unsafe.  Suitable for use in a 3rd party package.
-     * Replace with a simple call to Unsafe.getUnsafe when integrating
-     * into a jdk.
-     *
-     * @return a sun.misc.Unsafe
-     */
-    private static sun.misc.Unsafe getUnsafe() {
-        try {
-            return sun.misc.Unsafe.getUnsafe();
-        } catch (SecurityException se) {
-            try {
-                return java.security.AccessController.doPrivileged
-                    (new java.security
-                     .PrivilegedExceptionAction<sun.misc.Unsafe>() {
-                        public sun.misc.Unsafe run() throws Exception {
-                            java.lang.reflect.Field f = sun.misc
-                                .Unsafe.class.getDeclaredField("theUnsafe");
-                            f.setAccessible(true);
-                            return (sun.misc.Unsafe) f.get(null);
-                        }});
-            } catch (java.security.PrivilegedActionException e) {
-                throw new RuntimeException("Could not initialize intrinsics",
-                                           e.getCause());
-            }
-        }
-    }
 }
--- a/src/share/classes/java/util/concurrent/ForkJoinUtils.java	Tue Oct 30 16:46:32 2012 -0700
+++ b/src/share/classes/java/util/concurrent/ForkJoinUtils.java	Tue Oct 30 17:30:48 2012 -0700
@@ -43,7 +43,7 @@
  * <ul>
  *  <li>Parallel sorting of arrays</li>
  * </ul>
- * All methods use the {@link #defaultFJPool default Fork-Join pool}.
+ * All methods use the {@link #ForkJoinPool.commonPool} to execute the tasks.
  *
  * @since 8
  */
@@ -62,95 +62,7 @@
     }
 
     public static long suggestTargetSize(long s) {
-        return 1 + ((s + 7) >>> 3) / defaultFJPool().getParallelism();
-    }
-
-    /**
-     * The exception thrown if initialization of the
-     * {@link #defaultFJPool default ForkJoinPool}
-     * fails trying to instantiate the user-defined
-     * {@link ForkJoinWorkerThreadFactory thread factory} or
-     * {@link java.lang.Thread.UncaughtExceptionHandler uncaught exception handler}.
-     * The {@link #getCause() cause} of this exception is the original
-     * exception that reported the failure.
-     *
-     * @see ForkJoinUtils#defaultFJPool()
-     */
-    public static class PoolInitializationException extends RuntimeException {
-        static final long serialVersionUID = 6170719558364436271L;
-
-        /** Constructs a new runtime exception with the specified cause and a
-         * detail message of <tt>(cause==null ? null : cause.toString())</tt>
-         * (which typically contains the class and detail message of
-         * <tt>cause</tt>).  This constructor is useful for runtime exceptions
-         * that are little more than wrappers for other throwables.
-         *
-         * @param  cause the cause (which is saved for later retrieval by the
-         *         {@link #getCause()} method).  (A {@code null} value is
-         *         permitted, and indicates that the cause is nonexistent or
-         *         unknown.)
-         */
-        // private as only construction is done internally
-        private PoolInitializationException(Throwable cause) {
-            super(cause);
-        }
-    }
-
-    /** Lazy-initialization holder class for the default ForkJoinPool */
-    private static class DefaultPool {
-
-        /** The ForkJoinPool we use for submissions by default */
-        private static final ForkJoinPool _instance;
-
-        /** The exception that occurred trying to instantiate any user-defined
-            initialization objects
-        */
-        private static Throwable _error;
-
-        private static final String propPrefix = "java.util.concurrent.ForkJoinUtils.pool.";
-        static {
-            int parallelism = Runtime.getRuntime().availableProcessors(); // default for FJP
-            String sParallelism =
-                System.getProperty(propPrefix + "parallelism");
-            if (sParallelism != null) {
-                try {
-                    int temp = Integer.parseInt(sParallelism);
-                    if (temp > 0)
-                        parallelism = temp;
-                }
-                catch (NumberFormatException _ignore) {}
-            }
-
-            ForkJoinWorkerThreadFactory factory;
-            UncaughtExceptionHandler ueh = null;
-            ForkJoinPool temp = null;
-            try {
-                String cls = System.getProperty(propPrefix + "threadFactory");
-                if (cls != null)
-                    factory = (ForkJoinWorkerThreadFactory)
-                        ClassLoader.getSystemClassLoader().loadClass(cls).newInstance();
-                else
-                    factory = ForkJoinPool.defaultForkJoinWorkerThreadFactory;
-
-                cls = System.getProperty(propPrefix + "exceptionHandler");
-                if (cls != null)
-                    ueh = (UncaughtExceptionHandler)
-                        ClassLoader.getSystemClassLoader().loadClass(cls).newInstance();
-
-                temp = new ForkJoinPool(parallelism, factory, ueh, false /*not async*/);
-            }
-            catch (Throwable ex) {
-                _error = ex;
-            }
-
-            _instance = temp;
-        }
-
-        static ForkJoinPool instance() {
-            if (_instance != null)
-                return _instance;
-            throw new PoolInitializationException(_error);
-        }
+        return 1 + ((s + 7) >>> 3) / ForkJoinPool.getCommonPoolParallelism();
     }
 
     /**
@@ -159,43 +71,6 @@
      */
     public static final int MIN_SORT_GRAN = 256; // reasonable default so that we don't overcreate tasks
 
-
-    /**
-     * Returns a reference to the default {@link ForkJoinPool} used by
-     * the methods of this class.
-     *
-     * The underlying pool is constructed lazily and can be customized by setting the
-     * following {@link java.lang.System#getProperty(String) system properties}:
-     * <dl>
-     *  <dt>{@code java.util.concurrent.ForkJoinUtils.pool.parallelism=N}</dt>
-     *  <dd>Where N is the target parallelism level to pass to the
-     *      {@link ForkJoinPool#ForkJoinPool(int, ForkJoinWorkerThreadFactory, Thread.UncaughtExceptionHandler, boolean) ForkJoinPool constructor}.
-     *      If N is &le; 0 then the default parallelism level is used.
-     *  </dd>
-     *  <dt>{@code java.util.concurrent.ForkJoinUtils.pool.threadFactory=<class-name>}</dt>
-     *  <dd>The class to be instantiated to provide the instance of {@link ForkJoinWorkerThreadFactory}
-     *      to be passed to the {@link ForkJoinPool#ForkJoinPool(int, ForkJoinWorkerThreadFactory, Thread.UncaughtExceptionHandler, boolean) ForkJoinPool constructor}.
-     *   </dd>
-     *  <dt>{@code java.util.concurrent.ForkJoinUtils.pool.exceptionHandler=<class-name>}</dt>
-     *  <dd>The class to be instantiated to provide the instance of {@link java.lang.Thread.UncaughtExceptionHandler}
-     *      to be passed to the {@link ForkJoinPool#ForkJoinPool(int, ForkJoinWorkerThreadFactory, Thread.UncaughtExceptionHandler, boolean) ForkJoinPool constructor}.
-     *   </dd>
-     *  </dl>
-     *  Where class name properties are supplied the class is loaded using the
-     *  {@link java.lang.ClassLoader#getSystemClassLoader system classloader},
-     *  and an instance is created using the no-argument constructor.
-     *
-     *  @throws PoolInitializationException if the pool could not be constructed for any
-     *          reason, including failure to load or instantiate one of the user-supplied
-     *          customization classes
-     *
-     *  @return The pool used by this class
-     */
-    public static ForkJoinPool defaultFJPool() {
-        return DefaultPool.instance();
-    }
-
-
     // RFE: we should only need a working array as large as the subarray
     //      to be sorted, but the logic assumes that indices in the two
     //      arrays always line-up
@@ -203,9 +78,6 @@
     /**
      * Sorts the specified array into ascending numerical order.
      *
-     * This method will cause initialization of the
-     * {@link #defaultFJPool() default ForkJoinPool} if necessary.
-     *
      * <p>Implementation note: The sorting algorithm is a parallel sort-merge
      * that breaks the array into sub-arrays that are themselves sorted and then
      * merged. When the sub-array length reaches a minimum granularity (currently
@@ -227,9 +99,6 @@
      * the index {@code toIndex}, exclusive. If {@code fromIndex == toIndex},
      * the range to be sorted is empty.
      *
-     * This method will cause initialization of the
-     * {@link #defaultFJPool() default ForkJoinPool} if necessary.
-     *
      * <p>Implementation note: The sorting algorithm is a parallel sort-merge
      * that breaks the array into sub-arrays that are themselves sorted and then
      * merged. When the sub-array length reaches a minimum granularity (currently
@@ -255,16 +124,13 @@
         byte[] ws = new byte[a.length];
         int gran = getThreshold(nelements);
         FJByte.Sorter task = new FJByte.Sorter(a, ws, origin, nelements, gran);
-        defaultFJPool().invoke(task);
+        task.invoke();
         return a;
     }
 
     /**
      * Sorts the specified array into ascending numerical order.
      *
-     * This method will cause initialization of the
-     * {@link #defaultFJPool() default ForkJoinPool} if necessary.
-     *
      * <p>Implementation note: The sorting algorithm is a parallel sort-merge
      * that breaks the array into sub-arrays that are themselves sorted and then
      * merged. When the sub-array length reaches a minimum granularity (currently
@@ -286,9 +152,6 @@
      * the index {@code toIndex}, exclusive. If {@code fromIndex == toIndex},
      * the range to be sorted is empty.
      *
-     * This method will cause initialization of the
-     * {@link #defaultFJPool() default ForkJoinPool} if necessary.
-     *
      * <p>Implementation note: The sorting algorithm is a parallel sort-merge
      * that breaks the array into sub-arrays that are themselves sorted and then
      * merged. When the sub-array length reaches a minimum granularity (currently
@@ -314,16 +177,13 @@
         char[] ws = new char[a.length];
         int gran = getThreshold(nelements);
         FJChar.Sorter task = new FJChar.Sorter(a, ws, origin, nelements, gran);
-        defaultFJPool().invoke(task);
+        task.invoke();
         return a;
     }
 
     /**
      * Sorts the specified array into ascending numerical order.
      *
-     * This method will cause initialization of the
-     * {@link #defaultFJPool() default ForkJoinPool} if necessary.
-     *
      * <p>Implementation note: The sorting algorithm is a parallel sort-merge
      * that breaks the array into sub-arrays that are themselves sorted and then
      * merged. When the sub-array length reaches a minimum granularity (currently
@@ -345,9 +205,6 @@
      * the index {@code toIndex}, exclusive. If {@code fromIndex == toIndex},
      * the range to be sorted is empty.
      *
-     * This method will cause initialization of the
-     * {@link #defaultFJPool() default ForkJoinPool} if necessary.
-     *
      * <p>Implementation note: The sorting algorithm is a parallel sort-merge
      * that breaks the array into sub-arrays that are themselves sorted and then
      * merged. When the sub-array length reaches a minimum granularity (currently
@@ -373,16 +230,13 @@
         short[] ws = new short[a.length];
         int gran = getThreshold(nelements);
         FJShort.Sorter task = new FJShort.Sorter(a, ws, origin, nelements, gran);
-        defaultFJPool().invoke(task);
+        task.invoke();
         return a;
     }
 
     /**
      * Sorts the specified array into ascending numerical order.
      *
-     * This method will cause initialization of the
-     * {@link #defaultFJPool() default ForkJoinPool} if necessary.
-     *
      * <p>Implementation note: The sorting algorithm is a parallel sort-merge
      * that breaks the array into sub-arrays that are themselves sorted and then
      * merged. When the sub-array length reaches a minimum granularity (currently
@@ -404,9 +258,6 @@
      * the index {@code toIndex}, exclusive. If {@code fromIndex == toIndex},
      * the range to be sorted is empty.
      *
-     * This method will cause initialization of the
-     * {@link #defaultFJPool() default ForkJoinPool} if necessary.
-     *
      * <p>Implementation note: The sorting algorithm is a parallel sort-merge
      * that breaks the array into sub-arrays that are themselves sorted and then
      * merged. When the sub-array length reaches a minimum granularity (currently
@@ -432,16 +283,13 @@
         int[] ws = new int[a.length];
         int gran = getThreshold(nelements);
         FJInt.Sorter task = new FJInt.Sorter(a, ws, origin, nelements, gran);
-        defaultFJPool().invoke(task);
+        task.invoke();
         return a;
     }
 
     /**
      * Sorts the specified array into ascending numerical order.
      *
-     * This method will cause initialization of the
-     * {@link #defaultFJPool() default ForkJoinPool} if necessary.
-     *
      * <p>Implementation note: The sorting algorithm is a parallel sort-merge
      * that breaks the array into sub-arrays that are themselves sorted and then
      * merged. When the sub-array length reaches a minimum granularity (currently
@@ -463,9 +311,6 @@
      * the index {@code toIndex}, exclusive. If {@code fromIndex == toIndex},
      * the range to be sorted is empty.
      *
-     * This method will cause initialization of the
-     * {@link #defaultFJPool() default ForkJoinPool} if necessary.
-     *
      * <p>Implementation note: The sorting algorithm is a parallel sort-merge
      * that breaks the array into sub-arrays that are themselves sorted and then
      * merged. When the sub-array length reaches a minimum granularity (currently
@@ -491,7 +336,7 @@
         long[] ws = new long[a.length];
         int gran = getThreshold(nelements);
         FJLong.Sorter task = new FJLong.Sorter(a, ws, origin, nelements, gran);
-        defaultFJPool().invoke(task);
+        task.invoke();
         return a;
     }
 
@@ -506,9 +351,6 @@
      * {@code 0.0f} and {@code Float.NaN} is considered greater than any
      * other value and all {@code Float.NaN} values are considered equal.
      *
-     * This method will cause initialization of the
-     * {@link #defaultFJPool() default ForkJoinPool} if necessary.
-     *
      * <p>Implementation note: The sorting algorithm is a parallel sort-merge
      * that breaks the array into sub-arrays that are themselves sorted and then
      * merged. When the sub-array length reaches a minimum granularity (currently
@@ -538,9 +380,6 @@
      * {@code 0.0f} and {@code Float.NaN} is considered greater than any
      * other value and all {@code Float.NaN} values are considered equal.
      *
-     * This method will cause initialization of the
-     * {@link #defaultFJPool() default ForkJoinPool} if necessary.
-     *
      * <p>Implementation note: The sorting algorithm is a parallel sort-merge
      * that breaks the array into sub-arrays that are themselves sorted and then
      * merged. When the sub-array length reaches a minimum granularity (currently
@@ -566,7 +405,7 @@
         float[] ws = new float[a.length];
         int gran = getThreshold(nelements);
         FJFloat.Sorter task = new FJFloat.Sorter(a, ws, origin, nelements, gran);
-        defaultFJPool().invoke(task);
+        task.invoke();
         return a;
     }
 
@@ -581,9 +420,6 @@
      * {@code 0.0d} and {@code Double.NaN} is considered greater than any
      * other value and all {@code Double.NaN} values are considered equal.
      *
-     * This method will cause initialization of the
-     * {@link #defaultFJPool() default ForkJoinPool} if necessary.
-     *
      * <p>Implementation note: The sorting algorithm is a parallel sort-merge
      * that breaks the array into sub-arrays that are themselves sorted and then
      * merged. When the sub-array length reaches a minimum granularity (currently
@@ -614,9 +450,6 @@
      * {@code 0.0d} and {@code Double.NaN} is considered greater than any
      * other value and all {@code Double.NaN} values are considered equal.
      *
-     * This method will cause initialization of the
-     * {@link #defaultFJPool() default ForkJoinPool} if necessary.
-     *
      * <p>Implementation note: The sorting algorithm is a parallel sort-merge
      * that breaks the array into sub-arrays that are themselves sorted and then
      * merged. When the sub-array length reaches a minimum granularity (currently
@@ -642,7 +475,7 @@
         double[] ws = new double[a.length];
         int gran = getThreshold(nelements);
         FJDouble.Sorter task = new FJDouble.Sorter(a, ws, origin, nelements, gran);
-        defaultFJPool().invoke(task);
+        task.invoke();
         return a;
     }
 
@@ -658,9 +491,6 @@
      * <p>This sort is not guaranteed to be <i>stable</i>:  equal elements
      * may be reordered as a result of the sort.
      *
-     * This method will cause initialization of the
-     * {@link #defaultFJPool() default ForkJoinPool} if necessary.
-     *
      * <p>Implementation note: The sorting algorithm is a parallel sort-merge
      * that breaks the array into sub-arrays that are themselves sorted and then
      * merged. When the sub-array length reaches a minimum granularity (currently
@@ -733,7 +563,7 @@
         T[] ws = (T[])Array.newInstance(tc, a.length);
         int gran = getThreshold(nelements);
         FJComparable.Sorter<T> task = new FJComparable.Sorter<>(a, ws, origin, nelements, gran);
-        defaultFJPool().invoke(task);
+        task.invoke();
         return a;
     }
 
@@ -747,9 +577,6 @@
      * <p>This sort is not guaranteed to be <i>stable</i>:  equal elements
      * may be reordered as a result of the sort.
      *
-     * This method will cause initialization of the
-     * {@link #defaultFJPool() default ForkJoinPool} if necessary.
-     *
      * <p>Implementation note: The sorting algorithm is a parallel sort-merge
      * that breaks the array into sub-arrays that are themselves sorted and then
      * merged. When the sub-array length reaches a minimum granularity (currently
@@ -820,7 +647,7 @@
         T[] ws = (T[])Array.newInstance(tc, a.length);
         int gran = getThreshold(nelements);
         FJComparator.Sorter<T> task = new FJComparator.Sorter<>(a, ws, origin, nelements, gran, cmp);
-        defaultFJPool().invoke(task);
+        task.invoke();
         return a;
     }
 
@@ -1922,7 +1749,7 @@
      * @param n number of elements in the array to be processed
      */
     static final int getThreshold(int n) {
-        int p = defaultFJPool().getParallelism();
+        int p = ForkJoinPool.getCommonPoolParallelism();
         int t = (p > 1) ? (1 + n / (p << 3)) : n;
         return t < MIN_SORT_GRAN ? MIN_SORT_GRAN : t;
     }
--- a/src/share/classes/java/util/streams/AbstractPipeline.java	Tue Oct 30 16:46:32 2012 -0700
+++ b/src/share/classes/java/util/streams/AbstractPipeline.java	Tue Oct 30 17:30:48 2012 -0700
@@ -284,7 +284,7 @@
 
         @Override
         public <FJ_R> FJ_R invoke(ForkJoinTask<FJ_R> task) {
-            return ForkJoinUtils.defaultFJPool().invoke(task);
+            return task.invoke();
         }
 
         @Override
--- a/src/share/classes/java/util/streams/ops/AbstractTask.java	Tue Oct 30 16:46:32 2012 -0700
+++ b/src/share/classes/java/util/streams/ops/AbstractTask.java	Tue Oct 30 17:30:48 2012 -0700
@@ -79,19 +79,8 @@
         return (T) getCompleter();
     }
 
-    protected T getRoot() {
-        T result = (T) this;
-        while (true) {
-            T parent = result.getParent();
-            if (parent == null)
-                return result;
-            else
-                result = parent;
-        }
-    }
-
     protected void completeRoot(R result) {
-        getRoot().complete(result);
+        ((CountedCompleter<R>)getRoot()).complete(result);
     }
 
     @Override
@@ -99,7 +88,7 @@
         isLeaf = spliterator.shouldNotSplit(helper.suggestTargetSize());
         if (isLeaf) {
             setRawResult(doLeaf());
-            tryComplete();
+            helpComplete();
         }
         else {
             int naturalSplits = spliterator.getNaturalSplits();
--- a/src/share/classes/java/util/streams/ops/TreeUtils.java	Tue Oct 30 16:46:32 2012 -0700
+++ b/src/share/classes/java/util/streams/ops/TreeUtils.java	Tue Oct 30 17:30:48 2012 -0700
@@ -75,7 +75,7 @@
     public static <T> Node<T> flatten(Node<T> node) {
         if (node.getChildCount() > 0) {
             T[] array = (T[]) new Object[node.size()];
-            ForkJoinUtils.defaultFJPool().invoke(new ToArrayTask<>(node, array, 0));
+            new ToArrayTask<>(node, array, 0).invoke();
             return Nodes.node(array);
         } else {
             return node;
@@ -85,7 +85,7 @@
     public static <T> void copyTo(Node<T> node, T[] array, int offset) {
         // @@@ Currently only used by Nodes.ConcNode
         if (node.getChildCount() > 0) {
-            ForkJoinUtils.defaultFJPool().invoke(new ToArrayTask<>(node, array, offset));
+            new ToArrayTask<>(node, array, offset).invoke();
         } else {
             node.copyInto(array, offset);
         }
@@ -162,7 +162,7 @@
             boolean isLeaf = spliterator.shouldNotSplit(helper.suggestTargetSize());
             if (isLeaf) {
                 OpUtils.intoUnwrapped(helper, spliterator, Arrays.sink(array, offset, length));
-                tryComplete();
+                helpComplete();
             }
             else {
                 int naturalSplits = spliterator.getNaturalSplits();
@@ -220,7 +220,7 @@
                 firstTask.compute();
             } else {
                 node.copyInto(array, offset);
-                tryComplete();
+                helpComplete();
             }
         }
     }