changeset 7771:acb6528d89a5

Update parallel sort to be stable when sorting objects. Contributed-by: Doug Lea <dl@cs.oswego.edu>
author psandoz
date Thu, 28 Mar 2013 17:10:40 +0100
parents c6cad59607b9
children a23ad9bc7ff4
files src/share/classes/java/util/Arrays.java src/share/classes/java/util/ArraysParallelSortHelpers.java src/share/classes/java/util/ComparableTimSort.java src/share/classes/java/util/TimSort.java
diffstat 4 files changed, 1119 insertions(+), 1161 deletions(-) [+]
line wrap: on
line diff
--- a/src/share/classes/java/util/Arrays.java	Thu Mar 28 10:48:43 2013 +0100
+++ b/src/share/classes/java/util/Arrays.java	Thu Mar 28 17:10:40 2013 +0100
@@ -69,10 +69,12 @@
 public class Arrays {
 
     /**
-     * The minimum array length below which the sorting algorithm will not further
-     * partition the sorting task.
+     * The minimum array length below which a parallel sorting
+     * algorithm will not further partition the sorting task. Using
+     * smaller sizes typically results in memory contention across
+     * tasks that makes parallel speedups unlikely.
      */
-    public static final int MIN_ARRAY_SORT_GRAN = 256; // reasonable default so that we don't overcreate tasks
+    public static final int MIN_ARRAY_SORT_GRAN = 1 << 13; 
 
     // Suppresses default constructor, ensuring non-instantiability.
     private Arrays() {}
@@ -432,13 +434,16 @@
      */
     public static void parallelSort(byte[] a, int fromIndex, int toIndex) {
         checkFromToBounds(a.length, fromIndex, toIndex);
-        int origin = fromIndex;
-        int fence = toIndex;
-        int nelements = fence - origin;
-        byte[] ws = new byte[a.length];
-        int gran = getSplitThreshold(nelements);
-        ArraySortHelpers.FJByte.Sorter task = new ArraySortHelpers.FJByte.Sorter(a, ws, origin, nelements, gran);
-        task.invoke();
+        int n = toIndex - fromIndex;
+        int p = ForkJoinPool.getCommonPoolParallelism();
+        if (p == 1 || n <= MIN_ARRAY_SORT_GRAN)
+            DualPivotQuicksort.sort(a, fromIndex, toIndex - 1);
+        else {
+            int g = n / (p << 3);
+            new ArraysParallelSortHelpers.FJByte.Sorter
+                (null, a, new byte[n], fromIndex, n, 0,
+                 (g <= MIN_ARRAY_SORT_GRAN) ? MIN_ARRAY_SORT_GRAN : g).invoke();
+        }
     }
 
     /**
@@ -480,13 +485,16 @@
      */
     public static void parallelSort(char[] a, int fromIndex, int toIndex) {
         checkFromToBounds(a.length, fromIndex, toIndex);
-        int origin = fromIndex;
-        int fence = toIndex;
-        int nelements = fence - origin;
-        char[] ws = new char[a.length];
-        int gran = getSplitThreshold(nelements);
-        ArraySortHelpers.FJChar.Sorter task = new ArraySortHelpers.FJChar.Sorter(a, ws, origin, nelements, gran);
-        task.invoke();
+        int n = toIndex - fromIndex;
+        int p = ForkJoinPool.getCommonPoolParallelism();
+        if (p == 1 || n <= MIN_ARRAY_SORT_GRAN)
+            DualPivotQuicksort.sort(a, fromIndex, toIndex - 1);
+        else {
+            int g = n / (p << 3);
+            new ArraysParallelSortHelpers.FJChar.Sorter
+                (null, a, new char[n], fromIndex, n, 0,
+                 (g <= MIN_ARRAY_SORT_GRAN) ? MIN_ARRAY_SORT_GRAN : g).invoke();
+        }
     }
 
     /**
@@ -528,13 +536,16 @@
      */
     public static void parallelSort(short[] a, int fromIndex, int toIndex) {
         checkFromToBounds(a.length, fromIndex, toIndex);
-        int origin = fromIndex;
-        int fence = toIndex;
-        int nelements = fence - origin;
-        short[] ws = new short[a.length];
-        int gran = getSplitThreshold(nelements);
-        ArraySortHelpers.FJShort.Sorter task = new ArraySortHelpers.FJShort.Sorter(a, ws, origin, nelements, gran);
-        task.invoke();
+        int n = toIndex - fromIndex;
+        int p = ForkJoinPool.getCommonPoolParallelism();
+        if (p == 1 || n <= MIN_ARRAY_SORT_GRAN)
+            DualPivotQuicksort.sort(a, fromIndex, toIndex - 1);
+        else {
+            int g = n / (p << 3);
+            new ArraysParallelSortHelpers.FJShort.Sorter
+                (null, a, new short[n], fromIndex, n, 0,
+                 (g <= MIN_ARRAY_SORT_GRAN) ? MIN_ARRAY_SORT_GRAN : g).invoke();
+        }
     }
 
     /**
@@ -576,13 +587,16 @@
      */
     public static void parallelSort(int[] a, int fromIndex, int toIndex) {
         checkFromToBounds(a.length, fromIndex, toIndex);
-        int origin = fromIndex;
-        int fence = toIndex;
-        int nelements = fence - origin;
-        int[] ws = new int[a.length];
-        int gran = getSplitThreshold(nelements);
-        ArraySortHelpers.FJInt.Sorter task = new ArraySortHelpers.FJInt.Sorter(a, ws, origin, nelements, gran);
-        task.invoke();
+        int n = toIndex - fromIndex;
+        int p = ForkJoinPool.getCommonPoolParallelism();
+        if (p == 1 || n <= MIN_ARRAY_SORT_GRAN)
+            DualPivotQuicksort.sort(a, fromIndex, toIndex - 1);
+        else {
+            int g = n / (p << 3);
+            new ArraysParallelSortHelpers.FJInt.Sorter
+                (null, a, new int[n], fromIndex, n, 0,
+                 (g <= MIN_ARRAY_SORT_GRAN) ? MIN_ARRAY_SORT_GRAN : g).invoke();
+        }
     }
 
     /**
@@ -624,13 +638,16 @@
      */
     public static void parallelSort(long[] a, int fromIndex, int toIndex) {
         checkFromToBounds(a.length, fromIndex, toIndex);
-        int origin = fromIndex;
-        int fence = toIndex;
-        int nelements = fence - origin;
-        long[] ws = new long[a.length];
-        int gran = getSplitThreshold(nelements);
-        ArraySortHelpers.FJLong.Sorter task = new ArraySortHelpers.FJLong.Sorter(a, ws, origin, nelements, gran);
-        task.invoke();
+        int n = toIndex - fromIndex;
+        int p = ForkJoinPool.getCommonPoolParallelism();
+        if (p == 1 || n <= MIN_ARRAY_SORT_GRAN)
+            DualPivotQuicksort.sort(a, fromIndex, toIndex - 1);
+        else {
+            int g = n / (p << 3);
+            new ArraysParallelSortHelpers.FJLong.Sorter
+                (null, a, new long[n], fromIndex, n, 0,
+                 (g <= MIN_ARRAY_SORT_GRAN) ? MIN_ARRAY_SORT_GRAN : g).invoke();
+        }
     }
 
     /**
@@ -688,13 +705,16 @@
      */
     public static void parallelSort(float[] a, int fromIndex, int toIndex) {
         checkFromToBounds(a.length, fromIndex, toIndex);
-        int origin = fromIndex;
-        int fence = toIndex;
-        int nelements = fence - origin;
-        float[] ws = new float[a.length];
-        int gran = getSplitThreshold(nelements);
-        ArraySortHelpers.FJFloat.Sorter task = new ArraySortHelpers.FJFloat.Sorter(a, ws, origin, nelements, gran);
-        task.invoke();
+        int n = toIndex - fromIndex;
+        int p = ForkJoinPool.getCommonPoolParallelism();
+        if (p == 1 || n <= MIN_ARRAY_SORT_GRAN)
+            DualPivotQuicksort.sort(a, fromIndex, toIndex - 1);
+        else {
+            int g = n / (p << 3);
+            new ArraysParallelSortHelpers.FJFloat.Sorter
+                (null, a, new float[n], fromIndex, n, 0,
+                 (g <= MIN_ARRAY_SORT_GRAN) ? MIN_ARRAY_SORT_GRAN : g).invoke();
+        }
     }
 
     /**
@@ -727,7 +747,6 @@
      * the index {@code toIndex}, exclusive. If {@code fromIndex == toIndex},
      * the range to be sorted is empty.
      *
-     *
      * <p>The {@code <} relation does not provide a total order on all double
      * values: {@code -0.0d == 0.0d} is {@code true} and a {@code Double.NaN}
      * value compares neither less than, greater than, nor equal to any value,
@@ -753,13 +772,16 @@
      */
     public static void parallelSort(double[] a, int fromIndex, int toIndex) {
         checkFromToBounds(a.length, fromIndex, toIndex);
-        int origin = fromIndex;
-        int fence = toIndex;
-        int nelements = fence - origin;
-        double[] ws = new double[a.length];
-        int gran = getSplitThreshold(nelements);
-        ArraySortHelpers.FJDouble.Sorter task = new ArraySortHelpers.FJDouble.Sorter(a, ws, origin, nelements, gran);
-        task.invoke();
+        int n = toIndex - fromIndex;
+        int p = ForkJoinPool.getCommonPoolParallelism();
+        if (p == 1 || n <= MIN_ARRAY_SORT_GRAN)
+            DualPivotQuicksort.sort(a, fromIndex, toIndex - 1);
+        else {
+            int g = n / (p << 3);
+            new ArraysParallelSortHelpers.FJDouble.Sorter
+                (null, a, new double[n], fromIndex, n, 0,
+                 (g <= MIN_ARRAY_SORT_GRAN) ? MIN_ARRAY_SORT_GRAN : g).invoke();
+        }
     }
 
     /**
@@ -771,8 +793,8 @@
      * not throw a {@code ClassCastException} for any elements {@code e1}
      * and {@code e2} in the array).
      *
-     * <p>This sort is not guaranteed to be <i>stable</i>:  equal elements
-     * may be reordered as a result of the sort.
+     * <p>This sort is guaranteed to be <i>stable</i>:  equal elements will
+     * not be reordered as a result of the sort.
      *
      * <p>Implementation note: The sorting algorithm is a parallel sort-merge
      * that breaks the array into sub-arrays that are themselves sorted and then
@@ -806,8 +828,8 @@
      * {@code ClassCastException} for any elements {@code e1} and
      * {@code e2} in the array).
      *
-     * <p>This sort is not guaranteed to be <i>stable</i>:  equal elements
-     * may be reordered as a result of the sort.
+     * <p>This sort is guaranteed to be <i>stable</i>:  equal elements will
+     * not be reordered as a result of the sort.
      *
      * This method will cause initialization of the
      * default {@code ForkJoinPool} if necessary.
@@ -832,18 +854,23 @@
      *         not <i>mutually comparable</i> (for example, strings and
      *         integers).
      */
+
     public static <T extends Comparable<? super T>>
-            void parallelSort(T[] a, int fromIndex, int toIndex) {
+    void parallelSort(T[] a, int fromIndex, int toIndex) {
         checkFromToBounds(a.length, fromIndex, toIndex);
-        int origin = fromIndex;
-        int fence = toIndex;
-        int nelements = fence - origin;
-        Class<?> tc = a.getClass().getComponentType();
-        @SuppressWarnings("unchecked")
-        T[] ws = (T[])Array.newInstance(tc, a.length);
-        int gran = getSplitThreshold(nelements);
-        ArraySortHelpers.FJComparable.Sorter<T> task = new ArraySortHelpers.FJComparable.Sorter<>(a, ws, origin, nelements, gran);
-        task.invoke();
+        int n = toIndex - fromIndex;
+        int p = ForkJoinPool.getCommonPoolParallelism();
+        if (p == 1 || n <= MIN_ARRAY_SORT_GRAN)
+            ComparableTimSort.sort(a, fromIndex, toIndex);
+        else {
+            int g = n / (p << 3);
+            Class<?> tc = a.getClass().getComponentType();
+            @SuppressWarnings("unchecked")
+            T[] ws = (T[])Array.newInstance(tc, n);
+            new ArraysParallelSortHelpers.FJComparable.Sorter<>
+                (null, a, ws, fromIndex, n, 0, 
+                 (g <= MIN_ARRAY_SORT_GRAN) ? MIN_ARRAY_SORT_GRAN : g).invoke();
+        }
     }
 
     /**
@@ -853,8 +880,8 @@
      * {@code c.compare(e1, e2)} must not throw a {@code ClassCastException}
      * for any elements {@code e1} and {@code e2} in the array).
      *
-     * <p>This sort is not guaranteed to be <i>stable</i>:  equal elements
-     * may be reordered as a result of the sort.
+     * <p>This sort is guaranteed to be <i>stable</i>:  equal elements will
+     * not be reordered as a result of the sort.
      *
      * <p>Implementation note: The sorting algorithm is a parallel sort-merge
      * that breaks the array into sub-arrays that are themselves sorted and then
@@ -886,8 +913,8 @@
      * {@code c.compare(e1, e2)} must not throw a {@code ClassCastException}
      * for any elements {@code e1} and {@code e2} in the range).
      *
-     * <p>This sort is not guaranteed to be <i>stable</i>:  equal elements
-     * may be reordered as a result of the sort.
+     * <p>This sort is guaranteed to be <i>stable</i>:  equal elements will
+     * not be reordered as a result of the sort.
      *
      * <p>Implementation note: The sorting algorithm is a parallel sort-merge
      * that breaks the array into sub-arrays that are themselves sorted and then
@@ -914,28 +941,20 @@
      */
     public static <T> void parallelSort(T[] a, int fromIndex, int toIndex,
                                         Comparator<? super T> cmp) {
-        checkFromToBounds(a.length, fromIndex, toIndex);
-        int origin = fromIndex;
-        int fence = toIndex;
-        int nelements = fence - origin;
-        Class<?> tc = a.getClass().getComponentType();
-        @SuppressWarnings("unchecked")
-        T[] ws = (T[])Array.newInstance(tc, a.length);
-        int gran = getSplitThreshold(nelements);
-        ArraySortHelpers.FJComparator.Sorter<T> task = new ArraySortHelpers.FJComparator.Sorter<>(a, ws, origin, nelements, gran, cmp);
-        task.invoke();
-    }
-
-    /**
-     * Returns the size threshold for splitting into subtasks.
-     * By default, uses about 8 times as many tasks as threads
-     *
-     * @param n number of elements in the array to be processed
-     */
-    private static final int getSplitThreshold(int n) {
+        int n = toIndex - fromIndex;
         int p = ForkJoinPool.getCommonPoolParallelism();
-        int t = (p > 1) ? (1 + n / (p << 3)) : n;
-        return t < MIN_ARRAY_SORT_GRAN ? MIN_ARRAY_SORT_GRAN : t;
+        if (p == 1 || n <= MIN_ARRAY_SORT_GRAN)
+            TimSort.sort(a, fromIndex, toIndex, cmp);
+        else {
+            int g = n / (p << 3);
+            Class<?> tc = a.getClass().getComponentType();
+            @SuppressWarnings("unchecked")
+                T[] ws = (T[])Array.newInstance(tc, n);
+            new ArraysParallelSortHelpers.FJComparator.Sorter<>
+                (null, a, ws, fromIndex, n, 0,
+                 (g <= MIN_ARRAY_SORT_GRAN) ? MIN_ARRAY_SORT_GRAN : g,
+                 cmp).invoke();
+        }
     }
 
     /*
--- a/src/share/classes/java/util/ArraysParallelSortHelpers.java	Thu Mar 28 10:48:43 2013 +0100
+++ b/src/share/classes/java/util/ArraysParallelSortHelpers.java	Thu Mar 28 17:10:40 2013 +0100
@@ -25,6 +25,7 @@
 package java.util;
 
 import java.util.concurrent.RecursiveAction;
+import java.util.concurrent.CountedCompleter;
 
 /**
  * Helper utilities for the parallel sort methods in Arrays.parallelSort.
@@ -44,1180 +45,1060 @@
  *             c. merge them together
  *         3. merge together the two halves.
  *
- * One reason for splitting in quarters is that this guarantees
- * that the final sort is in the main array, not the workspace
- * array.  (workspace and main swap roles on each subsort step.)
- * Leaf-level sorts use a Sequential quicksort, that in turn uses
- * insertion sort if under threshold.  Otherwise it uses median of
- * three to pick pivot, and loops rather than recurses along left
- * path.
+ * One reason for splitting in quarters is that this guarantees that
+ * the final sort is in the main array, not the workspace array.
+ * (workspace and main swap roles on each subsort step.)  Leaf-level
+ * sorts use the associated sequential sort.
  *
+ * Merger classes perform merging for Sorter. If big enough, it splits
+ * the Left partition in half; finds the greatest point in Right
+ * partition less than the beginning of the second half of Left via
+ * binary search; and then, in parallel, merges left half of Left with
+ * elements of Right up to split point, and merges right half of Left
+ * with elements of R past split point. At leaf, it just sequentially
+ * merges. The Mergers also include checks for excessive imbalances
+ * (hardwired to stop if less than threshold/8 elements on either
+ * side) and adjust for duplicate key runs on each side by placing
+ * merge bounds at the beginnings of runs. This is necessary to
+ * maintain sort-stability in the Object versions, and further helps
+ * avoid imbalances in others.
  *
- * Merger classes perform merging for Sorter. If big enough, splits Left
- * partition in half; finds the greatest point in Right partition
- * less than the beginning of the second half of Left via binary
- * search; and then, in parallel, merges left half of Left with
- * elements of Right up to split point, and merges right half of
- * Left with elements of R past split point. At leaf, it just
- * sequentially merges. This is all messy to code; sadly we need
- * distinct versions for each type.
- *
+ * The current CountedCompleter design also requires some little tasks
+ * to serve as place holders for triggering the merges and re-merges.
+ * These classes (EmptyCompleter and Relay) don't need to keep track
+ * of the arrays, and are never themselves forked, so don't hold any
+ * task state.
  */
 /*package*/ class ArraysParallelSortHelpers {
 
-    // RFE: we should only need a working array as large as the subarray
-    //      to be sorted, but the logic assumes that indices in the two
-    //      arrays always line-up
+    /*
+     * The primitive class versions (FJByte... FJDouble) classes are
+     * identical to FJComparable version, except removing task class
+     * parameter, replacing T with type, and replacing a.compareTo(b)
+     * with a relop b.
+     *
+     * The task classes have a lot of parameters, that are copied to
+     * local variables and used in compute() methods, We place these
+     * in as few lines as possible to minimize distraction.
+     */
+
+    /**
+     * A placeholder task for Sorters, used for the lowest
+     * quartile task, that does not need to maintain array state.
+     */
+    static final class EmptyCompleter extends CountedCompleter<Void> {
+        static final long serialVersionUID = 2446542900576103244L;
+        EmptyCompleter(CountedCompleter<?> p) { super(p); }
+        public final void compute() { }
+    }
+
+    /**
+     * A trigger for secondary merge of two merges
+     */
+    static final class Relay extends CountedCompleter<Void> {
+        static final long serialVersionUID = 2446542900576103244L;
+        final CountedCompleter<?> task;
+        Relay(CountedCompleter<?> task) {
+            super(null, 1);
+            this.task = task;
+        }
+        public final void compute() { }
+        public final void onCompletion(CountedCompleter<?> t) {
+            task.compute();
+        }
+    }
+
+    /** Comparable support class */
+    static final class FJComparable {
+        static final class Sorter<T extends Comparable<? super T>>
+            extends CountedCompleter<Void> {
+            static final long serialVersionUID = 2446542900576103244L;
+            final T[] a, w;
+            final int base, size, wbase, gran;
+            Sorter(CountedCompleter<?> par, T[] a, T[] w, int base, int size,
+                   int wbase, int gran) {
+                super(par);
+                this.a = a; this.w = w; this.base = base; this.size = size;
+                this.wbase = wbase; this.gran = gran;
+            }
+            public final void compute() {
+                CountedCompleter<?> s = this;
+                T[] a = this.a, w = this.w; // localize all params
+                int b = this.base, n = this.size, wb = this.wbase, g = this.gran;
+                while (n > g) {
+                    int h = n >>> 1, q = n >>> 2, u = h + q; // quartiles
+                    int qb = b + q, hb = b + h, ub = b + u;  // bases
+                    int wqb = wb + q, whb = wb + h, wub = wb + u;
+                    int hq = h - q, hn = n - h, un = n - u;  // sizes
+                    n = q;
+                    Relay fc = new Relay(new Merger<>(s,  w, a, wb,  h,
+                                                      whb, hn, b,  g));
+                    Relay rc = new Relay(new Merger<>(fc, a, w, hb, q,
+                                                      ub, un, whb, g));
+                    Relay bc = new Relay(new Merger<>(fc, a, w, b,  q,
+                                                      qb, hq, wb,  g));
+                    s = new EmptyCompleter(bc);
+                    Sorter<T> su = new Sorter<>(rc, a, w, ub, un, wub, g);
+                    Sorter<T> sh = new Sorter<>(rc, a, w, hb, q,  whb, g);
+                    Sorter<T> sq = new Sorter<>(bc, a, w, qb, hq, wqb, g);
+                    su.fork();
+                    sh.fork();
+                    sq.fork();
+                }
+                ComparableTimSort.sort(a, b, b + n, w, wb, n);
+                s.tryComplete();
+            }
+        }
+
+        static final class Merger<T extends Comparable<? super T>>
+            extends CountedCompleter<Void> {
+            static final long serialVersionUID = 2446542900576103244L;
+            final T[] a, w; // main and workspace arrays
+            final int lbase, lsize, rbase, rsize, wbase, gran;
+            Merger(CountedCompleter<?> par, T[] a, T[] w,
+                   int lbase, int lsize, int rbase,
+                   int rsize, int wbase, int gran) {
+                super(par);
+                this.a = a; this.w = w;
+                this.lbase = lbase; this.lsize = lsize;
+                this.rbase = rbase; this.rsize = rsize;
+                this.wbase = wbase; this.gran = gran;
+            }
+
+            public final void compute() {
+                T[] a = this.a, w = this.w; // localize all params
+                int lb = this.lbase, ln = this.lsize, rb = this.rbase,
+                    rn = this.rsize, k = this.wbase, g = this.gran;
+                int eighth = g >>> 3;
+                for (;;) {
+                    int lh = ln >>> 1;
+                    T split = a[lb + lh];
+                    int rh = rn;
+                    for (int rl = 0; rl < rh;) {
+                        int rm = (rl + rh) >>> 1;
+                        if (split.compareTo(a[rb + rm]) <= 0)
+                            rh = rm;
+                        else
+                            rl = rm + 1;
+                    }
+                    int s, sl, sr;
+                    while (rh > eighth && split.compareTo(a[rb + rh - 1]) == 0)
+                        --rh; // back up right to beginning of run
+                    if (rh <= eighth)
+                        break; // too imbalanced
+                    while (lh > eighth && split.compareTo(a[lb + lh - 1]) == 0)
+                        --lh; // back up left
+                    if (lh <= eighth || (s = lh + rh) <= g || // left too small
+                        (sl = ln - lh) + (sr = rn - rh) <= g) // right too small
+                        break;
+                    addToPendingCount(1);
+                    new Merger<>(this, a, w, lb + lh, sl, rb + rh, sr, k + s,
+                                 g).fork();
+                    rn = rh;
+                    ln = lh;
+                }
+
+                int lf = lb + ln, rf = rb + rn; // index bounds
+                while (lb < lf && rb < rf) {
+                    T t, al, ar;
+                    if ((al = a[lb]).compareTo(ar = a[rb]) <= 0) {
+                        lb++; t = al;
+                    }
+                    else {
+                        rb++; t = ar;
+                    }
+                    w[k++] = t;
+                }
+                if (rb < rf)
+                    System.arraycopy(a, rb, w, k, rf - rb);
+                else if (lb < lf)
+                    System.arraycopy(a, lb, w, k, lf - lb);
+
+                tryComplete();
+            }
+        }
+
+    } // FJComparable
+
+    /** Object + Comparator support class */
+    static final class FJComparator {
+        static final class Sorter<T> extends CountedCompleter<Void> {
+            static final long serialVersionUID = 2446542900576103244L;
+            final T[] a, w;
+            final int base, size, wbase, gran;
+            Comparator<? super T> comparator;
+            Sorter(CountedCompleter<?> par, T[] a, T[] w, int base, int size,
+                   int wbase, int gran,
+                   Comparator<? super T> comparator) {
+                super(par);
+                this.a = a; this.w = w; this.base = base; this.size = size;
+                this.wbase = wbase; this.gran = gran;
+                this.comparator = comparator;
+            }
+            public final void compute() {
+                CountedCompleter<?> s = this;
+                Comparator<? super T> c = this.comparator;
+                T[] a = this.a, w = this.w; // localize all params
+                int b = this.base, n = this.size, wb = this.wbase, g = this.gran;
+                while (n > g) {
+                    int h = n >>> 1, q = n >>> 2, u = h + q; // quartiles
+                    int qb = b + q, hb = b + h, ub = b + u;  // bases
+                    int wqb = wb + q, whb = wb + h, wub = wb + u;
+                    int hq = h - q, hn = n - h, un = n - u;  // sizes
+                    n = q;
+                    Relay fc = new Relay(new Merger<>(s,  w, a, wb,  h,
+                                                      whb, hn, b,  g, c));
+                    Relay rc = new Relay(new Merger<>(fc, a, w, hb, q,
+                                                      ub, un, whb, g, c));
+                    Relay bc = new Relay(new Merger<>(fc, a, w, b,  q,
+                                                      qb, hq, wb,  g, c));
+                    s = new EmptyCompleter(bc);
+                    Sorter<T> su = new Sorter<>(rc, a, w, ub, un, wub, g, c);
+                    Sorter<T> sh = new Sorter<>(rc, a, w, hb, q,  whb, g, c);
+                    Sorter<T> sq = new Sorter<>(bc, a, w, qb, hq, wqb, g, c);
+                    su.fork();
+                    sh.fork();
+                    sq.fork();
+                }
+                TimSort.sort(a, b, b + n, c, w, wb, n);
+                s.tryComplete();
+            }
+        }
+
+        static final class Merger<T> extends CountedCompleter<Void> {
+            static final long serialVersionUID = 2446542900576103244L;
+            final T[] a, w; // main and workspace arrays
+            final int lbase, lsize, rbase, rsize, wbase, gran;
+            Comparator<? super T> comparator;
+            Merger(CountedCompleter<?> par, T[] a, T[] w,
+                   int lbase, int lsize, int rbase,
+                   int rsize, int wbase, int gran,
+                   Comparator<? super T> comparator) {
+                super(par);
+                this.a = a; this.w = w;
+                this.lbase = lbase; this.lsize = lsize;
+                this.rbase = rbase; this.rsize = rsize;
+                this.wbase = wbase; this.gran = gran;
+                this.comparator = comparator;
+            }
+
+            public final void compute() {
+                Comparator<? super T> c = this.comparator;
+                T[] a = this.a, w = this.w; // localize all params
+                int lb = this.lbase, ln = this.lsize, rb = this.rbase,
+                    rn = this.rsize, k = this.wbase, g = this.gran;
+                int eighth = g >>> 3;
+                for (;;) {
+                    int lh = ln >>> 1;
+                    T split = a[lb + lh];
+                    int rh = rn;
+                    for (int rl = 0; rl < rh;) {
+                        int rm = (rl + rh) >>> 1;
+                        if (c.compare(split, a[rb + rm]) <= 0)
+                            rh = rm;
+                        else
+                            rl = rm + 1;
+                    }
+                    int s, sl, sr;
+                    while (rh > eighth && c.compare(split, a[rb + rh - 1]) == 0)
+                        --rh; // back up right to beginning of run
+                    if (rh <= eighth)
+                        break; // too imbalanced
+                    while (lh > eighth && c.compare(split, a[lb + lh - 1]) == 0)
+                        --lh; // back up left
+                    if (lh <= eighth || (s = lh + rh) <= g || // left too small
+                        (sl = ln - lh) + (sr = rn - rh) <= g) // right too small
+                        break;
+                    addToPendingCount(1);
+                    new Merger<>(this, a, w, lb + lh, sl, rb + rh, sr, k + s,
+                                 g, c).fork();
+                    rn = rh;
+                    ln = lh;
+                }
+
+                int lf = lb + ln, rf = rb + rn; // index bounds
+                while (lb < lf && rb < rf) {
+                    T t, al, ar;
+                    if (c.compare((al = a[lb]), (ar = a[rb])) <= 0) {
+                        lb++; t = al;
+                    }
+                    else {
+                        rb++; t = ar;
+                    }
+                    w[k++] = t;
+                }
+                if (rb < rf)
+                    System.arraycopy(a, rb, w, k, rf - rb);
+                else if (lb < lf)
+                    System.arraycopy(a, lb, w, k, lf - lb);
+
+                tryComplete();
+            }
+        }
+    } // FJComparator
 
     /** byte support class */
     static final class FJByte {
-        static final class Sorter extends RecursiveAction {
-            static final long serialVersionUID = 749471161188027634L;
-            final byte[] a;     // array to be sorted.
-            final byte[] w;     // workspace for merge
-            final int origin;   // origin of the part of array we deal with
-            final int n;        // Number of elements in (sub)arrays.
-            final int gran;     // split control
-
-            Sorter(byte[] a, byte[] w, int origin, int n, int gran) {
-                this.a = a;
-                this.w = w;
-                this.origin = origin;
-                this.n = n;
-                this.gran = gran;
+        static final class Sorter extends CountedCompleter<Void> {
+            static final long serialVersionUID = 2446542900576103244L;
+            final byte[] a, w;
+            final int base, size, wbase, gran;
+            Sorter(CountedCompleter<?> par, byte[] a, byte[] w, int base, int size,
+                   int wbase, int gran) {
+                super(par);
+                this.a = a; this.w = w; this.base = base; this.size = size;
+                this.wbase = wbase; this.gran = gran;
             }
-
-            public void compute() {
-                final int l = origin;
-                final int g = gran;
-                final int n = this.n;
-                final byte[] a = this.a;
-                final byte[] w = this.w;
-                if (n > g) {
-                    int h = n >>> 1; // half
-                    int q = n >>> 2; // lower quarter index
-                    int u = h + q;   // upper quarter
-                    FJSubSorter ls = new FJSubSorter(new Sorter(a, w, l, q,  g),
-                                                     new Sorter(a, w, l+q, h-q, g),
-                                                     new Merger(a, w, l,   q,
-                                                                l+q, h-q, l, g, null));
-                    FJSubSorter rs = new FJSubSorter(new Sorter(a, w, l+h, q,   g),
-                                                     new Sorter(a, w, l+u, n-u, g),
-                                                     new Merger(a, w, l+h, q,
-                                                                l+u, n-u, l+h, g, null));
-                    rs.fork();
-                    ls.compute();
-                    if (rs.tryUnfork()) rs.compute(); else rs.join();
-                    new Merger(w, a, l, h,
-                               l+h, n-h, l, g, null).compute();
-                } else {
-                    DualPivotQuicksort.sort(a, l, l+n-1);   //skip rangeCheck
+            public final void compute() {
+                CountedCompleter<?> s = this;
+                byte[] a = this.a, w = this.w; // localize all params
+                int b = this.base, n = this.size, wb = this.wbase, g = this.gran;
+                while (n > g) {
+                    int h = n >>> 1, q = n >>> 2, u = h + q; // quartiles
+                    int qb = b + q, hb = b + h, ub = b + u;  // bases
+                    int wqb = wb + q, whb = wb + h, wub = wb + u;
+                    int hq = h - q, hn = n - h, un = n - u;  // sizes
+                    n = q;
+                    Relay fc = new Relay(new Merger(s,  w, a, wb,  h,
+                                                    whb, hn, b,  g));
+                    Relay rc = new Relay(new Merger(fc, a, w, hb, q,
+                                                    ub, un, whb, g));
+                    Relay bc = new Relay(new Merger(fc, a, w, b,  q,
+                                                    qb, hq, wb,  g));
+                    s = new EmptyCompleter(bc);
+                    Sorter su = new Sorter(rc, a, w, ub, un, wub, g);
+                    Sorter sh = new Sorter(rc, a, w, hb, q,  whb, g);
+                    Sorter sq = new Sorter(bc, a, w, qb, hq, wqb, g);
+                    su.fork();
+                    sh.fork();
+                    sq.fork();
                 }
+                DualPivotQuicksort.sort(a, b, b + n - 1);
+                s.tryComplete();
             }
         }
 
-        static final class Merger extends RecursiveAction {
-            static final long serialVersionUID = -9090258248781844470L;
-            final byte[] a;
-            final byte[] w;
-            final int lo;
-            final int ln;
-            final int ro;
-            final int rn;
-            final int wo;
-            final int gran;
-            final Merger next;
-
-            Merger(byte[] a, byte[] w, int lo, int ln, int ro, int rn, int wo,
-                   int gran, Merger next) {
-                this.a = a;
-                this.w = w;
-                this.lo = lo;
-                this.ln = ln;
-                this.ro = ro;
-                this.rn = rn;
-                this.wo = wo;
-                this.gran = gran;
-                this.next = next;
+        static final class Merger extends CountedCompleter<Void> {
+            static final long serialVersionUID = 2446542900576103244L;
+            final byte[] a, w; // main and workspace arrays
+            final int lbase, lsize, rbase, rsize, wbase, gran;
+            Merger(CountedCompleter<?> par, byte[] a, byte[] w,
+                   int lbase, int lsize, int rbase,
+                   int rsize, int wbase, int gran) {
+                super(par);
+                this.a = a; this.w = w;
+                this.lbase = lbase; this.lsize = lsize;
+                this.rbase = rbase; this.rsize = rsize;
+                this.wbase = wbase; this.gran = gran;
             }
 
-            public void compute() {
-                final byte[] a = this.a;
-                final byte[] w = this.w;
-                Merger rights = null;
-                int nleft = ln;
-                int nright = rn;
-                while (nleft > gran) {
-                    int lh = nleft >>> 1;
-                    int splitIndex = lo + lh;
-                    byte split = a[splitIndex];
-                    int rl = 0;
-                    int rh = nright;
-                    while (rl < rh) {
-                        int mid = (rl + rh) >>> 1;
-                        if (split <= a[ro + mid])
-                            rh = mid;
+            public final void compute() {
+                byte[] a = this.a, w = this.w; // localize all params
+                int lb = this.lbase, ln = this.lsize, rb = this.rbase,
+                    rn = this.rsize, k = this.wbase, g = this.gran;
+                int eighth = g >>> 3;
+                for (;;) {
+                    int lh = ln >>> 1;
+                    byte split = a[lb + lh];
+                    int rh = rn;
+                    for (int rl = 0; rl < rh;) {
+                        int rm = (rl + rh) >>> 1;
+                        if (split <= a[rb + rm])
+                            rh = rm;
                         else
-                            rl = mid + 1;
+                            rl = rm + 1;
                     }
-                    (rights = new Merger(a, w, splitIndex, nleft-lh, ro+rh,
-                                         nright-rh, wo+lh+rh, gran, rights)).fork();
-                    nleft = lh;
-                    nright = rh;
+                    int s, sl, sr;
+                    while (rh > eighth && split == a[rb + rh - 1])
+                        --rh; // back up right to beginning of run
+                    if (rh <= eighth)
+                        break; // too imbalanced
+                    while (lh > eighth && split == a[lb + lh - 1])
+                        --lh; // back up left
+                    if (lh <= eighth || (s = lh + rh) <= g || // left too small
+                        (sl = ln - lh) + (sr = rn - rh) <= g) // right too small
+                        break;
+                    addToPendingCount(1);
+                    new Merger(this, a, w, lb + lh, sl, rb + rh, sr, k + s,
+                               g).fork();
+                    rn = rh;
+                    ln = lh;
                 }
 
-                int l = lo;
-                int lFence = l + nleft;
-                int r = ro;
-                int rFence = r + nright;
-                int k = wo;
-                while (l < lFence && r < rFence) {
-                    byte al = a[l];
-                    byte ar = a[r];
-                    byte t;
-                    if (al <= ar) {++l; t=al;} else {++r; t = ar;}
+                int lf = lb + ln, rf = rb + rn; // index bounds
+                while (lb < lf && rb < rf) {
+                    byte t, al, ar;
+                    if ((al = a[lb]) <= (ar = a[rb])) {
+                        lb++; t = al;
+                    }
+                    else {
+                        rb++; t = ar;
+                    }
                     w[k++] = t;
                 }
-                while (l < lFence)
-                    w[k++] = a[l++];
-                while (r < rFence)
-                    w[k++] = a[r++];
-                while (rights != null) {
-                    if (rights.tryUnfork())
-                        rights.compute();
-                    else
-                        rights.join();
-                    rights = rights.next;
-                }
+                if (rb < rf)
+                    System.arraycopy(a, rb, w, k, rf - rb);
+                else if (lb < lf)
+                    System.arraycopy(a, lb, w, k, lf - lb);
+
+                tryComplete();
             }
         }
     } // FJByte
 
     /** char support class */
     static final class FJChar {
-        static final class Sorter extends RecursiveAction {
-            static final long serialVersionUID = 8723376019074596641L;
-            final char[] a;     // array to be sorted.
-            final char[] w;     // workspace for merge
-            final int origin;   // origin of the part of array we deal with
-            final int n;        // Number of elements in (sub)arrays.
-            final int gran;     // split control
-
-            Sorter(char[] a, char[] w, int origin, int n, int gran) {
-                this.a = a;
-                this.w = w;
-                this.origin = origin;
-                this.n = n;
-                this.gran = gran;
+        static final class Sorter extends CountedCompleter<Void> {
+            static final long serialVersionUID = 2446542900576103244L;
+            final char[] a, w;
+            final int base, size, wbase, gran;
+            Sorter(CountedCompleter<?> par, char[] a, char[] w, int base, int size,
+                   int wbase, int gran) {
+                super(par);
+                this.a = a; this.w = w; this.base = base; this.size = size;
+                this.wbase = wbase; this.gran = gran;
             }
-
-            public void compute() {
-                final int l = origin;
-                final int g = gran;
-                final int n = this.n;
-                final char[] a = this.a;
-                final char[] w = this.w;
-                if (n > g) {
-                    int h = n >>> 1; // half
-                    int q = n >>> 2; // lower quarter index
-                    int u = h + q;   // upper quarter
-                    FJSubSorter ls = new FJSubSorter(new Sorter(a, w, l, q,   g),
-                                                     new Sorter(a, w, l+q, h-q, g),
-                                                     new Merger(a, w, l,   q,
-                                                                l+q, h-q, l, g, null));
-                    FJSubSorter rs = new FJSubSorter(new Sorter(a, w, l + h, q,   g),
-                                                     new Sorter(a, w, l+u, n-u, g),
-                                                     new Merger(a, w, l+h, q,
-                                                                l+u, n-u, l+h, g, null));
-                    rs.fork();
-                    ls.compute();
-                    if (rs.tryUnfork()) rs.compute(); else rs.join();
-                    new Merger(w, a, l, h, l + h, n - h, l, g, null).compute();
-                } else {
-                    DualPivotQuicksort.sort(a, l, l+n-1);   // skip rangeCheck
+            public final void compute() {
+                CountedCompleter<?> s = this;
+                char[] a = this.a, w = this.w; // localize all params
+                int b = this.base, n = this.size, wb = this.wbase, g = this.gran;
+                while (n > g) {
+                    int h = n >>> 1, q = n >>> 2, u = h + q; // quartiles
+                    int qb = b + q, hb = b + h, ub = b + u;  // bases
+                    int wqb = wb + q, whb = wb + h, wub = wb + u;
+                    int hq = h - q, hn = n - h, un = n - u;  // sizes
+                    n = q;
+                    Relay fc = new Relay(new Merger(s,  w, a, wb,  h,
+                                                    whb, hn, b,  g));
+                    Relay rc = new Relay(new Merger(fc, a, w, hb, q,
+                                                    ub, un, whb, g));
+                    Relay bc = new Relay(new Merger(fc, a, w, b,  q,
+                                                    qb, hq, wb,  g));
+                    s = new EmptyCompleter(bc);
+                    Sorter su = new Sorter(rc, a, w, ub, un, wub, g);
+                    Sorter sh = new Sorter(rc, a, w, hb, q,  whb, g);
+                    Sorter sq = new Sorter(bc, a, w, qb, hq, wqb, g);
+                    su.fork();
+                    sh.fork();
+                    sq.fork();
                 }
+                DualPivotQuicksort.sort(a, b, b + n - 1);
+                s.tryComplete();
             }
         }
 
-        static final class Merger extends RecursiveAction {
-            static final long serialVersionUID = -1383975444621698926L;
-            final char[] a;
-            final char[] w;
-            final int lo;
-            final int ln;
-            final int ro;
-            final int rn;
-            final int wo;
-            final int gran;
-            final Merger next;
-
-            Merger(char[] a, char[] w, int lo, int ln, int ro, int rn, int wo,
-                   int gran, Merger next) {
-                this.a = a;
-                this.w = w;
-                this.lo = lo;
-                this.ln = ln;
-                this.ro = ro;
-                this.rn = rn;
-                this.wo = wo;
-                this.gran = gran;
-                this.next = next;
+        static final class Merger extends CountedCompleter<Void> {
+            static final long serialVersionUID = 2446542900576103244L;
+            final char[] a, w; // main and workspace arrays
+            final int lbase, lsize, rbase, rsize, wbase, gran;
+            Merger(CountedCompleter<?> par, char[] a, char[] w,
+                   int lbase, int lsize, int rbase,
+                   int rsize, int wbase, int gran) {
+                super(par);
+                this.a = a; this.w = w;
+                this.lbase = lbase; this.lsize = lsize;
+                this.rbase = rbase; this.rsize = rsize;
+                this.wbase = wbase; this.gran = gran;
             }
 
-            public void compute() {
-                final char[] a = this.a;
-                final char[] w = this.w;
-                Merger rights = null;
-                int nleft = ln;
-                int nright = rn;
-                while (nleft > gran) {
-                    int lh = nleft >>> 1;
-                    int splitIndex = lo + lh;
-                    char split = a[splitIndex];
-                    int rl = 0;
-                    int rh = nright;
-                    while (rl < rh) {
-                        int mid = (rl + rh) >>> 1;
-                        if (split <= a[ro + mid])
-                            rh = mid;
+            public final void compute() {
+                char[] a = this.a, w = this.w; // localize all params
+                int lb = this.lbase, ln = this.lsize, rb = this.rbase,
+                    rn = this.rsize, k = this.wbase, g = this.gran;
+                int eighth = g >>> 3;
+                for (;;) {
+                    int lh = ln >>> 1;
+                    char split = a[lb + lh];
+                    int rh = rn;
+                    for (int rl = 0; rl < rh;) {
+                        int rm = (rl + rh) >>> 1;
+                        if (split <= a[rb + rm])
+                            rh = rm;
                         else
-                            rl = mid + 1;
+                            rl = rm + 1;
                     }
-                    (rights = new Merger(a, w, splitIndex, nleft-lh, ro+rh,
-                                         nright-rh, wo+lh+rh, gran, rights)).fork();
-                    nleft = lh;
-                    nright = rh;
+                    int s, sl, sr;
+                    while (rh > eighth && split == a[rb + rh - 1])
+                        --rh; // back up right to beginning of run
+                    if (rh <= eighth)
+                        break; // too imbalanced
+                    while (lh > eighth && split == a[lb + lh - 1])
+                        --lh; // back up left
+                    if (lh <= eighth || (s = lh + rh) <= g || // left too small
+                        (sl = ln - lh) + (sr = rn - rh) <= g) // right too small
+                        break;
+                    addToPendingCount(1);
+                    new Merger(this, a, w, lb + lh, sl, rb + rh, sr, k + s,
+                               g).fork();
+                    rn = rh;
+                    ln = lh;
                 }
 
-                int l = lo;
-                int lFence = l + nleft;
-                int r = ro;
-                int rFence = r + nright;
-                int k = wo;
-                while (l < lFence && r < rFence) {
-                    char al = a[l];
-                    char ar = a[r];
-                    char t;
-                    if (al <= ar) {++l; t=al;} else {++r; t = ar;}
+                int lf = lb + ln, rf = rb + rn; // index bounds
+                while (lb < lf && rb < rf) {
+                    char t, al, ar;
+                    if ((al = a[lb]) <= (ar = a[rb])) {
+                        lb++; t = al;
+                    }
+                    else {
+                        rb++; t = ar;
+                    }
                     w[k++] = t;
                 }
-                while (l < lFence)
-                    w[k++] = a[l++];
-                while (r < rFence)
-                    w[k++] = a[r++];
-                while (rights != null) {
-                    if (rights.tryUnfork())
-                        rights.compute();
-                    else
-                        rights.join();
-                    rights = rights.next;
-                }
+                if (rb < rf)
+                    System.arraycopy(a, rb, w, k, rf - rb);
+                else if (lb < lf)
+                    System.arraycopy(a, lb, w, k, lf - lb);
+
+                tryComplete();
             }
         }
+
     } // FJChar
 
     /** short support class */
     static final class FJShort {
-        static final class Sorter extends RecursiveAction {
-            static final long serialVersionUID = -7886754793730583084L;
-            final short[] a;    // array to be sorted.
-            final short[] w;    // workspace for merge
-            final int origin;   // origin of the part of array we deal with
-            final int n;        // Number of elements in (sub)arrays.
-            final int gran;     // split control
-
-            Sorter(short[] a, short[] w, int origin, int n, int gran) {
-                this.a = a;
-                this.w = w;
-                this.origin = origin;
-                this.n = n;
-                this.gran = gran;
+        static final class Sorter extends CountedCompleter<Void> {
+            static final long serialVersionUID = 2446542900576103244L;
+            final short[] a, w;
+            final int base, size, wbase, gran;
+            Sorter(CountedCompleter<?> par, short[] a, short[] w, int base, int size,
+                   int wbase, int gran) {
+                super(par);
+                this.a = a; this.w = w; this.base = base; this.size = size;
+                this.wbase = wbase; this.gran = gran;
             }
-
-            public void compute() {
-                final int l = origin;
-                final int g = gran;
-                final int n = this.n;
-                final short[] a = this.a;
-                final short[] w = this.w;
-                if (n > g) {
-                    int h = n >>> 1; // half
-                    int q = n >>> 2; // lower quarter index
-                    int u = h + q;   // upper quarter
-                    FJSubSorter ls = new FJSubSorter(new Sorter(a, w, l, q,   g),
-                                                     new Sorter(a, w, l+q, h-q, g),
-                                                     new Merger(a, w, l,   q,
-                                                                l+q, h-q, l, g, null));
-                    FJSubSorter rs = new FJSubSorter(new Sorter(a, w, l + h, q,   g),
-                                                     new Sorter(a, w, l+u, n-u, g),
-                                                     new Merger(a, w, l+h, q,
-                                                                l+u, n-u, l+h, g, null));
-                    rs.fork();
-                    ls.compute();
-                    if (rs.tryUnfork()) rs.compute(); else rs.join();
-                    new Merger(w, a, l, h, l + h, n - h, l, g, null).compute();
-                } else {
-                    DualPivotQuicksort.sort(a, l, l+n-1);   // skip rangeCheck
+            public final void compute() {
+                CountedCompleter<?> s = this;
+                short[] a = this.a, w = this.w; // localize all params
+                int b = this.base, n = this.size, wb = this.wbase, g = this.gran;
+                while (n > g) {
+                    int h = n >>> 1, q = n >>> 2, u = h + q; // quartiles
+                    int qb = b + q, hb = b + h, ub = b + u;  // bases
+                    int wqb = wb + q, whb = wb + h, wub = wb + u;
+                    int hq = h - q, hn = n - h, un = n - u;  // sizes
+                    n = q;
+                    Relay fc = new Relay(new Merger(s,  w, a, wb,  h,
+                                                    whb, hn, b,  g));
+                    Relay rc = new Relay(new Merger(fc, a, w, hb, q,
+                                                    ub, un, whb, g));
+                    Relay bc = new Relay(new Merger(fc, a, w, b,  q,
+                                                    qb, hq, wb,  g));
+                    s = new EmptyCompleter(bc);
+                    Sorter su = new Sorter(rc, a, w, ub, un, wub, g);
+                    Sorter sh = new Sorter(rc, a, w, hb, q,  whb, g);
+                    Sorter sq = new Sorter(bc, a, w, qb, hq, wqb, g);
+                    su.fork();
+                    sh.fork();
+                    sq.fork();
                 }
+                DualPivotQuicksort.sort(a, b, b + n - 1);
+                s.tryComplete();
             }
         }
 
-        static final class Merger extends RecursiveAction {
-            static final long serialVersionUID = 3895749408536700048L;
-            final short[] a;
-            final short[] w;
-            final int lo;
-            final int ln;
-            final int ro;
-            final int rn;
-            final int wo;
-            final int gran;
-            final Merger next;
-
-            Merger(short[] a, short[] w, int lo, int ln, int ro, int rn, int wo,
-                   int gran, Merger next) {
-                this.a = a;
-                this.w = w;
-                this.lo = lo;
-                this.ln = ln;
-                this.ro = ro;
-                this.rn = rn;
-                this.wo = wo;
-                this.gran = gran;
-                this.next = next;
+        static final class Merger extends CountedCompleter<Void> {
+            static final long serialVersionUID = 2446542900576103244L;
+            final short[] a, w; // main and workspace arrays
+            final int lbase, lsize, rbase, rsize, wbase, gran;
+            Merger(CountedCompleter<?> par, short[] a, short[] w,
+                   int lbase, int lsize, int rbase,
+                   int rsize, int wbase, int gran) {
+                super(par);
+                this.a = a; this.w = w;
+                this.lbase = lbase; this.lsize = lsize;
+                this.rbase = rbase; this.rsize = rsize;
+                this.wbase = wbase; this.gran = gran;
             }
 
-            public void compute() {
-                final short[] a = this.a;
-                final short[] w = this.w;
-                Merger rights = null;
-                int nleft = ln;
-                int nright = rn;
-                while (nleft > gran) {
-                    int lh = nleft >>> 1;
-                    int splitIndex = lo + lh;
-                    short split = a[splitIndex];
-                    int rl = 0;
-                    int rh = nright;
-                    while (rl < rh) {
-                        int mid = (rl + rh) >>> 1;
-                        if (split <= a[ro + mid])
-                            rh = mid;
+            public final void compute() {
+                short[] a = this.a, w = this.w; // localize all params
+                int lb = this.lbase, ln = this.lsize, rb = this.rbase,
+                    rn = this.rsize, k = this.wbase, g = this.gran;
+                int eighth = g >>> 3;
+                for (;;) {
+                    int lh = ln >>> 1;
+                    short split = a[lb + lh];
+                    int rh = rn;
+                    for (int rl = 0; rl < rh;) {
+                        int rm = (rl + rh) >>> 1;
+                        if (split <= a[rb + rm])
+                            rh = rm;
                         else
-                            rl = mid + 1;
+                            rl = rm + 1;
                     }
-                    (rights = new Merger(a, w, splitIndex, nleft-lh, ro+rh,
-                                         nright-rh, wo+lh+rh, gran, rights)).fork();
-                    nleft = lh;
-                    nright = rh;
+                    int s, sl, sr;
+                    while (rh > eighth && split == a[rb + rh - 1])
+                        --rh; // back up right to beginning of run
+                    if (rh <= eighth)
+                        break; // too imbalanced
+                    while (lh > eighth && split == a[lb + lh - 1])
+                        --lh; // back up left
+                    if (lh <= eighth || (s = lh + rh) <= g || // left too small
+                        (sl = ln - lh) + (sr = rn - rh) <= g) // right too small
+                        break;
+                    addToPendingCount(1);
+                    new Merger(this, a, w, lb + lh, sl, rb + rh, sr, k + s,
+                               g).fork();
+                    rn = rh;
+                    ln = lh;
                 }
 
-                int l = lo;
-                int lFence = l + nleft;
-                int r = ro;
-                int rFence = r + nright;
-                int k = wo;
-                while (l < lFence && r < rFence) {
-                    short al = a[l];
-                    short ar = a[r];
-                    short t;
-                    if (al <= ar) {++l; t=al;} else {++r; t = ar;}
+                int lf = lb + ln, rf = rb + rn; // index bounds
+                while (lb < lf && rb < rf) {
+                    short t, al, ar;
+                    if ((al = a[lb]) <= (ar = a[rb])) {
+                        lb++; t = al;
+                    }
+                    else {
+                        rb++; t = ar;
+                    }
                     w[k++] = t;
                 }
-                while (l < lFence)
-                    w[k++] = a[l++];
-                while (r < rFence)
-                    w[k++] = a[r++];
-                while (rights != null) {
-                    if (rights.tryUnfork())
-                        rights.compute();
-                    else
-                        rights.join();
-                    rights = rights.next;
-                }
+                if (rb < rf)
+                    System.arraycopy(a, rb, w, k, rf - rb);
+                else if (lb < lf)
+                    System.arraycopy(a, lb, w, k, lf - lb);
+
+                tryComplete();
             }
         }
     } // FJShort
 
     /** int support class */
     static final class FJInt {
-        static final class Sorter extends RecursiveAction {
-            static final long serialVersionUID = 4263311808957292729L;
-            final int[] a;     // array to be sorted.
-            final int[] w;     // workspace for merge
-            final int origin;  // origin of the part of array we deal with
-            final int n;       // Number of elements in (sub)arrays.
-            final int gran;    // split control
-
-            Sorter(int[] a, int[] w, int origin, int n, int gran) {
-                this.a = a;
-                this.w = w;
-                this.origin = origin;
-                this.n = n;
-                this.gran = gran;
+        static final class Sorter extends CountedCompleter<Void> {
+            static final long serialVersionUID = 2446542900576103244L;
+            final int[] a, w;
+            final int base, size, wbase, gran;
+            Sorter(CountedCompleter<?> par, int[] a, int[] w, int base, int size,
+                   int wbase, int gran) {
+                super(par);
+                this.a = a; this.w = w; this.base = base; this.size = size;
+                this.wbase = wbase; this.gran = gran;
             }
-
-            public void compute() {
-                final int l = origin;
-                final int g = gran;
-                final int n = this.n;
-                final int[] a = this.a;
-                final int[] w = this.w;
-                if (n > g) {
-                    int h = n >>> 1; // half
-                    int q = n >>> 2; // lower quarter index
-                    int u = h + q;   // upper quarter
-                    FJSubSorter ls = new FJSubSorter(new Sorter(a, w, l, q,   g),
-                                                     new Sorter(a, w, l+q, h-q, g),
-                                                     new Merger(a, w, l,   q,
-                                                                l+q, h-q, l, g, null));
-                    FJSubSorter rs = new FJSubSorter(new Sorter(a, w, l + h, q,   g),
-                                                     new Sorter(a, w, l+u, n-u, g),
-                                                     new Merger(a, w, l+h, q,
-                                                                l+u, n-u, l+h, g, null));
-                    rs.fork();
-                    ls.compute();
-                    if (rs.tryUnfork()) rs.compute(); else rs.join();
-                    new Merger(w, a, l, h, l + h, n - h, l, g, null).compute();
-                } else {
-                    DualPivotQuicksort.sort(a, l, l+n-1);   // skip rangeCheck
+            public final void compute() {
+                CountedCompleter<?> s = this;
+                int[] a = this.a, w = this.w; // localize all params
+                int b = this.base, n = this.size, wb = this.wbase, g = this.gran;
+                while (n > g) {
+                    int h = n >>> 1, q = n >>> 2, u = h + q; // quartiles
+                    int qb = b + q, hb = b + h, ub = b + u;  // bases
+                    int wqb = wb + q, whb = wb + h, wub = wb + u;
+                    int hq = h - q, hn = n - h, un = n - u;  // sizes
+                    n = q;
+                    Relay fc = new Relay(new Merger(s,  w, a, wb,  h,
+                                                    whb, hn, b,  g));
+                    Relay rc = new Relay(new Merger(fc, a, w, hb, q,
+                                                    ub, un, whb, g));
+                    Relay bc = new Relay(new Merger(fc, a, w, b,  q,
+                                                    qb, hq, wb,  g));
+                    s = new EmptyCompleter(bc);
+                    Sorter su = new Sorter(rc, a, w, ub, un, wub, g);
+                    Sorter sh = new Sorter(rc, a, w, hb, q,  whb, g);
+                    Sorter sq = new Sorter(bc, a, w, qb, hq, wqb, g);
+                    su.fork();
+                    sh.fork();
+                    sq.fork();
                 }
+                DualPivotQuicksort.sort(a, b, b + n - 1);
+                s.tryComplete();
             }
         }
 
-        static final class Merger extends RecursiveAction {
-            static final long serialVersionUID = -8727507284219982792L;
-            final int[] a;
-            final int[] w;
-            final int lo;
-            final int ln;
-            final int ro;
-            final int rn;
-            final int wo;
-            final int gran;
-            final Merger next;
-
-            Merger(int[] a, int[] w, int lo, int ln, int ro, int rn, int wo,
-                   int gran, Merger next) {
-                this.a = a;
-                this.w = w;
-                this.lo = lo;
-                this.ln = ln;
-                this.ro = ro;
-                this.rn = rn;
-                this.wo = wo;
-                this.gran = gran;
-                this.next = next;
+        static final class Merger extends CountedCompleter<Void> {
+            static final long serialVersionUID = 2446542900576103244L;
+            final int[] a, w; // main and workspace arrays
+            final int lbase, lsize, rbase, rsize, wbase, gran;
+            Merger(CountedCompleter<?> par, int[] a, int[] w,
+                   int lbase, int lsize, int rbase,
+                   int rsize, int wbase, int gran) {
+                super(par);
+                this.a = a; this.w = w;
+                this.lbase = lbase; this.lsize = lsize;
+                this.rbase = rbase; this.rsize = rsize;
+                this.wbase = wbase; this.gran = gran;
             }
 
-            public void compute() {
-                final int[] a = this.a;
-                final int[] w = this.w;
-                Merger rights = null;
-                int nleft = ln;
-                int nright = rn;
-                while (nleft > gran) {
-                    int lh = nleft >>> 1;
-                    int splitIndex = lo + lh;
-                    int split = a[splitIndex];
-                    int rl = 0;
-                    int rh = nright;
-                    while (rl < rh) {
-                        int mid = (rl + rh) >>> 1;
-                        if (split <= a[ro + mid])
-                            rh = mid;
+            public final void compute() {
+                int[] a = this.a, w = this.w; // localize all params
+                int lb = this.lbase, ln = this.lsize, rb = this.rbase,
+                    rn = this.rsize, k = this.wbase, g = this.gran;
+                int eighth = g >>> 3;
+                for (;;) {
+                    int lh = ln >>> 1;
+                    int split = a[lb + lh];
+                    int rh = rn;
+                    for (int rl = 0; rl < rh;) {
+                        int rm = (rl + rh) >>> 1;
+                        if (split <= a[rb + rm])
+                            rh = rm;
                         else
-                            rl = mid + 1;
+                            rl = rm + 1;
                     }
-                    (rights = new Merger(a, w, splitIndex, nleft-lh, ro+rh,
-                                         nright-rh, wo+lh+rh, gran, rights)).fork();
-                    nleft = lh;
-                    nright = rh;
+                    int s, sl, sr;
+                    while (rh > eighth && split == a[rb + rh - 1])
+                        --rh; // back up right to beginning of run
+                    if (rh <= eighth)
+                        break; // too imbalanced
+                    while (lh > eighth && split == a[lb + lh - 1])
+                        --lh; // back up left
+                    if (lh <= eighth || (s = lh + rh) <= g || // left too small
+                        (sl = ln - lh) + (sr = rn - rh) <= g) // right too small
+                        break;
+                    addToPendingCount(1);
+                    new Merger(this, a, w, lb + lh, sl, rb + rh, sr, k + s,
+                               g).fork();
+                    rn = rh;
+                    ln = lh;
                 }
 
-                int l = lo;
-                int lFence = l + nleft;
-                int r = ro;
-                int rFence = r + nright;
-                int k = wo;
-                while (l < lFence && r < rFence) {
-                    int al = a[l];
-                    int ar = a[r];
-                    int t;
-                    if (al <= ar) {++l; t=al;} else {++r; t = ar;}
+                int lf = lb + ln, rf = rb + rn; // index bounds
+                while (lb < lf && rb < rf) {
+                    int t, al, ar;
+                    if ((al = a[lb]) <= (ar = a[rb])) {
+                        lb++; t = al;
+                    }
+                    else {
+                        rb++; t = ar;
+                    }
                     w[k++] = t;
                 }
-                while (l < lFence)
-                    w[k++] = a[l++];
-                while (r < rFence)
-                    w[k++] = a[r++];
-                while (rights != null) {
-                    if (rights.tryUnfork())
-                        rights.compute();
-                    else
-                        rights.join();
-                    rights = rights.next;
-                }
+                if (rb < rf)
+                    System.arraycopy(a, rb, w, k, rf - rb);
+                else if (lb < lf)
+                    System.arraycopy(a, lb, w, k, lf - lb);
+
+                tryComplete();
             }
         }
     } // FJInt
 
     /** long support class */
     static final class FJLong {
-        static final class Sorter extends RecursiveAction {
-            static final long serialVersionUID = 6553695007444392455L;
-            final long[] a;     // array to be sorted.
-            final long[] w;     // workspace for merge
-            final int origin;   // origin of the part of array we deal with
-            final int n;        // Number of elements in (sub)arrays.
-            final int gran;     // split control
-
-            Sorter(long[] a, long[] w, int origin, int n, int gran) {
-                this.a = a;
-                this.w = w;
-                this.origin = origin;
-                this.n = n;
-                this.gran = gran;
+        static final class Sorter extends CountedCompleter<Void> {
+            static final long serialVersionUID = 2446542900576103244L;
+            final long[] a, w;
+            final int base, size, wbase, gran;
+            Sorter(CountedCompleter<?> par, long[] a, long[] w, int base, int size,
+                   int wbase, int gran) {
+                super(par);
+                this.a = a; this.w = w; this.base = base; this.size = size;
+                this.wbase = wbase; this.gran = gran;
             }
-
-            public void compute() {
-                final int l = origin;
-                final int g = gran;
-                final int n = this.n;
-                final long[] a = this.a;
-                final long[] w = this.w;
-                if (n > g) {
-                    int h = n >>> 1; // half
-                    int q = n >>> 2; // lower quarter index
-                    int u = h + q;   // upper quarter
-                    FJSubSorter ls = new FJSubSorter(new Sorter(a, w, l, q,   g),
-                                                     new Sorter(a, w, l+q, h-q, g),
-                                                     new Merger(a, w, l,   q,
-                                                                l+q, h-q, l, g, null));
-                    FJSubSorter rs = new FJSubSorter(new Sorter(a, w, l + h, q,   g),
-                                                     new Sorter(a, w, l+u, n-u, g),
-                                                     new Merger(a, w, l+h, q,
-                                                                l+u, n-u, l+h, g, null));
-                    rs.fork();
-                    ls.compute();
-                    if (rs.tryUnfork()) rs.compute(); else rs.join();
-                    new Merger(w, a, l, h, l + h, n - h, l, g, null).compute();
-                } else {
-                    DualPivotQuicksort.sort(a, l, l+n-1);   // skip rangeCheck
+            public final void compute() {
+                CountedCompleter<?> s = this;
+                long[] a = this.a, w = this.w; // localize all params
+                int b = this.base, n = this.size, wb = this.wbase, g = this.gran;
+                while (n > g) {
+                    int h = n >>> 1, q = n >>> 2, u = h + q; // quartiles
+                    int qb = b + q, hb = b + h, ub = b + u;  // bases
+                    int wqb = wb + q, whb = wb + h, wub = wb + u;
+                    int hq = h - q, hn = n - h, un = n - u;  // sizes
+                    n = q;
+                    Relay fc = new Relay(new Merger(s,  w, a, wb,  h,
+                                                    whb, hn, b,  g));
+                    Relay rc = new Relay(new Merger(fc, a, w, hb, q,
+                                                    ub, un, whb, g));
+                    Relay bc = new Relay(new Merger(fc, a, w, b,  q,
+                                                    qb, hq, wb,  g));
+                    s = new EmptyCompleter(bc);
+                    Sorter su = new Sorter(rc, a, w, ub, un, wub, g);
+                    Sorter sh = new Sorter(rc, a, w, hb, q,  whb, g);
+                    Sorter sq = new Sorter(bc, a, w, qb, hq, wqb, g);
+                    su.fork();
+                    sh.fork();
+                    sq.fork();
                 }
+                DualPivotQuicksort.sort(a, b, b + n - 1);
+                s.tryComplete();
             }
         }
 
-        static final class Merger extends RecursiveAction {
-            static final long serialVersionUID = 8843567516333283861L;
-            final long[] a;
-            final long[] w;
-            final int lo;
-            final int ln;
-            final int ro;
-            final int rn;
-            final int wo;
-            final int gran;
-            final Merger next;
-
-            Merger(long[] a, long[] w, int lo, int ln, int ro, int rn, int wo,
-                   int gran, Merger next) {
-                this.a = a;
-                this.w = w;
-                this.lo = lo;
-                this.ln = ln;
-                this.ro = ro;
-                this.rn = rn;
-                this.wo = wo;
-                this.gran = gran;
-                this.next = next;
+        static final class Merger extends CountedCompleter<Void> {
+            static final long serialVersionUID = 2446542900576103244L;
+            final long[] a, w; // main and workspace arrays
+            final int lbase, lsize, rbase, rsize, wbase, gran;
+            Merger(CountedCompleter<?> par, long[] a, long[] w,
+                   int lbase, int lsize, int rbase,
+                   int rsize, int wbase, int gran) {
+                super(par);
+                this.a = a; this.w = w;
+                this.lbase = lbase; this.lsize = lsize;
+                this.rbase = rbase; this.rsize = rsize;
+                this.wbase = wbase; this.gran = gran;
             }
 
-            public void compute() {
-                final long[] a = this.a;
-                final long[] w = this.w;
-                Merger rights = null;
-                int nleft = ln;
-                int nright = rn;
-                while (nleft > gran) {
-                    int lh = nleft >>> 1;
-                    int splitIndex = lo + lh;
-                    long split = a[splitIndex];
-                    int rl = 0;
-                    int rh = nright;
-                    while (rl < rh) {
-                        int mid = (rl + rh) >>> 1;
-                        if (split <= a[ro + mid])
-                            rh = mid;
+            public final void compute() {
+                long[] a = this.a, w = this.w; // localize all params
+                int lb = this.lbase, ln = this.lsize, rb = this.rbase,
+                    rn = this.rsize, k = this.wbase, g = this.gran;
+                int eighth = g >>> 3;
+                for (;;) {
+                    int lh = ln >>> 1;
+                    long split = a[lb + lh];
+                    int rh = rn;
+                    for (int rl = 0; rl < rh;) {
+                        int rm = (rl + rh) >>> 1;
+                        if (split <= a[rb + rm])
+                            rh = rm;
                         else
-                            rl = mid + 1;
+                            rl = rm + 1;
                     }
-                    (rights = new Merger(a, w, splitIndex, nleft-lh, ro+rh,
-                      nright-rh, wo+lh+rh, gran, rights)).fork();
-                    nleft = lh;
-                    nright = rh;
+                    int s, sl, sr;
+                    while (rh > eighth && split == a[rb + rh - 1])
+                        --rh; // back up right to beginning of run
+                    if (rh <= eighth)
+                        break; // too imbalanced
+                    while (lh > eighth && split == a[lb + lh - 1])
+                        --lh; // back up left
+                    if (lh <= eighth || (s = lh + rh) <= g || // left too small
+                        (sl = ln - lh) + (sr = rn - rh) <= g) // right too small
+                        break;
+                    addToPendingCount(1);
+                    new Merger(this, a, w, lb + lh, sl, rb + rh, sr, k + s,
+                               g).fork();
+                    rn = rh;
+                    ln = lh;
                 }
 
-                int l = lo;
-                int lFence = l + nleft;
-                int r = ro;
-                int rFence = r + nright;
-                int k = wo;
-                while (l < lFence && r < rFence) {
-                    long al = a[l];
-                    long ar = a[r];
-                    long t;
-                    if (al <= ar) {++l; t=al;} else {++r; t = ar;}
+                int lf = lb + ln, rf = rb + rn; // index bounds
+                while (lb < lf && rb < rf) {
+                    long t, al, ar;
+                    if ((al = a[lb]) <= (ar = a[rb])) {
+                        lb++; t = al;
+                    }
+                    else {
+                        rb++; t = ar;
+                    }
                     w[k++] = t;
                 }
-                while (l < lFence)
-                    w[k++] = a[l++];
-                while (r < rFence)
-                    w[k++] = a[r++];
-                while (rights != null) {
-                    if (rights.tryUnfork())
-                        rights.compute();
-                    else
-                        rights.join();
-                    rights = rights.next;
-                }
+                if (rb < rf)
+                    System.arraycopy(a, rb, w, k, rf - rb);
+                else if (lb < lf)
+                    System.arraycopy(a, lb, w, k, lf - lb);
+
+                tryComplete();
             }
         }
     } // FJLong
 
     /** float support class */
     static final class FJFloat {
-        static final class Sorter extends RecursiveAction {
-            static final long serialVersionUID = 1602600178202763377L;
-            final float[] a;    // array to be sorted.
-            final float[] w;    // workspace for merge
-            final int origin;   // origin of the part of array we deal with
-            final int n;        // Number of elements in (sub)arrays.
-            final int gran;     // split control
-
-            Sorter(float[] a, float[] w, int origin, int n, int gran) {
-                this.a = a;
-                this.w = w;
-                this.origin = origin;
-                this.n = n;
-                this.gran = gran;
+        static final class Sorter extends CountedCompleter<Void> {
+            static final long serialVersionUID = 2446542900576103244L;
+            final float[] a, w;
+            final int base, size, wbase, gran;
+            Sorter(CountedCompleter<?> par, float[] a, float[] w, int base, int size,
+                   int wbase, int gran) {
+                super(par);
+                this.a = a; this.w = w; this.base = base; this.size = size;
+                this.wbase = wbase; this.gran = gran;
             }
-
-            public void compute() {
-                final int l = origin;
-                final int g = gran;
-                final int n = this.n;
-                final float[] a = this.a;
-                final float[] w = this.w;
-                if (n > g) {
-                    int h = n >>> 1; // half
-                    int q = n >>> 2; // lower quarter index
-                    int u = h + q;   // upper quarter
-                    FJSubSorter ls = new FJSubSorter(new Sorter(a, w, l, q,   g),
-                                                     new Sorter(a, w, l+q, h-q, g),
-                                                     new Merger(a, w, l,   q,
-                                                                l+q, h-q, l, g, null));
-                    FJSubSorter rs = new FJSubSorter(new Sorter(a, w, l + h, q,   g),
-                                                     new Sorter(a, w, l+u, n-u, g),
-                                                     new Merger(a, w, l+h, q,
-                                                                l+u, n-u, l+h, g, null));
-                    rs.fork();
-                    ls.compute();
-                    if (rs.tryUnfork()) rs.compute(); else rs.join();
-                    new Merger(w, a, l, h, l + h, n - h, l, g, null).compute();
-                } else {
-                    DualPivotQuicksort.sort(a, l, l+n-1);   // skip rangeCheck
+            public final void compute() {
+                CountedCompleter<?> s = this;
+                float[] a = this.a, w = this.w; // localize all params
+                int b = this.base, n = this.size, wb = this.wbase, g = this.gran;
+                while (n > g) {
+                    int h = n >>> 1, q = n >>> 2, u = h + q; // quartiles
+                    int qb = b + q, hb = b + h, ub = b + u;  // bases
+                    int wqb = wb + q, whb = wb + h, wub = wb + u;
+                    int hq = h - q, hn = n - h, un = n - u;  // sizes
+                    n = q;
+                    Relay fc = new Relay(new Merger(s,  w, a, wb,  h,
+                                                    whb, hn, b,  g));
+                    Relay rc = new Relay(new Merger(fc, a, w, hb, q,
+                                                    ub, un, whb, g));
+                    Relay bc = new Relay(new Merger(fc, a, w, b,  q,
+                                                    qb, hq, wb,  g));
+                    s = new EmptyCompleter(bc);
+                    Sorter su = new Sorter(rc, a, w, ub, un, wub, g);
+                    Sorter sh = new Sorter(rc, a, w, hb, q,  whb, g);
+                    Sorter sq = new Sorter(bc, a, w, qb, hq, wqb, g);
+                    su.fork();
+                    sh.fork();
+                    sq.fork();
                 }
+                DualPivotQuicksort.sort(a, b, b + n - 1);
+                s.tryComplete();
             }
         }
 
-        static final class Merger extends RecursiveAction {
-            static final long serialVersionUID = 1518176433845397426L;
-            final float[] a;
-            final float[] w;
-            final int lo;
-            final int ln;
-            final int ro;
-            final int rn;
-            final int wo;
-            final int gran;
-            final Merger next;
-
-            Merger(float[] a, float[] w, int lo, int ln, int ro, int rn, int wo,
-                   int gran, Merger next) {
-                this.a = a;
-                this.w = w;
-                this.lo = lo;
-                this.ln = ln;
-                this.ro = ro;
-                this.rn = rn;
-                this.wo = wo;
-                this.gran = gran;
-                this.next = next;
+        static final class Merger extends CountedCompleter<Void> {
+            static final long serialVersionUID = 2446542900576103244L;
+            final float[] a, w; // main and workspace arrays
+            final int lbase, lsize, rbase, rsize, wbase, gran;
+            Merger(CountedCompleter<?> par, float[] a, float[] w,
+                   int lbase, int lsize, int rbase,
+                   int rsize, int wbase, int gran) {
+                super(par);
+                this.a = a; this.w = w;
+                this.lbase = lbase; this.lsize = lsize;
+                this.rbase = rbase; this.rsize = rsize;
+                this.wbase = wbase; this.gran = gran;
             }
 
-            public void compute() {
-                final float[] a = this.a;
-                final float[] w = this.w;
-                Merger rights = null;
-                int nleft = ln;
-                int nright = rn;
-                while (nleft > gran) {
-                    int lh = nleft >>> 1;
-                    int splitIndex = lo + lh;
-                    float split = a[splitIndex];
-                    int rl = 0;
-                    int rh = nright;
-                    while (rl < rh) {
-                        int mid = (rl + rh) >>> 1;
-                        if (Float.compare(split, a[ro+mid]) <= 0)
-                            rh = mid;
+            public final void compute() {
+                float[] a = this.a, w = this.w; // localize all params
+                int lb = this.lbase, ln = this.lsize, rb = this.rbase,
+                    rn = this.rsize, k = this.wbase, g = this.gran;
+                int eighth = g >>> 3;
+                for (;;) {
+                    int lh = ln >>> 1;
+                    float split = a[lb + lh];
+                    int rh = rn;
+                    for (int rl = 0; rl < rh;) {
+                        int rm = (rl + rh) >>> 1;
+                        if (split <= a[rb + rm])
+                            rh = rm;
                         else
-                            rl = mid + 1;
+                            rl = rm + 1;
                     }
-                    (rights = new Merger(a, w, splitIndex, nleft-lh, ro+rh,
-                                         nright-rh, wo+lh+rh, gran, rights)).fork();
-                    nleft = lh;
-                    nright = rh;
+                    int s, sl, sr;
+                    while (rh > eighth && split == a[rb + rh - 1])
+                        --rh; // back up right to beginning of run
+                    if (rh <= eighth)
+                        break; // too imbalanced
+                    while (lh > eighth && split == a[lb + lh - 1])
+                        --lh; // back up left
+                    if (lh <= eighth || (s = lh + rh) <= g || // left too small
+                        (sl = ln - lh) + (sr = rn - rh) <= g) // right too small
+                        break;
+                    addToPendingCount(1);
+                    new Merger(this, a, w, lb + lh, sl, rb + rh, sr, k + s,
+                               g).fork();
+                    rn = rh;
+                    ln = lh;
                 }
 
-                int l = lo;
-                int lFence = l + nleft;
-                int r = ro;
-                int rFence = r + nright;
-                int k = wo;
-                while (l < lFence && r < rFence) {
-                    float al = a[l];
-                    float ar = a[r];
-                    float t;
-                    if (Float.compare(al, ar) <= 0) {
-                        ++l;
-                        t = al;
-                    } else {
-                        ++r;
-                        t = ar;
+                int lf = lb + ln, rf = rb + rn; // index bounds
+                while (lb < lf && rb < rf) {
+                    float t, al, ar;
+                    if ((al = a[lb]) <= (ar = a[rb])) {
+                        lb++; t = al;
+                    }
+                    else {
+                        rb++; t = ar;
                     }
                     w[k++] = t;
                 }
-                while (l < lFence)
-                    w[k++] = a[l++];
-                while (r < rFence)
-                    w[k++] = a[r++];
-                while (rights != null) {
-                    if (rights.tryUnfork())
-                        rights.compute();
-                    else
-                        rights.join();
-                    rights = rights.next;
-                }
+                if (rb < rf)
+                    System.arraycopy(a, rb, w, k, rf - rb);
+                else if (lb < lf)
+                    System.arraycopy(a, lb, w, k, lf - lb);
+
+                tryComplete();
             }
         }
     } // FJFloat
 
     /** double support class */
     static final class FJDouble {
-        static final class Sorter extends RecursiveAction {
+        static final class Sorter extends CountedCompleter<Void> {
             static final long serialVersionUID = 2446542900576103244L;
-            final double[] a;    // array to be sorted.
-            final double[] w;    // workspace for merge
-            final int origin;    // origin of the part of array we deal with
-            final int n;         // Number of elements in (sub)arrays.
-            final int gran;      // split control
-
-            Sorter(double[] a, double[] w, int origin, int n, int gran) {
-                this.a = a;
-                this.w = w;
-                this.origin = origin;
-                this.n = n;
-                this.gran = gran;
+            final double[] a, w;
+            final int base, size, wbase, gran;
+            Sorter(CountedCompleter<?> par, double[] a, double[] w, int base, int size,
+                   int wbase, int gran) {
+                super(par);
+                this.a = a; this.w = w; this.base = base; this.size = size;
+                this.wbase = wbase; this.gran = gran;
             }
-
-            public void compute() {
-                final int l = origin;
-                final int g = gran;
-                final int n = this.n;
-                final double[] a = this.a;
-                final double[] w = this.w;
-                if (n > g) {
-                    int h = n >>> 1; // half
-                    int q = n >>> 2; // lower quarter index
-                    int u = h + q;   // upper quarter
-                    FJSubSorter ls = new FJSubSorter(new Sorter(a, w, l, q,   g),
-                                                     new Sorter(a, w, l+q, h-q, g),
-                                                     new Merger(a, w, l,   q,
-                                                                l+q, h-q, l, g, null));
-                    FJSubSorter rs = new FJSubSorter(new Sorter(a, w, l + h, q,   g),
-                                                     new Sorter(a, w, l+u, n-u, g),
-                                                     new Merger(a, w, l+h, q,
-                                                                l+u, n-u, l+h, g, null));
-                    rs.fork();
-                    ls.compute();
-                    if (rs.tryUnfork()) rs.compute(); else rs.join();
-                    new Merger(w, a, l, h, l + h, n - h, l, g, null).compute();
-                } else {
-                    DualPivotQuicksort.sort(a, l, l+n-1);   // skip rangeCheck
+            public final void compute() {
+                CountedCompleter<?> s = this;
+                double[] a = this.a, w = this.w; // localize all params
+                int b = this.base, n = this.size, wb = this.wbase, g = this.gran;
+                while (n > g) {
+                    int h = n >>> 1, q = n >>> 2, u = h + q; // quartiles
+                    int qb = b + q, hb = b + h, ub = b + u;  // bases
+                    int wqb = wb + q, whb = wb + h, wub = wb + u;
+                    int hq = h - q, hn = n - h, un = n - u;  // sizes
+                    n = q;
+                    Relay fc = new Relay(new Merger(s,  w, a, wb,  h,
+                                                    whb, hn, b,  g));
+                    Relay rc = new Relay(new Merger(fc, a, w, hb, q,
+                                                    ub, un, whb, g));
+                    Relay bc = new Relay(new Merger(fc, a, w, b,  q,
+                                                    qb, hq, wb,  g));
+                    s = new EmptyCompleter(bc);
+                    Sorter su = new Sorter(rc, a, w, ub, un, wub, g);
+                    Sorter sh = new Sorter(rc, a, w, hb, q,  whb, g);
+                    Sorter sq = new Sorter(bc, a, w, qb, hq, wqb, g);
+                    su.fork();
+                    sh.fork();
+                    sq.fork();
                 }
+                DualPivotQuicksort.sort(a, b, b + n - 1);
+                s.tryComplete();
             }
         }
 
-        static final class Merger extends RecursiveAction {
-            static final long serialVersionUID = 8076242187166127592L;
-            final double[] a;
-            final double[] w;
-            final int lo;
-            final int ln;
-            final int ro;
-            final int rn;
-            final int wo;
-            final int gran;
-            final Merger next;
-
-            Merger(double[] a, double[] w, int lo, int ln, int ro, int rn, int wo,
-                   int gran, Merger next) {
-                this.a = a;
-                this.w = w;
-                this.lo = lo;
-                this.ln = ln;
-                this.ro = ro;
-                this.rn = rn;
-                this.wo = wo;
-                this.gran = gran;
-                this.next = next;
+        static final class Merger extends CountedCompleter<Void> {
+            static final long serialVersionUID = 2446542900576103244L;
+            final double[] a, w; // main and workspace arrays
+            final int lbase, lsize, rbase, rsize, wbase, gran;
+            Merger(CountedCompleter<?> par, double[] a, double[] w,
+                   int lbase, int lsize, int rbase,
+                   int rsize, int wbase, int gran) {
+                super(par);
+                this.a = a; this.w = w;
+                this.lbase = lbase; this.lsize = lsize;
+                this.rbase = rbase; this.rsize = rsize;
+                this.wbase = wbase; this.gran = gran;
             }
 
-            public void compute() {
-                final double[] a = this.a;
-                final double[] w = this.w;
-                Merger rights = null;
-                int nleft = ln;
-                int nright = rn;
-                while (nleft > gran) {
-                    int lh = nleft >>> 1;
-                    int splitIndex = lo + lh;
-                    double split = a[splitIndex];
-                    int rl = 0;
-                    int rh = nright;
-                    while (rl < rh) {
-                        int mid = (rl + rh) >>> 1;
-                        if (Double.compare(split, a[ro+mid]) <= 0)
-                            rh = mid;
+            public final void compute() {
+                double[] a = this.a, w = this.w; // localize all params
+                int lb = this.lbase, ln = this.lsize, rb = this.rbase,
+                    rn = this.rsize, k = this.wbase, g = this.gran;
+                int eighth = g >>> 3;
+                for (;;) {
+                    int lh = ln >>> 1;
+                    double split = a[lb + lh];
+                    int rh = rn;
+                    for (int rl = 0; rl < rh;) {
+                        int rm = (rl + rh) >>> 1;
+                        if (split <= a[rb + rm])
+                            rh = rm;
                         else
-                            rl = mid + 1;
+                            rl = rm + 1;
                     }
-                    (rights = new Merger(a, w, splitIndex, nleft-lh, ro+rh,
-                                         nright-rh, wo+lh+rh, gran, rights)).fork();
-                    nleft = lh;
-                    nright = rh;
+                    int s, sl, sr;
+                    while (rh > eighth && split == a[rb + rh - 1])
+                        --rh; // back up right to beginning of run
+                    if (rh <= eighth)
+                        break; // too imbalanced
+                    while (lh > eighth && split == a[lb + lh - 1])
+                        --lh; // back up left
+                    if (lh <= eighth || (s = lh + rh) <= g || // left too small
+                        (sl = ln - lh) + (sr = rn - rh) <= g) // right too small
+                        break;
+                    addToPendingCount(1);
+                    new Merger(this, a, w, lb + lh, sl, rb + rh, sr, k + s,
+                               g).fork();
+                    rn = rh;
+                    ln = lh;
                 }
 
-                int l = lo;
-                int lFence = l + nleft;
-                int r = ro;
-                int rFence = r + nright;
-                int k = wo;
-                while (l < lFence && r < rFence) {
-                    double al = a[l];
-                    double ar = a[r];
-                    double t;
-                    if (Double.compare(al, ar) <= 0) {
-                        ++l;
-                        t = al;
-                    } else {
-                        ++r;
-                        t = ar;
+                int lf = lb + ln, rf = rb + rn; // index bounds
+                while (lb < lf && rb < rf) {
+                    double t, al, ar;
+                    if ((al = a[lb]) <= (ar = a[rb])) {
+                        lb++; t = al;
+                    }
+                    else {
+                        rb++; t = ar;
                     }
                     w[k++] = t;
                 }
-                while (l < lFence)
-                    w[k++] = a[l++];
-                while (r < rFence)
-                    w[k++] = a[r++];
-                while (rights != null) {
-                    if (rights.tryUnfork())
-                        rights.compute();
-                    else
-                        rights.join();
-                    rights = rights.next;
-                }
+                if (rb < rf)
+                    System.arraycopy(a, rb, w, k, rf - rb);
+                else if (lb < lf)
+                    System.arraycopy(a, lb, w, k, lf - lb);
+
+                tryComplete();
             }
         }
     } // FJDouble
 
-    /** Comparable support class */
-    static final class FJComparable {
-        static final class Sorter<T extends Comparable<? super T>> extends RecursiveAction {
-            static final long serialVersionUID = -1024003289463302522L;
-            final T[] a;
-            final T[] w;
-            final int origin;
-            final int n;
-            final int gran;
-
-            Sorter(T[] a, T[] w, int origin, int n, int gran) {
-                this.a = a;
-                this.w = w;
-                this.origin = origin;
-                this.n = n;
-                this.gran = gran;
-            }
-
-            public void compute() {
-                final int l = origin;
-                final int g = gran;
-                final int n = this.n;
-                final T[] a = this.a;
-                final T[] w = this.w;
-                if (n > g) {
-                    int h = n >>> 1;
-                    int q = n >>> 2;
-                    int u = h + q;
-                    FJSubSorter ls = new FJSubSorter(new Sorter<>(a, w, l, q,   g),
-                                                     new Sorter<>(a, w, l+q, h-q, g),
-                                                     new Merger<>(a, w, l,   q,
-                                                                  l+q, h-q, l, g, null));
-                    FJSubSorter rs = new FJSubSorter(new Sorter<>(a, w, l+h, q,   g),
-                                                     new Sorter<>(a, w, l+u, n-u, g),
-                                                     new Merger<>(a, w, l+h, q,
-                                                                  l+u, n-u, l+h, g, null));
-                    rs.fork();
-                    ls.compute();
-                    if (rs.tryUnfork()) rs.compute(); else rs.join();
-                    new Merger<>(w, a, l, h, l + h, n - h, l, g, null).compute();
-                } else {
-                    Arrays.sort(a, l, l+n);
-                }
-            }
-        }
-
-        static final class Merger<T extends Comparable<? super T>> extends RecursiveAction {
-            static final long serialVersionUID = -3989771675258379302L;
-            final T[] a;
-            final T[] w;
-            final int lo;
-            final int ln;
-            final int ro;
-            final int rn;
-            final int wo;
-            final int gran;
-            final Merger<T> next;
-
-            Merger(T[] a, T[] w, int lo, int ln, int ro, int rn, int wo,
-                   int gran, Merger<T> next) {
-                this.a = a;
-                this.w = w;
-                this.lo = lo;
-                this.ln = ln;
-                this.ro = ro;
-                this.rn = rn;
-                this.wo = wo;
-                this.gran = gran;
-                this.next = next;
-            }
-
-            public void compute() {
-                final T[] a = this.a;
-                final T[] w = this.w;
-                Merger<T> rights = null;
-                int nleft = ln;
-                int nright = rn;
-                while (nleft > gran) {
-                    int lh = nleft >>> 1;
-                    int splitIndex = lo + lh;
-                    T split = a[splitIndex];
-                    int rl = 0;
-                    int rh = nright;
-                    while (rl < rh) {
-                        int mid = (rl + rh) >>> 1;
-                        if (split.compareTo(a[ro + mid]) <= 0)
-                            rh = mid;
-                        else
-                            rl = mid + 1;
-                    }
-                    (rights = new Merger<>(a, w, splitIndex, nleft-lh, ro+rh,
-                                           nright-rh, wo+lh+rh, gran, rights)).fork();
-                    nleft = lh;
-                    nright = rh;
-                }
-
-                int l = lo;
-                int lFence = l + nleft;
-                int r = ro;
-                int rFence = r + nright;
-                int k = wo;
-                while (l < lFence && r < rFence) {
-                    T al = a[l];
-                    T ar = a[r];
-                    T t;
-                    if (al.compareTo(ar) <= 0) {++l; t=al;} else {++r; t=ar; }
-                    w[k++] = t;
-                }
-                while (l < lFence)
-                    w[k++] = a[l++];
-                while (r < rFence)
-                    w[k++] = a[r++];
-                while (rights != null) {
-                    if (rights.tryUnfork())
-                        rights.compute();
-                    else
-                        rights.join();
-                    rights = rights.next;
-                }
-            }
-        }
-    } // FJComparable
-
-    /** Object + Comparator support class */
-    static final class FJComparator {
-        static final class Sorter<T> extends RecursiveAction {
-            static final long serialVersionUID = 9191600840025808581L;
-            final T[] a;       // array to be sorted.
-            final T[] w;       // workspace for merge
-            final int origin;  // origin of the part of array we deal with
-            final int n;       // Number of elements in (sub)arrays.
-            final int gran;    // split control
-            final Comparator<? super T> cmp; // Comparator to use
-
-            Sorter(T[] a, T[] w, int origin, int n, int gran, Comparator<? super T> cmp) {
-                this.a = a;
-                this.w = w;
-                this.origin = origin;
-                this.n = n;
-                this.cmp = cmp;
-                this.gran = gran;
-            }
-
-            public void compute() {
-                final int l = origin;
-                final int g = gran;
-                final int n = this.n;
-                final T[] a = this.a;
-                final T[] w = this.w;
-                if (n > g) {
-                    int h = n >>> 1; // half
-                    int q = n >>> 2; // lower quarter index
-                    int u = h + q;   // upper quarter
-                    FJSubSorter ls = new FJSubSorter(new Sorter<>(a, w, l, q,   g, cmp),
-                                                     new Sorter<>(a, w, l+q, h-q, g, cmp),
-                                                     new Merger<>(a, w, l,   q,
-                                                                  l+q, h-q, l, g, null, cmp));
-                    FJSubSorter rs = new FJSubSorter(new Sorter<>(a, w, l + h, q,   g, cmp),
-                                                     new Sorter<>(a, w, l+u, n-u, g, cmp),
-                                                     new Merger<>(a, w, l+h, q,
-                                                                  l+u, n-u, l+h, g, null, cmp));
-                    rs.fork();
-                    ls.compute();
-                    if (rs.tryUnfork()) rs.compute(); else rs.join();
-                    new Merger<>(w, a, l, h, l + h, n - h, l, g, null, cmp).compute();
-                } else {
-                    Arrays.sort(a, l, l+n, cmp);
-                }
-            }
-        }
-
-        static final class Merger<T> extends RecursiveAction {
-            static final long serialVersionUID = -2679539040379156203L;
-            final T[] a;
-            final T[] w;
-            final int lo;
-            final int ln;
-            final int ro;
-            final int rn;
-            final int wo;
-            final int gran;
-            final Merger<T> next;
-            final Comparator<? super T> cmp;
-
-            Merger(T[] a, T[] w, int lo, int ln, int ro, int rn, int wo,
-                   int gran, Merger<T> next, Comparator<? super T> cmp) {
-                this.a = a;
-                this.w = w;
-                this.lo = lo;
-                this.ln = ln;
-                this.ro = ro;
-                this.rn = rn;
-                this.wo = wo;
-                this.gran = gran;
-                this.next = next;
-                this.cmp = cmp;
-            }
-
-            public void compute() {
-                final T[] a = this.a;
-                final T[] w = this.w;
-                Merger<T> rights = null;
-                int nleft = ln;
-                int nright = rn;
-                while (nleft > gran) {
-                    int lh = nleft >>> 1;
-                    int splitIndex = lo + lh;
-                    T split = a[splitIndex];
-                    int rl = 0;
-                    int rh = nright;
-                    while (rl < rh) {
-                        int mid = (rl + rh) >>> 1;
-                        if (cmp.compare(split, a[ro+mid]) <= 0)
-                            rh = mid;
-                        else
-                            rl = mid + 1;
-                    }
-                    (rights = new Merger<>(a, w, splitIndex, nleft-lh, ro+rh,
-                                           nright-rh, wo+lh+rh, gran, rights, cmp)).fork();
-                    nleft = lh;
-                    nright = rh;
-                }
-
-                int l = lo;
-                int lFence = l + nleft;
-                int r = ro;
-                int rFence = r + nright;
-                int k = wo;
-                while (l < lFence && r < rFence) {
-                    T al = a[l];
-                    T ar = a[r];
-                    T t;
-                    if (cmp.compare(al, ar) <= 0) {
-                        ++l;
-                        t = al;
-                    } else {
-                        ++r;
-                        t = ar;
-                    }
-                    w[k++] = t;
-                }
-                while (l < lFence)
-                    w[k++] = a[l++];
-                while (r < rFence)
-                    w[k++] = a[r++];
-                while (rights != null) {
-                    if (rights.tryUnfork())
-                        rights.compute();
-                    else
-                        rights.join();
-                    rights = rights.next;
-                }
-            }
-        }
-    } // FJComparator
-
-    /** Utility class to sort half a partitioned array */
-    private static final class FJSubSorter extends RecursiveAction {
-        static final long serialVersionUID = 9159249695527935512L;
-        final RecursiveAction left;
-        final RecursiveAction right;
-        final RecursiveAction merger;
-
-        FJSubSorter(RecursiveAction left, RecursiveAction right,
-                    RecursiveAction merger) {
-            this.left = left;
-            this.right = right;
-            this.merger = merger;
-        }
-
-        public void compute() {
-            right.fork();
-            left.invoke();
-            right.join();
-            merger.invoke();
-        }
-    }
 }
--- a/src/share/classes/java/util/ComparableTimSort.java	Thu Mar 28 10:48:43 2013 +0100
+++ b/src/share/classes/java/util/ComparableTimSort.java	Thu Mar 28 17:10:40 2013 +0100
@@ -86,9 +86,13 @@
     private static final int INITIAL_TMP_STORAGE_LENGTH = 256;
 
     /**
-     * Temp storage for merges.
+     * Temp storage for merges. A workspace array may optionally be
+     * provided in constructor, and if so will be used as long as it
+     * is big enough.
      */
     private Object[] tmp;
+    private int tmpBase; // base of tmp array slice
+    private int tmpLen;  // length of tmp array slice
 
     /**
      * A stack of pending runs yet to be merged.  Run i starts at
@@ -108,15 +112,27 @@
      * Creates a TimSort instance to maintain the state of an ongoing sort.
      *
      * @param a the array to be sorted
+     * @param work a workspace array (slice)
+     * @param workBase origin of usable space in work array
+     * @param workLen usable size of work array
      */
-    private ComparableTimSort(Object[] a) {
+    private ComparableTimSort(Object[] a, Object[] work, int workBase, int workLen) {
         this.a = a;
 
         // Allocate temp storage (which may be increased later if necessary)
         int len = a.length;
-        Object[] newArray = new Object[len < 2 * INITIAL_TMP_STORAGE_LENGTH ?
-                                       len >>> 1 : INITIAL_TMP_STORAGE_LENGTH];
-        tmp = newArray;
+        int tlen = (len < 2 * INITIAL_TMP_STORAGE_LENGTH) ?
+            len >>> 1 : INITIAL_TMP_STORAGE_LENGTH;
+        if (work == null || workLen < tlen) {
+            tmp = new Object[tlen];
+            tmpBase = 0;
+            tmpLen = tlen;
+        }
+        else {
+            tmp = work;
+            tmpBase = workBase;
+            tmpLen = workLen;
+        }
 
         /*
          * Allocate runs-to-be-merged stack (which cannot be expanded).  The
@@ -136,16 +152,25 @@
     }
 
     /*
-     * The next two methods (which are package private and static) constitute
+     * The next three methods (which are package private and static) constitute
      * the entire API of this class.  Each of these methods obeys the contract
      * of the public method with the same signature in java.util.Arrays.
      */
 
     static void sort(Object[] a) {
-          sort(a, 0, a.length);
+        sort(a, 0, a.length, null, 0, 0);
     }
 
     static void sort(Object[] a, int lo, int hi) {
+        sort(a, lo, hi, null, 0, 0);
+    }
+
+    /**
+     * sort, using the given workspace array slice for temp storage
+     * when possible.
+     * @since 1.8
+     */
+    static void sort(Object[] a, int lo, int hi, Object[] work, int workBase, int workLen) {
         rangeCheck(a.length, lo, hi);
         int nRemaining  = hi - lo;
         if (nRemaining < 2)
@@ -163,7 +188,7 @@
          * extending short natural runs to minRun elements, and merging runs
          * to maintain stack invariant.
          */
-        ComparableTimSort ts = new ComparableTimSort(a);
+        ComparableTimSort ts = new ComparableTimSort(a, work, workBase, workLen);
         int minRun = minRunLength(nRemaining);
         do {
             // Identify next run
@@ -619,11 +644,11 @@
         // Copy first run into temp array
         Object[] a = this.a; // For performance
         Object[] tmp = ensureCapacity(len1);
-        System.arraycopy(a, base1, tmp, 0, len1);
 
-        int cursor1 = 0;       // Indexes into tmp array
+        int cursor1 = tmpBase; // Indexes into tmp array
         int cursor2 = base2;   // Indexes int a
         int dest = base1;      // Indexes int a
+        System.arraycopy(a, base1, tmp, cursor1, len1);
 
         // Move first element of second run and deal with degenerate cases
         a[dest++] = a[cursor2++];
@@ -736,16 +761,17 @@
         // Copy second run into temp array
         Object[] a = this.a; // For performance
         Object[] tmp = ensureCapacity(len2);
-        System.arraycopy(a, base2, tmp, 0, len2);
+        int tmpBase = this.tmpBase;
+        System.arraycopy(a, base2, tmp, tmpBase, len2);
 
         int cursor1 = base1 + len1 - 1;  // Indexes into a
-        int cursor2 = len2 - 1;          // Indexes into tmp array
+        int cursor2 = tmpBase + len2 - 1; // Indexes into tmp array
         int dest = base2 + len2 - 1;     // Indexes into a
 
         // Move last element of first run and deal with degenerate cases
         a[dest--] = a[cursor1--];
         if (--len1 == 0) {
-            System.arraycopy(tmp, 0, a, dest - (len2 - 1), len2);
+            System.arraycopy(tmp, tmpBase, a, dest - (len2 - 1), len2);
             return;
         }
         if (len2 == 1) {
@@ -803,7 +829,7 @@
                 if (--len2 == 1)
                     break outer;
 
-                count2 = len2 - gallopLeft((Comparable) a[cursor1], tmp, 0, len2, len2 - 1);
+                count2 = len2 - gallopLeft((Comparable) a[cursor1], tmp, tmpBase, len2, len2 - 1);
                 if (count2 != 0) {
                     dest -= count2;
                     cursor2 -= count2;
@@ -835,7 +861,7 @@
         } else {
             assert len1 == 0;
             assert len2 > 0;
-            System.arraycopy(tmp, 0, a, dest - (len2 - 1), len2);
+            System.arraycopy(tmp, tmpBase, a, dest - (len2 - 1), len2);
         }
     }
 
@@ -848,7 +874,7 @@
      * @return tmp, whether or not it grew
      */
     private Object[]  ensureCapacity(int minCapacity) {
-        if (tmp.length < minCapacity) {
+        if (tmpLen < minCapacity) {
             // Compute smallest power of 2 > minCapacity
             int newSize = minCapacity;
             newSize |= newSize >> 1;
@@ -863,8 +889,11 @@
             else
                 newSize = Math.min(newSize, a.length >>> 1);
 
+            @SuppressWarnings({"unchecked", "UnnecessaryLocalVariable"})
             Object[] newArray = new Object[newSize];
             tmp = newArray;
+            tmpLen = newSize;
+            tmpBase = 0;
         }
         return tmp;
     }
--- a/src/share/classes/java/util/TimSort.java	Thu Mar 28 10:48:43 2013 +0100
+++ b/src/share/classes/java/util/TimSort.java	Thu Mar 28 17:10:40 2013 +0100
@@ -111,9 +111,13 @@
     private static final int INITIAL_TMP_STORAGE_LENGTH = 256;
 
     /**
-     * Temp storage for merges.
+     * Temp storage for merges. A workspace array may optionally be
+     * provided in constructor, and if so will be used as long as it
+     * is big enough.
      */
-    private T[] tmp; // Actual runtime type will be Object[], regardless of T
+    private T[] tmp;
+    private int tmpBase; // base of tmp array slice
+    private int tmpLen;  // length of tmp array slice
 
     /**
      * A stack of pending runs yet to be merged.  Run i starts at
@@ -134,17 +138,30 @@
      *
      * @param a the array to be sorted
      * @param c the comparator to determine the order of the sort
+     * @param work a workspace array (slice)
+     * @param workBase origin of usable space in work array
+     * @param workLen usable size of work array
      */
-    private TimSort(T[] a, Comparator<? super T> c) {
+    private TimSort(T[] a, Comparator<? super T> c, T[] work, int workBase, int workLen) {
         this.a = a;
         this.c = c;
 
         // Allocate temp storage (which may be increased later if necessary)
         int len = a.length;
-        @SuppressWarnings({"unchecked", "UnnecessaryLocalVariable"})
-        T[] newArray = (T[]) new Object[len < 2 * INITIAL_TMP_STORAGE_LENGTH ?
-                                        len >>> 1 : INITIAL_TMP_STORAGE_LENGTH];
-        tmp = newArray;
+        int tlen = (len < 2 * INITIAL_TMP_STORAGE_LENGTH) ?
+            len >>> 1 : INITIAL_TMP_STORAGE_LENGTH;
+        if (work == null || workLen < tlen) {
+            @SuppressWarnings({"unchecked", "UnnecessaryLocalVariable"})
+            T[] newArray = (T[]) new Object[tlen];
+            tmp = newArray;
+            tmpBase = 0;
+            tmpLen = tlen;
+        }
+        else {
+            tmp = work;
+            tmpBase = workBase;
+            tmpLen = workLen;
+        }
 
         /*
          * Allocate runs-to-be-merged stack (which cannot be expanded).  The
@@ -164,16 +181,26 @@
     }
 
     /*
-     * The next two methods (which are package private and static) constitute
+     * The next three methods (which are package private and static) constitute
      * the entire API of this class.  Each of these methods obeys the contract
      * of the public method with the same signature in java.util.Arrays.
      */
 
     static <T> void sort(T[] a, Comparator<? super T> c) {
-        sort(a, 0, a.length, c);
+        sort(a, 0, a.length, c, null, 0, 0);
     }
 
     static <T> void sort(T[] a, int lo, int hi, Comparator<? super T> c) {
+        sort(a, lo, hi, c, null, 0, 0);
+    }
+
+    /**
+     * sort, using the given workspace array slice for temp storage
+     * when possible.
+     * @since 1.8
+     */
+    static <T> void sort(T[] a, int lo, int hi, Comparator<? super T> c,
+                         T[] work, int workBase, int workLen) {
         if (c == null) {
             Arrays.sort(a, lo, hi);
             return;
@@ -196,7 +223,7 @@
          * extending short natural runs to minRun elements, and merging runs
          * to maintain stack invariant.
          */
-        TimSort<T> ts = new TimSort<>(a, c);
+        TimSort<T> ts = new TimSort<>(a, c, work, workBase, workLen);
         int minRun = minRunLength(nRemaining);
         do {
             // Identify next run
@@ -653,11 +680,10 @@
         // Copy first run into temp array
         T[] a = this.a; // For performance
         T[] tmp = ensureCapacity(len1);
-        System.arraycopy(a, base1, tmp, 0, len1);
-
-        int cursor1 = 0;       // Indexes into tmp array
+        int cursor1 = tmpBase; // Indexes into tmp array
         int cursor2 = base2;   // Indexes int a
         int dest = base1;      // Indexes int a
+        System.arraycopy(a, base1, tmp, cursor1, len1);
 
         // Move first element of second run and deal with degenerate cases
         a[dest++] = a[cursor2++];
@@ -770,16 +796,17 @@
         // Copy second run into temp array
         T[] a = this.a; // For performance
         T[] tmp = ensureCapacity(len2);
-        System.arraycopy(a, base2, tmp, 0, len2);
+        int tmpBase = this.tmpBase;
+        System.arraycopy(a, base2, tmp, tmpBase, len2);
 
         int cursor1 = base1 + len1 - 1;  // Indexes into a
-        int cursor2 = len2 - 1;          // Indexes into tmp array
+        int cursor2 = tmpBase + len2 - 1; // Indexes into tmp array
         int dest = base2 + len2 - 1;     // Indexes into a
 
         // Move last element of first run and deal with degenerate cases
         a[dest--] = a[cursor1--];
         if (--len1 == 0) {
-            System.arraycopy(tmp, 0, a, dest - (len2 - 1), len2);
+            System.arraycopy(tmp, tmpBase, a, dest - (len2 - 1), len2);
             return;
         }
         if (len2 == 1) {
@@ -838,7 +865,7 @@
                 if (--len2 == 1)
                     break outer;
 
-                count2 = len2 - gallopLeft(a[cursor1], tmp, 0, len2, len2 - 1, c);
+                count2 = len2 - gallopLeft(a[cursor1], tmp, tmpBase, len2, len2 - 1, c);
                 if (count2 != 0) {
                     dest -= count2;
                     cursor2 -= count2;
@@ -870,7 +897,7 @@
         } else {
             assert len1 == 0;
             assert len2 > 0;
-            System.arraycopy(tmp, 0, a, dest - (len2 - 1), len2);
+            System.arraycopy(tmp, tmpBase, a, dest - (len2 - 1), len2);
         }
     }
 
@@ -883,7 +910,7 @@
      * @return tmp, whether or not it grew
      */
     private T[] ensureCapacity(int minCapacity) {
-        if (tmp.length < minCapacity) {
+        if (tmpLen < minCapacity) {
             // Compute smallest power of 2 > minCapacity
             int newSize = minCapacity;
             newSize |= newSize >> 1;
@@ -901,6 +928,8 @@
             @SuppressWarnings({"unchecked", "UnnecessaryLocalVariable"})
             T[] newArray = (T[]) new Object[newSize];
             tmp = newArray;
+            tmpLen = newSize;
+            tmpBase = 0;
         }
         return tmp;
     }