changeset 8072:cc55bc49dda8

Merge
author mduigou
date Mon, 08 Apr 2013 16:19:46 -0700
parents a195f50617fa 60998dc19cdf
children fbc1c394e9bf
files src/share/classes/java/nio/file/Files.java test-ng/bootlib/java/util/stream/IntermediateOp.java test-ng/bootlib/java/util/stream/StatefulOp.java test-ng/tests/org/openjdk/tests/java/util/stream/UniqOpTest.java
diffstat 89 files changed, 3629 insertions(+), 2706 deletions(-) [+]
line wrap: on
line diff
--- a/src/share/classes/java/lang/Iterable.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/lang/Iterable.java	Mon Apr 08 16:19:46 2013 -0700
@@ -43,7 +43,7 @@
 @FunctionalInterface
 public interface Iterable<T> {
     /**
-     * Returns an iterator over elements of type {@code <T>}.
+     * Returns an iterator over elements of type {@code T}.
      *
      * @return an Iterator.
      */
@@ -52,8 +52,8 @@
     /**
      * Performs the given action on the contents of the {@code Iterable}, in the
      * order elements occur when iterating, until all elements have been
-     * processed or the action throws an exception.  Exceptions thrown
-     * by the action are relayed to the caller.
+     * processed or the action throws an exception.  Errors or runtime
+     * exceptions thrown by the action are relayed to the caller.
      *
      * @implSpec
      * <p>The default implementation behaves as if:
--- a/src/share/classes/java/nio/file/Files.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/nio/file/Files.java	Mon Apr 08 16:19:46 2013 -0700
@@ -3221,7 +3221,7 @@
             }
         };
 
-        return new DelegatingCloseableStream(ds,
+        return new DelegatingCloseableStream<>(ds,
             Streams.stream(Spliterators.spliteratorUnknownSize(it,
                                                                Spliterator.DISTINCT)));
     }
@@ -3313,7 +3313,7 @@
         }
 
         FileTreeIterator itor = FileTreeIterator.iterator(start, maxDepth, options);
-        return new DelegatingCloseableStream(itor,
+        return new DelegatingCloseableStream<>(itor,
             Streams.stream(Spliterators.spliteratorUnknownSize(itor, Spliterator.DISTINCT))
                    .map(entry -> entry.getPath()));
     }
@@ -3410,7 +3410,7 @@
             throw new IllegalArgumentException("'maxDepth' is negative");
         }
         FileTreeIterator itor = FileTreeIterator.iterator(start, maxDepth, options);
-        return new DelegatingCloseableStream(itor,
+        return new DelegatingCloseableStream<>(itor,
             Streams.stream(Spliterators.spliteratorUnknownSize(itor, Spliterator.DISTINCT))
                    .filter(entry -> matcher.test(entry.getPath(), entry.getFileAttributes()))
                    .map(entry -> entry.getPath()));
@@ -3463,6 +3463,6 @@
         throws IOException
     {
         BufferedReader br = Files.newBufferedReader(path, cs);
-        return new DelegatingCloseableStream(br, br.lines());
+        return new DelegatingCloseableStream<>(br, br.lines());
     }
 }
--- a/src/share/classes/java/util/ArrayList.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/ArrayList.java	Mon Apr 08 16:19:46 2013 -0700
@@ -1239,7 +1239,6 @@
     @Override
     @SuppressWarnings("unchecked")
     public void sort(Comparator<? super E> c) {
-        Objects.requireNonNull(c);
         final int expectedModCount = modCount;
         Arrays.sort((E[]) elementData, 0, size, c);
         if (modCount != expectedModCount) {
--- a/src/share/classes/java/util/Arrays.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/Arrays.java	Mon Apr 08 16:19:46 2013 -0700
@@ -74,13 +74,57 @@
      * smaller sizes typically results in memory contention across
      * tasks that makes parallel speedups unlikely.
      */
-    public static final int MIN_ARRAY_SORT_GRAN = 1 << 13; 
+    public static final int MIN_ARRAY_SORT_GRAN = 1 << 13;
 
     // Suppresses default constructor, ensuring non-instantiability.
     private Arrays() {}
 
+    /**
+     * A comparator that implements the natural ordering of a group of
+     * mutually comparable elements. May be used when a supplied
+     * comparator is null. To simplify code-sharing within underlying
+     * implementations, the compare method only declares type Object
+     * for its second argument.
+     *
+     * Arrays class implementor's note: It is an empirical matter
+     * whether ComparableTimSort offers any performance benefit over
+     * TimSort used with this comparator.  If not, you are better off
+     * deleting or bypassing ComparableTimSort.  There is currently no
+     * empirical case for separating them for parallel sorting, so all
+     * public Object parallelSort methods use the same comparator
+     * based implementation.
+     */
+    static final class NaturalOrder implements Comparator<Object> {
+        @SuppressWarnings("unchecked")
+        public int compare(Object first, Object second) {
+            return ((Comparable<Object>)first).compareTo(second);
+        }
+        static final NaturalOrder INSTANCE = new NaturalOrder();
+    }
+
+    /**
+     * Checks that {@code fromIndex} and {@code toIndex} are in
+     * the range and throws an exception if they aren't.
+     */
+    private static void rangeCheck(int arrayLength, int fromIndex, int toIndex) {
+        if (fromIndex > toIndex) {
+            throw new IllegalArgumentException(
+                    "fromIndex(" + fromIndex + ") > toIndex(" + toIndex + ")");
+        }
+        if (fromIndex < 0) {
+            throw new ArrayIndexOutOfBoundsException(fromIndex);
+        }
+        if (toIndex > arrayLength) {
+            throw new ArrayIndexOutOfBoundsException(toIndex);
+        }
+    }
+
     /*
-     * Sorting of primitive type arrays.
+     * Sorting methods. Note that all public "sort" methods take the
+     * same form: Performing argument checks if necessary, and then
+     * expanding arguments into those required for the internal
+     * implementation methods residing in other package-private
+     * classes (except for legacyMergeSort, included in this class).
      */
 
     /**
@@ -95,7 +139,7 @@
      * @param a the array to be sorted
      */
     public static void sort(int[] a) {
-        DualPivotQuicksort.sort(a);
+        DualPivotQuicksort.sort(a, 0, a.length - 1, null, 0, 0);
     }
 
     /**
@@ -120,7 +164,7 @@
      */
     public static void sort(int[] a, int fromIndex, int toIndex) {
         rangeCheck(a.length, fromIndex, toIndex);
-        DualPivotQuicksort.sort(a, fromIndex, toIndex - 1);
+        DualPivotQuicksort.sort(a, fromIndex, toIndex - 1, null, 0, 0);
     }
 
     /**
@@ -135,7 +179,7 @@
      * @param a the array to be sorted
      */
     public static void sort(long[] a) {
-        DualPivotQuicksort.sort(a);
+        DualPivotQuicksort.sort(a, 0, a.length - 1, null, 0, 0);
     }
 
     /**
@@ -160,7 +204,7 @@
      */
     public static void sort(long[] a, int fromIndex, int toIndex) {
         rangeCheck(a.length, fromIndex, toIndex);
-        DualPivotQuicksort.sort(a, fromIndex, toIndex - 1);
+        DualPivotQuicksort.sort(a, fromIndex, toIndex - 1, null, 0, 0);
     }
 
     /**
@@ -175,7 +219,7 @@
      * @param a the array to be sorted
      */
     public static void sort(short[] a) {
-        DualPivotQuicksort.sort(a);
+        DualPivotQuicksort.sort(a, 0, a.length - 1, null, 0, 0);
     }
 
     /**
@@ -200,7 +244,7 @@
      */
     public static void sort(short[] a, int fromIndex, int toIndex) {
         rangeCheck(a.length, fromIndex, toIndex);
-        DualPivotQuicksort.sort(a, fromIndex, toIndex - 1);
+        DualPivotQuicksort.sort(a, fromIndex, toIndex - 1, null, 0, 0);
     }
 
     /**
@@ -215,7 +259,7 @@
      * @param a the array to be sorted
      */
     public static void sort(char[] a) {
-        DualPivotQuicksort.sort(a);
+        DualPivotQuicksort.sort(a, 0, a.length - 1, null, 0, 0);
     }
 
     /**
@@ -240,7 +284,7 @@
      */
     public static void sort(char[] a, int fromIndex, int toIndex) {
         rangeCheck(a.length, fromIndex, toIndex);
-        DualPivotQuicksort.sort(a, fromIndex, toIndex - 1);
+        DualPivotQuicksort.sort(a, fromIndex, toIndex - 1, null, 0, 0);
     }
 
     /**
@@ -255,7 +299,7 @@
      * @param a the array to be sorted
      */
     public static void sort(byte[] a) {
-        DualPivotQuicksort.sort(a);
+        DualPivotQuicksort.sort(a, 0, a.length - 1);
     }
 
     /**
@@ -303,7 +347,7 @@
      * @param a the array to be sorted
      */
     public static void sort(float[] a) {
-        DualPivotQuicksort.sort(a);
+        DualPivotQuicksort.sort(a, 0, a.length - 1, null, 0, 0);
     }
 
     /**
@@ -336,7 +380,7 @@
      */
     public static void sort(float[] a, int fromIndex, int toIndex) {
         rangeCheck(a.length, fromIndex, toIndex);
-        DualPivotQuicksort.sort(a, fromIndex, toIndex - 1);
+        DualPivotQuicksort.sort(a, fromIndex, toIndex - 1, null, 0, 0);
     }
 
     /**
@@ -359,7 +403,7 @@
      * @param a the array to be sorted
      */
     public static void sort(double[] a) {
-        DualPivotQuicksort.sort(a);
+        DualPivotQuicksort.sort(a, 0, a.length - 1, null, 0, 0);
     }
 
     /**
@@ -392,7 +436,7 @@
      */
     public static void sort(double[] a, int fromIndex, int toIndex) {
         rangeCheck(a.length, fromIndex, toIndex);
-        DualPivotQuicksort.sort(a, fromIndex, toIndex - 1);
+        DualPivotQuicksort.sort(a, fromIndex, toIndex - 1, null, 0, 0);
     }
 
     /**
@@ -408,7 +452,15 @@
      * @param a the array to be sorted
      */
     public static void parallelSort(byte[] a) {
-        parallelSort(a, 0, a.length);
+        int n = a.length, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
+            DualPivotQuicksort.sort(a, 0, n - 1);
+        else
+            new ArraysParallelSortHelpers.FJByte.Sorter
+                (null, a, new byte[n], 0, n, 0,
+                 ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g).invoke();
     }
 
     /**
@@ -433,17 +485,16 @@
      *     if {@code fromIndex < 0} or {@code toIndex > a.length}
      */
     public static void parallelSort(byte[] a, int fromIndex, int toIndex) {
-        checkFromToBounds(a.length, fromIndex, toIndex);
-        int n = toIndex - fromIndex;
-        int p = ForkJoinPool.getCommonPoolParallelism();
-        if (p == 1 || n <= MIN_ARRAY_SORT_GRAN)
+        rangeCheck(a.length, fromIndex, toIndex);
+        int n = toIndex - fromIndex, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
             DualPivotQuicksort.sort(a, fromIndex, toIndex - 1);
-        else {
-            int g = n / (p << 3);
+        else
             new ArraysParallelSortHelpers.FJByte.Sorter
                 (null, a, new byte[n], fromIndex, n, 0,
-                 (g <= MIN_ARRAY_SORT_GRAN) ? MIN_ARRAY_SORT_GRAN : g).invoke();
-        }
+                 ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g).invoke();
     }
 
     /**
@@ -459,7 +510,15 @@
      * @param a the array to be sorted
      */
     public static void parallelSort(char[] a) {
-        parallelSort(a, 0, a.length);
+        int n = a.length, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
+            DualPivotQuicksort.sort(a, 0, n - 1, null, 0, 0);
+        else
+            new ArraysParallelSortHelpers.FJChar.Sorter
+                (null, a, new char[n], 0, n, 0,
+                 ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g).invoke();
     }
 
     /**
@@ -484,17 +543,16 @@
      *     if {@code fromIndex < 0} or {@code toIndex > a.length}
      */
     public static void parallelSort(char[] a, int fromIndex, int toIndex) {
-        checkFromToBounds(a.length, fromIndex, toIndex);
-        int n = toIndex - fromIndex;
-        int p = ForkJoinPool.getCommonPoolParallelism();
-        if (p == 1 || n <= MIN_ARRAY_SORT_GRAN)
-            DualPivotQuicksort.sort(a, fromIndex, toIndex - 1);
-        else {
-            int g = n / (p << 3);
+        rangeCheck(a.length, fromIndex, toIndex);
+        int n = toIndex - fromIndex, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
+            DualPivotQuicksort.sort(a, fromIndex, toIndex - 1, null, 0, 0);
+        else
             new ArraysParallelSortHelpers.FJChar.Sorter
                 (null, a, new char[n], fromIndex, n, 0,
-                 (g <= MIN_ARRAY_SORT_GRAN) ? MIN_ARRAY_SORT_GRAN : g).invoke();
-        }
+                 ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g).invoke();
     }
 
     /**
@@ -510,7 +568,15 @@
      * @param a the array to be sorted
      */
     public static void parallelSort(short[] a) {
-        parallelSort(a, 0, a.length);
+        int n = a.length, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
+            DualPivotQuicksort.sort(a, 0, n - 1, null, 0, 0);
+        else
+            new ArraysParallelSortHelpers.FJShort.Sorter
+                (null, a, new short[n], 0, n, 0,
+                 ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g).invoke();
     }
 
     /**
@@ -535,17 +601,16 @@
      *     if {@code fromIndex < 0} or {@code toIndex > a.length}
      */
     public static void parallelSort(short[] a, int fromIndex, int toIndex) {
-        checkFromToBounds(a.length, fromIndex, toIndex);
-        int n = toIndex - fromIndex;
-        int p = ForkJoinPool.getCommonPoolParallelism();
-        if (p == 1 || n <= MIN_ARRAY_SORT_GRAN)
-            DualPivotQuicksort.sort(a, fromIndex, toIndex - 1);
-        else {
-            int g = n / (p << 3);
+        rangeCheck(a.length, fromIndex, toIndex);
+        int n = toIndex - fromIndex, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
+            DualPivotQuicksort.sort(a, fromIndex, toIndex - 1, null, 0, 0);
+        else
             new ArraysParallelSortHelpers.FJShort.Sorter
                 (null, a, new short[n], fromIndex, n, 0,
-                 (g <= MIN_ARRAY_SORT_GRAN) ? MIN_ARRAY_SORT_GRAN : g).invoke();
-        }
+                 ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g).invoke();
     }
 
     /**
@@ -561,7 +626,15 @@
      * @param a the array to be sorted
      */
     public static void parallelSort(int[] a) {
-        parallelSort(a, 0, a.length);
+        int n = a.length, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
+            DualPivotQuicksort.sort(a, 0, n - 1, null, 0, 0);
+        else
+            new ArraysParallelSortHelpers.FJInt.Sorter
+                (null, a, new int[n], 0, n, 0,
+                 ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g).invoke();
     }
 
     /**
@@ -586,17 +659,16 @@
      *     if {@code fromIndex < 0} or {@code toIndex > a.length}
      */
     public static void parallelSort(int[] a, int fromIndex, int toIndex) {
-        checkFromToBounds(a.length, fromIndex, toIndex);
-        int n = toIndex - fromIndex;
-        int p = ForkJoinPool.getCommonPoolParallelism();
-        if (p == 1 || n <= MIN_ARRAY_SORT_GRAN)
-            DualPivotQuicksort.sort(a, fromIndex, toIndex - 1);
-        else {
-            int g = n / (p << 3);
+        rangeCheck(a.length, fromIndex, toIndex);
+        int n = toIndex - fromIndex, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
+            DualPivotQuicksort.sort(a, fromIndex, toIndex - 1, null, 0, 0);
+        else
             new ArraysParallelSortHelpers.FJInt.Sorter
                 (null, a, new int[n], fromIndex, n, 0,
-                 (g <= MIN_ARRAY_SORT_GRAN) ? MIN_ARRAY_SORT_GRAN : g).invoke();
-        }
+                 ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g).invoke();
     }
 
     /**
@@ -612,7 +684,15 @@
      * @param a the array to be sorted
      */
     public static void parallelSort(long[] a) {
-        parallelSort(a, 0, a.length);
+        int n = a.length, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
+            DualPivotQuicksort.sort(a, 0, n - 1, null, 0, 0);
+        else
+            new ArraysParallelSortHelpers.FJLong.Sorter
+                (null, a, new long[n], 0, n, 0,
+                 ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g).invoke();
     }
 
     /**
@@ -637,17 +717,16 @@
      *     if {@code fromIndex < 0} or {@code toIndex > a.length}
      */
     public static void parallelSort(long[] a, int fromIndex, int toIndex) {
-        checkFromToBounds(a.length, fromIndex, toIndex);
-        int n = toIndex - fromIndex;
-        int p = ForkJoinPool.getCommonPoolParallelism();
-        if (p == 1 || n <= MIN_ARRAY_SORT_GRAN)
-            DualPivotQuicksort.sort(a, fromIndex, toIndex - 1);
-        else {
-            int g = n / (p << 3);
+        rangeCheck(a.length, fromIndex, toIndex);
+        int n = toIndex - fromIndex, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
+            DualPivotQuicksort.sort(a, fromIndex, toIndex - 1, null, 0, 0);
+        else
             new ArraysParallelSortHelpers.FJLong.Sorter
                 (null, a, new long[n], fromIndex, n, 0,
-                 (g <= MIN_ARRAY_SORT_GRAN) ? MIN_ARRAY_SORT_GRAN : g).invoke();
-        }
+                 ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g).invoke();
     }
 
     /**
@@ -671,7 +750,15 @@
      * @param a the array to be sorted
      */
     public static void parallelSort(float[] a) {
-        parallelSort(a, 0, a.length);
+        int n = a.length, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
+            DualPivotQuicksort.sort(a, 0, n - 1, null, 0, 0);
+        else
+            new ArraysParallelSortHelpers.FJFloat.Sorter
+                (null, a, new float[n], 0, n, 0,
+                 ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g).invoke();
     }
 
     /**
@@ -704,17 +791,16 @@
      *     if {@code fromIndex < 0} or {@code toIndex > a.length}
      */
     public static void parallelSort(float[] a, int fromIndex, int toIndex) {
-        checkFromToBounds(a.length, fromIndex, toIndex);
-        int n = toIndex - fromIndex;
-        int p = ForkJoinPool.getCommonPoolParallelism();
-        if (p == 1 || n <= MIN_ARRAY_SORT_GRAN)
-            DualPivotQuicksort.sort(a, fromIndex, toIndex - 1);
-        else {
-            int g = n / (p << 3);
+        rangeCheck(a.length, fromIndex, toIndex);
+        int n = toIndex - fromIndex, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
+            DualPivotQuicksort.sort(a, fromIndex, toIndex - 1, null, 0, 0);
+        else
             new ArraysParallelSortHelpers.FJFloat.Sorter
                 (null, a, new float[n], fromIndex, n, 0,
-                 (g <= MIN_ARRAY_SORT_GRAN) ? MIN_ARRAY_SORT_GRAN : g).invoke();
-        }
+                 ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g).invoke();
     }
 
     /**
@@ -738,7 +824,15 @@
      * @param a the array to be sorted
      */
     public static void parallelSort(double[] a) {
-        parallelSort(a, 0, a.length);
+        int n = a.length, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
+            DualPivotQuicksort.sort(a, 0, n - 1, null, 0, 0);
+        else
+            new ArraysParallelSortHelpers.FJDouble.Sorter
+                (null, a, new double[n], 0, n, 0,
+                 ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g).invoke();
     }
 
     /**
@@ -771,17 +865,16 @@
      *     if {@code fromIndex < 0} or {@code toIndex > a.length}
      */
     public static void parallelSort(double[] a, int fromIndex, int toIndex) {
-        checkFromToBounds(a.length, fromIndex, toIndex);
-        int n = toIndex - fromIndex;
-        int p = ForkJoinPool.getCommonPoolParallelism();
-        if (p == 1 || n <= MIN_ARRAY_SORT_GRAN)
-            DualPivotQuicksort.sort(a, fromIndex, toIndex - 1);
-        else {
-            int g = n / (p << 3);
+        rangeCheck(a.length, fromIndex, toIndex);
+        int n = toIndex - fromIndex, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
+            DualPivotQuicksort.sort(a, fromIndex, toIndex - 1, null, 0, 0);
+        else
             new ArraysParallelSortHelpers.FJDouble.Sorter
                 (null, a, new double[n], fromIndex, n, 0,
-                 (g <= MIN_ARRAY_SORT_GRAN) ? MIN_ARRAY_SORT_GRAN : g).invoke();
-        }
+                 ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g).invoke();
     }
 
     /**
@@ -811,8 +904,18 @@
      *         ordering of the array elements is found to violate the
      *         {@link Comparable} contract
      */
+    @SuppressWarnings("unchecked")
     public static <T extends Comparable<? super T>> void parallelSort(T[] a) {
-        parallelSort(a, 0, a.length);
+        int n = a.length, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
+            TimSort.sort(a, 0, n, NaturalOrder.INSTANCE, null, 0, 0);
+        else
+            new ArraysParallelSortHelpers.FJObject.Sorter<T>
+                (null, a,
+                 (T[])Array.newInstance(a.getClass().getComponentType(), n),
+                 0, n, 0, ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g, NaturalOrder.INSTANCE).invoke();
     }
 
     /**
@@ -854,23 +957,20 @@
      *         not <i>mutually comparable</i> (for example, strings and
      *         integers).
      */
-
+    @SuppressWarnings("unchecked")
     public static <T extends Comparable<? super T>>
     void parallelSort(T[] a, int fromIndex, int toIndex) {
-        checkFromToBounds(a.length, fromIndex, toIndex);
-        int n = toIndex - fromIndex;
-        int p = ForkJoinPool.getCommonPoolParallelism();
-        if (p == 1 || n <= MIN_ARRAY_SORT_GRAN)
-            ComparableTimSort.sort(a, fromIndex, toIndex);
-        else {
-            int g = n / (p << 3);
-            Class<?> tc = a.getClass().getComponentType();
-            @SuppressWarnings("unchecked")
-            T[] ws = (T[])Array.newInstance(tc, n);
-            new ArraysParallelSortHelpers.FJComparable.Sorter<>
-                (null, a, ws, fromIndex, n, 0, 
-                 (g <= MIN_ARRAY_SORT_GRAN) ? MIN_ARRAY_SORT_GRAN : g).invoke();
-        }
+        rangeCheck(a.length, fromIndex, toIndex);
+        int n = toIndex - fromIndex, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
+            TimSort.sort(a, fromIndex, toIndex, NaturalOrder.INSTANCE, null, 0, 0);
+        else
+            new ArraysParallelSortHelpers.FJObject.Sorter<T>
+                (null, a,
+                 (T[])Array.newInstance(a.getClass().getComponentType(), n),
+                 fromIndex, n, 0, ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g, NaturalOrder.INSTANCE).invoke();
     }
 
     /**
@@ -899,8 +999,20 @@
      * @throws IllegalArgumentException (optional) if the comparator is
      *         found to violate the {@link java.util.Comparator} contract
      */
+    @SuppressWarnings("unchecked")
     public static <T> void parallelSort(T[] a, Comparator<? super T> cmp) {
-        parallelSort(a, 0, a.length, cmp);
+        if (cmp == null)
+            cmp = NaturalOrder.INSTANCE;
+        int n = a.length, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
+            TimSort.sort(a, 0, n, cmp, null, 0, 0);
+        else
+            new ArraysParallelSortHelpers.FJObject.Sorter<T>
+                (null, a,
+                 (T[])Array.newInstance(a.getClass().getComponentType(), n),
+                 0, n, 0, ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g, cmp).invoke();
     }
 
     /**
@@ -939,22 +1051,22 @@
      *         not <i>mutually comparable</i> (for example, strings and
      *         integers).
      */
+    @SuppressWarnings("unchecked")
     public static <T> void parallelSort(T[] a, int fromIndex, int toIndex,
                                         Comparator<? super T> cmp) {
-        int n = toIndex - fromIndex;
-        int p = ForkJoinPool.getCommonPoolParallelism();
-        if (p == 1 || n <= MIN_ARRAY_SORT_GRAN)
-            TimSort.sort(a, fromIndex, toIndex, cmp);
-        else {
-            int g = n / (p << 3);
-            Class<?> tc = a.getClass().getComponentType();
-            @SuppressWarnings("unchecked")
-                T[] ws = (T[])Array.newInstance(tc, n);
-            new ArraysParallelSortHelpers.FJComparator.Sorter<>
-                (null, a, ws, fromIndex, n, 0,
-                 (g <= MIN_ARRAY_SORT_GRAN) ? MIN_ARRAY_SORT_GRAN : g,
-                 cmp).invoke();
-        }
+        rangeCheck(a.length, fromIndex, toIndex);
+        if (cmp == null)
+            cmp = NaturalOrder.INSTANCE;
+        int n = toIndex - fromIndex, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
+            TimSort.sort(a, fromIndex, toIndex, cmp, null, 0, 0);
+        else
+            new ArraysParallelSortHelpers.FJObject.Sorter<T>
+                (null, a,
+                 (T[])Array.newInstance(a.getClass().getComponentType(), n),
+                 fromIndex, n, 0, ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g, cmp).invoke();
     }
 
     /*
@@ -974,39 +1086,6 @@
                     "java.util.Arrays.useLegacyMergeSort")).booleanValue();
     }
 
-    /*
-     * If this platform has an optimizing VM, check whether ComparableTimSort
-     * offers any performance benefit over TimSort in conjunction with a
-     * comparator that returns:
-     *    {@code ((Comparable)first).compareTo(Second)}.
-     * If not, you are better off deleting ComparableTimSort to
-     * eliminate the code duplication.  In other words, the commented
-     * out code below is the preferable implementation for sorting
-     * arrays of Comparables if it offers sufficient performance.
-     */
-
-//    /**
-//     * A comparator that implements the natural ordering of a group of
-//     * mutually comparable elements.  Using this comparator saves us
-//     * from duplicating most of the code in this file (one version for
-//     * Comparables, one for explicit Comparators).
-//     */
-//    private static final Comparator<Object> NATURAL_ORDER =
-//            new Comparator<Object>() {
-//        @SuppressWarnings("unchecked")
-//        public int compare(Object first, Object second) {
-//            return ((Comparable<Object>)first).compareTo(second);
-//        }
-//    };
-//
-//    public static void sort(Object[] a) {
-//        sort(a, 0, a.length, NATURAL_ORDER);
-//    }
-//
-//    public static void sort(Object[] a, int fromIndex, int toIndex) {
-//        sort(a, fromIndex, toIndex, NATURAL_ORDER);
-//    }
-
     /**
      * Sorts the specified array of objects into ascending order, according
      * to the {@linkplain Comparable natural ordering} of its elements.
@@ -1053,7 +1132,7 @@
         if (LegacyMergeSort.userRequested)
             legacyMergeSort(a);
         else
-            ComparableTimSort.sort(a);
+            ComparableTimSort.sort(a, 0, a.length, null, 0, 0);
     }
 
     /** To be removed in a future release. */
@@ -1115,16 +1194,16 @@
      *         integers).
      */
     public static void sort(Object[] a, int fromIndex, int toIndex) {
+        rangeCheck(a.length, fromIndex, toIndex);
         if (LegacyMergeSort.userRequested)
             legacyMergeSort(a, fromIndex, toIndex);
         else
-            ComparableTimSort.sort(a, fromIndex, toIndex);
+            ComparableTimSort.sort(a, fromIndex, toIndex, null, 0, 0);
     }
 
     /** To be removed in a future release. */
     private static void legacyMergeSort(Object[] a,
                                         int fromIndex, int toIndex) {
-        rangeCheck(a.length, fromIndex, toIndex);
         Object[] aux = copyOfRange(a, fromIndex, toIndex);
         mergeSort(aux, a, fromIndex, toIndex, -fromIndex);
     }
@@ -1238,10 +1317,12 @@
      *         found to violate the {@link Comparator} contract
      */
     public static <T> void sort(T[] a, Comparator<? super T> c) {
+        if (c == null)
+            c = NaturalOrder.INSTANCE;
         if (LegacyMergeSort.userRequested)
             legacyMergeSort(a, c);
         else
-            TimSort.sort(a, c);
+            TimSort.sort(a, 0, a.length, c, null, 0, 0);
     }
 
     /** To be removed in a future release. */
@@ -1306,16 +1387,18 @@
      */
     public static <T> void sort(T[] a, int fromIndex, int toIndex,
                                 Comparator<? super T> c) {
+        if (c == null)
+            c = NaturalOrder.INSTANCE;
+        rangeCheck(a.length, fromIndex, toIndex);
         if (LegacyMergeSort.userRequested)
             legacyMergeSort(a, fromIndex, toIndex, c);
         else
-            TimSort.sort(a, fromIndex, toIndex, c);
+            TimSort.sort(a, fromIndex, toIndex, c, null, 0, 0);
     }
 
     /** To be removed in a future release. */
     private static <T> void legacyMergeSort(T[] a, int fromIndex, int toIndex,
                                             Comparator<? super T> c) {
-        rangeCheck(a.length, fromIndex, toIndex);
         T[] aux = copyOfRange(a, fromIndex, toIndex);
         if (c==null)
             mergeSort(aux, a, fromIndex, toIndex, -fromIndex);
@@ -1371,23 +1454,6 @@
         }
     }
 
-    /**
-     * Checks that {@code fromIndex} and {@code toIndex} are in
-     * the range and throws an appropriate exception, if they aren't.
-     */
-    private static void rangeCheck(int length, int fromIndex, int toIndex) {
-        if (fromIndex > toIndex) {
-            throw new IllegalArgumentException(
-                "fromIndex(" + fromIndex + ") > toIndex(" + toIndex + ")");
-        }
-        if (fromIndex < 0) {
-            throw new ArrayIndexOutOfBoundsException(fromIndex);
-        }
-        if (toIndex > length) {
-            throw new ArrayIndexOutOfBoundsException(toIndex);
-        }
-    }
-
     // Parallel prefix
 
     /**
@@ -1426,7 +1492,7 @@
      */
     public static <T> void parallelPrefix(T[] array, int fromIndex,
                                           int toIndex, BinaryOperator<T> op) {
-        checkFromToBounds(array.length, fromIndex, toIndex);
+        rangeCheck(array.length, fromIndex, toIndex);
         if (fromIndex < toIndex)
             new ArrayPrefixHelpers.CumulateTask<>
                     (null, op, array, fromIndex, toIndex).invoke();
@@ -1468,7 +1534,7 @@
      */
     public static void parallelPrefix(long[] array, int fromIndex,
                                       int toIndex, LongBinaryOperator op) {
-        checkFromToBounds(array.length, fromIndex, toIndex);
+        rangeCheck(array.length, fromIndex, toIndex);
         if (fromIndex < toIndex)
             new ArrayPrefixHelpers.LongCumulateTask
                     (null, op, array, fromIndex, toIndex).invoke();
@@ -1510,7 +1576,7 @@
      */
     public static void parallelPrefix(double[] array, int fromIndex,
                                       int toIndex, DoubleBinaryOperator op) {
-        checkFromToBounds(array.length, fromIndex, toIndex);
+        rangeCheck(array.length, fromIndex, toIndex);
         if (fromIndex < toIndex)
             new ArrayPrefixHelpers.DoubleCumulateTask
                     (null, op, array, fromIndex, toIndex).invoke();
@@ -1552,7 +1618,7 @@
      */
     public static void parallelPrefix(int[] array, int fromIndex,
                                       int toIndex, IntBinaryOperator op) {
-        checkFromToBounds(array.length, fromIndex, toIndex);
+        rangeCheck(array.length, fromIndex, toIndex);
         if (fromIndex < toIndex)
             new ArrayPrefixHelpers.IntCumulateTask
                     (null, op, array, fromIndex, toIndex).invoke();
@@ -4561,23 +4627,6 @@
     }
 
     /**
-     * Checks that {@code fromIndex} and {@code toIndex} are in
-     * the range and throws an exception if they aren't.
-     */
-    private static void checkFromToBounds(int arrayLength, int fromIndex, int toIndex) {
-        if (fromIndex > toIndex) {
-            throw new IllegalArgumentException(
-                    "fromIndex(" + fromIndex + ") > toIndex(" + toIndex + ")");
-        }
-        if (fromIndex < 0) {
-            throw new ArrayIndexOutOfBoundsException(fromIndex);
-        }
-        if (toIndex > arrayLength) {
-            throw new ArrayIndexOutOfBoundsException(toIndex);
-        }
-    }
-
-    /**
      * Creates a {@link Spliterator} covering all of the specified array.
      *
      * <p>The spliterator reports {@link Spliterator#SIZED},
@@ -4586,7 +4635,7 @@
      *
      * @param <T> Type of elements
      * @param array The array, assumed to be unmodified during use
-     * @return A spliterator from an array
+     * @return A spliterator from the array
      * @throws NullPointerException if the specified array is {@code null}
      * @since 1.8
      */
@@ -4607,7 +4656,7 @@
      * @param array The array, assumed to be unmodified during use
      * @param fromIndex The least index (inclusive) to cover
      * @param toIndex One past the greatest index to cover
-     * @return A spliterator from an array
+     * @return A spliterator from the array
      * @throws NullPointerException if the specified array is {@code null}
      * @throws ArrayIndexOutOfBoundsException if {@code fromIndex} is negative,
      *         {@code toIndex} is less than {@code fromIndex}, or
@@ -4627,7 +4676,7 @@
      * {@link Spliterator#IMMUTABLE}.
      *
      * @param array The array, assumed to be unmodified during use
-     * @return A spliterator from an array
+     * @return A spliterator from the array
      * @throws NullPointerException if the specified array is {@code null}
      * @since 1.8
      */
@@ -4647,7 +4696,7 @@
      * @param array The array, assumed to be unmodified during use
      * @param fromIndex The least index (inclusive) to cover
      * @param toIndex One past the greatest index to cover
-     * @return A spliterator from an array
+     * @return A spliterator from the array
      * @throws NullPointerException if the specified array is {@code null}
      * @throws ArrayIndexOutOfBoundsException if {@code fromIndex} is negative,
      *         {@code toIndex} is less than {@code fromIndex}, or
@@ -4667,7 +4716,7 @@
      * {@link Spliterator#IMMUTABLE}.
      *
      * @param array The array, assumed to be unmodified during use
-     * @return A spliterator from an array
+     * @return A spliterator from the array
      * @throws NullPointerException if the specified array is {@code null}
      * @since 1.8
      */
@@ -4687,7 +4736,7 @@
      * @param array The array, assumed to be unmodified during use
      * @param fromIndex The least index (inclusive) to cover
      * @param toIndex One past the greatest index to cover
-     * @return A spliterator from an array
+     * @return A spliterator from the array
      * @throws NullPointerException if the specified array is {@code null}
      * @throws ArrayIndexOutOfBoundsException if {@code fromIndex} is negative,
      *         {@code toIndex} is less than {@code fromIndex}, or
@@ -4708,7 +4757,7 @@
      * {@link Spliterator#IMMUTABLE}.
      *
      * @param array The array, assumed to be unmodified during use
-     * @return A spliterator from an array
+     * @return A spliterator from the array
      * @throws NullPointerException if the specified array is {@code null}
      * @since 1.8
      */
@@ -4728,7 +4777,7 @@
      * @param array The array, assumed to be unmodified during use
      * @param fromIndex The least index (inclusive) to cover
      * @param toIndex One past the greatest index to cover
-     * @return A spliterator from an array
+     * @return A spliterator from the array
      * @throws NullPointerException if the specified array is {@code null}
      * @throws ArrayIndexOutOfBoundsException if {@code fromIndex} is negative,
      *         {@code toIndex} is less than {@code fromIndex}, or
@@ -4746,7 +4795,7 @@
      *
      * @param <T> The type of the array elements
      * @param array The array, assumed to be unmodified during use
-     * @return A {@code Stream} from an array
+     * @return A {@code Stream} from the array
      * @throws NullPointerException if the specified array is {@code null}
      * @since 1.8
      */
@@ -4763,7 +4812,7 @@
      * @param fromIndex The index of the first element (inclusive) to be
      *        encountered
      * @param toIndex One past the index of the last element to be encountered
-     * @return A {@code Stream} from an array
+     * @return A {@code Stream} from the array
      * @throws NullPointerException if the specified array is {@code null}
      * @throws ArrayIndexOutOfBoundsException if {@code fromIndex} is negative,
      *         {@code toIndex} is less than {@code fromIndex}, or
@@ -4779,7 +4828,7 @@
      * source.
      *
      * @param array The array, assumed to be unmodified during use
-     * @return An {@code IntStream} from an array
+     * @return An {@code IntStream} from the array
      * @throws NullPointerException if the specified array is {@code null}
      * @since 1.8
      */
@@ -4795,7 +4844,7 @@
      * @param fromIndex The index of the first element (inclusive) to be
      *        encountered
      * @param toIndex One past the index of the last element to be encountered
-     * @return An {@code IntStream} from an array
+     * @return An {@code IntStream} from the array
      * @throws NullPointerException if the specified array is {@code null}
      * @throws ArrayIndexOutOfBoundsException if {@code fromIndex} is negative,
      *         {@code toIndex} is less than {@code fromIndex}, or
@@ -4811,7 +4860,7 @@
      * source.
      *
      * @param array The array, assumed to be unmodified during use
-     * @return A {@code LongStream} from an array
+     * @return A {@code LongStream} from the array
      * @throws NullPointerException if the specified array is {@code null}
      * @since 1.8
      */
@@ -4827,7 +4876,7 @@
      * @param fromIndex The index of the first element (inclusive) to be
      *        encountered
      * @param toIndex One past the index of the last element to be encountered
-     * @return A {@code LongStream} from an array
+     * @return A {@code LongStream} from the array
      * @throws NullPointerException if the specified array is {@code null}
      * @throws ArrayIndexOutOfBoundsException if {@code fromIndex} is negative,
      *         {@code toIndex} is less than {@code fromIndex}, or
@@ -4843,7 +4892,7 @@
      * source.
      *
      * @param array The array, assumed to be unmodified during use
-     * @return A {@code DoubleStream} from an array
+     * @return A {@code DoubleStream} from the array
      * @throws NullPointerException if the specified array is {@code null}
      * @since 1.8
      */
@@ -4859,7 +4908,7 @@
      * @param fromIndex The index of the first element (inclusive) to be
      *        encountered
      * @param toIndex One past the index of the last element to be encountered
-     * @return A {@code DoubleStream} from an array
+     * @return A {@code DoubleStream} from the array
      * @throws NullPointerException if the specified array is {@code null}
      * @throws ArrayIndexOutOfBoundsException if {@code fromIndex} is negative,
      *         {@code toIndex} is less than {@code fromIndex}, or
@@ -4876,7 +4925,7 @@
      *
      * @param <T> The type of the array elements
      * @param array The array, assumed to be unmodified during use
-     * @return A {@code Stream} from an array
+     * @return A {@code Stream} from the array
      * @throws NullPointerException if the specified array is {@code null}
      * @since 1.8
      */
@@ -4893,7 +4942,7 @@
      * @param fromIndex The index of the first element (inclusive) to be
      *        encountered
      * @param toIndex One past the index of the last element to be encountered
-     * @return A {@code Stream} from an array
+     * @return A {@code Stream} from the array
      * @throws NullPointerException if the specified array is {@code null}
      * @throws ArrayIndexOutOfBoundsException if {@code fromIndex} is negative,
      *         {@code toIndex} is less than {@code fromIndex}, or
@@ -4909,7 +4958,7 @@
      * source.
      *
      * @param array The array, assumed to be unmodified during use
-     * @return An {@code IntStream} from an array
+     * @return An {@code IntStream} from the array
      * @throws NullPointerException if the specified array is {@code null}
      * @since 1.8
      */
@@ -4925,7 +4974,7 @@
      * @param fromIndex The index of the first element (inclusive) to be
      *        encountered
      * @param toIndex One past the index of the last element to be encountered
-     * @return An {@code IntStream} from an array
+     * @return An {@code IntStream} from the array
      * @throws NullPointerException if the specified array is {@code null}
      * @throws ArrayIndexOutOfBoundsException if {@code fromIndex} is negative,
      *         {@code toIndex} is less than {@code fromIndex}, or
@@ -4941,7 +4990,7 @@
      * source.
      *
      * @param array The array, assumed to be unmodified during use
-     * @return A {@code LongStream} from an array
+     * @return A {@code LongStream} from the array
      * @throws NullPointerException if the specified array is {@code null}
      * @since 1.8
      */
@@ -4957,7 +5006,7 @@
      * @param fromIndex The index of the first element (inclusive) to be
      *        encountered
      * @param toIndex One past the index of the last element to be encountered
-     * @return A {@code LongStream} from an array
+     * @return A {@code LongStream} from the array
      * @throws NullPointerException if the specified array is {@code null}
      * @throws ArrayIndexOutOfBoundsException if {@code fromIndex} is negative,
      *         {@code toIndex} is less than {@code fromIndex}, or
@@ -4973,7 +5022,7 @@
      * source.
      *
      * @param array The array, assumed to be unmodified during use
-     * @return A {@code DoubleStream} from an array
+     * @return A {@code DoubleStream} from the array
      * @throws NullPointerException if the specified array is {@code null}
      * @since 1.8
      */
@@ -4989,7 +5038,7 @@
      * @param fromIndex The index of the first element (inclusive) to be
      *        encountered
      * @param toIndex One past the index of the last element to be encountered
-     * @return A {@code DoubleStream} from an array
+     * @return A {@code DoubleStream} from the array
      * @throws NullPointerException if the specified array is {@code null}
      * @throws ArrayIndexOutOfBoundsException if {@code fromIndex} is negative,
      *         {@code toIndex} is less than {@code fromIndex}, or
--- a/src/share/classes/java/util/ArraysParallelSortHelpers.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/ArraysParallelSortHelpers.java	Mon Apr 08 16:19:46 2013 -0700
@@ -50,36 +50,36 @@
  * (workspace and main swap roles on each subsort step.)  Leaf-level
  * sorts use the associated sequential sort.
  *
- * Merger classes perform merging for Sorter. If big enough, it splits
- * the Left partition in half; finds the greatest point in Right
- * partition less than the beginning of the second half of Left via
- * binary search; and then, in parallel, merges left half of Left with
- * elements of Right up to split point, and merges right half of Left
- * with elements of R past split point. At leaf, it just sequentially
- * merges. The Mergers also include checks for excessive imbalances
- * (hardwired to stop if less than threshold/8 elements on either
- * side) and adjust for duplicate key runs on each side by placing
- * merge bounds at the beginnings of runs. This is necessary to
- * maintain sort-stability in the Object versions, and further helps
- * avoid imbalances in others.
+ * Merger classes perform merging for Sorter.  They are structured
+ * such that if the underlying sort is stable (as is true for
+ * TimSort), then so is the full sort.  If big enough, they split the
+ * largest of the two partitions in half, find the greatest point in
+ * smaller partition less than the beginning of the second half of
+ * larger via binary search; and then merge in parallel the two
+ * partitions.  In part to ensure tasks are triggered in
+ * stability-preserving order, the current CountedCompleter design
+ * requires some little tasks to serve as place holders for triggering
+ * completion tasks.  These classes (EmptyCompleter and Relay) don't
+ * need to keep track of the arrays, and are never themselves forked,
+ * so don't hold any task state.
  *
- * The current CountedCompleter design also requires some little tasks
- * to serve as place holders for triggering the merges and re-merges.
- * These classes (EmptyCompleter and Relay) don't need to keep track
- * of the arrays, and are never themselves forked, so don't hold any
- * task state.
+ * The primitive class versions (FJByte... FJDouble) are
+ * identical to each other except for type declarations.
+ *
+ * The base sequential sorts rely on non-public versions of TimSort,
+ * ComparableTimSort, and DualPivotQuicksort sort methods that accept
+ * temp workspace array slices that we will have already allocated, so
+ * avoids redundant allocation. (Except for DualPivotQuicksort byte[]
+ * sort, that does not ever use a workspace array.)
  */
 /*package*/ class ArraysParallelSortHelpers {
 
     /*
-     * The primitive class versions (FJByte... FJDouble) classes are
-     * identical to FJComparable version, except removing task class
-     * parameter, replacing T with type, and replacing a.compareTo(b)
-     * with a relop b.
-     *
-     * The task classes have a lot of parameters, that are copied to
-     * local variables and used in compute() methods, We place these
-     * in as few lines as possible to minimize distraction.
+     * Style note: The task classes have a lot of parameters, that are
+     * stored as task fields and copied to local variables and used in
+     * compute() methods, We pack these into as few lines as possible,
+     * and hoist consistency checks among them before main loops, to
+     * reduce distraction.
      */
 
     /**
@@ -108,120 +108,8 @@
         }
     }
 
-    /** Comparable support class */
-    static final class FJComparable {
-        static final class Sorter<T extends Comparable<? super T>>
-            extends CountedCompleter<Void> {
-            static final long serialVersionUID = 2446542900576103244L;
-            final T[] a, w;
-            final int base, size, wbase, gran;
-            Sorter(CountedCompleter<?> par, T[] a, T[] w, int base, int size,
-                   int wbase, int gran) {
-                super(par);
-                this.a = a; this.w = w; this.base = base; this.size = size;
-                this.wbase = wbase; this.gran = gran;
-            }
-            public final void compute() {
-                CountedCompleter<?> s = this;
-                T[] a = this.a, w = this.w; // localize all params
-                int b = this.base, n = this.size, wb = this.wbase, g = this.gran;
-                while (n > g) {
-                    int h = n >>> 1, q = n >>> 2, u = h + q; // quartiles
-                    int qb = b + q, hb = b + h, ub = b + u;  // bases
-                    int wqb = wb + q, whb = wb + h, wub = wb + u;
-                    int hq = h - q, hn = n - h, un = n - u;  // sizes
-                    n = q;
-                    Relay fc = new Relay(new Merger<>(s,  w, a, wb,  h,
-                                                      whb, hn, b,  g));
-                    Relay rc = new Relay(new Merger<>(fc, a, w, hb, q,
-                                                      ub, un, whb, g));
-                    Relay bc = new Relay(new Merger<>(fc, a, w, b,  q,
-                                                      qb, hq, wb,  g));
-                    s = new EmptyCompleter(bc);
-                    Sorter<T> su = new Sorter<>(rc, a, w, ub, un, wub, g);
-                    Sorter<T> sh = new Sorter<>(rc, a, w, hb, q,  whb, g);
-                    Sorter<T> sq = new Sorter<>(bc, a, w, qb, hq, wqb, g);
-                    su.fork();
-                    sh.fork();
-                    sq.fork();
-                }
-                ComparableTimSort.sort(a, b, b + n, w, wb, n);
-                s.tryComplete();
-            }
-        }
-
-        static final class Merger<T extends Comparable<? super T>>
-            extends CountedCompleter<Void> {
-            static final long serialVersionUID = 2446542900576103244L;
-            final T[] a, w; // main and workspace arrays
-            final int lbase, lsize, rbase, rsize, wbase, gran;
-            Merger(CountedCompleter<?> par, T[] a, T[] w,
-                   int lbase, int lsize, int rbase,
-                   int rsize, int wbase, int gran) {
-                super(par);
-                this.a = a; this.w = w;
-                this.lbase = lbase; this.lsize = lsize;
-                this.rbase = rbase; this.rsize = rsize;
-                this.wbase = wbase; this.gran = gran;
-            }
-
-            public final void compute() {
-                T[] a = this.a, w = this.w; // localize all params
-                int lb = this.lbase, ln = this.lsize, rb = this.rbase,
-                    rn = this.rsize, k = this.wbase, g = this.gran;
-                int eighth = g >>> 3;
-                for (;;) {
-                    int lh = ln >>> 1;
-                    T split = a[lb + lh];
-                    int rh = rn;
-                    for (int rl = 0; rl < rh;) {
-                        int rm = (rl + rh) >>> 1;
-                        if (split.compareTo(a[rb + rm]) <= 0)
-                            rh = rm;
-                        else
-                            rl = rm + 1;
-                    }
-                    int s, sl, sr;
-                    while (rh > eighth && split.compareTo(a[rb + rh - 1]) == 0)
-                        --rh; // back up right to beginning of run
-                    if (rh <= eighth)
-                        break; // too imbalanced
-                    while (lh > eighth && split.compareTo(a[lb + lh - 1]) == 0)
-                        --lh; // back up left
-                    if (lh <= eighth || (s = lh + rh) <= g || // left too small
-                        (sl = ln - lh) + (sr = rn - rh) <= g) // right too small
-                        break;
-                    addToPendingCount(1);
-                    new Merger<>(this, a, w, lb + lh, sl, rb + rh, sr, k + s,
-                                 g).fork();
-                    rn = rh;
-                    ln = lh;
-                }
-
-                int lf = lb + ln, rf = rb + rn; // index bounds
-                while (lb < lf && rb < rf) {
-                    T t, al, ar;
-                    if ((al = a[lb]).compareTo(ar = a[rb]) <= 0) {
-                        lb++; t = al;
-                    }
-                    else {
-                        rb++; t = ar;
-                    }
-                    w[k++] = t;
-                }
-                if (rb < rf)
-                    System.arraycopy(a, rb, w, k, rf - rb);
-                else if (lb < lf)
-                    System.arraycopy(a, lb, w, k, lf - lb);
-
-                tryComplete();
-            }
-        }
-
-    } // FJComparable
-
     /** Object + Comparator support class */
-    static final class FJComparator {
+    static final class FJObject {
         static final class Sorter<T> extends CountedCompleter<Void> {
             static final long serialVersionUID = 2446542900576103244L;
             final T[] a, w;
@@ -241,24 +129,18 @@
                 T[] a = this.a, w = this.w; // localize all params
                 int b = this.base, n = this.size, wb = this.wbase, g = this.gran;
                 while (n > g) {
-                    int h = n >>> 1, q = n >>> 2, u = h + q; // quartiles
-                    int qb = b + q, hb = b + h, ub = b + u;  // bases
-                    int wqb = wb + q, whb = wb + h, wub = wb + u;
-                    int hq = h - q, hn = n - h, un = n - u;  // sizes
+                    int h = n >>> 1, q = h >>> 1, u = h + q; // quartiles
+                    Relay fc = new Relay(new Merger<T>(s, w, a, wb, h,
+                                                       wb+h, n-h, b, g, c));
+                    Relay rc = new Relay(new Merger<T>(fc, a, w, b+h, q,
+                                                       b+u, n-u, wb+h, g, c));
+                    new Sorter<T>(rc, a, w, b+u, n-u, wb+u, g, c).fork();
+                    new Sorter<T>(rc, a, w, b+h, q, wb+h, g, c).fork();;
+                    Relay bc = new Relay(new Merger<T>(fc, a, w, b, q,
+                                                       b+q, h-q, wb, g, c));
+                    new Sorter<T>(bc, a, w, b+q, h-q, wb+q, g, c).fork();
+                    s = new EmptyCompleter(bc);
                     n = q;
-                    Relay fc = new Relay(new Merger<>(s,  w, a, wb,  h,
-                                                      whb, hn, b,  g, c));
-                    Relay rc = new Relay(new Merger<>(fc, a, w, hb, q,
-                                                      ub, un, whb, g, c));
-                    Relay bc = new Relay(new Merger<>(fc, a, w, b,  q,
-                                                      qb, hq, wb,  g, c));
-                    s = new EmptyCompleter(bc);
-                    Sorter<T> su = new Sorter<>(rc, a, w, ub, un, wub, g, c);
-                    Sorter<T> sh = new Sorter<>(rc, a, w, hb, q,  whb, g, c);
-                    Sorter<T> sq = new Sorter<>(bc, a, w, qb, hq, wqb, g, c);
-                    su.fork();
-                    sh.fork();
-                    sq.fork();
                 }
                 TimSort.sort(a, b, b + n, c, w, wb, n);
                 s.tryComplete();
@@ -287,33 +169,43 @@
                 T[] a = this.a, w = this.w; // localize all params
                 int lb = this.lbase, ln = this.lsize, rb = this.rbase,
                     rn = this.rsize, k = this.wbase, g = this.gran;
-                int eighth = g >>> 3;
-                for (;;) {
-                    int lh = ln >>> 1;
-                    T split = a[lb + lh];
-                    int rh = rn;
-                    for (int rl = 0; rl < rh;) {
-                        int rm = (rl + rh) >>> 1;
-                        if (c.compare(split, a[rb + rm]) <= 0)
-                            rh = rm;
-                        else
-                            rl = rm + 1;
+                if (a == null || w == null || lb < 0 || rb < 0 || k < 0 ||
+                    c == null)
+                    throw new IllegalStateException(); // hoist checks
+                for (int lh, rh;;) {  // split larger, find point in smaller
+                    if (ln >= rn) {
+                        if (ln <= g)
+                            break;
+                        rh = rn;
+                        T split = a[(lh = ln >>> 1) + lb];
+                        for (int lo = 0; lo < rh; ) {
+                            int rm = (lo + rh) >>> 1;
+                            if (c.compare(split, a[rm + rb]) <= 0)
+                                rh = rm;
+                            else
+                                lo = rm + 1;
+                        }
                     }
-                    int s, sl, sr;
-                    while (rh > eighth && c.compare(split, a[rb + rh - 1]) == 0)
-                        --rh; // back up right to beginning of run
-                    if (rh <= eighth)
-                        break; // too imbalanced
-                    while (lh > eighth && c.compare(split, a[lb + lh - 1]) == 0)
-                        --lh; // back up left
-                    if (lh <= eighth || (s = lh + rh) <= g || // left too small
-                        (sl = ln - lh) + (sr = rn - rh) <= g) // right too small
-                        break;
-                    addToPendingCount(1);
-                    new Merger<>(this, a, w, lb + lh, sl, rb + rh, sr, k + s,
-                                 g, c).fork();
+                    else {
+                        if (rn <= g)
+                            break;
+                        lh = ln;
+                        T split = a[(rh = rn >>> 1) + rb];
+                        for (int lo = 0; lo < lh; ) {
+                            int lm = (lo + lh) >>> 1;
+                            if (c.compare(split, a[lm + lb]) <= 0)
+                                lh = lm;
+                            else
+                                lo = lm + 1;
+                        }
+                    }
+                    Merger<T> m = new Merger<T>(this, a, w, lb + lh, ln - lh,
+                                                rb + rh, rn - rh,
+                                                k + lh + rh, g, c);
                     rn = rh;
                     ln = lh;
+                    addToPendingCount(1);
+                    m.fork();
                 }
 
                 int lf = lb + ln, rf = rb + rn; // index bounds
@@ -334,8 +226,9 @@
 
                 tryComplete();
             }
+
         }
-    } // FJComparator
+    } // FJObject
 
     /** byte support class */
     static final class FJByte {
@@ -343,8 +236,8 @@
             static final long serialVersionUID = 2446542900576103244L;
             final byte[] a, w;
             final int base, size, wbase, gran;
-            Sorter(CountedCompleter<?> par, byte[] a, byte[] w, int base, int size,
-                   int wbase, int gran) {
+            Sorter(CountedCompleter<?> par, byte[] a, byte[] w, int base,
+                   int size, int wbase, int gran) {
                 super(par);
                 this.a = a; this.w = w; this.base = base; this.size = size;
                 this.wbase = wbase; this.gran = gran;
@@ -354,24 +247,18 @@
                 byte[] a = this.a, w = this.w; // localize all params
                 int b = this.base, n = this.size, wb = this.wbase, g = this.gran;
                 while (n > g) {
-                    int h = n >>> 1, q = n >>> 2, u = h + q; // quartiles
-                    int qb = b + q, hb = b + h, ub = b + u;  // bases
-                    int wqb = wb + q, whb = wb + h, wub = wb + u;
-                    int hq = h - q, hn = n - h, un = n - u;  // sizes
+                    int h = n >>> 1, q = h >>> 1, u = h + q; // quartiles
+                    Relay fc = new Relay(new Merger(s, w, a, wb, h,
+                                                    wb+h, n-h, b, g));
+                    Relay rc = new Relay(new Merger(fc, a, w, b+h, q,
+                                                    b+u, n-u, wb+h, g));
+                    new Sorter(rc, a, w, b+u, n-u, wb+u, g).fork();
+                    new Sorter(rc, a, w, b+h, q, wb+h, g).fork();;
+                    Relay bc = new Relay(new Merger(fc, a, w, b, q,
+                                                    b+q, h-q, wb, g));
+                    new Sorter(bc, a, w, b+q, h-q, wb+q, g).fork();
+                    s = new EmptyCompleter(bc);
                     n = q;
-                    Relay fc = new Relay(new Merger(s,  w, a, wb,  h,
-                                                    whb, hn, b,  g));
-                    Relay rc = new Relay(new Merger(fc, a, w, hb, q,
-                                                    ub, un, whb, g));
-                    Relay bc = new Relay(new Merger(fc, a, w, b,  q,
-                                                    qb, hq, wb,  g));
-                    s = new EmptyCompleter(bc);
-                    Sorter su = new Sorter(rc, a, w, ub, un, wub, g);
-                    Sorter sh = new Sorter(rc, a, w, hb, q,  whb, g);
-                    Sorter sq = new Sorter(bc, a, w, qb, hq, wqb, g);
-                    su.fork();
-                    sh.fork();
-                    sq.fork();
                 }
                 DualPivotQuicksort.sort(a, b, b + n - 1);
                 s.tryComplete();
@@ -396,33 +283,42 @@
                 byte[] a = this.a, w = this.w; // localize all params
                 int lb = this.lbase, ln = this.lsize, rb = this.rbase,
                     rn = this.rsize, k = this.wbase, g = this.gran;
-                int eighth = g >>> 3;
-                for (;;) {
-                    int lh = ln >>> 1;
-                    byte split = a[lb + lh];
-                    int rh = rn;
-                    for (int rl = 0; rl < rh;) {
-                        int rm = (rl + rh) >>> 1;
-                        if (split <= a[rb + rm])
-                            rh = rm;
-                        else
-                            rl = rm + 1;
+                if (a == null || w == null || lb < 0 || rb < 0 || k < 0)
+                    throw new IllegalStateException(); // hoist checks
+                for (int lh, rh;;) {  // split larger, find point in smaller
+                    if (ln >= rn) {
+                        if (ln <= g)
+                            break;
+                        rh = rn;
+                        byte split = a[(lh = ln >>> 1) + lb];
+                        for (int lo = 0; lo < rh; ) {
+                            int rm = (lo + rh) >>> 1;
+                            if (split <= a[rm + rb])
+                                rh = rm;
+                            else
+                                lo = rm + 1;
+                        }
                     }
-                    int s, sl, sr;
-                    while (rh > eighth && split == a[rb + rh - 1])
-                        --rh; // back up right to beginning of run
-                    if (rh <= eighth)
-                        break; // too imbalanced
-                    while (lh > eighth && split == a[lb + lh - 1])
-                        --lh; // back up left
-                    if (lh <= eighth || (s = lh + rh) <= g || // left too small
-                        (sl = ln - lh) + (sr = rn - rh) <= g) // right too small
-                        break;
-                    addToPendingCount(1);
-                    new Merger(this, a, w, lb + lh, sl, rb + rh, sr, k + s,
-                               g).fork();
+                    else {
+                        if (rn <= g)
+                            break;
+                        lh = ln;
+                        byte split = a[(rh = rn >>> 1) + rb];
+                        for (int lo = 0; lo < lh; ) {
+                            int lm = (lo + lh) >>> 1;
+                            if (split <= a[lm + lb])
+                                lh = lm;
+                            else
+                                lo = lm + 1;
+                        }
+                    }
+                    Merger m = new Merger(this, a, w, lb + lh, ln - lh,
+                                          rb + rh, rn - rh,
+                                          k + lh + rh, g);
                     rn = rh;
                     ln = lh;
+                    addToPendingCount(1);
+                    m.fork();
                 }
 
                 int lf = lb + ln, rf = rb + rn; // index bounds
@@ -440,7 +336,6 @@
                     System.arraycopy(a, rb, w, k, rf - rb);
                 else if (lb < lf)
                     System.arraycopy(a, lb, w, k, lf - lb);
-
                 tryComplete();
             }
         }
@@ -452,8 +347,8 @@
             static final long serialVersionUID = 2446542900576103244L;
             final char[] a, w;
             final int base, size, wbase, gran;
-            Sorter(CountedCompleter<?> par, char[] a, char[] w, int base, int size,
-                   int wbase, int gran) {
+            Sorter(CountedCompleter<?> par, char[] a, char[] w, int base,
+                   int size, int wbase, int gran) {
                 super(par);
                 this.a = a; this.w = w; this.base = base; this.size = size;
                 this.wbase = wbase; this.gran = gran;
@@ -463,26 +358,20 @@
                 char[] a = this.a, w = this.w; // localize all params
                 int b = this.base, n = this.size, wb = this.wbase, g = this.gran;
                 while (n > g) {
-                    int h = n >>> 1, q = n >>> 2, u = h + q; // quartiles
-                    int qb = b + q, hb = b + h, ub = b + u;  // bases
-                    int wqb = wb + q, whb = wb + h, wub = wb + u;
-                    int hq = h - q, hn = n - h, un = n - u;  // sizes
+                    int h = n >>> 1, q = h >>> 1, u = h + q; // quartiles
+                    Relay fc = new Relay(new Merger(s, w, a, wb, h,
+                                                    wb+h, n-h, b, g));
+                    Relay rc = new Relay(new Merger(fc, a, w, b+h, q,
+                                                    b+u, n-u, wb+h, g));
+                    new Sorter(rc, a, w, b+u, n-u, wb+u, g).fork();
+                    new Sorter(rc, a, w, b+h, q, wb+h, g).fork();;
+                    Relay bc = new Relay(new Merger(fc, a, w, b, q,
+                                                    b+q, h-q, wb, g));
+                    new Sorter(bc, a, w, b+q, h-q, wb+q, g).fork();
+                    s = new EmptyCompleter(bc);
                     n = q;
-                    Relay fc = new Relay(new Merger(s,  w, a, wb,  h,
-                                                    whb, hn, b,  g));
-                    Relay rc = new Relay(new Merger(fc, a, w, hb, q,
-                                                    ub, un, whb, g));
-                    Relay bc = new Relay(new Merger(fc, a, w, b,  q,
-                                                    qb, hq, wb,  g));
-                    s = new EmptyCompleter(bc);
-                    Sorter su = new Sorter(rc, a, w, ub, un, wub, g);
-                    Sorter sh = new Sorter(rc, a, w, hb, q,  whb, g);
-                    Sorter sq = new Sorter(bc, a, w, qb, hq, wqb, g);
-                    su.fork();
-                    sh.fork();
-                    sq.fork();
                 }
-                DualPivotQuicksort.sort(a, b, b + n - 1);
+                DualPivotQuicksort.sort(a, b, b + n - 1, w, wb, n);
                 s.tryComplete();
             }
         }
@@ -505,33 +394,42 @@
                 char[] a = this.a, w = this.w; // localize all params
                 int lb = this.lbase, ln = this.lsize, rb = this.rbase,
                     rn = this.rsize, k = this.wbase, g = this.gran;
-                int eighth = g >>> 3;
-                for (;;) {
-                    int lh = ln >>> 1;
-                    char split = a[lb + lh];
-                    int rh = rn;
-                    for (int rl = 0; rl < rh;) {
-                        int rm = (rl + rh) >>> 1;
-                        if (split <= a[rb + rm])
-                            rh = rm;
-                        else
-                            rl = rm + 1;
+                if (a == null || w == null || lb < 0 || rb < 0 || k < 0)
+                    throw new IllegalStateException(); // hoist checks
+                for (int lh, rh;;) {  // split larger, find point in smaller
+                    if (ln >= rn) {
+                        if (ln <= g)
+                            break;
+                        rh = rn;
+                        char split = a[(lh = ln >>> 1) + lb];
+                        for (int lo = 0; lo < rh; ) {
+                            int rm = (lo + rh) >>> 1;
+                            if (split <= a[rm + rb])
+                                rh = rm;
+                            else
+                                lo = rm + 1;
+                        }
                     }
-                    int s, sl, sr;
-                    while (rh > eighth && split == a[rb + rh - 1])
-                        --rh; // back up right to beginning of run
-                    if (rh <= eighth)
-                        break; // too imbalanced
-                    while (lh > eighth && split == a[lb + lh - 1])
-                        --lh; // back up left
-                    if (lh <= eighth || (s = lh + rh) <= g || // left too small
-                        (sl = ln - lh) + (sr = rn - rh) <= g) // right too small
-                        break;
-                    addToPendingCount(1);
-                    new Merger(this, a, w, lb + lh, sl, rb + rh, sr, k + s,
-                               g).fork();
+                    else {
+                        if (rn <= g)
+                            break;
+                        lh = ln;
+                        char split = a[(rh = rn >>> 1) + rb];
+                        for (int lo = 0; lo < lh; ) {
+                            int lm = (lo + lh) >>> 1;
+                            if (split <= a[lm + lb])
+                                lh = lm;
+                            else
+                                lo = lm + 1;
+                        }
+                    }
+                    Merger m = new Merger(this, a, w, lb + lh, ln - lh,
+                                          rb + rh, rn - rh,
+                                          k + lh + rh, g);
                     rn = rh;
                     ln = lh;
+                    addToPendingCount(1);
+                    m.fork();
                 }
 
                 int lf = lb + ln, rf = rb + rn; // index bounds
@@ -549,11 +447,9 @@
                     System.arraycopy(a, rb, w, k, rf - rb);
                 else if (lb < lf)
                     System.arraycopy(a, lb, w, k, lf - lb);
-
                 tryComplete();
             }
         }
-
     } // FJChar
 
     /** short support class */
@@ -562,8 +458,8 @@
             static final long serialVersionUID = 2446542900576103244L;
             final short[] a, w;
             final int base, size, wbase, gran;
-            Sorter(CountedCompleter<?> par, short[] a, short[] w, int base, int size,
-                   int wbase, int gran) {
+            Sorter(CountedCompleter<?> par, short[] a, short[] w, int base,
+                   int size, int wbase, int gran) {
                 super(par);
                 this.a = a; this.w = w; this.base = base; this.size = size;
                 this.wbase = wbase; this.gran = gran;
@@ -573,26 +469,20 @@
                 short[] a = this.a, w = this.w; // localize all params
                 int b = this.base, n = this.size, wb = this.wbase, g = this.gran;
                 while (n > g) {
-                    int h = n >>> 1, q = n >>> 2, u = h + q; // quartiles
-                    int qb = b + q, hb = b + h, ub = b + u;  // bases
-                    int wqb = wb + q, whb = wb + h, wub = wb + u;
-                    int hq = h - q, hn = n - h, un = n - u;  // sizes
+                    int h = n >>> 1, q = h >>> 1, u = h + q; // quartiles
+                    Relay fc = new Relay(new Merger(s, w, a, wb, h,
+                                                    wb+h, n-h, b, g));
+                    Relay rc = new Relay(new Merger(fc, a, w, b+h, q,
+                                                    b+u, n-u, wb+h, g));
+                    new Sorter(rc, a, w, b+u, n-u, wb+u, g).fork();
+                    new Sorter(rc, a, w, b+h, q, wb+h, g).fork();;
+                    Relay bc = new Relay(new Merger(fc, a, w, b, q,
+                                                    b+q, h-q, wb, g));
+                    new Sorter(bc, a, w, b+q, h-q, wb+q, g).fork();
+                    s = new EmptyCompleter(bc);
                     n = q;
-                    Relay fc = new Relay(new Merger(s,  w, a, wb,  h,
-                                                    whb, hn, b,  g));
-                    Relay rc = new Relay(new Merger(fc, a, w, hb, q,
-                                                    ub, un, whb, g));
-                    Relay bc = new Relay(new Merger(fc, a, w, b,  q,
-                                                    qb, hq, wb,  g));
-                    s = new EmptyCompleter(bc);
-                    Sorter su = new Sorter(rc, a, w, ub, un, wub, g);
-                    Sorter sh = new Sorter(rc, a, w, hb, q,  whb, g);
-                    Sorter sq = new Sorter(bc, a, w, qb, hq, wqb, g);
-                    su.fork();
-                    sh.fork();
-                    sq.fork();
                 }
-                DualPivotQuicksort.sort(a, b, b + n - 1);
+                DualPivotQuicksort.sort(a, b, b + n - 1, w, wb, n);
                 s.tryComplete();
             }
         }
@@ -615,33 +505,42 @@
                 short[] a = this.a, w = this.w; // localize all params
                 int lb = this.lbase, ln = this.lsize, rb = this.rbase,
                     rn = this.rsize, k = this.wbase, g = this.gran;
-                int eighth = g >>> 3;
-                for (;;) {
-                    int lh = ln >>> 1;
-                    short split = a[lb + lh];
-                    int rh = rn;
-                    for (int rl = 0; rl < rh;) {
-                        int rm = (rl + rh) >>> 1;
-                        if (split <= a[rb + rm])
-                            rh = rm;
-                        else
-                            rl = rm + 1;
+                if (a == null || w == null || lb < 0 || rb < 0 || k < 0)
+                    throw new IllegalStateException(); // hoist checks
+                for (int lh, rh;;) {  // split larger, find point in smaller
+                    if (ln >= rn) {
+                        if (ln <= g)
+                            break;
+                        rh = rn;
+                        short split = a[(lh = ln >>> 1) + lb];
+                        for (int lo = 0; lo < rh; ) {
+                            int rm = (lo + rh) >>> 1;
+                            if (split <= a[rm + rb])
+                                rh = rm;
+                            else
+                                lo = rm + 1;
+                        }
                     }
-                    int s, sl, sr;
-                    while (rh > eighth && split == a[rb + rh - 1])
-                        --rh; // back up right to beginning of run
-                    if (rh <= eighth)
-                        break; // too imbalanced
-                    while (lh > eighth && split == a[lb + lh - 1])
-                        --lh; // back up left
-                    if (lh <= eighth || (s = lh + rh) <= g || // left too small
-                        (sl = ln - lh) + (sr = rn - rh) <= g) // right too small
-                        break;
-                    addToPendingCount(1);
-                    new Merger(this, a, w, lb + lh, sl, rb + rh, sr, k + s,
-                               g).fork();
+                    else {
+                        if (rn <= g)
+                            break;
+                        lh = ln;
+                        short split = a[(rh = rn >>> 1) + rb];
+                        for (int lo = 0; lo < lh; ) {
+                            int lm = (lo + lh) >>> 1;
+                            if (split <= a[lm + lb])
+                                lh = lm;
+                            else
+                                lo = lm + 1;
+                        }
+                    }
+                    Merger m = new Merger(this, a, w, lb + lh, ln - lh,
+                                          rb + rh, rn - rh,
+                                          k + lh + rh, g);
                     rn = rh;
                     ln = lh;
+                    addToPendingCount(1);
+                    m.fork();
                 }
 
                 int lf = lb + ln, rf = rb + rn; // index bounds
@@ -659,7 +558,6 @@
                     System.arraycopy(a, rb, w, k, rf - rb);
                 else if (lb < lf)
                     System.arraycopy(a, lb, w, k, lf - lb);
-
                 tryComplete();
             }
         }
@@ -671,8 +569,8 @@
             static final long serialVersionUID = 2446542900576103244L;
             final int[] a, w;
             final int base, size, wbase, gran;
-            Sorter(CountedCompleter<?> par, int[] a, int[] w, int base, int size,
-                   int wbase, int gran) {
+            Sorter(CountedCompleter<?> par, int[] a, int[] w, int base,
+                   int size, int wbase, int gran) {
                 super(par);
                 this.a = a; this.w = w; this.base = base; this.size = size;
                 this.wbase = wbase; this.gran = gran;
@@ -682,26 +580,20 @@
                 int[] a = this.a, w = this.w; // localize all params
                 int b = this.base, n = this.size, wb = this.wbase, g = this.gran;
                 while (n > g) {
-                    int h = n >>> 1, q = n >>> 2, u = h + q; // quartiles
-                    int qb = b + q, hb = b + h, ub = b + u;  // bases
-                    int wqb = wb + q, whb = wb + h, wub = wb + u;
-                    int hq = h - q, hn = n - h, un = n - u;  // sizes
+                    int h = n >>> 1, q = h >>> 1, u = h + q; // quartiles
+                    Relay fc = new Relay(new Merger(s, w, a, wb, h,
+                                                    wb+h, n-h, b, g));
+                    Relay rc = new Relay(new Merger(fc, a, w, b+h, q,
+                                                    b+u, n-u, wb+h, g));
+                    new Sorter(rc, a, w, b+u, n-u, wb+u, g).fork();
+                    new Sorter(rc, a, w, b+h, q, wb+h, g).fork();;
+                    Relay bc = new Relay(new Merger(fc, a, w, b, q,
+                                                    b+q, h-q, wb, g));
+                    new Sorter(bc, a, w, b+q, h-q, wb+q, g).fork();
+                    s = new EmptyCompleter(bc);
                     n = q;
-                    Relay fc = new Relay(new Merger(s,  w, a, wb,  h,
-                                                    whb, hn, b,  g));
-                    Relay rc = new Relay(new Merger(fc, a, w, hb, q,
-                                                    ub, un, whb, g));
-                    Relay bc = new Relay(new Merger(fc, a, w, b,  q,
-                                                    qb, hq, wb,  g));
-                    s = new EmptyCompleter(bc);
-                    Sorter su = new Sorter(rc, a, w, ub, un, wub, g);
-                    Sorter sh = new Sorter(rc, a, w, hb, q,  whb, g);
-                    Sorter sq = new Sorter(bc, a, w, qb, hq, wqb, g);
-                    su.fork();
-                    sh.fork();
-                    sq.fork();
                 }
-                DualPivotQuicksort.sort(a, b, b + n - 1);
+                DualPivotQuicksort.sort(a, b, b + n - 1, w, wb, n);
                 s.tryComplete();
             }
         }
@@ -724,33 +616,42 @@
                 int[] a = this.a, w = this.w; // localize all params
                 int lb = this.lbase, ln = this.lsize, rb = this.rbase,
                     rn = this.rsize, k = this.wbase, g = this.gran;
-                int eighth = g >>> 3;
-                for (;;) {
-                    int lh = ln >>> 1;
-                    int split = a[lb + lh];
-                    int rh = rn;
-                    for (int rl = 0; rl < rh;) {
-                        int rm = (rl + rh) >>> 1;
-                        if (split <= a[rb + rm])
-                            rh = rm;
-                        else
-                            rl = rm + 1;
+                if (a == null || w == null || lb < 0 || rb < 0 || k < 0)
+                    throw new IllegalStateException(); // hoist checks
+                for (int lh, rh;;) {  // split larger, find point in smaller
+                    if (ln >= rn) {
+                        if (ln <= g)
+                            break;
+                        rh = rn;
+                        int split = a[(lh = ln >>> 1) + lb];
+                        for (int lo = 0; lo < rh; ) {
+                            int rm = (lo + rh) >>> 1;
+                            if (split <= a[rm + rb])
+                                rh = rm;
+                            else
+                                lo = rm + 1;
+                        }
                     }
-                    int s, sl, sr;
-                    while (rh > eighth && split == a[rb + rh - 1])
-                        --rh; // back up right to beginning of run
-                    if (rh <= eighth)
-                        break; // too imbalanced
-                    while (lh > eighth && split == a[lb + lh - 1])
-                        --lh; // back up left
-                    if (lh <= eighth || (s = lh + rh) <= g || // left too small
-                        (sl = ln - lh) + (sr = rn - rh) <= g) // right too small
-                        break;
-                    addToPendingCount(1);
-                    new Merger(this, a, w, lb + lh, sl, rb + rh, sr, k + s,
-                               g).fork();
+                    else {
+                        if (rn <= g)
+                            break;
+                        lh = ln;
+                        int split = a[(rh = rn >>> 1) + rb];
+                        for (int lo = 0; lo < lh; ) {
+                            int lm = (lo + lh) >>> 1;
+                            if (split <= a[lm + lb])
+                                lh = lm;
+                            else
+                                lo = lm + 1;
+                        }
+                    }
+                    Merger m = new Merger(this, a, w, lb + lh, ln - lh,
+                                          rb + rh, rn - rh,
+                                          k + lh + rh, g);
                     rn = rh;
                     ln = lh;
+                    addToPendingCount(1);
+                    m.fork();
                 }
 
                 int lf = lb + ln, rf = rb + rn; // index bounds
@@ -768,7 +669,6 @@
                     System.arraycopy(a, rb, w, k, rf - rb);
                 else if (lb < lf)
                     System.arraycopy(a, lb, w, k, lf - lb);
-
                 tryComplete();
             }
         }
@@ -780,8 +680,8 @@
             static final long serialVersionUID = 2446542900576103244L;
             final long[] a, w;
             final int base, size, wbase, gran;
-            Sorter(CountedCompleter<?> par, long[] a, long[] w, int base, int size,
-                   int wbase, int gran) {
+            Sorter(CountedCompleter<?> par, long[] a, long[] w, int base,
+                   int size, int wbase, int gran) {
                 super(par);
                 this.a = a; this.w = w; this.base = base; this.size = size;
                 this.wbase = wbase; this.gran = gran;
@@ -791,26 +691,20 @@
                 long[] a = this.a, w = this.w; // localize all params
                 int b = this.base, n = this.size, wb = this.wbase, g = this.gran;
                 while (n > g) {
-                    int h = n >>> 1, q = n >>> 2, u = h + q; // quartiles
-                    int qb = b + q, hb = b + h, ub = b + u;  // bases
-                    int wqb = wb + q, whb = wb + h, wub = wb + u;
-                    int hq = h - q, hn = n - h, un = n - u;  // sizes
+                    int h = n >>> 1, q = h >>> 1, u = h + q; // quartiles
+                    Relay fc = new Relay(new Merger(s, w, a, wb, h,
+                                                    wb+h, n-h, b, g));
+                    Relay rc = new Relay(new Merger(fc, a, w, b+h, q,
+                                                    b+u, n-u, wb+h, g));
+                    new Sorter(rc, a, w, b+u, n-u, wb+u, g).fork();
+                    new Sorter(rc, a, w, b+h, q, wb+h, g).fork();;
+                    Relay bc = new Relay(new Merger(fc, a, w, b, q,
+                                                    b+q, h-q, wb, g));
+                    new Sorter(bc, a, w, b+q, h-q, wb+q, g).fork();
+                    s = new EmptyCompleter(bc);
                     n = q;
-                    Relay fc = new Relay(new Merger(s,  w, a, wb,  h,
-                                                    whb, hn, b,  g));
-                    Relay rc = new Relay(new Merger(fc, a, w, hb, q,
-                                                    ub, un, whb, g));
-                    Relay bc = new Relay(new Merger(fc, a, w, b,  q,
-                                                    qb, hq, wb,  g));
-                    s = new EmptyCompleter(bc);
-                    Sorter su = new Sorter(rc, a, w, ub, un, wub, g);
-                    Sorter sh = new Sorter(rc, a, w, hb, q,  whb, g);
-                    Sorter sq = new Sorter(bc, a, w, qb, hq, wqb, g);
-                    su.fork();
-                    sh.fork();
-                    sq.fork();
                 }
-                DualPivotQuicksort.sort(a, b, b + n - 1);
+                DualPivotQuicksort.sort(a, b, b + n - 1, w, wb, n);
                 s.tryComplete();
             }
         }
@@ -833,33 +727,42 @@
                 long[] a = this.a, w = this.w; // localize all params
                 int lb = this.lbase, ln = this.lsize, rb = this.rbase,
                     rn = this.rsize, k = this.wbase, g = this.gran;
-                int eighth = g >>> 3;
-                for (;;) {
-                    int lh = ln >>> 1;
-                    long split = a[lb + lh];
-                    int rh = rn;
-                    for (int rl = 0; rl < rh;) {
-                        int rm = (rl + rh) >>> 1;
-                        if (split <= a[rb + rm])
-                            rh = rm;
-                        else
-                            rl = rm + 1;
+                if (a == null || w == null || lb < 0 || rb < 0 || k < 0)
+                    throw new IllegalStateException(); // hoist checks
+                for (int lh, rh;;) {  // split larger, find point in smaller
+                    if (ln >= rn) {
+                        if (ln <= g)
+                            break;
+                        rh = rn;
+                        long split = a[(lh = ln >>> 1) + lb];
+                        for (int lo = 0; lo < rh; ) {
+                            int rm = (lo + rh) >>> 1;
+                            if (split <= a[rm + rb])
+                                rh = rm;
+                            else
+                                lo = rm + 1;
+                        }
                     }
-                    int s, sl, sr;
-                    while (rh > eighth && split == a[rb + rh - 1])
-                        --rh; // back up right to beginning of run
-                    if (rh <= eighth)
-                        break; // too imbalanced
-                    while (lh > eighth && split == a[lb + lh - 1])
-                        --lh; // back up left
-                    if (lh <= eighth || (s = lh + rh) <= g || // left too small
-                        (sl = ln - lh) + (sr = rn - rh) <= g) // right too small
-                        break;
-                    addToPendingCount(1);
-                    new Merger(this, a, w, lb + lh, sl, rb + rh, sr, k + s,
-                               g).fork();
+                    else {
+                        if (rn <= g)
+                            break;
+                        lh = ln;
+                        long split = a[(rh = rn >>> 1) + rb];
+                        for (int lo = 0; lo < lh; ) {
+                            int lm = (lo + lh) >>> 1;
+                            if (split <= a[lm + lb])
+                                lh = lm;
+                            else
+                                lo = lm + 1;
+                        }
+                    }
+                    Merger m = new Merger(this, a, w, lb + lh, ln - lh,
+                                          rb + rh, rn - rh,
+                                          k + lh + rh, g);
                     rn = rh;
                     ln = lh;
+                    addToPendingCount(1);
+                    m.fork();
                 }
 
                 int lf = lb + ln, rf = rb + rn; // index bounds
@@ -877,7 +780,6 @@
                     System.arraycopy(a, rb, w, k, rf - rb);
                 else if (lb < lf)
                     System.arraycopy(a, lb, w, k, lf - lb);
-
                 tryComplete();
             }
         }
@@ -889,8 +791,8 @@
             static final long serialVersionUID = 2446542900576103244L;
             final float[] a, w;
             final int base, size, wbase, gran;
-            Sorter(CountedCompleter<?> par, float[] a, float[] w, int base, int size,
-                   int wbase, int gran) {
+            Sorter(CountedCompleter<?> par, float[] a, float[] w, int base,
+                   int size, int wbase, int gran) {
                 super(par);
                 this.a = a; this.w = w; this.base = base; this.size = size;
                 this.wbase = wbase; this.gran = gran;
@@ -900,26 +802,20 @@
                 float[] a = this.a, w = this.w; // localize all params
                 int b = this.base, n = this.size, wb = this.wbase, g = this.gran;
                 while (n > g) {
-                    int h = n >>> 1, q = n >>> 2, u = h + q; // quartiles
-                    int qb = b + q, hb = b + h, ub = b + u;  // bases
-                    int wqb = wb + q, whb = wb + h, wub = wb + u;
-                    int hq = h - q, hn = n - h, un = n - u;  // sizes
+                    int h = n >>> 1, q = h >>> 1, u = h + q; // quartiles
+                    Relay fc = new Relay(new Merger(s, w, a, wb, h,
+                                                    wb+h, n-h, b, g));
+                    Relay rc = new Relay(new Merger(fc, a, w, b+h, q,
+                                                    b+u, n-u, wb+h, g));
+                    new Sorter(rc, a, w, b+u, n-u, wb+u, g).fork();
+                    new Sorter(rc, a, w, b+h, q, wb+h, g).fork();;
+                    Relay bc = new Relay(new Merger(fc, a, w, b, q,
+                                                    b+q, h-q, wb, g));
+                    new Sorter(bc, a, w, b+q, h-q, wb+q, g).fork();
+                    s = new EmptyCompleter(bc);
                     n = q;
-                    Relay fc = new Relay(new Merger(s,  w, a, wb,  h,
-                                                    whb, hn, b,  g));
-                    Relay rc = new Relay(new Merger(fc, a, w, hb, q,
-                                                    ub, un, whb, g));
-                    Relay bc = new Relay(new Merger(fc, a, w, b,  q,
-                                                    qb, hq, wb,  g));
-                    s = new EmptyCompleter(bc);
-                    Sorter su = new Sorter(rc, a, w, ub, un, wub, g);
-                    Sorter sh = new Sorter(rc, a, w, hb, q,  whb, g);
-                    Sorter sq = new Sorter(bc, a, w, qb, hq, wqb, g);
-                    su.fork();
-                    sh.fork();
-                    sq.fork();
                 }
-                DualPivotQuicksort.sort(a, b, b + n - 1);
+                DualPivotQuicksort.sort(a, b, b + n - 1, w, wb, n);
                 s.tryComplete();
             }
         }
@@ -942,33 +838,42 @@
                 float[] a = this.a, w = this.w; // localize all params
                 int lb = this.lbase, ln = this.lsize, rb = this.rbase,
                     rn = this.rsize, k = this.wbase, g = this.gran;
-                int eighth = g >>> 3;
-                for (;;) {
-                    int lh = ln >>> 1;
-                    float split = a[lb + lh];
-                    int rh = rn;
-                    for (int rl = 0; rl < rh;) {
-                        int rm = (rl + rh) >>> 1;
-                        if (split <= a[rb + rm])
-                            rh = rm;
-                        else
-                            rl = rm + 1;
+                if (a == null || w == null || lb < 0 || rb < 0 || k < 0)
+                    throw new IllegalStateException(); // hoist checks
+                for (int lh, rh;;) {  // split larger, find point in smaller
+                    if (ln >= rn) {
+                        if (ln <= g)
+                            break;
+                        rh = rn;
+                        float split = a[(lh = ln >>> 1) + lb];
+                        for (int lo = 0; lo < rh; ) {
+                            int rm = (lo + rh) >>> 1;
+                            if (split <= a[rm + rb])
+                                rh = rm;
+                            else
+                                lo = rm + 1;
+                        }
                     }
-                    int s, sl, sr;
-                    while (rh > eighth && split == a[rb + rh - 1])
-                        --rh; // back up right to beginning of run
-                    if (rh <= eighth)
-                        break; // too imbalanced
-                    while (lh > eighth && split == a[lb + lh - 1])
-                        --lh; // back up left
-                    if (lh <= eighth || (s = lh + rh) <= g || // left too small
-                        (sl = ln - lh) + (sr = rn - rh) <= g) // right too small
-                        break;
-                    addToPendingCount(1);
-                    new Merger(this, a, w, lb + lh, sl, rb + rh, sr, k + s,
-                               g).fork();
+                    else {
+                        if (rn <= g)
+                            break;
+                        lh = ln;
+                        float split = a[(rh = rn >>> 1) + rb];
+                        for (int lo = 0; lo < lh; ) {
+                            int lm = (lo + lh) >>> 1;
+                            if (split <= a[lm + lb])
+                                lh = lm;
+                            else
+                                lo = lm + 1;
+                        }
+                    }
+                    Merger m = new Merger(this, a, w, lb + lh, ln - lh,
+                                          rb + rh, rn - rh,
+                                          k + lh + rh, g);
                     rn = rh;
                     ln = lh;
+                    addToPendingCount(1);
+                    m.fork();
                 }
 
                 int lf = lb + ln, rf = rb + rn; // index bounds
@@ -986,7 +891,6 @@
                     System.arraycopy(a, rb, w, k, rf - rb);
                 else if (lb < lf)
                     System.arraycopy(a, lb, w, k, lf - lb);
-
                 tryComplete();
             }
         }
@@ -998,8 +902,8 @@
             static final long serialVersionUID = 2446542900576103244L;
             final double[] a, w;
             final int base, size, wbase, gran;
-            Sorter(CountedCompleter<?> par, double[] a, double[] w, int base, int size,
-                   int wbase, int gran) {
+            Sorter(CountedCompleter<?> par, double[] a, double[] w, int base,
+                   int size, int wbase, int gran) {
                 super(par);
                 this.a = a; this.w = w; this.base = base; this.size = size;
                 this.wbase = wbase; this.gran = gran;
@@ -1009,26 +913,20 @@
                 double[] a = this.a, w = this.w; // localize all params
                 int b = this.base, n = this.size, wb = this.wbase, g = this.gran;
                 while (n > g) {
-                    int h = n >>> 1, q = n >>> 2, u = h + q; // quartiles
-                    int qb = b + q, hb = b + h, ub = b + u;  // bases
-                    int wqb = wb + q, whb = wb + h, wub = wb + u;
-                    int hq = h - q, hn = n - h, un = n - u;  // sizes
+                    int h = n >>> 1, q = h >>> 1, u = h + q; // quartiles
+                    Relay fc = new Relay(new Merger(s, w, a, wb, h,
+                                                    wb+h, n-h, b, g));
+                    Relay rc = new Relay(new Merger(fc, a, w, b+h, q,
+                                                    b+u, n-u, wb+h, g));
+                    new Sorter(rc, a, w, b+u, n-u, wb+u, g).fork();
+                    new Sorter(rc, a, w, b+h, q, wb+h, g).fork();;
+                    Relay bc = new Relay(new Merger(fc, a, w, b, q,
+                                                    b+q, h-q, wb, g));
+                    new Sorter(bc, a, w, b+q, h-q, wb+q, g).fork();
+                    s = new EmptyCompleter(bc);
                     n = q;
-                    Relay fc = new Relay(new Merger(s,  w, a, wb,  h,
-                                                    whb, hn, b,  g));
-                    Relay rc = new Relay(new Merger(fc, a, w, hb, q,
-                                                    ub, un, whb, g));
-                    Relay bc = new Relay(new Merger(fc, a, w, b,  q,
-                                                    qb, hq, wb,  g));
-                    s = new EmptyCompleter(bc);
-                    Sorter su = new Sorter(rc, a, w, ub, un, wub, g);
-                    Sorter sh = new Sorter(rc, a, w, hb, q,  whb, g);
-                    Sorter sq = new Sorter(bc, a, w, qb, hq, wqb, g);
-                    su.fork();
-                    sh.fork();
-                    sq.fork();
                 }
-                DualPivotQuicksort.sort(a, b, b + n - 1);
+                DualPivotQuicksort.sort(a, b, b + n - 1, w, wb, n);
                 s.tryComplete();
             }
         }
@@ -1051,33 +949,42 @@
                 double[] a = this.a, w = this.w; // localize all params
                 int lb = this.lbase, ln = this.lsize, rb = this.rbase,
                     rn = this.rsize, k = this.wbase, g = this.gran;
-                int eighth = g >>> 3;
-                for (;;) {
-                    int lh = ln >>> 1;
-                    double split = a[lb + lh];
-                    int rh = rn;
-                    for (int rl = 0; rl < rh;) {
-                        int rm = (rl + rh) >>> 1;
-                        if (split <= a[rb + rm])
-                            rh = rm;
-                        else
-                            rl = rm + 1;
+                if (a == null || w == null || lb < 0 || rb < 0 || k < 0)
+                    throw new IllegalStateException(); // hoist checks
+                for (int lh, rh;;) {  // split larger, find point in smaller
+                    if (ln >= rn) {
+                        if (ln <= g)
+                            break;
+                        rh = rn;
+                        double split = a[(lh = ln >>> 1) + lb];
+                        for (int lo = 0; lo < rh; ) {
+                            int rm = (lo + rh) >>> 1;
+                            if (split <= a[rm + rb])
+                                rh = rm;
+                            else
+                                lo = rm + 1;
+                        }
                     }
-                    int s, sl, sr;
-                    while (rh > eighth && split == a[rb + rh - 1])
-                        --rh; // back up right to beginning of run
-                    if (rh <= eighth)
-                        break; // too imbalanced
-                    while (lh > eighth && split == a[lb + lh - 1])
-                        --lh; // back up left
-                    if (lh <= eighth || (s = lh + rh) <= g || // left too small
-                        (sl = ln - lh) + (sr = rn - rh) <= g) // right too small
-                        break;
-                    addToPendingCount(1);
-                    new Merger(this, a, w, lb + lh, sl, rb + rh, sr, k + s,
-                               g).fork();
+                    else {
+                        if (rn <= g)
+                            break;
+                        lh = ln;
+                        double split = a[(rh = rn >>> 1) + rb];
+                        for (int lo = 0; lo < lh; ) {
+                            int lm = (lo + lh) >>> 1;
+                            if (split <= a[lm + lb])
+                                lh = lm;
+                            else
+                                lo = lm + 1;
+                        }
+                    }
+                    Merger m = new Merger(this, a, w, lb + lh, ln - lh,
+                                          rb + rh, rn - rh,
+                                          k + lh + rh, g);
                     rn = rh;
                     ln = lh;
+                    addToPendingCount(1);
+                    m.fork();
                 }
 
                 int lf = lb + ln, rf = rb + rn; // index bounds
@@ -1095,7 +1002,6 @@
                     System.arraycopy(a, rb, w, k, rf - rb);
                 else if (lb < lf)
                     System.arraycopy(a, lb, w, k, lf - lb);
-
                 tryComplete();
             }
         }
--- a/src/share/classes/java/util/ComparableTimSort.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/ComparableTimSort.java	Mon Apr 08 16:19:46 2013 -0700
@@ -123,7 +123,7 @@
         int len = a.length;
         int tlen = (len < 2 * INITIAL_TMP_STORAGE_LENGTH) ?
             len >>> 1 : INITIAL_TMP_STORAGE_LENGTH;
-        if (work == null || workLen < tlen) {
+        if (work == null || workLen < tlen || workBase + tlen > work.length) {
             tmp = new Object[tlen];
             tmpBase = 0;
             tmpLen = tlen;
@@ -152,26 +152,28 @@
     }
 
     /*
-     * The next three methods (which are package private and static) constitute
-     * the entire API of this class.  Each of these methods obeys the contract
-     * of the public method with the same signature in java.util.Arrays.
+     * The next method (package private and static) constitutes the
+     * entire API of this class. 
      */
 
-    static void sort(Object[] a) {
-        sort(a, 0, a.length, null, 0, 0);
-    }
-
-    static void sort(Object[] a, int lo, int hi) {
-        sort(a, lo, hi, null, 0, 0);
-    }
-
     /**
-     * sort, using the given workspace array slice for temp storage
-     * when possible.
+     * Sorts the given range, using the given workspace array slice
+     * for temp storage when possible. This method is designed to be
+     * invoked from public methods (in class Arrays) after performing
+     * any necessary array bounds checks and expanding parameters into
+     * the required forms.
+     *
+     * @param a the array to be sorted
+     * @param lo the index of the first element, inclusive, to be sorted
+     * @param hi the index of the last element, exclusive, to be sorted
+     * @param work a workspace array (slice)
+     * @param workBase origin of usable space in work array
+     * @param workLen usable size of work array
      * @since 1.8
      */
     static void sort(Object[] a, int lo, int hi, Object[] work, int workBase, int workLen) {
-        rangeCheck(a.length, lo, hi);
+        assert a != null && lo >= 0 && lo <= hi && hi <= a.length;
+
         int nRemaining  = hi - lo;
         if (nRemaining < 2)
             return;  // Arrays of size 0 and 1 are always sorted
@@ -898,24 +900,4 @@
         return tmp;
     }
 
-    /**
-     * Checks that fromIndex and toIndex are in range, and throws an
-     * appropriate exception if they aren't.
-     *
-     * @param arrayLen the length of the array
-     * @param fromIndex the index of the first element of the range
-     * @param toIndex the index after the last element of the range
-     * @throws IllegalArgumentException if fromIndex > toIndex
-     * @throws ArrayIndexOutOfBoundsException if fromIndex < 0
-     *         or toIndex > arrayLen
-     */
-    private static void rangeCheck(int arrayLen, int fromIndex, int toIndex) {
-        if (fromIndex > toIndex)
-            throw new IllegalArgumentException("fromIndex(" + fromIndex +
-                       ") > toIndex(" + toIndex+")");
-        if (fromIndex < 0)
-            throw new ArrayIndexOutOfBoundsException(fromIndex);
-        if (toIndex > arrayLen)
-            throw new ArrayIndexOutOfBoundsException(toIndex);
-    }
 }
--- a/src/share/classes/java/util/DoubleSummaryStatistics.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/DoubleSummaryStatistics.java	Mon Apr 08 16:19:46 2013 -0700
@@ -66,7 +66,7 @@
         sum += other.sum;
         sumOfSquares += other.sumOfSquares;
         min = Math.min(min, other.min);
-        max = Math.min(max, other.max);
+        max = Math.max(max, other.max);
     }
 
     /**
--- a/src/share/classes/java/util/DualPivotQuicksort.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/DualPivotQuicksort.java	Mon Apr 08 16:19:46 2013 -0700
@@ -32,6 +32,11 @@
  * quicksorts to degrade to quadratic performance, and is typically
  * faster than traditional (one-pivot) Quicksort implementations.
  *
+ * All exposed methods are package-private, designed to be invoked
+ * from public methods (in class Arrays) after performing any
+ * necessary array bounds checks and expanding parameters into the
+ * required forms.
+ *
  * @author Vladimir Yaroslavskiy
  * @author Jon Bentley
  * @author Josh Bloch
@@ -89,22 +94,18 @@
      */
 
     /**
-     * Sorts the specified array.
-     *
-     * @param a the array to be sorted
-     */
-    public static void sort(int[] a) {
-        sort(a, 0, a.length - 1);
-    }
-
-    /**
-     * Sorts the specified range of the array.
+     * Sorts the specified range of the array using the given
+     * workspace array slice if possible for merging
      *
      * @param a the array to be sorted
      * @param left the index of the first element, inclusive, to be sorted
      * @param right the index of the last element, inclusive, to be sorted
+     * @param work a workspace array (slice)
+     * @param workBase origin of usable space in work array
+     * @param workLen usable size of work array
      */
-    public static void sort(int[] a, int left, int right) {
+    static void sort(int[] a, int left, int right, 
+                     int[] work, int workBase, int workLen) {
         // Use Quicksort on small arrays
         if (right - left < QUICKSORT_THRESHOLD) {
             sort(a, left, right, true);
@@ -147,24 +148,35 @@
         }
 
         // Check special cases
+        // Implementation note: variable "right" is increased by 1.
         if (run[count] == right++) { // The last run contains one element
             run[++count] = right;
         } else if (count == 1) { // The array is already sorted
             return;
         }
 
-        /*
-         * Create temporary array, which is used for merging.
-         * Implementation note: variable "right" is increased by 1.
-         */
-        int[] b; byte odd = 0;
+        // Determine alternation base for merge
+        byte odd = 0;
         for (int n = 1; (n <<= 1) < count; odd ^= 1);
 
+        // Use or create temporary array b for merging
+        int[] b;                 // temp array; alternates with a
+        int ao, bo;              // array offsets from 'left'
+        int blen = right - left; // space needed for b
+        if (work == null || workLen < blen || workBase + blen > work.length) {
+            work = new int[blen];
+            workBase = 0;
+        }
         if (odd == 0) {
-            b = a; a = new int[b.length];
-            for (int i = left - 1; ++i < right; a[i] = b[i]);
+            System.arraycopy(a, left, work, workBase, blen);
+            b = a; 
+            bo = 0;
+            a = work;
+            ao = workBase - left;
         } else {
-            b = new int[a.length];
+            b = work;
+            ao = 0;
+            bo = workBase - left;
         }
 
         // Merging
@@ -172,17 +184,17 @@
             for (int k = (last = 0) + 2; k <= count; k += 2) {
                 int hi = run[k], mi = run[k - 1];
                 for (int i = run[k - 2], p = i, q = mi; i < hi; ++i) {
-                    if (q >= hi || p < mi && a[p] <= a[q]) {
-                        b[i] = a[p++];
+                    if (q >= hi || p < mi && a[p + ao] <= a[q + ao]) {
+                        b[i + bo] = a[p++ + ao];
                     } else {
-                        b[i] = a[q++];
+                        b[i + bo] = a[q++ + ao];
                     }
                 }
                 run[++last] = hi;
             }
             if ((count & 1) != 0) {
                 for (int i = right, lo = run[count - 1]; --i >= lo;
-                    b[i] = a[i]
+                    b[i + bo] = a[i + ao]
                 );
                 run[++last] = right;
             }
@@ -529,22 +541,18 @@
     }
 
     /**
-     * Sorts the specified array.
-     *
-     * @param a the array to be sorted
-     */
-    public static void sort(long[] a) {
-        sort(a, 0, a.length - 1);
-    }
-
-    /**
-     * Sorts the specified range of the array.
+     * Sorts the specified range of the array using the given
+     * workspace array slice if possible for merging
      *
      * @param a the array to be sorted
      * @param left the index of the first element, inclusive, to be sorted
      * @param right the index of the last element, inclusive, to be sorted
+     * @param work a workspace array (slice)
+     * @param workBase origin of usable space in work array
+     * @param workLen usable size of work array
      */
-    public static void sort(long[] a, int left, int right) {
+    static void sort(long[] a, int left, int right,
+                     long[] work, int workBase, int workLen) {
         // Use Quicksort on small arrays
         if (right - left < QUICKSORT_THRESHOLD) {
             sort(a, left, right, true);
@@ -587,24 +595,35 @@
         }
 
         // Check special cases
+        // Implementation note: variable "right" is increased by 1.
         if (run[count] == right++) { // The last run contains one element
             run[++count] = right;
         } else if (count == 1) { // The array is already sorted
             return;
         }
 
-        /*
-         * Create temporary array, which is used for merging.
-         * Implementation note: variable "right" is increased by 1.
-         */
-        long[] b; byte odd = 0;
+        // Determine alternation base for merge
+        byte odd = 0;
         for (int n = 1; (n <<= 1) < count; odd ^= 1);
 
+        // Use or create temporary array b for merging
+        long[] b;                 // temp array; alternates with a
+        int ao, bo;              // array offsets from 'left'
+        int blen = right - left; // space needed for b
+        if (work == null || workLen < blen || workBase + blen > work.length) {
+            work = new long[blen];
+            workBase = 0;
+        }
         if (odd == 0) {
-            b = a; a = new long[b.length];
-            for (int i = left - 1; ++i < right; a[i] = b[i]);
+            System.arraycopy(a, left, work, workBase, blen);
+            b = a; 
+            bo = 0;
+            a = work;
+            ao = workBase - left;
         } else {
-            b = new long[a.length];
+            b = work;
+            ao = 0;
+            bo = workBase - left;
         }
 
         // Merging
@@ -612,17 +631,17 @@
             for (int k = (last = 0) + 2; k <= count; k += 2) {
                 int hi = run[k], mi = run[k - 1];
                 for (int i = run[k - 2], p = i, q = mi; i < hi; ++i) {
-                    if (q >= hi || p < mi && a[p] <= a[q]) {
-                        b[i] = a[p++];
+                    if (q >= hi || p < mi && a[p + ao] <= a[q + ao]) {
+                        b[i + bo] = a[p++ + ao];
                     } else {
-                        b[i] = a[q++];
+                        b[i + bo] = a[q++ + ao];
                     }
                 }
                 run[++last] = hi;
             }
             if ((count & 1) != 0) {
                 for (int i = right, lo = run[count - 1]; --i >= lo;
-                    b[i] = a[i]
+                    b[i + bo] = a[i + ao]
                 );
                 run[++last] = right;
             }
@@ -969,22 +988,18 @@
     }
 
     /**
-     * Sorts the specified array.
-     *
-     * @param a the array to be sorted
-     */
-    public static void sort(short[] a) {
-        sort(a, 0, a.length - 1);
-    }
-
-    /**
-     * Sorts the specified range of the array.
+     * Sorts the specified range of the array using the given
+     * workspace array slice if possible for merging
      *
      * @param a the array to be sorted
      * @param left the index of the first element, inclusive, to be sorted
      * @param right the index of the last element, inclusive, to be sorted
+     * @param work a workspace array (slice)
+     * @param workBase origin of usable space in work array
+     * @param workLen usable size of work array
      */
-    public static void sort(short[] a, int left, int right) {
+    static void sort(short[] a, int left, int right,
+                     short[] work, int workBase, int workLen) {
         // Use counting sort on large arrays
         if (right - left > COUNTING_SORT_THRESHOLD_FOR_SHORT_OR_CHAR) {
             int[] count = new int[NUM_SHORT_VALUES];
@@ -1002,7 +1017,7 @@
                 } while (--s > 0);
             }
         } else { // Use Dual-Pivot Quicksort on small arrays
-            doSort(a, left, right);
+            doSort(a, left, right, work, workBase, workLen);
         }
     }
 
@@ -1015,8 +1030,12 @@
      * @param a the array to be sorted
      * @param left the index of the first element, inclusive, to be sorted
      * @param right the index of the last element, inclusive, to be sorted
+     * @param work a workspace array (slice)
+     * @param workBase origin of usable space in work array
+     * @param workLen usable size of work array
      */
-    private static void doSort(short[] a, int left, int right) {
+    private static void doSort(short[] a, int left, int right,
+                               short[] work, int workBase, int workLen) {
         // Use Quicksort on small arrays
         if (right - left < QUICKSORT_THRESHOLD) {
             sort(a, left, right, true);
@@ -1059,24 +1078,35 @@
         }
 
         // Check special cases
+        // Implementation note: variable "right" is increased by 1.
         if (run[count] == right++) { // The last run contains one element
             run[++count] = right;
         } else if (count == 1) { // The array is already sorted
             return;
         }
 
-        /*
-         * Create temporary array, which is used for merging.
-         * Implementation note: variable "right" is increased by 1.
-         */
-        short[] b; byte odd = 0;
+        // Determine alternation base for merge
+        byte odd = 0;
         for (int n = 1; (n <<= 1) < count; odd ^= 1);
 
+        // Use or create temporary array b for merging
+        short[] b;                 // temp array; alternates with a
+        int ao, bo;              // array offsets from 'left'
+        int blen = right - left; // space needed for b
+        if (work == null || workLen < blen || workBase + blen > work.length) {
+            work = new short[blen];
+            workBase = 0;
+        }
         if (odd == 0) {
-            b = a; a = new short[b.length];
-            for (int i = left - 1; ++i < right; a[i] = b[i]);
+            System.arraycopy(a, left, work, workBase, blen);
+            b = a; 
+            bo = 0;
+            a = work;
+            ao = workBase - left;
         } else {
-            b = new short[a.length];
+            b = work;
+            ao = 0;
+            bo = workBase - left;
         }
 
         // Merging
@@ -1084,17 +1114,17 @@
             for (int k = (last = 0) + 2; k <= count; k += 2) {
                 int hi = run[k], mi = run[k - 1];
                 for (int i = run[k - 2], p = i, q = mi; i < hi; ++i) {
-                    if (q >= hi || p < mi && a[p] <= a[q]) {
-                        b[i] = a[p++];
+                    if (q >= hi || p < mi && a[p + ao] <= a[q + ao]) {
+                        b[i + bo] = a[p++ + ao];
                     } else {
-                        b[i] = a[q++];
+                        b[i + bo] = a[q++ + ao];
                     }
                 }
                 run[++last] = hi;
             }
             if ((count & 1) != 0) {
                 for (int i = right, lo = run[count - 1]; --i >= lo;
-                    b[i] = a[i]
+                    b[i + bo] = a[i + ao]
                 );
                 run[++last] = right;
             }
@@ -1441,22 +1471,18 @@
     }
 
     /**
-     * Sorts the specified array.
-     *
-     * @param a the array to be sorted
-     */
-    public static void sort(char[] a) {
-        sort(a, 0, a.length - 1);
-    }
-
-    /**
-     * Sorts the specified range of the array.
+     * Sorts the specified range of the array using the given
+     * workspace array slice if possible for merging
      *
      * @param a the array to be sorted
      * @param left the index of the first element, inclusive, to be sorted
      * @param right the index of the last element, inclusive, to be sorted
+     * @param work a workspace array (slice)
+     * @param workBase origin of usable space in work array
+     * @param workLen usable size of work array
      */
-    public static void sort(char[] a, int left, int right) {
+    static void sort(char[] a, int left, int right,
+                     char[] work, int workBase, int workLen) {
         // Use counting sort on large arrays
         if (right - left > COUNTING_SORT_THRESHOLD_FOR_SHORT_OR_CHAR) {
             int[] count = new int[NUM_CHAR_VALUES];
@@ -1474,7 +1500,7 @@
                 } while (--s > 0);
             }
         } else { // Use Dual-Pivot Quicksort on small arrays
-            doSort(a, left, right);
+            doSort(a, left, right, work, workBase, workLen);
         }
     }
 
@@ -1487,8 +1513,12 @@
      * @param a the array to be sorted
      * @param left the index of the first element, inclusive, to be sorted
      * @param right the index of the last element, inclusive, to be sorted
+     * @param work a workspace array (slice)
+     * @param workBase origin of usable space in work array
+     * @param workLen usable size of work array
      */
-    private static void doSort(char[] a, int left, int right) {
+    private static void doSort(char[] a, int left, int right,
+                               char[] work, int workBase, int workLen) {
         // Use Quicksort on small arrays
         if (right - left < QUICKSORT_THRESHOLD) {
             sort(a, left, right, true);
@@ -1531,24 +1561,35 @@
         }
 
         // Check special cases
+        // Implementation note: variable "right" is increased by 1.
         if (run[count] == right++) { // The last run contains one element
             run[++count] = right;
         } else if (count == 1) { // The array is already sorted
             return;
         }
 
-        /*
-         * Create temporary array, which is used for merging.
-         * Implementation note: variable "right" is increased by 1.
-         */
-        char[] b; byte odd = 0;
+        // Determine alternation base for merge
+        byte odd = 0;
         for (int n = 1; (n <<= 1) < count; odd ^= 1);
 
+        // Use or create temporary array b for merging
+        char[] b;                 // temp array; alternates with a
+        int ao, bo;              // array offsets from 'left'
+        int blen = right - left; // space needed for b
+        if (work == null || workLen < blen || workBase + blen > work.length) {
+            work = new char[blen];
+            workBase = 0;
+        }
         if (odd == 0) {
-            b = a; a = new char[b.length];
-            for (int i = left - 1; ++i < right; a[i] = b[i]);
+            System.arraycopy(a, left, work, workBase, blen);
+            b = a; 
+            bo = 0;
+            a = work;
+            ao = workBase - left;
         } else {
-            b = new char[a.length];
+            b = work;
+            ao = 0;
+            bo = workBase - left;
         }
 
         // Merging
@@ -1556,17 +1597,17 @@
             for (int k = (last = 0) + 2; k <= count; k += 2) {
                 int hi = run[k], mi = run[k - 1];
                 for (int i = run[k - 2], p = i, q = mi; i < hi; ++i) {
-                    if (q >= hi || p < mi && a[p] <= a[q]) {
-                        b[i] = a[p++];
+                    if (q >= hi || p < mi && a[p + ao] <= a[q + ao]) {
+                        b[i + bo] = a[p++ + ao];
                     } else {
-                        b[i] = a[q++];
+                        b[i + bo] = a[q++ + ao];
                     }
                 }
                 run[++last] = hi;
             }
             if ((count & 1) != 0) {
                 for (int i = right, lo = run[count - 1]; --i >= lo;
-                    b[i] = a[i]
+                    b[i + bo] = a[i + ao]
                 );
                 run[++last] = right;
             }
@@ -1916,22 +1957,13 @@
     private static final int NUM_BYTE_VALUES = 1 << 8;
 
     /**
-     * Sorts the specified array.
-     *
-     * @param a the array to be sorted
-     */
-    public static void sort(byte[] a) {
-        sort(a, 0, a.length - 1);
-    }
-
-    /**
      * Sorts the specified range of the array.
      *
      * @param a the array to be sorted
      * @param left the index of the first element, inclusive, to be sorted
      * @param right the index of the last element, inclusive, to be sorted
      */
-    public static void sort(byte[] a, int left, int right) {
+    static void sort(byte[] a, int left, int right) {
         // Use counting sort on large arrays
         if (right - left > COUNTING_SORT_THRESHOLD_FOR_BYTE) {
             int[] count = new int[NUM_BYTE_VALUES];
@@ -1963,22 +1995,18 @@
     }
 
     /**
-     * Sorts the specified array.
-     *
-     * @param a the array to be sorted
-     */
-    public static void sort(float[] a) {
-        sort(a, 0, a.length - 1);
-    }
-
-    /**
-     * Sorts the specified range of the array.
+     * Sorts the specified range of the array using the given
+     * workspace array slice if possible for merging
      *
      * @param a the array to be sorted
      * @param left the index of the first element, inclusive, to be sorted
      * @param right the index of the last element, inclusive, to be sorted
+     * @param work a workspace array (slice)
+     * @param workBase origin of usable space in work array
+     * @param workLen usable size of work array
      */
-    public static void sort(float[] a, int left, int right) {
+    static void sort(float[] a, int left, int right,
+                     float[] work, int workBase, int workLen) {
         /*
          * Phase 1: Move NaNs to the end of the array.
          */
@@ -1997,7 +2025,7 @@
         /*
          * Phase 2: Sort everything except NaNs (which are already in place).
          */
-        doSort(a, left, right);
+        doSort(a, left, right, work, workBase, workLen);
 
         /*
          * Phase 3: Place negative zeros before positive zeros.
@@ -2064,8 +2092,12 @@
      * @param a the array to be sorted
      * @param left the index of the first element, inclusive, to be sorted
      * @param right the index of the last element, inclusive, to be sorted
+     * @param work a workspace array (slice)
+     * @param workBase origin of usable space in work array
+     * @param workLen usable size of work array
      */
-    private static void doSort(float[] a, int left, int right) {
+    private static void doSort(float[] a, int left, int right,
+                               float[] work, int workBase, int workLen) {
         // Use Quicksort on small arrays
         if (right - left < QUICKSORT_THRESHOLD) {
             sort(a, left, right, true);
@@ -2108,24 +2140,35 @@
         }
 
         // Check special cases
+        // Implementation note: variable "right" is increased by 1.
         if (run[count] == right++) { // The last run contains one element
             run[++count] = right;
         } else if (count == 1) { // The array is already sorted
             return;
         }
 
-        /*
-         * Create temporary array, which is used for merging.
-         * Implementation note: variable "right" is increased by 1.
-         */
-        float[] b; byte odd = 0;
+        // Determine alternation base for merge
+        byte odd = 0;
         for (int n = 1; (n <<= 1) < count; odd ^= 1);
 
+        // Use or create temporary array b for merging
+        float[] b;                 // temp array; alternates with a
+        int ao, bo;              // array offsets from 'left'
+        int blen = right - left; // space needed for b
+        if (work == null || workLen < blen || workBase + blen > work.length) {
+            work = new float[blen];
+            workBase = 0;
+        }
         if (odd == 0) {
-            b = a; a = new float[b.length];
-            for (int i = left - 1; ++i < right; a[i] = b[i]);
+            System.arraycopy(a, left, work, workBase, blen);
+            b = a; 
+            bo = 0;
+            a = work;
+            ao = workBase - left;
         } else {
-            b = new float[a.length];
+            b = work;
+            ao = 0;
+            bo = workBase - left;
         }
 
         // Merging
@@ -2133,17 +2176,17 @@
             for (int k = (last = 0) + 2; k <= count; k += 2) {
                 int hi = run[k], mi = run[k - 1];
                 for (int i = run[k - 2], p = i, q = mi; i < hi; ++i) {
-                    if (q >= hi || p < mi && a[p] <= a[q]) {
-                        b[i] = a[p++];
+                    if (q >= hi || p < mi && a[p + ao] <= a[q + ao]) {
+                        b[i + bo] = a[p++ + ao];
                     } else {
-                        b[i] = a[q++];
+                        b[i + bo] = a[q++ + ao];
                     }
                 }
                 run[++last] = hi;
             }
             if ((count & 1) != 0) {
                 for (int i = right, lo = run[count - 1]; --i >= lo;
-                    b[i] = a[i]
+                    b[i + bo] = a[i + ao]
                 );
                 run[++last] = right;
             }
@@ -2490,22 +2533,18 @@
     }
 
     /**
-     * Sorts the specified array.
-     *
-     * @param a the array to be sorted
-     */
-    public static void sort(double[] a) {
-        sort(a, 0, a.length - 1);
-    }
-
-    /**
-     * Sorts the specified range of the array.
+     * Sorts the specified range of the array using the given
+     * workspace array slice if possible for merging
      *
      * @param a the array to be sorted
      * @param left the index of the first element, inclusive, to be sorted
      * @param right the index of the last element, inclusive, to be sorted
+     * @param work a workspace array (slice)
+     * @param workBase origin of usable space in work array
+     * @param workLen usable size of work array
      */
-    public static void sort(double[] a, int left, int right) {
+    static void sort(double[] a, int left, int right,
+                     double[] work, int workBase, int workLen) {
         /*
          * Phase 1: Move NaNs to the end of the array.
          */
@@ -2524,7 +2563,7 @@
         /*
          * Phase 2: Sort everything except NaNs (which are already in place).
          */
-        doSort(a, left, right);
+        doSort(a, left, right, work, workBase, workLen);
 
         /*
          * Phase 3: Place negative zeros before positive zeros.
@@ -2591,8 +2630,12 @@
      * @param a the array to be sorted
      * @param left the index of the first element, inclusive, to be sorted
      * @param right the index of the last element, inclusive, to be sorted
+     * @param work a workspace array (slice)
+     * @param workBase origin of usable space in work array
+     * @param workLen usable size of work array
      */
-    private static void doSort(double[] a, int left, int right) {
+    private static void doSort(double[] a, int left, int right,
+                               double[] work, int workBase, int workLen) {
         // Use Quicksort on small arrays
         if (right - left < QUICKSORT_THRESHOLD) {
             sort(a, left, right, true);
@@ -2635,24 +2678,35 @@
         }
 
         // Check special cases
+        // Implementation note: variable "right" is increased by 1.
         if (run[count] == right++) { // The last run contains one element
             run[++count] = right;
         } else if (count == 1) { // The array is already sorted
             return;
         }
 
-        /*
-         * Create temporary array, which is used for merging.
-         * Implementation note: variable "right" is increased by 1.
-         */
-        double[] b; byte odd = 0;
+        // Determine alternation base for merge
+        byte odd = 0;
         for (int n = 1; (n <<= 1) < count; odd ^= 1);
 
+        // Use or create temporary array b for merging
+        double[] b;                 // temp array; alternates with a
+        int ao, bo;              // array offsets from 'left'
+        int blen = right - left; // space needed for b
+        if (work == null || workLen < blen || workBase + blen > work.length) {
+            work = new double[blen];
+            workBase = 0;
+        }
         if (odd == 0) {
-            b = a; a = new double[b.length];
-            for (int i = left - 1; ++i < right; a[i] = b[i]);
+            System.arraycopy(a, left, work, workBase, blen);
+            b = a; 
+            bo = 0;
+            a = work;
+            ao = workBase - left;
         } else {
-            b = new double[a.length];
+            b = work;
+            ao = 0;
+            bo = workBase - left;
         }
 
         // Merging
@@ -2660,17 +2714,17 @@
             for (int k = (last = 0) + 2; k <= count; k += 2) {
                 int hi = run[k], mi = run[k - 1];
                 for (int i = run[k - 2], p = i, q = mi; i < hi; ++i) {
-                    if (q >= hi || p < mi && a[p] <= a[q]) {
-                        b[i] = a[p++];
+                    if (q >= hi || p < mi && a[p + ao] <= a[q + ao]) {
+                        b[i + bo] = a[p++ + ao];
                     } else {
-                        b[i] = a[q++];
+                        b[i + bo] = a[q++ + ao];
                     }
                 }
                 run[++last] = hi;
             }
             if ((count & 1) != 0) {
                 for (int i = right, lo = run[count - 1]; --i >= lo;
-                    b[i] = a[i]
+                    b[i + bo] = a[i + ao]
                 );
                 run[++last] = right;
             }
--- a/src/share/classes/java/util/IntSummaryStatistics.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/IntSummaryStatistics.java	Mon Apr 08 16:19:46 2013 -0700
@@ -69,7 +69,7 @@
         sum += other.sum;
         sumOfSquares += other.sumOfSquares;
         min = Math.min(min, other.min);
-        max = Math.min(max, other.max);
+        max = Math.max(max, other.max);
     }
 
     /**
--- a/src/share/classes/java/util/Iterator.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/Iterator.java	Mon Apr 08 16:19:46 2013 -0700
@@ -88,7 +88,6 @@
      *         yet been called, or the {@code remove} method has already
      *         been called after the last call to the {@code next}
      *         method
-     * @since 1.8
      */
     default void remove() {
         throw new UnsupportedOperationException("remove");
@@ -97,8 +96,8 @@
     /**
      * Performs the given action for each remaining element, in the order
      * elements occur when iterating, until all elements have been processed or
-     * the action throws an exception.  Exceptions thrown by the action
-     * are relayed to the caller.
+     * the action throws an exception.  Errors or runtime exceptions thrown by
+     * the action are relayed to the caller.
      *
      * @implSpec
      * <p>The default implementation behaves as if:
--- a/src/share/classes/java/util/List.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/List.java	Mon Apr 08 16:19:46 2013 -0700
@@ -417,9 +417,10 @@
      * The default implementation is equivalent to, for this {@code list}:
      * <pre>Collections.sort(list, c)</pre>
      *
-     * @param c the {@code Comparator} used to compare list elements
+     * @param c the {@code Comparator} used to compare list elements.
+     *          A {@code null} value indicates that the elements'
+     *          {@linkplain Comparable natural ordering} should be used.
      * @since 1.8
-     * @throws NullPointerException if the specified comparator is null
      * @throws ClassCastException if the list contains elements that are not
      *         <i>mutually comparable</i> using the specified comparator.
      * @throws UnsupportedOperationException if the list's list-iterator does
@@ -428,7 +429,6 @@
      *         found to violate the {@link Comparator} contract
      */
     default void sort(Comparator<? super E> c) {
-        Objects.requireNonNull(c);
         Collections.sort(this, c);
     }
 
--- a/src/share/classes/java/util/LongSummaryStatistics.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/LongSummaryStatistics.java	Mon Apr 08 16:19:46 2013 -0700
@@ -78,7 +78,7 @@
         sum += other.sum;
         sumOfSquares += other.sumOfSquares;
         min = Math.min(min, other.min);
-        max = Math.min(max, other.max);
+        max = Math.max(max, other.max);
     }
 
     /**
--- a/src/share/classes/java/util/PrimitiveIterator.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/PrimitiveIterator.java	Mon Apr 08 16:19:46 2013 -0700
@@ -44,6 +44,12 @@
  * should be used in preference to {@link PrimitiveIterator.OfInt#next()} and
  * {@link PrimitiveIterator.OfInt#forEachRemaining(java.util.function.Consumer)}.
  *
+ * <p>Iteration of primitive values using boxing-based methods
+ * {@link Iterator#next next()} and
+ * {@link Iterator#forEachRemaining(java.util.function.Consumer) forEachRemaining()},
+ * does not affect the order in which the values, transformed to boxed values,
+ * are encountered.
+ *
  * @implNote
  * If boxing occurs when operating on instances of primitive subtype
  * specializations a warning may be output if the boolean system property
@@ -55,7 +61,7 @@
 public interface PrimitiveIterator<T> extends Iterator<T> {
 
     /**
-     * Specialization for {@code int} values.
+     * An Iterator specialized for {@code int} values.
      * @since 1.8
      */
     public static interface OfInt extends PrimitiveIterator<Integer> {
@@ -71,8 +77,8 @@
         /**
          * Performs the given action for each remaining element, in the order
          * elements occur when iterating, until all elements have been processed
-         * or the action throws an exception.  Exceptions thrown by the
-         * action are relayed to the caller.
+         * or the action throws an exception.  Errors or runtime exceptions
+         * thrown by the action are relayed to the caller.
          *
          * @implSpec
          * <p>The default implementation behaves as if:
@@ -126,7 +132,7 @@
     }
 
     /**
-     * Specialization for {@code long} values.
+     * An Iterator specialized for {@code long} values.
      * @since 1.8
      */
     public static interface OfLong extends PrimitiveIterator<Long> {
@@ -142,8 +148,8 @@
         /**
          * Performs the given action for each remaining element, in the order
          * elements occur when iterating, until all elements have been processed
-         * or the action throws an exception.  Exceptions thrown by the
-         * action are relayed to the caller.
+         * or the action throws an exception.  Errors or runtime exceptions
+         * thrown by the action are relayed to the caller.
          *
          * @implSpec
          * <p>The default implementation behaves as if:
@@ -196,7 +202,7 @@
     }
 
     /**
-     * Specialization for {@code double} values.
+     * An Iterator specialized for {@code double} values.
      * @since 1.8
      */
     public static interface OfDouble extends PrimitiveIterator<Double> {
@@ -212,8 +218,8 @@
         /**
          * Performs the given action for each remaining element, in the order
          * elements occur when iterating, until all elements have been processed
-         * or the action throws an exception.  Exceptions thrown by the
-         * action are relayed to the caller.
+         * or the action throws an exception.  Errors or runtime exceptions
+         * thrown by the action are relayed to the caller.
          *
          * @implSpec
          * <p>The default implementation behaves as if:
--- a/src/share/classes/java/util/Spliterator.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/Spliterator.java	Mon Apr 08 16:19:46 2013 -0700
@@ -35,7 +35,7 @@
  * {@link Collection}, an IO channel, or a generator function.
  *
  * <p>A Spliterator may traverse elements individually ({@link
- * #tryAdvance} tryAdvance()) or sequentially in bulk
+ * #tryAdvance tryAdvance()}) or sequentially in bulk
  * ({@link #forEachRemaining forEachRemaining()}).
  *
  * <p>A Spliterator may partition off some of its elements (using
@@ -105,6 +105,11 @@
  * should be used in preference to
  * {@link Spliterator.OfInt#tryAdvance(java.util.function.Consumer)} and
  * {@link Spliterator.OfInt#forEachRemaining(java.util.function.Consumer)}.
+ * Traversal of primitive values using boxing-based methods
+ * {@link #tryAdvance tryAdvance()} and
+ * {@link #forEachRemaining(java.util.function.Consumer) forEachRemaining()}
+ * does not affect the order in which the values, transformed to boxed values,
+ * are encountered.
  *
  * @apiNote
  * <p>Spliterators are for traversing the elements of a source.  For
@@ -449,6 +454,11 @@
      * elements are {@link Comparable} and use {@code compareTo} for
      * natural ordering.
      *
+     * <p>A Spliterator that reports {@code SORTED} should also report
+     * {@code ORDERED}.  Otherwise, such a Spliterator is inconsistent
+     * and no guarantees can be made about any computation using that
+     * Spliterator.
+     *
      * @apiNote
      * A Spliterator based on a {@link NavigableSet} or {@link SortedSet}
      * reports {@code SORTED}.
@@ -495,9 +505,13 @@
      * documented policy concerning the impact of modifications during
      * traversal.
      *
-     * <p>The Spliterator must not report {@code SIZED}, since the finite size,
-     * if known, may change if the source is concurrently modified during
-     * traversal.
+     * <p>A top-level Spliterator should not report {@code CONCURRENT} and
+     * {@code SIZED}, since the finite size, if known, may change if the
+     * source is concurrently modified during traversal.  Such a Spliterator
+     * is inconsistent and no guarantees can be made about any computation
+     * using that Spliterator.  Sub-spliterators may report {@code SIZED} if
+     * the sub-split size is known and additions or removals to the source
+     * are not reflected when traversing.
      *
      * @apiNote
      * Most concurrent collections maintain a consistency policy guaranteeing
@@ -512,6 +526,10 @@
      * resulting from {@code trySplit()}, as well as any that they in
      * turn split, are {@link #SIZED}.
      *
+     * <p>A Spliterator that does not report {@code SIZED} as required by
+     * {@code SUBSIZED} is inconsistent and no guarantees can be made about any
+     * computation using that Spliterator.
+     *
      * @apiNote
      * This characteristic is not reported when an initial Spliterator's
      * {@code estimateSize()} reports an exact size, but sub-spliterators
@@ -520,7 +538,7 @@
     public static final int SUBSIZED = 0x00004000;
 
     /**
-     * Specialization for {@code int} elements.
+     * A Spliterator specialized for {@code int} values.
      * @since 1.8
      */
     public interface OfInt extends Spliterator<Integer> {
@@ -607,7 +625,7 @@
     }
 
     /**
-     * Specialization for {@code long} elements.
+     * A Spliterator specialized for {@code long} values.
      * @since 1.8
      */
     public interface OfLong extends Spliterator<Long> {
@@ -694,7 +712,7 @@
     }
 
     /**
-     * Specialization for {@code double} elements.
+     * A Spliterator specialized for {@code double} values.
      * @since 1.8
      */
     public interface OfDouble extends Spliterator<Double> {
--- a/src/share/classes/java/util/TimSort.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/TimSort.java	Mon Apr 08 16:19:46 2013 -0700
@@ -150,9 +150,10 @@
         int len = a.length;
         int tlen = (len < 2 * INITIAL_TMP_STORAGE_LENGTH) ?
             len >>> 1 : INITIAL_TMP_STORAGE_LENGTH;
-        if (work == null || workLen < tlen) {
+        if (work == null || workLen < tlen || workBase + tlen > work.length) {
             @SuppressWarnings({"unchecked", "UnnecessaryLocalVariable"})
-            T[] newArray = (T[]) new Object[tlen];
+            T[] newArray = (T[])java.lang.reflect.Array.newInstance
+                (a.getClass().getComponentType(), tlen);
             tmp = newArray;
             tmpBase = 0;
             tmpLen = tlen;
@@ -181,32 +182,30 @@
     }
 
     /*
-     * The next three methods (which are package private and static) constitute
-     * the entire API of this class.  Each of these methods obeys the contract
-     * of the public method with the same signature in java.util.Arrays.
+     * The next method (package private and static) constitutes the
+     * entire API of this class. 
      */
 
-    static <T> void sort(T[] a, Comparator<? super T> c) {
-        sort(a, 0, a.length, c, null, 0, 0);
-    }
-
-    static <T> void sort(T[] a, int lo, int hi, Comparator<? super T> c) {
-        sort(a, lo, hi, c, null, 0, 0);
-    }
-
     /**
-     * sort, using the given workspace array slice for temp storage
-     * when possible.
+     * Sorts the given range, using the given workspace array slice
+     * for temp storage when possible. This method is designed to be
+     * invoked from public methods (in class Arrays) after performing
+     * any necessary array bounds checks and expanding parameters into
+     * the required forms.
+     *
+     * @param a the array to be sorted
+     * @param lo the index of the first element, inclusive, to be sorted
+     * @param hi the index of the last element, exclusive, to be sorted
+     * @param c the comparator to use
+     * @param work a workspace array (slice)
+     * @param workBase origin of usable space in work array
+     * @param workLen usable size of work array
      * @since 1.8
      */
     static <T> void sort(T[] a, int lo, int hi, Comparator<? super T> c,
                          T[] work, int workBase, int workLen) {
-        if (c == null) {
-            Arrays.sort(a, lo, hi);
-            return;
-        }
+        assert c != null && a != null && lo >= 0 && lo <= hi && hi <= a.length;
 
-        rangeCheck(a.length, lo, hi);
         int nRemaining  = hi - lo;
         if (nRemaining < 2)
             return;  // Arrays of size 0 and 1 are always sorted
@@ -926,32 +925,12 @@
                 newSize = Math.min(newSize, a.length >>> 1);
 
             @SuppressWarnings({"unchecked", "UnnecessaryLocalVariable"})
-            T[] newArray = (T[]) new Object[newSize];
+            T[] newArray = (T[])java.lang.reflect.Array.newInstance
+                (a.getClass().getComponentType(), newSize);
             tmp = newArray;
             tmpLen = newSize;
             tmpBase = 0;
         }
         return tmp;
     }
-
-    /**
-     * Checks that fromIndex and toIndex are in range, and throws an
-     * appropriate exception if they aren't.
-     *
-     * @param arrayLen the length of the array
-     * @param fromIndex the index of the first element of the range
-     * @param toIndex the index after the last element of the range
-     * @throws IllegalArgumentException if fromIndex > toIndex
-     * @throws ArrayIndexOutOfBoundsException if fromIndex < 0
-     *         or toIndex > arrayLen
-     */
-    private static void rangeCheck(int arrayLen, int fromIndex, int toIndex) {
-        if (fromIndex > toIndex)
-            throw new IllegalArgumentException("fromIndex(" + fromIndex +
-                       ") > toIndex(" + toIndex+")");
-        if (fromIndex < 0)
-            throw new ArrayIndexOutOfBoundsException(fromIndex);
-        if (toIndex > arrayLen)
-            throw new ArrayIndexOutOfBoundsException(toIndex);
-    }
 }
--- a/src/share/classes/java/util/Tripwire.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/Tripwire.java	Mon Apr 08 16:19:46 2013 -0700
@@ -24,8 +24,10 @@
  */
 package java.util;
 
-import java.util.logging.Level;
-import java.util.logging.Logger;
+import sun.util.logging.PlatformLogger;
+
+import java.security.AccessController;
+import java.security.PrivilegedAction;
 
 /**
  * Utility class for detecting inadvertent uses of boxing in
@@ -47,20 +49,21 @@
     private static final String TRIPWIRE_PROPERTY = "org.openjdk.java.util.stream.tripwire";
 
     /** Should debugging checks be enabled? */
-    static final boolean ENABLED = true;
-//            = Boolean.getBoolean(TRIPWIRE_PROPERTY);
+    static final boolean ENABLED = AccessController.doPrivileged(
+            (PrivilegedAction<Boolean>) () -> Boolean.getBoolean(TRIPWIRE_PROPERTY));
 
     private Tripwire() { }
 
     /**
-     * Produces a log warning, using {@code Logger.getLogger(className)}, using
-     * the supplied message.  The class name of {@code trippingClass} will be
-     * used as the first parameter to the message.
+     * Produces a log warning, using {@code PlatformLogger.getLogger(className)},
+     * using the supplied message.  The class name of {@code trippingClass} will
+     * be used as the first parameter to the message.
      *
      * @param trippingClass Name of the class generating the message
-     * @param msg A message format string of the type expected by {@link Logger}
+     * @param msg A message format string of the type expected by
+     * {@link PlatformLogger}
      */
     static void trip(Class<?> trippingClass, String msg) {
-        Logger.getLogger(trippingClass.getName()).log(Level.WARNING, msg, trippingClass.getName());
+        PlatformLogger.getLogger(trippingClass.getName()).warning(msg, trippingClass.getName());
     }
 }
--- a/src/share/classes/java/util/Vector.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/Vector.java	Mon Apr 08 16:19:46 2013 -0700
@@ -1315,7 +1315,6 @@
     @Override
     @SuppressWarnings("unchecked")
     public synchronized void sort(Comparator<? super E> c) {
-        Objects.requireNonNull(c);
         final int expectedModCount = modCount;
         Arrays.sort((E[]) elementData, 0, elementCount, c);
         if (modCount != expectedModCount) {
--- a/src/share/classes/java/util/concurrent/CopyOnWriteArrayList.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/concurrent/CopyOnWriteArrayList.java	Mon Apr 08 16:19:46 2013 -0700
@@ -1445,7 +1445,6 @@
 
     @Override
     public void sort(Comparator<? super E> c) {
-        Objects.requireNonNull(c);
         final ReentrantLock lock = this.lock;
         lock.lock();
         try {
--- a/src/share/classes/java/util/regex/Pattern.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/regex/Pattern.java	Mon Apr 08 16:19:46 2013 -0700
@@ -30,10 +30,18 @@
 import java.text.CharacterIterator;
 import java.text.Normalizer;
 import java.util.Locale;
+import java.util.Iterator;
 import java.util.Map;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Arrays;
+import java.util.NoSuchElementException;
+import java.util.Spliterator;
+import java.util.Spliterators;
+import java.util.function.Consumer;
+import java.util.function.Predicate;
+import java.util.stream.Stream;
+import java.util.stream.Streams;
 
 
 /**
@@ -5741,4 +5749,93 @@
                     return Character.isMirrored(ch);}});
         }
     }
+
+    /**
+     * Creates a predicate which can be used to match a string.
+     *
+     * @return  The predicate which can be used for matching on a string
+     * @since       1.8
+     *
+     */
+    public Predicate<String> asPredicate(){
+        return s -> this.matcher(s).find();
+    }
+
+    private static class MatcherIterator implements Iterator<String> {
+	private final Matcher curMatcher;
+        private final CharSequence input;
+	private int current = 0;
+        private String nextElement = null;
+	private boolean valueReady = false;
+        
+	MatcherIterator(CharSequence in, Matcher m) {
+	    input = in;
+	    curMatcher = m;
+	}
+
+	public void accept(String t) {
+	    valueReady = true;
+	    nextElement = t;
+	}
+	
+	public String next() {
+	    if (!valueReady && !hasNext())
+		throw new NoSuchElementException();
+	    else {
+		valueReady = false;
+		return nextElement;
+	    }
+	}
+
+	public boolean hasNext() {
+	    if (!valueReady) {
+		if (current == input.length()) return false;
+
+		if (curMatcher.find()) {
+		    nextElement = input.subSequence(current, curMatcher.start()).toString();
+		    current = curMatcher.end();
+		    valueReady = true;
+		} else {
+		    nextElement = input.subSequence(current, input.length()).toString();
+		    current = input.length();
+		    valueReady = true;
+		}
+	    }
+	    return true;
+	}
+    }
+
+
+    /**
+     * Creates a stream from the given input sequence around matches of this 
+     * pattern.
+     *
+     * <p>The stream returned by this method contains each substring of the
+     * input sequence that is terminated by another subsequence that matches
+     * this pattern or is terminated by the end of the input sequence.  The
+     * substrings in the stream are in the order in which they occur in the
+     * input.  
+     *
+     * <p>If this pattern does not match any subsequence of the input then
+     * the resulting stream has just one element, namely the input sequence in
+     * string form.
+     *
+     * <p>If the input sequence is mutable, it must remain constant during the
+     * execution of the terminal stream operation.  Otherwise, the result of the
+     * terminal stream operation is undefined.
+     *
+     * @see     #split(CharSequence)
+     *
+     * @param   input
+     *          The character sequence to be split
+     *
+     * @return  The stream of strings computed by splitting the input
+     *          around matches of this pattern
+     * @since   1.8
+     * 
+     */
+    public Stream<String> splitAsStream(final CharSequence input) {
+	return Streams.stream(Spliterators.spliteratorUnknownSize(new MatcherIterator(input, matcher(input)), 
+								  Spliterator.ORDERED | Spliterator.NONNULL));
+    }
 }
--- a/src/share/classes/java/util/stream/AbstractPipeline.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/stream/AbstractPipeline.java	Mon Apr 08 16:19:46 2013 -0700
@@ -112,29 +112,31 @@
      */
     private Supplier<? extends Spliterator<?>> sourceSupplier;
 
-    /** True if this pipeline has been consumed */
+    /** True if this pipeline has been linked or consumed */
     private boolean linkedOrConsumed;
 
     /** True if there are any stateful ops in the pipeline; only valid for the source stage */
     private boolean sourceAnyStateful;
 
-    /** True if there have been any calls to .sequential() or .parallel(); only valid for the source stage */
-    private boolean sourceAnyParChange;
+    /** True if pipeline is parallel, otherwise the pipeline is sequential; only valid for the source stage */
+    private boolean parallel;
 
     /**
      * Constructor for the head of a stream pipeline.
      *
      * @param source {@code Supplier<Spliterator>} describing the stream source
      * @param sourceFlags The source flags for the stream source, described in {@link StreamOpFlag}
+     * @param parallel True if the pipeline is parallel
      */
     AbstractPipeline(Supplier<? extends Spliterator<?>> source,
-                     int sourceFlags) {
+                     int sourceFlags, boolean parallel) {
         this.previousStage = null;
         this.sourceSupplier = source;
         this.sourceStage = this;
-        this.sourceOrOpFlags = StreamOpFlag.combineOpFlags(sourceFlags, StreamOpFlag.INITIAL_OPS_VALUE);
-        this.combinedFlags = sourceOrOpFlags;
+        this.sourceOrOpFlags = sourceFlags & StreamOpFlag.STREAM_MASK;
+        this.combinedFlags = StreamOpFlag.combineOpFlags(sourceOrOpFlags, StreamOpFlag.INITIAL_OPS_VALUE);
         this.depth = 0;
+        this.parallel = parallel;
     }
 
     /**
@@ -142,15 +144,17 @@
      *
      * @param source {@code Spliterator} describing the stream source
      * @param sourceFlags The source flags for the stream source, described in {@link StreamOpFlag}
+     * @param parallel True if the pipeline is parallel
      */
     AbstractPipeline(Spliterator<?> source,
-                     int sourceFlags) {
+                     int sourceFlags, boolean parallel) {
         this.previousStage = null;
-        this.sourceOrOpFlags = StreamOpFlag.combineOpFlags(sourceFlags, StreamOpFlag.INITIAL_OPS_VALUE);
         this.sourceSpliterator = source;
         this.sourceStage = this;
-        this.combinedFlags = sourceOrOpFlags;
+        this.sourceOrOpFlags = sourceFlags & StreamOpFlag.STREAM_MASK;
+        this.combinedFlags = StreamOpFlag.combineOpFlags(sourceOrOpFlags, StreamOpFlag.INITIAL_OPS_VALUE);
         this.depth = 0;
+        this.parallel = parallel;
     }
 
     /**
@@ -175,59 +179,6 @@
         this.depth = previousStage.depth + 1;
     }
 
-    /**
-     * Prepares the pipeline for evaluation.
-     * @param terminalFlags The terminal operation flags, described in {@link StreamOpFlag}
-     */
-    private void prepare(int terminalFlags) {
-        if (isParallel()) {
-            AbstractPipeline backPropagationHead = sourceStage;
-            if (sourceStage.sourceAnyStateful) {
-                int depth = 1;
-                for (AbstractPipeline u = sourceStage, p = sourceStage.nextStage;
-                     p != null;
-                     u = p, p = p.nextStage) {
-                    int thisOpFlags = p.sourceOrOpFlags;
-                    if (p.opIsStateful()) {
-                        // If the stateful operation is a short-circuit operation
-                        // then move the back propagation head forwards
-                        // NOTE: there are no size-injecting ops
-                        if (StreamOpFlag.SHORT_CIRCUIT.isKnown(thisOpFlags)) {
-                            backPropagationHead = p;
-                        }
-
-                        depth = 0;
-                        // The following injects size, it is equivalent to:
-                        // StreamOpFlag.combineOpFlags(StreamOpFlag.IS_SIZED, p.combinedFlags);
-                        thisOpFlags = (thisOpFlags & ~StreamOpFlag.NOT_SIZED) | StreamOpFlag.IS_SIZED;
-                    }
-                    p.depth = depth++;
-                    p.combinedFlags = StreamOpFlag.combineOpFlags(thisOpFlags, u.combinedFlags);
-                }
-            }
-
-            // Apply the upstream terminal flags
-            if (terminalFlags != 0) {
-                int upstreamTerminalFlags = terminalFlags & StreamOpFlag.UPSTREAM_TERMINAL_OP_MASK;
-                for (AbstractPipeline p = backPropagationHead; p != null; p = p.nextStage) {
-                    p.combinedFlags = StreamOpFlag.combineOpFlags(upstreamTerminalFlags, p.combinedFlags);
-                }
-            }
-        }
-        else {
-            if (sourceStage.sourceAnyParChange) {
-                for (AbstractPipeline u = sourceStage, p = sourceStage.nextStage;
-                     p != null;
-                     u = p, p = p.nextStage) {
-                    p.combinedFlags = StreamOpFlag.combineOpFlags(p.sourceOrOpFlags, u.combinedFlags);
-                }
-            }
-        }
-        // Update last stage to incorporate terminal flags
-        if (terminalFlags != 0)
-            combinedFlags = StreamOpFlag.combineOpFlags(terminalFlags, combinedFlags);
-    }
-
 
     // Terminal evaluation methods
 
@@ -244,10 +195,9 @@
             throw new IllegalStateException("stream has already been operated upon");
         linkedOrConsumed = true;
 
-        prepare(terminalOp.getOpFlags());
         return isParallel()
-               ? (R) terminalOp.evaluateParallel(this, sourceSpliterator())
-               : (R) terminalOp.evaluateSequential(this, sourceSpliterator());
+               ? (R) terminalOp.evaluateParallel(this, sourceSpliterator(terminalOp.getOpFlags()))
+               : (R) terminalOp.evaluateSequential(this, sourceSpliterator(terminalOp.getOpFlags()));
     }
 
     /**
@@ -261,35 +211,28 @@
             throw new IllegalStateException("stream has already been operated upon");
         linkedOrConsumed = true;
 
-        prepare(0);
         // If the last intermediate operation is stateful then
         // evaluate directly to avoid an extra collection step
         if (isParallel() && previousStage != null && opIsStateful()) {
-            return opEvaluateParallel(previousStage, previousStage.sourceSpliterator(), generator)
-                    .flatten(generator);
+            return opEvaluateParallel(previousStage, previousStage.sourceSpliterator(0), generator);
         }
         else {
-            return evaluate(sourceSpliterator(), true, generator);
+            return evaluate(sourceSpliterator(0), true, generator);
         }
     }
 
+
+    // BaseStream
+
     /** Implements {@link BaseStream#sequential()} */
     public final S sequential() {
-        if (StreamOpFlag.PARALLEL.isKnown(sourceStage.combinedFlags)) {
-            sourceStage.sourceAnyParChange = true;
-            sourceStage.combinedFlags = StreamOpFlag.combineOpFlags(StreamOpFlag.NOT_PARALLEL,
-                                                                    sourceStage.combinedFlags);
-        }
+        sourceStage.parallel = false;
         return (S) this;
     }
 
     /** Implements {@link BaseStream#parallel()} */
     public final S parallel() {
-        if (!StreamOpFlag.PARALLEL.isKnown(sourceStage.combinedFlags)) {
-            sourceStage.sourceAnyParChange = true;
-            sourceStage.combinedFlags = StreamOpFlag.combineOpFlags(StreamOpFlag.IS_PARALLEL,
-                                                                    sourceStage.combinedFlags);
-        }
+        sourceStage.parallel = true;
         return (S) this;
     }
 
@@ -300,7 +243,6 @@
             throw new IllegalStateException("stream has already been operated upon");
         linkedOrConsumed = true;
 
-        prepare(0);
         if (this == sourceStage) {
             if (sourceStage.sourceSpliterator != null) {
                 Spliterator<E_OUT> s = sourceStage.sourceSpliterator;
@@ -317,15 +259,18 @@
             }
         }
         else {
-            return wrap(this, () -> sourceSpliterator(), isParallel());
+            return wrap(this, () -> sourceSpliterator(0), isParallel());
         }
     }
 
     /** Implements {@link BaseStream#isParallel()} */
     public final boolean isParallel() {
-        return StreamOpFlag.PARALLEL.isKnown(sourceStage.combinedFlags);
+        return sourceStage.parallel;
     }
 
+
+    //
+
     /**
      * Returns the composition of stream flags of the stream source and all
      * intermediate operations.
@@ -337,17 +282,53 @@
     final int getStreamFlags() {
         // @@@ Currently only used by tests, review and see if functionality
         //     can be replaced by spliterator().characteristics()
-        prepare(0);
         return StreamOpFlag.toStreamFlags(combinedFlags);
     }
 
+    private void parallelPrepare(int terminalFlags) {
+        AbstractPipeline backPropagationHead = sourceStage;
+        if (sourceStage.sourceAnyStateful) {
+            int depth = 1;
+            for (AbstractPipeline u = sourceStage, p = sourceStage.nextStage;
+                 p != null;
+                 u = p, p = p.nextStage) {
+                int thisOpFlags = p.sourceOrOpFlags;
+                if (p.opIsStateful()) {
+                    // If the stateful operation is a short-circuit operation
+                    // then move the back propagation head forwards
+                    // NOTE: there are no size-injecting ops
+                    if (StreamOpFlag.SHORT_CIRCUIT.isKnown(thisOpFlags)) {
+                        backPropagationHead = p;
+                    }
+
+                    depth = 0;
+                    // The following injects size, it is equivalent to:
+                    // StreamOpFlag.combineOpFlags(StreamOpFlag.IS_SIZED, p.combinedFlags);
+                    thisOpFlags = (thisOpFlags & ~StreamOpFlag.NOT_SIZED) | StreamOpFlag.IS_SIZED;
+                }
+                p.depth = depth++;
+                p.combinedFlags = StreamOpFlag.combineOpFlags(thisOpFlags, u.combinedFlags);
+            }
+        }
+
+        // Apply the upstream terminal flags
+        if (terminalFlags != 0) {
+            int upstreamTerminalFlags = terminalFlags & StreamOpFlag.UPSTREAM_TERMINAL_OP_MASK;
+            for (AbstractPipeline p = backPropagationHead; p.nextStage != null; p = p.nextStage) {
+                p.combinedFlags = StreamOpFlag.combineOpFlags(upstreamTerminalFlags, p.combinedFlags);
+            }
+
+            combinedFlags = StreamOpFlag.combineOpFlags(terminalFlags, combinedFlags);
+        }
+    }
+
     /**
      * Get the source spliterator for this pipeline stage.  For a sequential or stateless
      * parallel pipeline, this is the source spliterator.  For a stateful parallel pipeline,
      * this is a spliterator describing the results of all computations up to and including
      * the most recent stateful operation.
      */
-    private Spliterator<?> sourceSpliterator() {
+    private Spliterator<?> sourceSpliterator(int terminalFlags) {
         // Get the source spliterator of the pipeline
         Spliterator<?> spliterator = null;
         if (sourceStage.sourceSpliterator != null) {
@@ -363,6 +344,11 @@
         }
 
         if (isParallel()) {
+            // @@@ Merge parallelPrepare with the loop below and use the
+            //     spliterator characteristics to determine if SIZED
+            //     should be injected
+            parallelPrepare(terminalFlags);
+
             // Adapt the source spliterator, evaluating each stateful op
             // in the pipeline up to and including this pipeline stage
             for (AbstractPipeline u = sourceStage, p = sourceStage.nextStage, e = this;
@@ -374,10 +360,14 @@
                 }
             }
         }
+        else if (terminalFlags != 0)  {
+            combinedFlags = StreamOpFlag.combineOpFlags(terminalFlags, combinedFlags);
+        }
 
         return spliterator;
     }
 
+
     // PipelineHelper
 
     @Override
@@ -421,6 +411,10 @@
         return combinedFlags;
     }
 
+    final boolean isOrdered() {
+        return StreamOpFlag.ORDERED.isKnown(combinedFlags);
+    }
+
     @Override
     final <P_IN> Sink<P_IN> wrapSink(Sink<E_OUT> sink) {
         Objects.requireNonNull(sink);
--- a/src/share/classes/java/util/stream/AbstractShortCircuitTask.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/stream/AbstractShortCircuitTask.java	Mon Apr 08 16:19:46 2013 -0700
@@ -32,14 +32,15 @@
  * stream ops, which can produce a result without processing all elements of the
  * stream.
  *
- * @param <P_IN> Type of elements input to the pipeline
- * @param <P_OUT> Type of elements output from the pipeline
+ * @param <P_IN> Type of input elements to the pipeline
+ * @param <P_OUT> Type of output elements from the pipeline
  * @param <R> Type of intermediate result, may be different from operation
  *        result type
  * @param <T> Type of child and sibling tasks
  * @since 1.8
  */
-abstract class AbstractShortCircuitTask<P_IN, P_OUT, R, T extends AbstractShortCircuitTask<P_IN, P_OUT, R, T>>
+abstract class AbstractShortCircuitTask<P_IN, P_OUT, R,
+                                        T extends AbstractShortCircuitTask<P_IN, P_OUT, R, T>>
         extends AbstractTask<P_IN, P_OUT, R, T> {
     /**
      * The result for this computation; this is shared among all tasks and set
@@ -55,14 +56,25 @@
      */
     protected volatile boolean canceled;
 
-    /** Constructor for root nodes */
+    /**
+     * Constructor for root nodes.
+     * @param helper The {@code PipelineHelper} describing the stream pipeline
+     *               up to this operation
+     * @param spliterator The {@code Spliterator} describing the source for this
+     *                    pipeline
+     */
     protected AbstractShortCircuitTask(PipelineHelper<P_OUT> helper,
                                        Spliterator<P_IN> spliterator) {
         super(helper, spliterator);
         sharedResult = new AtomicReference<>(null);
     }
 
-    /** Constructor for non-root nodes */
+    /**
+     * Constructor for non-root nodes.
+     * @param parent Parent task in the computation tree
+     * @param spliterator The {@code Spliterator} for the portion of the
+     *                    computation tree described by this task
+     */
     protected AbstractShortCircuitTask(T parent,
                                        Spliterator<P_IN> spliterator) {
         super(parent, spliterator);
@@ -73,6 +85,8 @@
      * Returns the value indicating the computation completed with no task
      * finding a short-circuitable result.  For example, for a "find" operation,
      * this might be null or an empty {@code Optional}.
+     *
+     * @return the result to return when no task finds a result
      */
     protected abstract R getEmptyResult();
 
@@ -98,6 +112,7 @@
      * {@code sharedResult}.  The {@code compute()} method will check
      * {@code sharedResult} before proceeding with computation, so this causes
      * the computation to terminate early.
+     * @param result The result found
      */
     protected void shortCircuit(R result) {
         if (result != null)
@@ -107,6 +122,7 @@
     /**
      * Sets a local result for this task.  If this task is the root, set the
      * shared result instead (if not already set).
+     * @param localResult The result to set for this task
      */
     @Override
     protected void setLocalResult(R localResult) {
@@ -138,7 +154,7 @@
             return super.getLocalResult();
     }
 
-    /** Set this node as canceled */
+    /** Mark this node as canceled */
     protected void cancel() {
         canceled = true;
     }
--- a/src/share/classes/java/util/stream/AbstractTask.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/stream/AbstractTask.java	Mon Apr 08 16:19:46 2013 -0700
@@ -37,6 +37,7 @@
  * the {@code Spliterator}) or internal nodes (which split the
  * {@code Spliterator} into multiple child tasks).
  *
+ * @implNote
  * <p>This class is based on {@link CountedCompleter}, a form of fork-join task
  * where each task has a semaphore-like count of uncompleted children, and the
  * task is implicitly completed and notified when its last child completes.
@@ -54,7 +55,7 @@
  * {@code Spliterator}, and override {@code onCompletion()} to merge the results
  * of the child tasks for internal nodes:
  *
- * <pre>
+ * <pre>{@code
  *     protected S doLeaf() {
  *         spliterator.forEach(...);
  *         return localReductionResult;
@@ -70,7 +71,7 @@
  *             setLocalResult(result);
  *         }
  *     }
- * </pre>
+ * }</pre>
  *
  * @param <P_IN> Type of elements input to the pipeline
  * @param <P_OUT> Type of elements output from the pipeline
@@ -79,11 +80,12 @@
  * @param <T> Type of parent, child and sibling tasks
  * @since 1.8
  */
-abstract class AbstractTask<P_IN, P_OUT, R, T extends AbstractTask<P_IN, P_OUT, R, T>>
+abstract class AbstractTask<P_IN, P_OUT, R,
+                            T extends AbstractTask<P_IN, P_OUT, R, T>>
         extends CountedCompleter<R> {
 
     /**
-     * Default target number of leaf tasks for parallel decomposition.
+     * Default target factor of leaf tasks for parallel decomposition.
      * To allow load balancing, we over-partition, currently to approximately
      * four tasks per processor, which enables others to help out
      * if leaf tasks are uneven or some processors are otherwise busy.
@@ -99,7 +101,7 @@
      */
     protected Spliterator<P_IN> spliterator;
 
-    /** Target leaf size */
+    /** Target leaf size, common to all tasks in a computation */
     protected final long targetSize;
 
     /**
@@ -116,6 +118,10 @@
 
     /**
      * Constructor for root nodes.
+     * @param helper The {@code PipelineHelper} describing the stream pipeline
+     *               up to this operation
+     * @param spliterator The {@code Spliterator} describing the source for this
+     *                    pipeline
      */
     protected AbstractTask(PipelineHelper<P_OUT> helper,
                            Spliterator<P_IN> spliterator) {
@@ -126,19 +132,6 @@
     }
 
     /**
-     * Alternate constructor for root nodes that have already gotten the
-     * Spliterator from the helper.
-     */
-    protected AbstractTask(PipelineHelper<P_OUT> helper,
-                           Spliterator<P_IN> spliterator,
-                           long targetSize) {
-        super(null);
-        this.helper = helper;
-        this.spliterator = spliterator;
-        this.targetSize = targetSize;
-    }
-
-    /**
      * Constructor for non-root nodes
      *
      * @param parent This node's parent task
@@ -176,12 +169,11 @@
      * Suggests whether it is adviseable to split the provided spliterator based
      * on target size and other considerations, such as pool state
      */
-    public static<P_IN, P_OUT> boolean suggestSplit(PipelineHelper<P_OUT> helper,
-                                                    Spliterator spliterator,
-                                                    long targetSize) {
+    public static boolean suggestSplit(Spliterator spliterator,
+                                       long targetSize) {
         long remaining = spliterator.estimateSize();
         return (remaining > targetSize);
-        // @@@ May want to fold in pool characteristics such as surplus task count
+        // @@@ May additionally want to fold in pool characteristics such as surplus task count
     }
 
     /**
@@ -189,11 +181,11 @@
      * and other considerations
      */
     public boolean suggestSplit() {
-        return suggestSplit(helper, spliterator, targetSize);
+        return suggestSplit(spliterator, targetSize);
     }
 
     /**
-     * Returns the local result, if any.  Subclasses should use
+     * Returns the local result, if any. Subclasses should use
      * {@link #setLocalResult(Object)} and {@link #getLocalResult()} to manage
      * results.  This returns the local result so that calls from within the
      * fork-join framework will return the correct result.
@@ -235,7 +227,7 @@
     }
 
     /**
-     * Determines if this this task a leaf node.  (Only valid after
+     * Indicates whether this this task a leaf node.  (Only valid after
      * {@link #compute} has been called on this node).  If the node is not a
      * leaf node, then children will be non-null and numChildren will be
      * positive.
@@ -245,7 +237,7 @@
     }
 
     /**
-     * Determines if this task is a root node
+     * Indicates whether this task is the root node
      */
     protected boolean isRoot() {
         return getParent() == null;
@@ -279,6 +271,7 @@
      */
     @Override
     public final void compute() {
+        ForkJoinPool cp = ForkJoinPool.commonPool();
         @SuppressWarnings("unchecked")
         T task = (T) this;
         while (task.canCompute()) {
@@ -294,7 +287,7 @@
                 task.children = leftChild;
                 leftChild.nextSibling = rightChild;
                 task.setPendingCount(1);
-                leftChild.fork();
+                cp.execute(leftChild);
                 task = rightChild;
             }
         }
@@ -302,6 +295,7 @@
 
     /**
      * {@inheritDoc}
+     * @implNote
      * Clears spliterator and children fields.  Overriders MUST call
      * {@code super.onCompletion} as the last thing they do if they want these
      * cleared
@@ -314,6 +308,7 @@
 
     /**
      * Determines if the task can be computed.
+     * @implSpec The default always returns true
      *
      * @return true if this task can be computed to either calculate the leaf
      *         via {@link #doLeaf()} or split, otherwise false if this task
--- a/src/share/classes/java/util/stream/BaseStream.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/stream/BaseStream.java	Mon Apr 08 16:19:46 2013 -0700
@@ -30,8 +30,8 @@
 /**
  * Base interface for stream types such as {@link Stream}, {@link IntStream},
  * etc.  Contains methods common to all stream types.  Many of these methods
- * are implemented by {@link AbstractPipeline}, even though {@code AbstractPipeline}
- * does not directly implement {@code BaseStream}.
+ * are implemented by {@link AbstractPipeline}, even though
+ * {@code AbstractPipeline} does not directly implement {@code BaseStream}.
  *
  * @param <T> Type of stream elements.
  * @param <S> Type of stream implementing {@code BaseStream}.
@@ -41,7 +41,8 @@
     /**
      * Returns an iterator for the elements of this stream.
      *
-     * <p>This is a <a href="package-summary.html#StreamOps">terminal operation</a>.
+     * <p>This is a <a href="package-summary.html#StreamOps">terminal
+     * operation</a>.
      *
      * @return the element iterator for this stream
      */
@@ -50,40 +51,57 @@
     /**
      * Returns a spliterator for the elements of this stream.
      *
-     * <p>This is a <a href="package-summary.html#StreamOps">terminal operation</a>.
+     * <p>This is a <a href="package-summary.html#StreamOps">terminal
+     * operation</a>.
      *
      * @return the element spliterator for this stream
      */
     Spliterator<T> spliterator();
 
     /**
-     * Returns whether this stream, when executed, would execute in parallel (assuming
-     * no further modification of the stream, such as appending further intermediate
-     * operations or changing its parallelism).  Calling this method after invoking
-     * an intermediate or terminal stream operation method may yield unpredictable results.
+     * Returns whether this stream, when executed, would execute in parallel
+     * (assuming no further modification of the stream, such as appending
+     * further intermediate operations or changing its parallelism).  Calling
+     * this method after invoking an intermediate or terminal stream operation
+     * method may yield unpredictable results.
      *
-     * @return whether this stream would execute in parallel if executed without further
-     * modification
+     * @return whether this stream would execute in parallel if executed without
+     * further modification
      */
     boolean isParallel();
 
     /**
-     * Produces an equivalent stream that is sequential.
-     * If this stream is already sequential, may return itself.
+     * Produces an equivalent stream that is sequential.  May return
+     * itself, either because the stream was already sequential, or because
+     * the underlying stream state was modified to be sequential.
      *
-     * <p>This is a <a href="package-summary.html#StreamOps">stateful intermediate operation</a>.
+     * <p>This is an <a href="package-summary.html#StreamOps">intermediate
+     * operation</a>.
      *
      * @return a sequential stream
      */
     S sequential();
 
     /**
-     * Produces an equivalent stream that is parallel.
-     * If this stream is already parallel, may return itself.
+     * Produces an equivalent stream that is parallel.  May return
+     * itself, either because the stream was already parallel, or because
+     * the underlying stream state was modified to be parallel.
      *
-     * <p>This is a <a href="package-summary.html#StreamOps">stateful intermediate operation</a>.
+     * <p>This is an <a href="package-summary.html#StreamOps">intermediate
+     * operation</a>.
      *
      * @return a parallel stream
      */
     S parallel();
+
+    /**
+     * Produces an equivalent stream that is
+     * <a href="package-summary.html#Ordering">unordered</a>.  May return
+     * itself if the stream was already unordered.
+     *
+     * <p>This is an <a href="package-summary.html#StreamOps">intermediate
+     * operation</a>.
+     * @return an unordered stream
+     */
+    S unordered();
 }
--- a/src/share/classes/java/util/stream/Collector.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/stream/Collector.java	Mon Apr 08 16:19:46 2013 -0700
@@ -31,83 +31,132 @@
 import java.util.function.Supplier;
 
 /**
- * A <a href="package-summary.html#Reduction">reduction operation</a> that supports folding
- * input elements into a cumulative result.  The result may be a value or may be a mutable
- * result container.  Examples of operations accumulating results into a mutable result
- * container include: accumulating input elements into a {@code Collection}; concatenating
- * strings into a {@code StringBuilder}; computing summary information about elements such as
- * sum, min, max, or average; computing "pivot table" summaries such as "maximum valued
- * transaction by seller", etc.  Reduction operations can be performed either sequentially or
- * in parallel.
+ * A <a href="package-summary.html#Reduction">reduction operation</a> that
+ * supports folding input elements into a cumulative result.  The result may be
+ * a value or may be a mutable result container.  Examples of operations
+ * accumulating results into a mutable result container include: accumulating
+ * input elements into a {@code Collection}; concatenating strings into a
+ * {@code StringBuilder}; computing summary information about elements such as
+ * sum, min, max, or average; computing "pivot table" summaries such as "maximum
+ * valued transaction by seller", etc.  Reduction operations can be performed
+ * either sequentially or in parallel.
  *
- * <p>A {@code Collector} is specified by three functions that work together to manage
- * a result or result container.  They are: creation of an initial result, incorporating
- * a new data element into a result, and combining two result into one. The last function
- * -- combining two results into one -- is used during parallel operations, where subsets
- * of the input are collected in parallel, and then the subresults merged into a combined
- * result. The result may be a mutable container or a value.  If the result is mutable,
- * the accumulation and combination functions may either mutate their left argument and return
- * that (such as adding elements to a collection), or return a new result, in which case it
- * should not perform any mutation.
+ * <p>The following are examples of using the predefined {@code Collector}
+ * implementations in {@link Collectors} with the {@code Stream} API to perform
+ * mutable reduction tasks:
+ * <pre>{@code
+ *     // Accumulate elements into a List
+ *     List list = stream.collect(Collectors.toList());
  *
- * <p>Collectors also have a set of characteristics, including {@link Characteristics#CONCURRENT}
- * and {@link Characteristics#STRICTLY_MUTATIVE}.  These characteristics provide hints that
- * can be used by a reduction implementation to provide better performance.
+ *     // Accumulate elements into a TreeSet
+ *     List list = stream.collect(Collectors.toCollection(TreeSet::new));
  *
- * <p>Libraries that implement reduction based on {@code Collector}, such as the
- * {@link Stream#collect(Collector)} must adhere to the following constraints:
+ *     // Convert elements to strings and concatenate them, separated by commas
+ *     String joined = stream.map(Object::toString)
+ *                           .collect(Collectors.toStringJoiner(", "))
+ *                           .toString();
+ *
+ *     // Find highest-paid employee
+ *     Employee highestPaid = employees.stream()
+ *                                     .collect(Collectors.maxBy(Comparators.comparing(Employee::getSalary)));
+ *
+ *     // Group employees by department
+ *     Map<Department, List<Employee>> byDept
+ *         = employees.stream()
+ *                    .collect(Collectors.groupingBy(Employee::getDepartment));
+ *
+ *     // Find highest-paid employee by department
+ *     Map<Department, Employee> highestPaidByDept
+ *         = employees.stream()
+ *                    .collect(Collectors.groupingBy(Employee::getDepartment,
+ *                                                   Collectors.maxBy(Comparators.comparing(Employee::getSalary))));
+ *
+ *     // Partition students into passing and failing
+ *     Map<Boolean, List<Student>> passingFailing =
+ *         students.stream()
+ *                 .collect(Collectors.partitioningBy(s -> s.getGrade() >= PASS_THRESHOLD);
+ *
+ * }</pre>
+ *
+ * <p>A {@code Collector} is specified by three functions that work together to
+ * manage a result or result container.  They are: creation of an initial
+ * result, incorporating a new data element into a result, and combining two
+ * results into one. The last function -- combining two results into one -- is
+ * used during parallel operations, where subsets of the input are accumulated
+ * in parallel, and then the subresults merged into a combined result. The
+ * result may be a mutable container or a value.  If the result is mutable, the
+ * accumulation and combination functions may either mutate their left argument
+ * and return that (such as adding elements to a collection), or return a new
+ * result, in which case it should not perform any mutation.
+ *
+ * <p>Collectors also have a set of characteristics, including
+ * {@link Characteristics#CONCURRENT} and
+ * {@link Characteristics#STRICTLY_MUTATIVE}.  These characteristics provide
+ * hints that can be used by a reduction implementation to provide better
+ * performance.
+ *
+ * <p>Libraries that implement reduction based on {@code Collector}, such as
+ * {@link Stream#collect(Collector)}, must adhere to the following constraints:
  * <ul>
- *     <li>The first argument passed to the accumulator function, and both arguments passed
- *     to the combiner function, must be the result of of a previous invocation of
- *     {@link #resultSupplier()}, {@link #accumulator()}, or {@link #combiner()}.</li>
- *     <li>The implementation should not do anything with the result of any of the result
- *     supplier, accumulator, or combiner functions other than to pass them again to the
- *     accumulator or combiner functions, or return them to the caller of the reduction
- *     operation.</li>
- *     <li>If a result is passed to the accumulator or combiner function, and the same object
- *     is not returned from that function, it is never used again.</li>
- *     <li>Once a result is passed to the combiner function, it is never passed to the
- *     accumulator function again.</li>
- *     <li>For non-concurrent collectors, any result returned from the result supplier,
- *     accumulator, or combiner functions must be serially thread-confined.  This enables
- *     collection to occur in parallel without the {@code Collector} needing to implement
- *     any additional synchronization.  The reduction implementation must manage that
- *     the input is properly partitioned, that partitions are processed in isolation,
- *     and combining happens only after accumulation is complete.</li>
- *     <li>For concurrent collectors, an implementation is free to (but not required to)
- *     implement reduction concurrently.  A concurrent collection is one where the
- *     accumulator function is called concurrently from multiple threads, rather than
- *     keeping the result isolated during accumulation.</li>
+ *     <li>The first argument passed to the accumulator function, and both
+ *     arguments passed to the combiner function, must be the result of a
+ *     previous invocation of {@link #resultSupplier()}, {@link #accumulator()},
+ *     or {@link #combiner()}.</li>
+ *     <li>The implementation should not do anything with the result of any of
+ *     the result supplier, accumulator, or combiner functions other than to
+ *     pass them again to the accumulator or combiner functions, or return them
+ *     to the caller of the reduction operation.</li>
+ *     <li>If a result is passed to the accumulator or combiner function, and
+ *     the same object is not returned from that function, it is never used
+ *     again.</li>
+ *     <li>Once a result is passed to the combiner function, it is never passed
+ *     to the accumulator function again.</li>
+ *     <li>For non-concurrent collectors, any result returned from the result
+ *     supplier, accumulator, or combiner functions must be serially
+ *     thread-confined.  This enables collection to occur in parallel without
+ *     the {@code Collector} needing to implement any additional synchronization.
+ *     The reduction implementation must manage that the input is properly
+ *     partitioned, that partitions are processed in isolation, and combining
+ *     happens only after accumulation is complete.</li>
+ *     <li>For concurrent collectors, an implementation is free to (but not
+ *     required to) implement reduction concurrently.  A concurrent reduction
+ *     is one where the accumulator function is called concurrently from
+ *     multiple threads, using the same concurrently-modifiable result container,
+ *     rather than keeping the result isolated during accumulation.
+ *     A concurrent reduction should only be applied if the collector has the
+ *     {@link Characteristics#UNORDERED} characteristics or if the
+ *     originating data is unordered.</li>
  * </ul>
  *
  * @apiNote
- * <p>Performing a reduction operation with a {@code Collector} should produce a result
- * equivalent to:
- * <pre>
+ * <p>Performing a reduction operation with a {@code Collector} should produce a
+ * result equivalent to:
+ * <pre>{@code
  *     BiFunction<R,T,R> accumulator = collector.accumulator();
  *     R result = collector.resultSupplier().get();
  *     for (T t : data)
  *         result = accumulator.apply(result, t);
  *     return result;
- * </pre>
+ * }</pre>
  *
- * However, the library is free to partition the input, perform the reduction on the partitions,
- * and then use the combiner function to combine the partial results to achieve a parallel
- * reduction.  Depending on the specific reduction operation, this may perform better or worse,
- * depending on the relative cost of the accumulator and combiner functions.
+ * However, the library is free to partition the input, perform the reduction on
+ * the partitions, and then use the combiner function to combine the partial
+ * results to achieve a parallel reduction.  Depending on the specific reduction
+ * operation, this may perform better or worse, depending on the relative cost
+ * of the accumulator and combiner functions.
  *
- * <p>An example of an operation that can be easily modeled by {@code Collector} is accumulating
- * elements into a {@code TreeSet}. In this case, the @{code resultSupplier()} function is
- * {@code () -> new Treeset<T>()}, the {@code accumulator} function is
- * {@code (set, element) -> { set.add(element); return set; }}, and the combiner function is
- * {@code (left, right) -> { left.addAll(right); return left; }}.  (This behavior is
- * implemented by the method {@code Collectors.toCollection(TreeSet::new)}).
+ * <p>An example of an operation that can be easily modeled by {@code Collector}
+ * is accumulating elements into a {@code TreeSet}. In this case, the {@code
+ * resultSupplier()} function is {@code () -> new Treeset<T>()}, the
+ * {@code accumulator} function is
+ * {@code (set, element) -> { set.add(element); return set; }}, and the combiner
+ * function is {@code (left, right) -> { left.addAll(right); return left; }}.
+ * (This behavior is implemented by
+ * {@code Collectors.toCollection(TreeSet::new)}).
  *
- * TODO Document concurrent behavior and interaction with ordering
  * TODO Associativity and commutativity
  *
  * @see Stream#collect(Collector)
- * @see Stream#collectUnordered(Collector)
  * @see Collectors
  *
  * @param <T> The type of input element to the collect operation
@@ -116,7 +165,7 @@
  */
 public interface Collector<T, R> {
     /**
-     * A function that creates and return a new result that represents "no values".
+     * A function that creates and returns a new result that represents "no values".
      * If the accumulator or combiner functions may mutate their arguments, this must
      * be a new, empty result container.
      *
@@ -127,23 +176,23 @@
     /**
      * A function that folds a new value into a cumulative result.  The result may be a
      * mutable result container or a value.  The accumulator function may modify a mutable
-     * container and return it, or create a new result and return that, but it should not
-     * modify a provided mutable container and return a different object.
+     * container and return it, or create a new result and return that, but if it returns
+     * a new result object, it must not modify any of its arguments.
      *
      * <p>If the collector has the {@link Characteristics#STRICTLY_MUTATIVE} characteristic,
      * then the accumulator function <em>must</em> always return its first argument, after
-     * possibly  mutating its state.
+     * possibly mutating its state.
      *
-     * @return A function which folds a new value into a cumulative results
-     *
-     * MUST either mutate and return same, or NOT mutate
+     * @return A function which folds a new value into a cumulative result
      */
     BiFunction<R, T, R> accumulator();
 
     /**
-     * A function that accepts two partial results and merges them.  It may fold state from one
-     * argument into the other and return that, or may return a new result, but if it returns
-     * a new result, should not modify the state of either of its arguments.
+     * A function that accepts two partial results and merges them.  The
+     * combiner function may fold state from one argument into the other and
+     * return that, or may return a new result object, but if it returns
+     * a new result object, it must not modify the state of either of its
+     * arguments.
      *
      * <p>If the collector has the {@link Characteristics#STRICTLY_MUTATIVE} characteristic,
      * then the combiner function <em>must</em> always return its first argument, after
@@ -171,12 +220,10 @@
          * Indicates that this collector is <em>concurrent</em>, meaning that the result
          * container can support the accumulator function being called concurrently with
          * the same result container from multiple threads. Concurrent collectors must also
-         * always have the STRICTLY_MUTATIVE characteristic.
+         * always have the {@code STRICTLY_MUTATIVE} characteristic.
          *
-         * <p>Because a concurrent collection cannot guarantee that the elements will be
-         * presented to the accumulator function in encounter order, a concurrent collector
-         * must represent a combining operation that is not only
-         * <a href="package-summary.html#Associativity">associative</a>, but also commutative.
+         * <p>If this collector is not also {@code UNORDERED}, then it should
+         * only be evaluated concurrently if applied to an unordered data source.
          */
         CONCURRENT,
         /**
--- a/src/share/classes/java/util/stream/Collectors.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/stream/Collectors.java	Mon Apr 08 16:19:46 2013 -0700
@@ -29,6 +29,8 @@
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.Comparator;
+import java.util.Comparators;
 import java.util.DoubleSummaryStatistics;
 import java.util.EnumSet;
 import java.util.HashMap;
@@ -71,7 +73,7 @@
             = Collections.unmodifiableSet(EnumSet.of(Collector.Characteristics.STRICTLY_MUTATIVE,
                                                      Collector.Characteristics.UNORDERED));
 
-    private Collectors() {}
+    private Collectors() { }
 
     /**
      * Return a merge function, suitable for use in {@link Map#merge(Object, Object, BiFunction)} or
@@ -81,11 +83,36 @@
      * @param <T> The type of input arguments to the merge function
      * @return A merge function which always throw {@code IllegalStateException}
      */
-    public static<T> BinaryOperator<T> throwingMergeFunction() {
+    public static<T> BinaryOperator<T> throwingMerger() {
         return (u,v) -> { throw new IllegalStateException(String.format("Duplicate key %s", u)); };
     }
 
-    static final class CollectorImpl<T, R> implements Collector<T,R> {
+    /**
+     * Return a merge function, suitable for use in {@link Map#merge(Object, Object, BiFunction)} or
+     * {@link #toMap(Function, Supplier, BinaryOperator)}, which implements a "first wins"
+     * policy.
+     *
+     * @param <T> The type of input arguments to the merge function
+     * @return A merge function which always returns its first argument
+     */
+    public static<T> BinaryOperator<T> firstWinsMerger() {
+        return (u,v) -> u;
+    }
+
+    /**
+     * Return a merge function, suitable for use in {@link Map#merge(Object, Object, BiFunction)} or
+     * {@link #toMap(Function, Supplier, BinaryOperator)}, which implements a "last wins"
+     * policy.
+     *
+     * @param <T> The type of input arguments to the merge function
+     * @return A merge function which always returns its second argument
+     */
+    public static<T> BinaryOperator<T> lastWinsMerger() {
+        return (u,v) -> v;
+    }
+
+    /** Simple implementation class for {@code Collector} */
+    private static final class CollectorImpl<T, R> implements Collector<T,R> {
         private final Supplier<R> resultSupplier;
         private final BiFunction<R, T, R> accumulator;
         private final BinaryOperator<R> combiner;
@@ -130,14 +157,14 @@
 
     /**
      * Return a {@code Collector} that accumulates the input elements into a new {@code Collection},
-     * which is created by the provided factory.
+     * in encounter order.  The {@code Collection} is created by the provided factory.
      *
      * @param collectionFactory A {@code Supplier} which returns a new, empty {@code Collection}
-     *                          of the appropriate type each time it is called
+     *                          of the appropriate type
      * @param <T> The type of the input elements
      * @param <C> The type of the resulting {@code Collection}
-     * @return A {@code Collector} which collects elements into a {@code Collection} containing all the input elements
-     * , in encounter order
+     * @return A {@code Collector} which collects all the input elements into a
+     * {@code Collection}, in encounter order
      */
     public static<T, C extends Collection<T>>
     Collector<T, C> toCollection(Supplier<C> collectionFactory) {
@@ -153,8 +180,8 @@
      * returned, and the returned list is not guaranteed to be mutable.
      *
      * @param <T> The type of the input elements
-     * @return A {@code Collector} which collects elements into a {@code List} containing all the input elements,
-     * in encounter order
+     * @return A {@code Collector} which collects all the input elements into a
+     * {@code List}, in encounter order
      */
     public static<T>
     Collector<T, List<T>> toList() {
@@ -195,7 +222,8 @@
      * returned, and the returned list is not guaranteed to be mutable.
      *
      * @param <T> The type of the input elements
-     * @return A {@code Collector} which collects elements into a {@code Set} containing all the input elements
+     * @return A {@code Collector} which collects all the input elements into a
+     * {@code Set}
      */
     public static<T>
     Collector<T, Set<T>> toSet() {
@@ -206,10 +234,11 @@
     }
 
     /**
-     * Return a {@code Collector} that concatenates the input elements into a new {@code StringBuilder}.
+     * Return a {@code Collector} that concatenates the input elements into a new
+     * {@code StringBuilder}.
      *
-     * @return A {@code Collector} which collects {@code String} elements into a {@code StringBuilder} containing
-     * all of the input elements concatenated in encounter order
+     * @return A {@code Collector} which collects String elements into a
+     * {@code StringBuilder}, in encounter order
      */
     public static Collector<String, StringBuilder> toStringBuilder() {
         return new CollectorImpl<>(StringBuilder::new,
@@ -219,11 +248,11 @@
     }
 
     /**
-     * Return a {@code Collector} that concatenates the input elements into a new {@code StringJoiner},
-     * using the specified separator.
+     * Return a {@code Collector} that concatenates the input elements into a new
+     * {@code StringJoiner}, using the specified separator.
      *
-     * @return A {@code Collector} which collects String elements into a {@code StringJoiner} containing all of
-     * the input elements concatenated in encounter order
+     * @return A {@code Collector} which collects String elements into a
+     * {@code StringJoiner}, in encounter order
      */
     public static Collector<CharSequence, StringJoiner> toStringJoiner(String separator) {
         BinaryOperator<StringJoiner> merger = (sj, other) -> {
@@ -235,6 +264,14 @@
                                    merger, CH_STRICT);
     }
 
+    /** BinaryOperator<Map> that merges the contents of its right argument into its left
+     * argument, using the provided merge function to handle duplicate keys
+     * @param mergeFunction A merge function suitable for {@link Map#merge(Object, Object, BiFunction)}
+     * @param <K> Type of the map keys
+     * @param <V> Type of the map values
+     * @param <M> Type of the map
+     * @return A merge function for two maps
+     */
     private static<K, V, M extends Map<K,V>> BinaryOperator<M> mapMerger(BinaryOperator<V> mergeFunction) {
         return (m1, m2) -> {
             for (Map.Entry<K,V> e : m2.entrySet())
@@ -253,11 +290,11 @@
      * following a {@code groupingBy} or {@code partitioningBy} collection.
      * For example, given a stream of {@code Person}, to accumulate the set of last names in
      * each city:
-     * <pre>
+     * <pre>{@code
      *     Map<City, Set<String>> lastNamesByCity
      *         = people.stream().collect(groupingBy(Person::getCity,
      *                                              mapping(Person::getLastName, toSet())));
-     * </pre>
+     * }</pre>
      *
      * @param <T> The type of the input elements
      * @param <U> Type of elements accepted by downstream collector
@@ -276,34 +313,129 @@
     }
 
     /**
-     * Given a {@code BinaryOperator<T>}, return a {@code Collector<T,T>} which calculates the reduction of
-     * its input elements under the specified {@code BinaryOperator}.
+     * Produces a {@code Collector<T, Long>} that counts the number of input elements.
+     *
+     * @implSpec
+     * This produces a result equivalent to:
+     * <pre>{@code
+     *     reducing(0L, e -> 1L, Long::sum)
+     * }</pre>
+     * @param <T> The type of the input elements
+     * @return A {@code Collector} that counts its input elements
+     */
+    public static<T> Collector<T, Long>
+    count() {
+        return reducing(0L, e -> 1L, Long::sum);
+    }
+
+    /**
+     * Produces a {@code Collector<T, T>} that produces the minimal element
+     * according to a given {@code Comparator}.
+     *
+     * @implSpec
+     * This produces a result equivalent to:
+     * <pre>{@code
+     *     reducing(Comparators.lesserOf(comparator))
+     * }</pre>
+     * @param <T> The type of the input elements
+     * @param comparator A {@code Comparator} for comparing elements
+     * @return A {@code Collector} that produces the maximal value
+     */
+    public static<T> Collector<T, T>
+    minBy(Comparator<? super T> comparator) {
+        return reducing(Comparators.lesserOf(comparator));
+    }
+
+    /**
+     * Produces a {@code Collector<T, T>} that produces the maximal element
+     * according to a given {@code Comparator}.
+     *
+     * @implSpec
+     * This produces a result equivalent to:
+     * <pre>{@code
+     *     reducing(Comparators.greaterOf(comparator))
+     * }</pre>
+     * @param <T> The type of the input elements
+     * @param comparator A {@code Comparator} for comparing elements
+     * @return A {@code Collector} that produces the maximal value
+     */
+    public static<T> Collector<T, T>
+    maxBy(Comparator<? super T> comparator) {
+        return reducing(Comparators.greaterOf(comparator));
+    }
+
+    /**
+     * Produces a {@code Collector<T, Long>} that produces the sum of an
+     * long-valued function applied to the input element.
+     *
+     * @implSpec
+     * This produces a result equivalent to:
+     * <pre>{@code
+     *     reducing(0L, mapper, Long::sum)
+     * }</pre>
+     * @param <T> The type of the input elements
+     * @param mapper A function extracting the property to be summed
+     * @return A {@code Collector} that produces the sum of a derived property
+     */
+    public static<T> Collector<T, Long>
+    sumBy(Function<? super T, Long> mapper) {
+        return reducing(0L, mapper, Long::sum);
+    }
+
+    /**
+     * Given a {@code BinaryOperator<T>}, return a {@code Collector<T,T>} which calculates the
+     * reduction of its input elements under the specified {@code BinaryOperator}.
      *
      * @apiNote
      * The {@code reducing()} collectors are most useful when used in a multi-level collection
      * following a {@code groupingBy} or {@code partitioningBy} collection; if you want to perform
      * a simple reduction on a stream, use {@link Stream#reduce(BinaryOperator)}.
      * For example, given a stream of {@code Person}, to calculate tallest person in each city:
-     * <pre>
+     * <pre>{@code
      *     Comparator<Person> byHeight = Comparators.comparing(Person::getHeight);
      *     BinaryOperator<Person> tallerOf = Comparators.greaterOf(byHeight);
      *     Map<City, Person> tallestByCity
      *         = people.stream().collect(groupingBy(Person::getCity, reducing(tallerOf)));
-     * </pre>
+     * }</pre>
+     * @param identity The identity value for the reduction (also, the value
+     *                 that is returned when there are no elements to reduce)
      * @param op A {@code BinaryOperator<T>} used to reduce the input elements
-     * @param <T> The type of the input elements
      * @return A {@code Collector} which implements the reduction operation
-     * @see #reducing(Function, BinaryOperator)
+     * @see #reducing(Object, Function, BinaryOperator)
+     */
+    public static <T> Collector<T, T>
+    reducing(T identity, BinaryOperator<T> op) {
+        return new CollectorImpl<>(() -> identity, (r, t) -> (r == null ? t : op.apply(r, t)), op);
+    }
+
+    /**
+     * Given a {@code BinaryOperator<T>}, return a {@code Collector<T,T>} which calculates the
+     * reduction of its input elements under the specified {@code BinaryOperator}.
+     *
+     * @apiNote
+     * The {@code reducing()} collectors are most useful when used in a multi-level collection
+     * following a {@code groupingBy} or {@code partitioningBy} collection; if you want to perform
+     * a simple reduction on a stream, use {@link Stream#reduce(BinaryOperator)}.
+     * For example, given a stream of {@code Person}, to calculate tallest person in each city:
+     * <pre>{@code
+     *     Comparator<Person> byHeight = Comparators.comparing(Person::getHeight);
+     *     BinaryOperator<Person> tallerOf = Comparators.greaterOf(byHeight);
+     *     Map<City, Person> tallestByCity
+     *         = people.stream().collect(groupingBy(Person::getCity, reducing(tallerOf)));
+     * }</pre>
+     * @param op A {@code BinaryOperator<T>} used to reduce the input elements
+     * @return A {@code Collector} which implements the reduction operation
+     * @see #reducing(Object, Function, BinaryOperator)
      */
     public static <T> Collector<T, T>
     reducing(BinaryOperator<T> op) {
-        return new CollectorImpl<>(() -> null, (r, t) -> (r == null ? t : op.apply(r, t)), op);
+        return reducing(null, op);
     }
 
     /**
      * Given a {@code BinaryOperator<U>} and a {@code Function<T,U>}, return a {@code Collector<T,U>}
      * which calculates the reduction of the input elements after applying the mapping function.
-     * This is a generalization of {@link #reducing(BinaryOperator)}, which allows a transformation of
+     * This is a generalization of {@link #reducing(Object, BinaryOperator)} which allows a transformation of
      * the elements before reduction.
      *
      * @apiNote
@@ -312,25 +444,26 @@
      * a simple reduction on a stream, use {@link Stream#reduce(BinaryOperator)}.
      * For example, given a stream of {@code Person}, to calculate the longest last name of residents
      * in each city:
-     * <pre>
+     * <pre>{@code
      *     Comparator<String> byLength = Comparators.comparing(String::length);
      *     BinaryOperator<String> longerOf = Comparators.greaterOf(byLength);
      *     Map<City, String> longestLastNameByCity
      *         = people.stream().collect(groupingBy(Person::getCity,
      *                                              reducing(Person::getLastName, longerOf)));
-     * </pre>
+     * }</pre>
      *
      * @param mapper A mapping function to apply to each input value
      * @param op A {@code BinaryOperator<T>} used to reduce the mapped values
      * @param <T> The type of the input elements
      * @param <U> The type of the mapped values
      * @return A {@code Collector} implementing the map-reduce operation
-     * @see #reducing(BinaryOperator)
+     * @see #reducing(Object, BinaryOperator)
      */
     public static <T, U>
-    Collector<T, U> reducing(Function<? super T, ? extends U> mapper,
+    Collector<T, U> reducing(U identity,
+                             Function<? super T, ? extends U> mapper,
                              BinaryOperator<U> op) {
-        return new CollectorImpl<>(() -> null,
+        return new CollectorImpl<>(() -> identity,
                                    (r, t) -> (r == null ? mapper.apply(t) : op.apply(r, mapper.apply(t))),
                                    op);
     }
@@ -444,23 +577,108 @@
         return new CollectorImpl<>(mapFactory, accumulator, mapMerger(downstream.combiner()), CH_STRICT);
     }
 
+    /**
+     * Returns a {@code Collector} that implements a concurrent "group by" operation on input
+     * elements of type {@code T}.
+     * <p>This is a {@link Collector.Characteristics#CONCURRENT concurrent} Collector.
+     *
+     * <p>Accepts a classification function from {@code T} to {@code K}.  The collector produces
+     * a {@code ConcurrentMap} whose keys are the set of values resulting of applying the
+     * classification function to the input elements, and whose corresponding values are
+     * {@code List}s containing the input elements which map to the associated key under the
+     * classification function.
+     *
+     * <p>No guarantees are made as to the type of the {@code ConcurrentMap} or the type of the
+     * {@code List} used for the map values.
+     *
+     * @param classifier The classifier function mapping input elements to keys
+     * @param <T> The type of the input elements
+     * @param <K> The type of the keys
+     * @return A {@code Collector} implementing the group-by operation
+     */
     public static<T, K>
     Collector<T, ConcurrentMap<K, List<T>>> groupingByConcurrent(Function<? super T, ? extends K> classifier) {
         return groupingByConcurrent(classifier, ConcurrentHashMap::new);
     }
 
+    /**
+     * Returns a {@code Collector} that implements a concurrent "group by" operation on input
+     * elements of type {@code T}, resulting in a {@code Map} of a specific type.
+     * <p>This is a {@link Collector.Characteristics#CONCURRENT concurrent} Collector.
+     *
+     * <p>Accepts a classification function from {@code T} to {@code K}, and a factory function
+     * which produces a {@code ConcurrentMap} of the desired type.  The collector populates
+     * the {@code ConcurrentMap} produced by the factory function, whose keys are the set of
+     * values resulting of applying the classification function to the input elements, and
+     * whose corresponding values are {@code List}s containing the input elements which map to
+     * the associated key under the classification function.
+     *
+     * <p>No guarantees are made as to the type of the {@code List} used for the map values.
+     *
+     * @param classifier The classifier function mapping input elements to keys
+     * @param mapFactory A function which, when invoked, returns a new, empty instance
+     *                   of a {@code Map} of the desired type
+     * @param <M> The type of the resulting {@code Map}
+     * @param <T> The type of the input elements
+     * @param <K> The type of the keys
+     * @return A {@code Collector} implementing the group-by operation
+     * @return
+     */
     public static<T, K, M extends ConcurrentMap<K, List<T>>>
     Collector<T, M> groupingByConcurrent(Function<? super T, ? extends K> classifier,
                                          Supplier<M> mapFactory) {
         return groupingByConcurrent(classifier, mapFactory, toList());
     }
 
+    /**
+     * Returns a {@code Collector} that implements a concurrent cascaded "group by" operation on
+     * input elements of type {@code T}, resulting in a {@code ConcurrentMap} whose values are
+     * the result of another reduction, resulting in a {@code ConcurrentMap} of a specific type.
+     * <p>This is a {@link Collector.Characteristics#CONCURRENT concurrent} Collector.
+     *
+     * <p>Accepts a classification function from {@code T} to {@code K} and a {@code Collector}
+     * which implements another reduction on elements of type {@code T}.  The collector populates
+     * a {@code ConcurrentMap} whose keys are the set of values resulting of applying the
+     * classification function to the input elements, and whose corresponding values are the
+     * result of reducing the input elements which map to the associated key under the
+     * classification function with the dowstream reducer.
+     *
+     * <p>No guarantees are made as to the type of the resulting {@code Map}.
+     *
+     * @param classifier The classifier function mapping input elements to keys
+     * @param downstream A {@code Collector} implementing the downstream reduction
+     * @param <T> The type of the input elements
+     * @param <K> The type of the keys
+     * @param <D> The result type of the downstream reduction
+     * @return A {@code Collector} implementing the cascaded group-by operation
+     */
     public static<T, K, D>
     Collector<T, ConcurrentMap<K, D>> groupingByConcurrent(Function<? super T, ? extends K> classifier,
                                                            Collector<T, D> downstream) {
         return groupingByConcurrent(classifier, ConcurrentHashMap::new, downstream);
     }
 
+    /**
+     * Returns a {@code Collector} that implements a cascaded concurrent "group by" operation on
+     * input elements of type {@code T}, resulting in a {@code ConcurrentMap} whose values are
+     * the result of another reduction.
+     * <p>This is a {@link Collector.Characteristics#CONCURRENT concurrent} Collector.
+     *
+     * <p>Accepts a classification function from {@code T} to {@code K}, a factory function
+     * which produces a {@code ConcurrentMap} of the desired type, and a {@code Collector} which
+     * implements another reduction on elements of type {@code T}.  The collector populates a
+     * {@code ConcurrentMap} produced by the factory function whose keys are the set of values
+     * resulting of applying the classification function to the input elements, and whose
+     * corresponding values are the result of reducing the input elements which map to the
+     * associated key under the classification function with the dowstream reducer.
+     *
+     * @param classifier The classifier function mapping input elements to keys
+     * @param downstream A {@code Collector} implementing the downstream reduction
+     * @param <T> The type of the input elements
+     * @param <K> The type of the keys
+     * @param <D> The result type of the downstream reduction
+     * @return A {@code Collector} implementing the cascaded group-by operation
+     */
     public static<T, K, D, M extends ConcurrentMap<K, D>>
     Collector<T, M> groupingByConcurrent(Function<? super T, ? extends K> classifier,
                                          Supplier<M> mapFactory,
@@ -563,6 +781,7 @@
                                    accumulator, partitionMerger(downstream.combiner()), CH_STRICT);
     }
 
+    /** Merge function for two partitions, given a merge function for the elements */
     private static<D> BinaryOperator<Map<Boolean, D>> partitionMerger(BinaryOperator<D> op) {
         return (m1, m2) -> {
             Partition<D> left = (Partition<D>) m1;
@@ -592,7 +811,7 @@
      * and whose values are the result of applying the mapping function to the input element
      */
     public static <T, U> Collector<T, Map<T,U>> toMap(Function<? super T, ? extends U> mapper) {
-        return toMap(mapper, HashMap::new, throwingMergeFunction());
+        return toMap(mapper, HashMap::new, throwingMerger());
     }
 
     /**
@@ -627,7 +846,7 @@
      * (according to {@link Object#equals(Object)}), an {@code IllegalStateException} is thrown when the
      * collection operation is performed.
      *
-     * <p>This is a <em>concurrent</em> Collector.  (TODO need reference).
+     * <p>This is a {@link Collector.Characteristics#CONCURRENT concurrent} Collector.
      *
      * @param <T> The type of the input elements, and the input type of the mapping function
      * @param <U> The output type of the mapping function
@@ -637,7 +856,7 @@
      */
     public static <T, U>
     Collector<T, ConcurrentMap<T,U>> toConcurrentMap(Function<? super T, ? extends U> mapper) {
-        return toConcurrentMap(mapper, ConcurrentHashMap::new, throwingMergeFunction());
+        return toConcurrentMap(mapper, ConcurrentHashMap::new, throwingMerger());
     }
 
     /**
@@ -646,7 +865,7 @@
      * (according to {@link Object#equals(Object)}), the mapping function is applied to each equal element, and the
      * results are merged with the provided merging function.
      *
-     * <p>This is a <em>concurrent</em> Collector.  (TODO need reference).
+     * <p>This is a {@link Collector.Characteristics#CONCURRENT concurrent} Collector.
      *
      * @param mapper The mapping function
      * @param mapSupplier A function which provides a new, empty {@code Map} into which the results will be inserted
--- a/src/share/classes/java/util/stream/DelegatingStream.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/stream/DelegatingStream.java	Mon Apr 08 16:19:46 2013 -0700
@@ -33,7 +33,6 @@
 import java.util.function.BiConsumer;
 import java.util.function.BiFunction;
 import java.util.function.BinaryOperator;
-import java.util.function.BooleanSupplier;
 import java.util.function.Consumer;
 import java.util.function.Function;
 import java.util.function.IntFunction;
@@ -109,7 +108,7 @@
     }
 
     @Override
-    public <R> Stream<R> flatMap(Function<T, Stream<? extends R>> mapper) {
+    public <R> Stream<R> flatMap(Function<? super T, ? extends Stream<? extends R>> mapper) {
         return delegate.flatMap(mapper);
     }
 
@@ -217,11 +216,6 @@
     }
 
     @Override
-    public <R> R collectUnordered(Collector<? super T, R> collector) {
-        return delegate.collectUnordered(collector);
-    }
-
-    @Override
     public Optional<T> max(Comparator<? super T> comparator) {
         return delegate.max(comparator);
     }
@@ -262,6 +256,11 @@
     }
 
     @Override
+    public Stream<T> unordered() {
+        return delegate.unordered();
+    }
+
+    @Override
     public Stream<T> sequential() {
         return delegate.sequential();
     }
--- a/src/share/classes/java/util/stream/DoublePipeline.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/stream/DoublePipeline.java	Mon Apr 08 16:19:46 2013 -0700
@@ -57,8 +57,9 @@
      * @param sourceFlags The source flags for the stream source, described in
      * {@link StreamOpFlag}
      */
-    DoublePipeline(Supplier<? extends Spliterator<Double>> source, int sourceFlags) {
-        super(source, sourceFlags);
+    DoublePipeline(Supplier<? extends Spliterator<Double>> source,
+                   int sourceFlags, boolean parallel) {
+        super(source, sourceFlags, parallel);
     }
 
     /**
@@ -68,8 +69,9 @@
      * @param sourceFlags The source flags for the stream source, described in
      * {@link StreamOpFlag}
      */
-    DoublePipeline(Spliterator<Double> source, int sourceFlags) {
-        super(source, sourceFlags);
+    DoublePipeline(Spliterator<Double> source,
+                   int sourceFlags, boolean parallel) {
+        super(source, sourceFlags, parallel);
     }
 
     /**
@@ -189,7 +191,7 @@
     }
 
     @Override
-    public final <U> Stream<U> mapToObj(DoubleFunction<U> mapper) {
+    public final <U> Stream<U> mapToObj(DoubleFunction<? extends U> mapper) {
         Objects.requireNonNull(mapper);
         return new ReferencePipeline.StatelessOp<Double, U>(this, StreamShape.DOUBLE_VALUE,
                                                             StreamOpFlag.NOT_SORTED | StreamOpFlag.NOT_DISTINCT) {
@@ -261,6 +263,18 @@
     }
 
     @Override
+    public DoubleStream unordered() {
+        if (!isOrdered())
+            return this;
+        return new StatelessOp<Double>(this, StreamShape.DOUBLE_VALUE, StreamOpFlag.NOT_ORDERED) {
+            @Override
+            Sink<Double> opWrapSink(int flags, Sink<Double> sink) {
+                return sink;
+            }
+        };
+    }
+
+    @Override
     public final DoubleStream filter(DoublePredicate predicate) {
         Objects.requireNonNull(predicate);
         return new StatelessOp<Double>(this, StreamShape.DOUBLE_VALUE,
@@ -434,7 +448,8 @@
 
     @Override
     public final double[] toArray() {
-        return ((Node.OfDouble) evaluateToArrayNode(Double[]::new)).asDoubleArray();
+        return NodeUtils.doubleFlatten((Node.OfDouble) evaluateToArrayNode(Double[]::new))
+                        .asDoubleArray();
     }
 
     //
@@ -446,9 +461,11 @@
          *
          * @param source {@code Supplier<Spliterator>} describing the stream source
          * @param sourceFlags The source flags for the stream source, described in {@link StreamOpFlag}
+         * @param parallel True if the pipeline is parallel
          */
-        Head(Supplier<? extends Spliterator<Double>> source, int sourceFlags) {
-            super(source, sourceFlags);
+        Head(Supplier<? extends Spliterator<Double>> source,
+             int sourceFlags, boolean parallel) {
+            super(source, sourceFlags, parallel);
         }
 
         /**
@@ -456,9 +473,11 @@
          *
          * @param source {@code Spliterator} describing the stream source
          * @param sourceFlags The source flags for the stream source, described in {@link StreamOpFlag}
+         * @param parallel True if the pipeline is parallel
          */
-        Head(Spliterator<Double> source, int sourceFlags) {
-            super(source, sourceFlags);
+        Head(Spliterator<Double> source,
+             int sourceFlags, boolean parallel) {
+            super(source, sourceFlags, parallel);
         }
 
         @Override
--- a/src/share/classes/java/util/stream/DoubleStream.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/stream/DoubleStream.java	Mon Apr 08 16:19:46 2013 -0700
@@ -112,7 +112,7 @@
      *               each element
      * @return the new stream
      */
-    <U> Stream<U> mapToObj(DoubleFunction<U> mapper);
+    <U> Stream<U> mapToObj(DoubleFunction<? extends U> mapper);
 
     /**
      * Produces an {@code IntStream} consisting of the results of applying
@@ -217,14 +217,14 @@
      *
      * @apiNote This method exists mainly to support debugging, where you want
      * to see the elements as they flow past a certain point in a pipeline:
-     * <pre>
+     * <pre>{@code
      *     list.stream()
      *         .filter(filteringFunction)
      *         .peek(e -> {System.out.println("Filtered value: " + e); });
      *         .map(mappingFunction)
      *         .peek(e -> {System.out.println("Mapped value: " + e); });
      *         .collect(Collectors.toDoubleSummaryStastistics());
-     * </pre>
+     * }</pre>
      *
      * @param consumer A <a href="package-summary.html#NonInterference">
      *                 non-interfering</a> action to perform on the elements as
@@ -319,12 +319,12 @@
      * an <a href="package-summary.html#Associativity">associative</a>
      * accumulation function, and return the reduced value.  This is equivalent
      * to:
-     * <pre>
+     * <pre>{@code
      *     double result = identity;
      *     for (double element : this stream)
      *         result = accumulator.apply(result, element)
      *     return result;
-     * </pre>
+     * }</pre>
      *
      * but is not constrained to execute sequentially.
      *
@@ -340,15 +340,15 @@
      * @apiNote Sum, min, max, and average are all special cases of reduction.
      * Summing a stream of numbers can be expressed as:
      *
-     * <pre>
+     * <pre>{@code
      *     double sum = numbers.reduce(0, (a, b) -> a+b);
-     * </pre>
+     * }</pre>
      *
      * or more compactly:
      *
-     * <pre>
+     * <pre>{@code
      *     double sum = numbers.reduce(0, Double::sum);
-     * </pre>
+     * }</pre>
      *
      * <p>While this may seem a more roundabout way to perform an aggregation
      * compared to simply mutating a running total in a loop, reduction
@@ -373,7 +373,7 @@
      * <a href="package-summary.html#Associativity">associative</a> accumulation
      * function, and return an {@code OptionalDouble} describing the reduced value,
      * if any. This is equivalent to:
-     * <pre>
+     * <pre>{@code
      *     boolean foundAny = false;
      *     double result = null;
      *     for (double element : this stream) {
@@ -384,7 +384,7 @@
      *         else
      *             result = accumulator.apply(result, element)
      *     return foundAny ? OptionalDouble.of(result) : OptionalDouble.empty();
-     * </pre>
+     * }</pre>
      *
      * but is not constrained to execute sequentially.
      *
@@ -409,12 +409,12 @@
      * such as an {@code ArrayList}, and elements are incorporated by updating
      * the state of the result, rather than by replacing the result.  This
      * produces a result equivalent to:
-     * <pre>
+     * <pre>{@code
      *     R result = resultFactory.get();
      *     for (double element : this stream)
      *         accumulator.accept(result, element);
      *     return result;
-     * </pre>
+     * }</pre>
      *
      * Like {@link #reduce(double, DoubleBinaryOperator)}, {@code collect} operations
      * can be parallelized without requiring additional sychronization.
@@ -446,9 +446,9 @@
      * Returns the sum of elements in this stream.  This is a special case
      * of a <a href="package-summary.html#MutableReduction">reduction</a>
      * and is equivalent to:
-     * <pre>
+     * <pre>{@code
      *     return reduce(0, Double::sum);
-     * </pre>
+     * }</pre>
      * @return The sum of elements in this stream
      */
     double sum();
@@ -458,9 +458,9 @@
      * stream, or an empty optional if this stream is empty.  This is a special
      * case of a <a href="package-summary.html#MutableReduction">reduction</a>
      * and is equivalent to:
-     * <pre>
+     * <pre>{@code
      *     return reduce(Double::min);
-     * </pre>
+     * }</pre>
      * @return The minimal element of this stream, or an empty
      * {@code OptionalDouble}
      */
@@ -471,9 +471,9 @@
      * stream, or an empty optional if this stream is empty.  This is a special
      * case of a <a href="package-summary.html#MutableReduction">reduction</a>
      * and is equivalent to:
-     * <pre>
+     * <pre>{@code
      *     return reduce(Double::max);
-     * </pre>
+     * }</pre>
      * @return The maximal element of this stream, or an empty
      * {@code OptionalDouble}
      */
@@ -483,9 +483,9 @@
      * Returns the count of elements in this stream.  This is a special case of
      * a <a href="package-summary.html#MutableReduction">reduction</a> and is
      * equivalent to:
-     * <pre>
+     * <pre>{@code
      *     return mapToLong(e -> 1L).sum();
-     * </pre>
+     * }</pre>
      * @return The count of elements in this stream
      */
     long count();
--- a/src/share/classes/java/util/stream/FindOps.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/stream/FindOps.java	Mon Apr 08 16:19:46 2013 -0700
@@ -30,9 +30,9 @@
 import java.util.function.Supplier;
 
 /**
- * Factory for creating instances of a short-circuiting {@code TerminalOp}
- * that searches for an element in a stream pipeline, and terminates when it
- * finds one.  The search supports find-first (find the first element in the
+ * Factory for instances of a short-circuiting {@code TerminalOp} that searches
+ * for an element in a stream pipeline, and terminates when it finds one.
+ * Supported variants include find-first (find the first element in the
  * encounter order) and find-any (find any element, may not be the first in
  * encounter order.)
  *
--- a/src/share/classes/java/util/stream/FlatMapper.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/stream/FlatMapper.java	Mon Apr 08 16:19:46 2013 -0700
@@ -30,34 +30,44 @@
 import java.util.function.LongConsumer;
 
 /**
- * An operation that maps an element of type {@code T} to zero or more elements of type {@code U}.
+ * An operation that maps an element of type {@code T} to zero or more elements
+ * of type {@code U}.
+ *
+ * <p>These functional interfaces are used to describe the arguments of
+ * {@link Stream#flatMap(FlatMapper)} and related methods.
  *
  * @apiNote
- * Rather than representing the result as a {@code Collection} or array, the results are
- * emitted directly into a {@link Consumer}, to avoid the overhead of creating and iterating
- * the (possibly empty or small) intermediate data structure.
+ * Example: @@@
+ * <pre>{@code
+ * }</pre>
+ *
+ * <p>Rather than representing the result as a {@code Collection} or array, the
+ * results are emitted directly into a {@link Consumer}, to avoid the overhead
+ * of creating and iterating the (often empty or small) intermediate data
+ * structure.
+ * @see Stream#flatMap(FlatMapper)
  * @since 1.8
  */
 @FunctionalInterface
 public interface FlatMapper<T, U> {
     /**
-     * Accept an input element and emit zero or more output elements into the provided
-     * {@code Consumer}
+     * Accept an input element and emit zero or more output elements into the
+     * provided {@code Consumer}
      * @param element The input element
      * @param sink A {@code Consumer} to receive the output elements
      */
     void flattenInto(T element, Consumer<U> sink);
 
     /**
-     * An operation that maps an element of type {@code T} to zero or more elements of type
-     * {@code int}.
+     * An operation that maps an element of type {@code T} to zero or more
+     * elements of type {@code int}.
      * This is the int-bearing specialization of {@code FlatMapper}.
      */
     @FunctionalInterface
     interface ToInt<T> {
         /**
-         * Accept an input element and emit zero or more output elements into the provided
-         * {@code IntConsumer}
+         * Accept an input element and emit zero or more output elements into
+         * the provided {@code IntConsumer}
          * @param element The input element
          * @param sink A {@code IntConsumer} to receive the output elements
          */
@@ -65,15 +75,15 @@
     }
 
     /**
-     * An operation that maps an element of type {@code T} to zero or more elements of type
-     * {@code long}.
+     * An operation that maps an element of type {@code T} to zero or more
+     * elements of type {@code long}.
      * This is the long-bearing specialization of {@code FlatMapper}.
      */
     @FunctionalInterface
     interface ToLong<T> {
         /**
-         * Accept an input element and emit zero or more output elements into the provided
-         * {@code LongConsumer}
+         * Accept an input element and emit zero or more output elements into
+         * the provided {@code LongConsumer}
          * @param element The input element
          * @param sink A {@code LongConsumer} to receive the output elements
          */
@@ -81,15 +91,15 @@
     }
 
     /**
-     * An operation that maps an element of type {@code T} to zero or more elements of type
-     * {@code double}.
+     * An operation that maps an element of type {@code T} to zero or more
+     * elements of type {@code double}.
      * This is the double-bearing specialization of {@code FlatMapper}.
      */
     @FunctionalInterface
     interface ToDouble<T> {
         /**
-         * Accept an input element and emit zero or more output elements into the provided
-         * {@code DoubleConsumer}
+         * Accept an input element and emit zero or more output elements into
+         * the provided {@code DoubleConsumer}
          * @param element The input element
          * @param sink A {@code DoubleConsumer} to receive the output elements
          */
@@ -97,15 +107,15 @@
     }
 
     /**
-     * An operation that maps an element of type {@code int} to zero or more elements of type
-     * {@code int}.
+     * An operation that maps an element of type {@code int} to zero or more
+     * elements of type {@code int}.
      * This is the int-to-int specialization of {@code FlatMapper}.
      */
     @FunctionalInterface
     interface OfIntToInt {
         /**
-         * Accept an input element and emit zero or more output elements into the provided
-         * {@code IntConsumer}
+         * Accept an input element and emit zero or more output elements into
+         * the provided {@code IntConsumer}
          * @param element The input element
          * @param sink A {@code IntConsumer} to receive the output elements
          */
@@ -113,15 +123,15 @@
     }
 
     /**
-     * An operation that maps an element of type {@code long} to zero or more elements of type
-     * {@code long}.
+     * An operation that maps an element of type {@code long} to zero or more
+     * elements of type {@code long}.
      * This is the long-to-long specialization of {@code FlatMapper}.
      */
     @FunctionalInterface
     interface OfLongToLong {
         /**
-         * Accept an input element and emit zero or more output elements into the provided
-         * {@code LongConsumer}
+         * Accept an input element and emit zero or more output elements into
+         * the provided {@code LongConsumer}
          * @param element The input element
          * @param sink A {@code LongConsumer} to receive the output elements
          */
@@ -129,15 +139,15 @@
     }
 
     /**
-     * An operation that maps an element of type {@code double} to zero or more elements of type
-     * {@code double}.
+     * An operation that maps an element of type {@code double} to zero or more
+     * elements of type {@code double}.
      * This is the double-to-double specialization of {@code FlatMapper}.
      */
     @FunctionalInterface
     interface OfDoubleToDouble {
         /**
-         * Accept an input element and emit zero or more output elements into the provided
-         * {@code DoubleConsumer}
+         * Accept an input element and emit zero or more output elements into
+         * the provided {@code DoubleConsumer}
          * @param element The input element
          * @param sink A {@code DoubleConsumer} to receive the output elements
          */
--- a/src/share/classes/java/util/stream/ForEachOps.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/stream/ForEachOps.java	Mon Apr 08 16:19:46 2013 -0700
@@ -34,15 +34,16 @@
 import java.util.Objects;
 
 /**
- * Factory for creating instances of {@code TerminalOp} that implement
- * {@code forEach} traversal over elements of a stream.
+ * Factory for creating instances of {@code TerminalOp} that perform an
+ * action for every element of a stream.  Supported variants include unordered
+ * traversal (elements are provided to the {@code Consumer} as soon as they are
+ * available), and ordered traversal (elements are provided to the
+ * {@code Consumer} in encounter order.)
  *
- * <p>{@code forEach} traverses all elements of a stream and sends those
- * elements to a {@code Consumer}.
- *
- * <p>For either type of traversal elements will be sent to the {@code Consumer}
- * on whatever thread and whatever order they become available, independent of
- * the stream's encounter order.
+ * <p>Elements are provided to the {@code Consumer} on whatever thread and
+ * whatever order they become available.  For ordered traversals, it is
+ * guaranteed that processing an element <em>happens-before</em> processing
+ * subsequent elements in the encounter order.
  *
  * <p>Exceptions occurring as a result of sending an element to the
  * {@code Consumer} will be relayed to the caller and traversal will be
@@ -55,80 +56,76 @@
     private ForEachOps() { }
 
     /**
-     * Constructs a {@code TerminalOp} that implements {@code forEach}
-     * traversal, which traverses all elements of a {@code Stream} and sends
-     * those elements the provided {@code Consumer}.
+     * Constructs a {@code TerminalOp} that perform an action for every element
+     * of a stream.
      *
-     * @param consumer The {@code Consumer} that receives all elements of a
+     * @param action The {@code Consumer} that receives all elements of a
      *        stream
+     * @param ordered Whether an ordered traversal is requested
      * @param <T> The type of the stream elements
      * @return the {@code TerminalOp} instance
      */
-    public static <T> TerminalOp<T, Void> makeRef(Consumer<? super T> consumer, boolean ordered) {
-        Objects.requireNonNull(consumer);
-        return new ForEachOp.OfRef<>(consumer, ordered);
+    public static <T> TerminalOp<T, Void> makeRef(Consumer<? super T> action, boolean ordered) {
+        Objects.requireNonNull(action);
+        return new ForEachOp.OfRef<>(action, ordered);
     }
 
     /**
-     * Constructs a {@code TerminalOp} that implements {@code forEach}
-     * traversal, which traverses all {@code int} elements of a
-     * {@code IntStream} and sends those elements the provided
-     * {@code IntConsumer}.
+     * Constructs a {@code TerminalOp} that perform an action for every element
+     * of an {@code IntStream}.
      *
-     * @param consumer The {@code IntConsumer} that receives all elements of a
+     * @param action The {@code IntConsumer} that receives all elements of a
      *        stream
+     * @param ordered Whether an ordered traversal is requested
      * @return the {@code TerminalOp} instance
      */
-    public static TerminalOp<Integer, Void> makeInt(IntConsumer consumer, boolean ordered) {
-        Objects.requireNonNull(consumer);
-        return new ForEachOp.OfInt(consumer, ordered);
+    public static TerminalOp<Integer, Void> makeInt(IntConsumer action, boolean ordered) {
+        Objects.requireNonNull(action);
+        return new ForEachOp.OfInt(action, ordered);
     }
 
     /**
-     * Constructs a {@code TerminalOp} that implements {@code forEach}
-     * traversal, which traverses all {@code long} elements of a
-     * {@code LongStream} and sends those elements the provided
-     * {@code LongConsumer}.
+     * Constructs a {@code TerminalOp} that perform an action for every element
+     * of an {@code LongStream}.
      *
-     * @param consumer The {@code LongConsumer} that receives all elements of a
+     * @param action The {@code LongConsumer} that receives all elements of a
      *        stream
+     * @param ordered Whether an ordered traversal is requested
      * @return the {@code TerminalOp} instance
      */
-    public static TerminalOp<Long, Void> makeLong(LongConsumer consumer, boolean ordered) {
-        Objects.requireNonNull(consumer);
-        return new ForEachOp.OfLong(consumer, ordered);
+    public static TerminalOp<Long, Void> makeLong(LongConsumer action, boolean ordered) {
+        Objects.requireNonNull(action);
+        return new ForEachOp.OfLong(action, ordered);
     }
 
     /**
-     * Constructs a {@code TerminalOp} that implements {@code forEach}
-     * traversal, which traverses all {@code double} elements of a
-     * {@code DoubleStream} and sends those elements the provided
-     * {@code DoubleConsumer}.
+     * Constructs a {@code TerminalOp} that perform an action for every element
+     * of an {@code DoubleStream}.
      *
-     * @param consumer The {@code DoubleConsumer} that receives all elements of
+     * @param action The {@code DoubleConsumer} that receives all elements of
      *        a stream
+     * @param ordered Whether an ordered traversal is requested
      * @return the {@code TerminalOp} instance
      */
-    public static TerminalOp<Double, Void> makeDouble(DoubleConsumer consumer, boolean ordered) {
-        Objects.requireNonNull(consumer);
-        return new ForEachOp.OfDouble(consumer, ordered);
+    public static TerminalOp<Double, Void> makeDouble(DoubleConsumer action, boolean ordered) {
+        Objects.requireNonNull(action);
+        return new ForEachOp.OfDouble(action, ordered);
     }
 
     /**
      * A {@code TerminalOp} that evaluates a stream pipeline and sends the
      * output to itself as a {@code TerminalSink}.  Elements will be sent in
-     * whatever thread and whatever order they become available, independent of
-     * the stream's encounter order.
+     * whatever thread they become available.  If the traversal is unordered,
+     * they will be sent independent of the stream's encounter order.
      *
-     * <p>This terminal operation is stateless.  For parallel evaluation each
+     * <p>This terminal operation is stateless.  For parallel evaluation, each
      * leaf instance of a {@code ForEachTask} will send elements to the same
-     * {@code TerminalSink} reference that is an instance of this class.  State
-     * management, if any, is deferred to the consumer, held by the concrete
-     * sub-classes, that is the final receiver elements.
+     * {@code TerminalSink} reference that is an instance of this class.
      *
      * @param <T> The output type of the stream pipeline
      */
-    private static abstract class ForEachOp<T> implements TerminalOp<T, Void>, TerminalSink<T, Void> {
+    private static abstract class ForEachOp<T>
+            implements TerminalOp<T, Void>, TerminalSink<T, Void> {
         private final boolean ordered;
 
         protected ForEachOp(boolean ordered) {
@@ -143,12 +140,14 @@
         }
 
         @Override
-        public <S> Void evaluateSequential(PipelineHelper<T> helper, Spliterator<S> spliterator) {
+        public <S> Void evaluateSequential(PipelineHelper<T> helper,
+                                           Spliterator<S> spliterator) {
             return helper.wrapAndCopyInto(this, spliterator).get();
         }
 
         @Override
-        public <S> Void evaluateParallel(PipelineHelper<T> helper, Spliterator<S> spliterator) {
+        public <S> Void evaluateParallel(PipelineHelper<T> helper,
+                                         Spliterator<S> spliterator) {
             if (ordered)
                 new ForEachOrderedTask<>(helper, spliterator, this).invoke();
             else
@@ -165,7 +164,7 @@
 
         // Implementations
 
-        /** {@code forEach} with {@code Stream} */
+        /** Implementation class for reference streams */
         private static class OfRef<T> extends ForEachOp<T> {
             final Consumer<? super T> consumer;
 
@@ -180,7 +179,7 @@
             }
         }
 
-        /** {@code forEach} with {@code IntStream} */
+        /** Implementation class for {@code IntStream} */
         private static class OfInt extends ForEachOp<Integer> implements Sink.OfInt {
             final IntConsumer consumer;
 
@@ -200,7 +199,7 @@
             }
         }
 
-        /** {@code forEach} with {@code LongStream} */
+        /** Implementation class for {@code LongStream} */
         private static class OfLong extends ForEachOp<Long> implements Sink.OfLong {
             final LongConsumer consumer;
 
@@ -220,7 +219,7 @@
             }
         }
 
-        /** {@code forEach} with {@code DoubleStream} */
+        /** Implementation class for {@code DoubleStream} */
         private static class OfDouble extends ForEachOp<Double> implements Sink.OfDouble {
             final DoubleConsumer consumer;
 
@@ -274,7 +273,7 @@
                 }
 
                 Spliterator<S> split;
-                if (!AbstractTask.suggestSplit(helper, spliterator, targetSize)
+                if (!AbstractTask.suggestSplit(spliterator, targetSize)
                     || (split = spliterator.trySplit()) == null) {
                     helper.copyInto(sink, spliterator);
                     propagateCompletion();
@@ -289,7 +288,10 @@
         }
     }
 
-    /** A {@code ForkJoinTask} for performing a parallel ordered for-each operation */
+    /**
+     * A {@code ForkJoinTask} for performing a parallel for-each operation
+     * which visits the elements in encounter order
+     */
     private static class ForEachOrderedTask<S, T> extends CountedCompleter<Void> {
         private final PipelineHelper<T> helper;
         private Spliterator<S> spliterator;
@@ -300,7 +302,9 @@
         private final ForEachOrderedTask<S, T> leftPredecessor;
         private Node<T> node;
 
-        protected ForEachOrderedTask(PipelineHelper<T> helper, Spliterator<S> spliterator, Sink<T> action) {
+        protected ForEachOrderedTask(PipelineHelper<T> helper,
+                                     Spliterator<S> spliterator,
+                                     Sink<T> action) {
             super(null);
             this.helper = helper;
             this.spliterator = spliterator;
@@ -332,7 +336,7 @@
         private static<S, T> void doCompute(ForEachOrderedTask<S, T> task) {
             while (true) {
                 Spliterator<S> split;
-                if (!AbstractTask.suggestSplit(task.helper, task.spliterator, task.targetSize)
+                if (!AbstractTask.suggestSplit(task.spliterator, task.targetSize)
                     || (split = task.spliterator.trySplit()) == null) {
                     if (task.getPendingCount() == 0) {
                         task.helper.wrapAndCopyInto(task.action, task.spliterator);
--- a/src/share/classes/java/util/stream/IntPipeline.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/stream/IntPipeline.java	Mon Apr 08 16:19:46 2013 -0700
@@ -55,10 +55,12 @@
      *
      * @param source {@code Supplier<Spliterator>} describing the stream source
      * @param sourceFlags The source flags for the stream source, described in
-     * {@link StreamOpFlag}
+     *        {@link StreamOpFlag}
+     * @param parallel True if the pipeline is parallel
      */
-    IntPipeline(Supplier<? extends Spliterator<Integer>> source, int sourceFlags) {
-        super(source, sourceFlags);
+    IntPipeline(Supplier<? extends Spliterator<Integer>> source,
+                int sourceFlags, boolean parallel) {
+        super(source, sourceFlags, parallel);
     }
 
     /**
@@ -66,10 +68,12 @@
      *
      * @param source {@code Spliterator} describing the stream source
      * @param sourceFlags The source flags for the stream source, described in
-     * {@link StreamOpFlag}
+     *        {@link StreamOpFlag}
+     * @param parallel True if the pipeline is parallel
      */
-    IntPipeline(Spliterator<Integer> source, int sourceFlags) {
-        super(source, sourceFlags);
+    IntPipeline(Spliterator<Integer> source,
+                int sourceFlags, boolean parallel) {
+        super(source, sourceFlags, parallel);
     }
 
     /**
@@ -215,7 +219,7 @@
     }
 
     @Override
-    public final <U> Stream<U> mapToObj(IntFunction<U> mapper) {
+    public final <U> Stream<U> mapToObj(IntFunction<? extends U> mapper) {
         Objects.requireNonNull(mapper);
         return new ReferencePipeline.StatelessOp<Integer, U>(this, StreamShape.INT_VALUE,
                                                              StreamOpFlag.NOT_SORTED | StreamOpFlag.NOT_DISTINCT) {
@@ -287,6 +291,18 @@
     }
 
     @Override
+    public IntStream unordered() {
+        if (!isOrdered())
+            return this;
+        return new StatelessOp<Integer>(this, StreamShape.INT_VALUE, StreamOpFlag.NOT_ORDERED) {
+            @Override
+            Sink<Integer> opWrapSink(int flags, Sink<Integer> sink) {
+                return sink;
+            }
+        };
+    }
+
+    @Override
     public final IntStream filter(IntPredicate predicate) {
         Objects.requireNonNull(predicate);
         return new StatelessOp<Integer>(this, StreamShape.INT_VALUE,
@@ -461,7 +477,8 @@
 
     @Override
     public final int[] toArray() {
-        return ((Node.OfInt) evaluateToArrayNode(Integer[]::new)).asIntArray();
+        return NodeUtils.intFlatten((Node.OfInt) evaluateToArrayNode(Integer[]::new))
+                        .asIntArray();
     }
 
 
@@ -474,9 +491,11 @@
          *
          * @param source {@code Supplier<Spliterator>} describing the stream source
          * @param sourceFlags The source flags for the stream source, described in {@link StreamOpFlag}
+         * @param parallel True if the pipeline is parallel
          */
-        Head(Supplier<? extends Spliterator<Integer>> source, int sourceFlags) {
-            super(source, sourceFlags);
+        Head(Supplier<? extends Spliterator<Integer>> source,
+             int sourceFlags, boolean parallel) {
+            super(source, sourceFlags, parallel);
         }
 
         /**
@@ -484,9 +503,11 @@
          *
          * @param source {@code Spliterator} describing the stream source
          * @param sourceFlags The source flags for the stream source, described in {@link StreamOpFlag}
+         * @param parallel True if the pipeline is parallel
          */
-        Head(Spliterator<Integer> source, int sourceFlags) {
-            super(source, sourceFlags);
+        Head(Spliterator<Integer> source,
+             int sourceFlags, boolean parallel) {
+            super(source, sourceFlags, parallel);
         }
 
         @Override
--- a/src/share/classes/java/util/stream/IntStream.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/stream/IntStream.java	Mon Apr 08 16:19:46 2013 -0700
@@ -113,7 +113,7 @@
      *               each element
      * @return the new stream
      */
-    <U> Stream<U> mapToObj(IntFunction<U> mapper);
+    <U> Stream<U> mapToObj(IntFunction<? extends U> mapper);
 
     /**
      * Produces a {@code LongStream} consisting of the results of applying
@@ -218,14 +218,14 @@
      *
      * @apiNote This method exists mainly to support debugging, where you want
      * to see the elements as they flow past a certain point in a pipeline:
-     * <pre>
+     * <pre>{@code
      *     list.stream()
      *         .filter(filteringFunction)
      *         .peek(e -> {System.out.println("Filtered value: " + e); });
      *         .map(mappingFunction)
      *         .peek(e -> {System.out.println("Mapped value: " + e); });
      *         .collect(Collectors.toIntSummaryStastistics());
-     * </pre>
+     * }</pre>
      *
      * @param consumer A <a href="package-summary.html#NonInterference">
      *                 non-interfering</a> action to perform on the elements as
@@ -320,12 +320,12 @@
      * an <a href="package-summary.html#Associativity">associative</a>
      * accumulation function, and return the reduced value.  This is equivalent
      * to:
-     * <pre>
+     * <pre>{@code
      *     int result = identity;
      *     for (int element : this stream)
      *         result = accumulator.apply(result, element)
      *     return result;
-     * </pre>
+     * }</pre>
      *
      * but is not constrained to execute sequentially.
      *
@@ -341,15 +341,15 @@
      * @apiNote Sum, min, max, and average are all special cases of reduction.
      * Summing a stream of numbers can be expressed as:
      *
-     * <pre>
+     * <pre>{@code
      *     int sum = integers.reduce(0, (a, b) -> a+b);
-     * </pre>
+     * }</pre>
      *
      * or more compactly:
      *
-     * <pre>
+     * <pre>{@code
      *     int sum = integers.reduce(0, Integer::sum);
-     * </pre>
+     * }</pre>
      *
      * <p>While this may seem a more roundabout way to perform an aggregation
      * compared to simply mutating a running total in a loop, reduction
@@ -374,7 +374,7 @@
      * <a href="package-summary.html#Associativity">associative</a> accumulation
      * function, and return an {@code OptionalInt} describing the reduced value,
      * if any. This is equivalent to:
-     * <pre>
+     * <pre>{@code
      *     boolean foundAny = false;
      *     int result = null;
      *     for (int element : this stream) {
@@ -385,7 +385,7 @@
      *         else
      *             result = accumulator.apply(result, element)
      *     return foundAny ? OptionalInt.of(result) : OptionalInt.empty();
-     * </pre>
+     * }</pre>
      *
      * but is not constrained to execute sequentially.
      *
@@ -410,12 +410,12 @@
      * such as an {@code ArrayList}, and elements are incorporated by updating
      * the state of the result, rather than by replacing the result.  This
      * produces a result equivalent to:
-     * <pre>
+     * <pre>{@code
      *     R result = resultFactory.get();
      *     for (int element : this stream)
      *         accumulator.accept(result, element);
      *     return result;
-     * </pre>
+     * }</pre>
      *
      * Like {@link #reduce(int, IntBinaryOperator)}, {@code collect} operations
      * can be parallelized without requiring additional sychronization.
@@ -447,9 +447,9 @@
      * Returns the sum of elements in this stream.  This is a special case
      * of a <a href="package-summary.html#MutableReduction">reduction</a>
      * and is equivalent to:
-     * <pre>
+     * <pre>{@code
      *     return reduce(0, Integer::sum);
-     * </pre>
+     * }</pre>
      * @return The sum of elements in this stream
      */
     int sum();
@@ -459,9 +459,9 @@
      * stream, or an empty optional if this stream is empty.  This is a special
      * case of a <a href="package-summary.html#MutableReduction">reduction</a>
      * and is equivalent to:
-     * <pre>
+     * <pre>{@code
      *     return reduce(Integer::min);
-     * </pre>
+     * }</pre>
      * @return The minimal element of this stream, or an empty
      * {@code OptionalInt}
      */
@@ -472,9 +472,9 @@
      * stream, or an empty optional if this stream is empty.  This is a special
      * case of a <a href="package-summary.html#MutableReduction">reduction</a>
      * and is equivalent to:
-     * <pre>
+     * <pre>{@code
      *     return reduce(Integer::max);
-     * </pre>
+     * }</pre>
      * @return The maximal element of this stream, or an empty
      * {@code OptionalInt}
      */
@@ -484,9 +484,9 @@
      * Returns the count of elements in this stream.  This is a special case of
      * a <a href="package-summary.html#MutableReduction">reduction</a> and is
      * equivalent to:
-     * <pre>
+     * <pre>{@code
      *     return mapToLong(e -> 1L).sum();
-     * </pre>
+     * }</pre>
      * @return The count of elements in this stream
      */
     long count();
--- a/src/share/classes/java/util/stream/LongPipeline.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/stream/LongPipeline.java	Mon Apr 08 16:19:46 2013 -0700
@@ -55,20 +55,26 @@
      * Constructor for the head of a stream pipeline.
      *
      * @param source {@code Supplier<Spliterator>} describing the stream source
-     * @param sourceFlags The source flags for the stream source, described in {@link StreamOpFlag}
+     * @param sourceFlags The source flags for the stream source, described in
+     *        {@link StreamOpFlag}
+     * @param parallel True if the pipeline is parallel
      */
-    LongPipeline(Supplier<? extends Spliterator<Long>> source, int sourceFlags) {
-        super(source, sourceFlags);
+    LongPipeline(Supplier<? extends Spliterator<Long>> source,
+                 int sourceFlags, boolean parallel) {
+        super(source, sourceFlags, parallel);
     }
 
     /**
      * Constructor for the head of a stream pipeline.
      *
      * @param source {@code Spliterator} describing the stream source
-     * @param sourceFlags The source flags for the stream source, described in {@link StreamOpFlag}
+     * @param sourceFlags The source flags for the stream source, described in
+     *        {@link StreamOpFlag}
+     * @param parallel True if the pipeline is parallel
      */
-    LongPipeline(Spliterator<Long> source, int sourceFlags) {
-        super(source, sourceFlags);
+    LongPipeline(Spliterator<Long> source,
+                 int sourceFlags, boolean parallel) {
+        super(source, sourceFlags, parallel);
     }
 
     /**
@@ -197,7 +203,7 @@
     }
 
     @Override
-    public final <U> Stream<U> mapToObj(LongFunction<U> mapper) {
+    public final <U> Stream<U> mapToObj(LongFunction<? extends U> mapper) {
         Objects.requireNonNull(mapper);
         return new ReferencePipeline.StatelessOp<Long, U>(this, StreamShape.LONG_VALUE,
                                                           StreamOpFlag.NOT_SORTED | StreamOpFlag.NOT_DISTINCT) {
@@ -269,6 +275,18 @@
     }
 
     @Override
+    public LongStream unordered() {
+        if (!isOrdered())
+            return this;
+        return new StatelessOp<Long>(this, StreamShape.LONG_VALUE, StreamOpFlag.NOT_ORDERED) {
+            @Override
+            Sink<Long> opWrapSink(int flags, Sink<Long> sink) {
+                return sink;
+            }
+        };
+    }
+
+    @Override
     public final LongStream filter(LongPredicate predicate) {
         Objects.requireNonNull(predicate);
         return new StatelessOp<Long>(this, StreamShape.LONG_VALUE,
@@ -444,7 +462,7 @@
 
     @Override
     public final long[] toArray() {
-        return ((Node.OfLong) evaluateToArrayNode(Long[]::new)).asLongArray();
+        return NodeUtils.longFlatten((Node.OfLong) evaluateToArrayNode(Long[]::new)).asLongArray();
     }
 
 
@@ -457,9 +475,11 @@
          *
          * @param source {@code Supplier<Spliterator>} describing the stream source
          * @param sourceFlags The source flags for the stream source, described in {@link StreamOpFlag}
+         * @param parallel True if the pipeline is parallel
          */
-        Head(Supplier<? extends Spliterator<Long>> source, int sourceFlags) {
-            super(source, sourceFlags);
+        Head(Supplier<? extends Spliterator<Long>> source,
+             int sourceFlags, boolean parallel) {
+            super(source, sourceFlags, parallel);
         }
 
         /**
@@ -467,9 +487,11 @@
          *
          * @param source {@code Spliterator} describing the stream source
          * @param sourceFlags The source flags for the stream source, described in {@link StreamOpFlag}
+         * @param parallel True if the pipeline is parallel
          */
-        Head(Spliterator<Long> source, int sourceFlags) {
-            super(source, sourceFlags);
+        Head(Spliterator<Long> source,
+             int sourceFlags, boolean parallel) {
+            super(source, sourceFlags, parallel);
         }
 
         @Override
--- a/src/share/classes/java/util/stream/LongStream.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/stream/LongStream.java	Mon Apr 08 16:19:46 2013 -0700
@@ -113,7 +113,7 @@
      *               each element
      * @return the new stream
      */
-    <U> Stream<U> mapToObj(LongFunction<U> mapper);
+    <U> Stream<U> mapToObj(LongFunction<? extends U> mapper);
 
     /**
      * Produces an {@code IntStream} consisting of the results of applying
@@ -218,14 +218,14 @@
      *
      * @apiNote This method exists mainly to support debugging, where you want
      * to see the elements as they flow past a certain point in a pipeline:
-     * <pre>
+     * <pre>{@code
      *     list.stream()
      *         .filter(filteringFunction)
      *         .peek(e -> {System.out.println("Filtered value: " + e); });
      *         .map(mappingFunction)
      *         .peek(e -> {System.out.println("Mapped value: " + e); });
      *         .collect(Collectors.toLongSummaryStastistics());
-     * </pre>
+     * }</pre>
      *
      * @param consumer A <a href="package-summary.html#NonInterference">
      *                 non-interfering</a> action to perform on the elements as
@@ -320,12 +320,12 @@
      * an <a href="package-summary.html#Associativity">associative</a>
      * accumulation function, and return the reduced value.  This is equivalent
      * to:
-     * <pre>
+     * <pre>{@code
      *     long result = identity;
      *     for (long element : this stream)
      *         result = accumulator.apply(result, element)
      *     return result;
-     * </pre>
+     * }</pre>
      *
      * but is not constrained to execute sequentially.
      *
@@ -341,15 +341,15 @@
      * @apiNote Sum, min, max, and average are all special cases of reduction.
      * Summing a stream of numbers can be expressed as:
      *
-     * <pre>
+     * <pre>{@code
      *     long sum = integers.reduce(0, (a, b) -> a+b);
-     * </pre>
+     * }</pre>
      *
      * or more compactly:
      *
-     * <pre>
+     * <pre>{@code
      *     long sum = integers.reduce(0, Long::sum);
-     * </pre>
+     * }</pre>
      *
      * <p>While this may seem a more roundabout way to perform an aggregation
      * compared to simply mutating a running total in a loop, reduction
@@ -374,7 +374,7 @@
      * <a href="package-summary.html#Associativity">associative</a> accumulation
      * function, and return an {@code OptionalLong} describing the reduced value,
      * if any. This is equivalent to:
-     * <pre>
+     * <pre>{@code
      *     boolean foundAny = false;
      *     long result = null;
      *     for (long element : this stream) {
@@ -385,7 +385,7 @@
      *         else
      *             result = accumulator.apply(result, element)
      *     return foundAny ? OptionalLong.of(result) : OptionalLong.empty();
-     * </pre>
+     * }</pre>
      *
      * but is not constrained to execute sequentially.
      *
@@ -410,12 +410,12 @@
      * such as an {@code ArrayList}, and elements are incorporated by updating
      * the state of the result, rather than by replacing the result.  This
      * produces a result equivalent to:
-     * <pre>
+     * <pre>{@code
      *     R result = resultFactory.get();
      *     for (long element : this stream)
      *         accumulator.accept(result, element);
      *     return result;
-     * </pre>
+     * }</pre>
      *
      * Like {@link #reduce(long, LongBinaryOperator)}, {@code collect} operations
      * can be parallelized without requiring additional sychronization.
@@ -447,9 +447,9 @@
      * Returns the sum of elements in this stream.  This is a special case
      * of a <a href="package-summary.html#MutableReduction">reduction</a>
      * and is equivalent to:
-     * <pre>
+     * <pre>{@code
      *     return reduce(0, Long::sum);
-     * </pre>
+     * }</pre>
      * @return The sum of elements in this stream
      */
     long sum();
@@ -459,9 +459,9 @@
      * stream, or an empty optional if this stream is empty.  This is a special
      * case of a <a href="package-summary.html#MutableReduction">reduction</a>
      * and is equivalent to:
-     * <pre>
+     * <pre>{@code
      *     return reduce(Long::min);
-     * </pre>
+     * }</pre>
      * @return The minimal element of this stream, or an empty
      * {@code OptionalLong}
      */
@@ -472,9 +472,9 @@
      * stream, or an empty optional if this stream is empty.  This is a special
      * case of a <a href="package-summary.html#MutableReduction">reduction</a>
      * and is equivalent to:
-     * <pre>
+     * <pre>{@code
      *     return reduce(Long::max);
-     * </pre>
+     * }</pre>
      * @return The maximal element of this stream, or an empty
      * {@code OptionalLong}
      */
@@ -484,9 +484,9 @@
      * Returns the count of elements in this stream.  This is a special case of
      * a <a href="package-summary.html#MutableReduction">reduction</a> and is
      * equivalent to:
-     * <pre>
+     * <pre>{@code
      *     return map(e -> 1).sum();
-     * </pre>
+     * }</pre>
      * @return The count of elements in this stream
      */
     long count();
--- a/src/share/classes/java/util/stream/MatchOps.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/stream/MatchOps.java	Mon Apr 08 16:19:46 2013 -0700
@@ -33,9 +33,9 @@
 import java.util.function.Supplier;
 
 /**
- * Factory for creating instances of a short-circuiting {@code TerminalOp}
- * that evaluates a predicate on the elements of a stream and determines whether
- * all, any or none of those elements match the predicate.
+ * Factory for instances of a short-circuiting {@code TerminalOp} that
+ * implement quantified predicate matching on the elements of a stream.
+ * Supported variants include match-all, match-any, and match-none.
  *
  * @since 1.8
  */
@@ -67,8 +67,7 @@
     }
 
     /**
-     * Constructs a {@code TerminalOp} for the given predicate and quantified
-     * match criteria
+     * Constructs a quantified predicate matcher for a Stream
      *
      * @param predicate The {@code Predicate} to apply to stream elements
      * @param matchKind The kind of quantified match (all, any, none)
@@ -86,8 +85,6 @@
 
             @Override
             public void accept(T t) {
-                // @@@ assert !stop when SortedOp supports short-circuit on Sink.end
-                //     for sequential operations
                 if (!stop && predicate.test(t) == matchKind.stopOnPredicateMatches) {
                     stop = true;
                     value = matchKind.shortCircuitResult;
@@ -95,7 +92,7 @@
             }
         }
 
-        // @@@ Change to return MatchSink::new when compiler and runtime bugs are fixed
+        // @@@ Workaround for JDK-8011591 -- when fixed, replace s with constructor ref
         Supplier<BooleanTerminalSink<T>> s = new Supplier<BooleanTerminalSink<T>>() {
             @Override
             public BooleanTerminalSink<T> get() {return new MatchSink();}
@@ -104,8 +101,7 @@
     }
 
     /**
-     * Constructs a {@code TerminalOp} for the given predicate and quantified
-     * match criteria for an {@code IntStream}
+     * Constructs a quantified predicate matcher for an {@code IntStream}
      *
      * @param predicate The {@code Predicate} to apply to stream elements
      * @param matchKind The kind of quantified match (all, any, none)
@@ -129,6 +125,7 @@
             }
         }
 
+        // @@@ Workaround for JDK-8011591 -- when fixed, replace s with constructor ref
         Supplier<BooleanTerminalSink<Integer>> s = new Supplier<BooleanTerminalSink<Integer>>() {
             @Override
             public BooleanTerminalSink<Integer> get() {return new MatchSink();}
@@ -137,8 +134,7 @@
     }
 
     /**
-     * Constructs a {@code TerminalOp} for the given predicate and quantified
-     * match criteria for a {@code LongStream}
+     * Constructs a quantified predicate matcher for a {@code LongStream}
      *
      * @param predicate The {@code Predicate} to apply to stream elements
      * @param matchKind The kind of quantified match (all, any, none)
@@ -163,6 +159,7 @@
             }
         }
 
+        // @@@ Workaround for JDK-8011591 -- when fixed, replace s with constructor ref
         Supplier<BooleanTerminalSink<Long>> s = new Supplier<BooleanTerminalSink<Long>>() {
             @Override
             public BooleanTerminalSink<Long> get() {return new MatchSink();}
@@ -171,8 +168,7 @@
     }
 
     /**
-     * Constructs a {@code TerminalOp} for the given predicate and quantified
-     * match criteria for a {@code DoubleStream}
+     * Constructs a quantified predicate matcher for a {@code DoubleStream}
      *
      * @param predicate The {@code Predicate} to apply to stream elements
      * @param matchKind The kind of quantified match (all, any, none)
@@ -197,6 +193,7 @@
             }
         }
 
+        // @@@ Workaround for JDK-8011591 -- when fixed, replace s with constructor ref
         Supplier<BooleanTerminalSink<Double>> s = new Supplier<BooleanTerminalSink<Double>>() {
             @Override
             public BooleanTerminalSink<Double> get() {return new MatchSink();}
@@ -288,14 +285,17 @@
      * @param <S> The type of source elements for the pipeline
      * @param <T> The type of output elements for the pipeline
      */
-    private static final class MatchTask<S, T> extends AbstractShortCircuitTask<S, T, Boolean, MatchTask<S, T>> {
+    private static final class MatchTask<S, T>
+            extends AbstractShortCircuitTask<S, T, Boolean, MatchTask<S, T>> {
         private final MatchOp<T> op;
 
+        /** Constructor for root node */
         MatchTask(MatchOp<T> op, PipelineHelper<T> helper, Spliterator<S> spliterator) {
             super(helper, spliterator);
             this.op = op;
         }
 
+        /** Constructor for non-root node */
         MatchTask(MatchTask<S, T> parent, Spliterator<S> spliterator) {
             super(parent, spliterator);
             this.op = parent.op;
--- a/src/share/classes/java/util/stream/Node.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/stream/Node.java	Mon Apr 08 16:19:46 2013 -0700
@@ -48,12 +48,11 @@
  * <p>A {@code Node} typically does not store the elements directly, but instead
  * mediates access to one or more existing (effectively immutable) data
  * structures such as a {@code Collection}, array, or a set of other
- * {@code Node}s.  {@code Node}s directly representing existing data structures
- * are considered <em>flat</em> (have no children); commonly {@code Node}s are
- * formed into a tree whose shape corresponds to the computation tree that
- * produced the elements that are contained in the leaf nodes.  The use of
- * {@code Node} within the stream framework is largely to avoid copying data
- * unnecessarily during parallel operations.
+ * {@code Node}s.  Commonly {@code Node}s are formed into a tree whose shape
+ * corresponds to the computation tree that produced the elements that are
+ * contained in the leaf nodes.  The use of {@code Node} within the stream
+ * framework is largely to avoid copying data unnecessarily during parallel
+ * operations.
  *
  * @param <T> the type of elements.
  * @since 1.8
@@ -65,7 +64,7 @@
      * {@code Node}.
      *
      * @return a {@code Spliterator} describing the elements contained in this
-     *         {@code Node}.
+     *         {@code Node}
      */
     Spliterator<T> spliterator();
 
@@ -92,24 +91,23 @@
     /**
      * Retrieves the child {@code Node} at a given index.
      *
-     * @implSpec The default implementation throws
+     * @implSpec The default implementation always throws
      * {@code IndexOutOfBoundsException}
      * @param i the index to the child node
      * @return the child node
      * @throws IndexOutOfBoundsException if the index is less than 0 or greater
-     *         than or equal to the
-     * number of child nodes.
+     *         than or equal to the number of child nodes.
      */
     default Node<T> getChild(int i) {
         throw new IndexOutOfBoundsException();
     }
 
     /**
-     * Provide an array view of the contents of this node.
+     * Provides an array view of the contents of this node.
      *
      * <p>Depending on the underlying implementation, this may return a
      * reference to an internal array rather than a copy.  Since the returned
-     * array may be shared, the resulting array should not be modified.  The
+     * array may be shared, the returned array should not be modified.  The
      * {@code generator} function may be consulted to create the array if a new
      * array needs to be created.
      *
@@ -145,10 +143,6 @@
         return StreamShape.REFERENCE;
     }
 
-    default Node<T> flatten(IntFunction<T[]> generator) {
-        return NodeUtils.flatten(this, generator);
-    }
-
     /**
      * Returns the number of elements contained in this node
      *
@@ -164,8 +158,8 @@
     interface Builder<T> extends Sink<T> {
 
         /**
-         * Builds the node.  Should be called after all elements have been pushed
-         * and signalled with an invocation of {@link Sink#end()}.
+         * Builds the node.  Should be called after all elements have been
+         * pushed and signalled with an invocation of {@link Sink#end()}.
          *
          * @return the resulting {@code Node}
          */
@@ -284,7 +278,8 @@
          * at a given offset into the array.  It is the caller's responsibility
          * to ensure there is sufficient room in the array.
          *
-         * @param array the array into which to copy the contents of this {@code Node}
+         * @param array the array into which to copy the contents of this
+         *              {@code Node}
          * @param offset the starting offset within the array
          * @throws IndexOutOfBoundsException if copying would cause access of
          *         data outside array bounds
@@ -301,10 +296,6 @@
             return StreamShape.INT_VALUE;
         }
 
-        @Override
-        default Node.OfInt flatten(IntFunction<Integer[]> generator) {
-            return NodeUtils.intFlatten(this);
-        }
     }
 
     /** Specialized {@code Node} for long elements */
@@ -420,10 +411,6 @@
         }
 
 
-        @Override
-        default Node.OfLong flatten(IntFunction<Long[]> generator) {
-            return NodeUtils.longFlatten(this);
-        }
     }
 
     /** Specialized {@code Node} for double elements */
@@ -540,9 +527,5 @@
             return StreamShape.DOUBLE_VALUE;
         }
 
-        @Override
-        default Node.OfDouble flatten(IntFunction<Double[]> generator) {
-            return NodeUtils.doubleFlatten(this);
-        }
     }
 }
--- a/src/share/classes/java/util/stream/NodeUtils.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/stream/NodeUtils.java	Mon Apr 08 16:19:46 2013 -0700
@@ -305,7 +305,7 @@
         private final IntFunction<U[]> generator;
 
         CollectorTask(PipelineHelper<U> helper, IntFunction<U[]> generator, Spliterator<T> spliterator) {
-            super(helper, spliterator, AbstractTask.suggestTargetSize(spliterator.estimateSize()));
+            super(helper, spliterator);
             this.helper = helper;
             this.generator = generator;
         }
@@ -388,7 +388,7 @@
         private static <T, U> void doCompute(SizedCollectorTask<T, U> task) {
             while (true) {
                 Spliterator<T> leftSplit;
-                if (!AbstractTask.suggestSplit(task.helper, task.spliterator, task.targetSize)
+                if (!AbstractTask.suggestSplit(task.spliterator, task.targetSize)
                     || ((leftSplit = task.spliterator.trySplit()) == null)) {
                     if (task.offset + task.length >= Streams.MAX_ARRAY_SIZE)
                         throw new IllegalArgumentException("Stream size exceeds max array size");
@@ -494,7 +494,7 @@
         private final PipelineHelper<Integer> helper;
 
         IntCollectorTask(PipelineHelper<Integer> helper, Spliterator<T> spliterator) {
-            super(helper, spliterator, AbstractTask.suggestTargetSize(spliterator.estimateSize()));
+            super(helper, spliterator);
             this.helper = helper;
         }
 
@@ -574,7 +574,7 @@
         private static <T> void doCompute(IntSizedCollectorTask<T> task) {
             while (true) {
                 Spliterator<T> leftSplit;
-                if (!AbstractTask.suggestSplit(task.helper, task.spliterator, task.targetSize)
+                if (!AbstractTask.suggestSplit(task.spliterator, task.targetSize)
                     || ((leftSplit = task.spliterator.trySplit()) == null)) {
                     if (task.offset + task.length >= Streams.MAX_ARRAY_SIZE)
                         throw new IllegalArgumentException("Stream size exceeds max array size");
@@ -677,7 +677,7 @@
         private final PipelineHelper<Long> helper;
 
         LongCollectorTask(PipelineHelper<Long> helper, Spliterator<T> spliterator) {
-            super(helper, spliterator, AbstractTask.suggestTargetSize(spliterator.estimateSize()));
+            super(helper, spliterator);
             this.helper = helper;
         }
 
@@ -757,7 +757,7 @@
         private static <T> void doCompute(LongSizedCollectorTask<T> task) {
             while (true) {
                 Spliterator<T> leftSplit;
-                if (!AbstractTask.suggestSplit(task.helper, task.spliterator, task.targetSize)
+                if (!AbstractTask.suggestSplit(task.spliterator, task.targetSize)
                     || ((leftSplit = task.spliterator.trySplit()) == null)) {
                     if (task.offset + task.length >= Streams.MAX_ARRAY_SIZE)
                         throw new IllegalArgumentException("Stream size exceeds max array size");
@@ -859,7 +859,7 @@
         private final PipelineHelper<Double> helper;
 
         DoubleCollectorTask(PipelineHelper<Double> helper, Spliterator<T> spliterator) {
-            super(helper, spliterator, AbstractTask.suggestTargetSize(spliterator.estimateSize()));
+            super(helper, spliterator);
             this.helper = helper;
         }
 
@@ -939,7 +939,7 @@
         private static <T> void doCompute(DoubleSizedCollectorTask<T> task) {
             while (true) {
                 Spliterator<T> leftSplit;
-                if (!AbstractTask.suggestSplit(task.helper, task.spliterator, task.targetSize)
+                if (!AbstractTask.suggestSplit(task.spliterator, task.targetSize)
                     || ((leftSplit = task.spliterator.trySplit()) == null)) {
                     if (task.offset + task.length >= Streams.MAX_ARRAY_SIZE)
                         throw new IllegalArgumentException("Stream size exceeds max array size");
--- a/src/share/classes/java/util/stream/PipelineHelper.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/stream/PipelineHelper.java	Mon Apr 08 16:19:46 2013 -0700
@@ -28,27 +28,26 @@
 import java.util.function.IntFunction;
 
 /**
- * Helper class for executing
- * <a href="package-summary.html#StreamPipelines">stream pipelines</a>,
- * capturing all of the information about a stream pipeline (source, output
- * shape, stream flags, parallelism, etc) in one place.
+ * Helper class for executing <a href="package-summary.html#StreamPipelines">
+ * stream pipelines</a>, capturing all of the information about a stream
+ * pipeline (output shape, intermediate operations, stream flags, parallelism,
+ * etc) in one place.
  *
  * @apiNote
- * A stream pipeline consists of a source, zero or more intermediate operations,
- * and a terminal operation.  Execution of the stream pipeline begins when the
- * terminal operation is executed.  A {@code PipelineHelper} describes the
- * portion of a stream pipeline including its source, some or all of its
- * intermediate operations, and certain information about the terminal (or
- * stateful) operation which follows the last intermediate operation described
- * by this {@code PipelineHelper}.  The {@code PipelineHelper} is passed to the
+ * A {@code PipelineHelper} describes the initial segment of a stream pipeline,
+ * including its source, intermediate operations, and may additionally
+ * incorporate information about the terminal (or stateful) operation which
+ * follows the last intermediate operation described by this
+ * {@code PipelineHelper}. The {@code PipelineHelper} is passed to the
  * {@link TerminalOp#evaluateParallel(PipelineHelper, java.util.Spliterator)},
- * {@link TerminalOp#evaluateSequential(PipelineHelper, java.util.Spliterator)}, and
- * {@link AbstractPipeline#opEvaluateParallel(PipelineHelper, java.util.Spliterator,
- * java.util.function.IntFunction)}, methods, which can use the {@code PipelineHelper}
- * to access the source {@code Spliterator} for the pipeline, information about the pipeline
- * such as input shape, output shape, stream flags, and size, and use the helper methods
- * such as {@link #wrapAndCopyInto(Sink, Spliterator)}, {@link #copyInto(Sink, Spliterator)},
- * and {@link #wrapSink(Sink)} to execute pipeline operations.
+ * {@link TerminalOp#evaluateSequential(PipelineHelper, java.util.Spliterator)},
+ * and {@link AbstractPipeline#opEvaluateParallel(PipelineHelper, java.util.Spliterator,
+ * java.util.function.IntFunction)}, methods, which can use the
+ * {@code PipelineHelper} to access information about the pipeline such as
+ * input shape, output shape, stream flags, and size, and use the helper methods
+ * such as {@link #wrapAndCopyInto(Sink, Spliterator)},
+ * {@link #copyInto(Sink, Spliterator)}, and {@link #wrapSink(Sink)} to execute
+ * pipeline operations.
  *
  * @param <P_OUT> Type of output elements from the pipeline
  * @since 1.8
@@ -56,12 +55,11 @@
 abstract class PipelineHelper<P_OUT> {
 
     /**
-     * Gets the combined stream and operation flags for the output of the
+     * Gets the combined stream and operation flags for the output of the described
      * pipeline.  This will incorporate stream flags from the stream source, all
      * the intermediate operations and the terminal operation.
      *
-     * @return the combined stream and operation flags for the output of the
-     *         pipeline
+     * @return the combined stream and operation flags
      * @see StreamOpFlag
      */
     abstract int getStreamAndOpFlags();
@@ -92,13 +90,12 @@
      *
      * @implSpec
      * The implementation behaves as if:
-     * <pre>
+     * <pre>{@code
      *     intoWrapped(wrapSink(sink), spliterator);
-     * </pre>
+     * }</pre>
      *
      * @param sink the {@code Sink} to receive the results
-     * @param spliterator the spliterator describing the portion of the source
-     *        input to process
+     * @param spliterator the spliterator describing the source input to process
      */
     abstract<P_IN, S extends Sink<P_OUT>> S wrapAndCopyInto(S sink, Spliterator<P_IN> spliterator);
 
@@ -155,8 +152,8 @@
      * @param exactSizeIfKnown if >=0 then a builder will be created that has a
      *        fixed capacity of exactly sizeIfKnown elements; if < 0 then the
      *        builder has variable capacity.  A fixed capacity builder will fail
-     *        if an element is added and the builder has reached capacity.
-     * @param generator the array generator
+     *        if an element is added after the builder has reached capacity.
+     * @param generator a factory function for array instances
      * @return A {@code Node.Builder} compatible with the output shape of this
      *         {@code PipelineHelper}
      */
@@ -167,10 +164,10 @@
      * Collects all output elements resulting from applying the pipeline stages
      * to the source {@code Spliterator} into a {@code Node}.
      *
-     * @implSpec
+     * @implNote
      * If the pipeline has no intermediate operations and the source is backed
-     * by a {@code Node} then that {@code Node} will be returned or flattened
-     * and then returned. This reduces copying for a pipeline consisting of a
+     * by a {@code Node} then that {@code Node} will be returned (or flattened
+     * and then returned). This reduces copying for a pipeline consisting of a
      * stateful operation followed by a terminal operation that returns an
      * array, such as:
      * <pre>{@code
@@ -182,7 +179,7 @@
      *        {@code Node} returned will contain no children, otherwise the
      *        {@code Node} may represent the root in a tree that reflects the
      *        shape of the computation tree.
-     * @param generator the array generator
+     * @param generator a factory function for array instances
      * @return the {@code Node} containing all output elements
      */
     abstract<P_IN> Node<P_OUT> evaluate(Spliterator<P_IN> spliterator,
--- a/src/share/classes/java/util/stream/ReduceOps.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/stream/ReduceOps.java	Mon Apr 08 16:19:46 2013 -0700
@@ -179,6 +179,13 @@
             public ReducingSink makeSink() {
                 return new ReducingSink();
             }
+
+            @Override
+            public int getOpFlags() {
+                return collector.characteristics().contains(Collector.Characteristics.UNORDERED)
+                       ? StreamOpFlag.NOT_ORDERED
+                       : 0;
+            }
         };
     }
 
--- a/src/share/classes/java/util/stream/ReferencePipeline.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/stream/ReferencePipeline.java	Mon Apr 08 16:19:46 2013 -0700
@@ -58,11 +58,12 @@
      *
      * @param source {@code Supplier<Spliterator>} describing the stream source
      * @param sourceFlags The source flags for the stream source, described in
-     * {@link StreamOpFlag}
+     *        {@link StreamOpFlag}
+     * @param parallel True if the pipeline is parallel
      */
     ReferencePipeline(Supplier<? extends Spliterator<?>> source,
-                      int sourceFlags) {
-        super(source, sourceFlags);
+                      int sourceFlags, boolean parallel) {
+        super(source, sourceFlags, parallel);
     }
 
     /**
@@ -70,10 +71,12 @@
      *
      * @param source {@code Spliterator} describing the stream source
      * @param sourceFlags The source flags for the stream source, described in
-     * {@link StreamOpFlag}
+     *        {@link StreamOpFlag}
+     * @param parallel True if the pipeline is parallel
      */
-    ReferencePipeline(Spliterator<?> source, int sourceFlags) {
-        super(source, sourceFlags);
+    ReferencePipeline(Spliterator<?> source,
+                      int sourceFlags, boolean parallel) {
+        super(source, sourceFlags, parallel);
     }
 
     /**
@@ -138,6 +141,18 @@
     // Stateless intermediate operations from Stream
 
     @Override
+    public Stream<U> unordered() {
+        if (!isOrdered())
+            return this;
+        return new StatelessOp<U, U>(this, StreamShape.REFERENCE, StreamOpFlag.NOT_ORDERED) {
+            @Override
+            Sink<U> opWrapSink(int flags, Sink<U> sink) {
+                return sink;
+            }
+        };
+    }
+
+    @Override
     public final Stream<U> filter(Predicate<? super U> predicate) {
         Objects.requireNonNull(predicate);
         return new StatelessOp<U, U>(this, StreamShape.REFERENCE,
@@ -224,7 +239,7 @@
     }
 
     @Override
-    public final <R> Stream<R> flatMap(Function<U, Stream<? extends R>> mapper) {
+    public final <R> Stream<R> flatMap(Function<? super U, ? extends Stream<? extends R>> mapper) {
         Objects.requireNonNull(mapper);
         // We can do better than this, by polling cancellationRequested when stream is infinite
         return flatMap((U u, Consumer<R> sink) -> mapper.apply(u).sequential().forEach(sink));
@@ -329,7 +344,7 @@
         return SortedOps.makeRef(this, comparator);
     }
 
-    private final Stream<U> slice(long skip, long limit) {
+    private Stream<U> slice(long skip, long limit) {
         return SliceOps.makeRef(this, skip, limit);
     }
 
@@ -380,7 +395,8 @@
         // Runtime checking will be performed when an element is stored in A[], thus if A is not a
         // super type of U an ArrayStoreException will be thrown.
         IntFunction rawGenerator = (IntFunction) generator;
-        return (A[]) evaluateToArrayNode(rawGenerator).asArray(rawGenerator);
+        return (A[]) NodeUtils.flatten(evaluateToArrayNode(rawGenerator), rawGenerator)
+                              .asArray(rawGenerator);
     }
 
     @Override
@@ -430,6 +446,14 @@
 
     @Override
     public final <R> R collect(Collector<? super U, R> collector) {
+        if (isParallel()
+                && (collector.characteristics().contains(Collector.Characteristics.CONCURRENT))
+                && (!isOrdered() || collector.characteristics().contains(Collector.Characteristics.UNORDERED))) {
+            R container = collector.resultSupplier().get();
+            BiFunction<R, ? super U, R> accumulator = collector.accumulator();
+            forEach(u -> accumulator.apply(container, u));
+            return container;
+        }
         return evaluate(ReduceOps.makeRef(collector));
     }
 
@@ -439,19 +463,6 @@
     }
 
     @Override
-    public final <R> R collectUnordered(Collector<? super U, R> collector) {
-        if (collector.characteristics().contains(Collector.Characteristics.CONCURRENT)) {
-            R container = collector.resultSupplier().get();
-            BiFunction<R, ? super U, R> accumulator = collector.accumulator();
-            forEach(u -> accumulator.apply(container, u));
-            return container;
-        }
-        else {
-            return collect(collector);
-        }
-    }
-
-    @Override
     public final Optional<U> max(Comparator<? super U> comparator) {
         return reduce(Comparators.greaterOf(comparator));
     }
@@ -478,8 +489,9 @@
          * @param source {@code Supplier<Spliterator>} describing the stream source
          * @param sourceFlags The source flags for the stream source, described in {@link StreamOpFlag}
          */
-        Head(Supplier<? extends Spliterator<?>> source, int sourceFlags) {
-            super(source, sourceFlags);
+        Head(Supplier<? extends Spliterator<?>> source,
+             int sourceFlags, boolean parallel) {
+            super(source, sourceFlags, parallel);
         }
 
         /**
@@ -488,8 +500,9 @@
          * @param source {@code Spliterator} describing the stream source
          * @param sourceFlags The source flags for the stream source, described in {@link StreamOpFlag}
          */
-        Head(Spliterator<?> source, int sourceFlags) {
-            super(source, sourceFlags);
+        Head(Spliterator<?> source,
+             int sourceFlags, boolean parallel) {
+            super(source, sourceFlags, parallel);
         }
 
         @Override
--- a/src/share/classes/java/util/stream/Sink.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/stream/Sink.java	Mon Apr 08 16:19:46 2013 -0700
@@ -56,13 +56,13 @@
  * (such as filtering or mapping), and a terminal stage, such as reduction or
  * for-each.  For concreteness, consider the pipeline:
  *
- * <pre>
+ * <pre>{@code
  *     int longestStringLengthStartingWithA
  *         = strings.stream()
  *                  .filter(s -> s.startsWith("A"))
  *                  .mapToInt(String::length)
  *                  .max();
- * </pre>
+ * }</pre>
  *
  * <p>Here, we have three stages, filtering, mapping, and reducing.  The
  * filtering stage consumes strings and emits a subset of those strings; the
@@ -97,31 +97,31 @@
  * intermediate operations will use these chaining wrappers.  For example, the
  * mapping stage in the above example would look like:
  *
- * <pre>
+ * <pre>{@code
  *     IntSink is = new Sink.ChainedReference<U>(sink) {
  *         public void accept(U u) {
  *             downstream.accept(mapper.applyAsInt(u));
  *         }
  *     };
- * </pre>
+ * }</pre>
  *
- * <p>Here, we implement {@code Sink.ChainedReference<U>}, meaning that we expect to
- * receive elements of type {@code U} as input, and pass the downstream sink to
- * the constructor.  Because the next stage expects to receive integers, we must
- * call the {@code accept(int)} method when emitting values to the downstream.
+ * <p>Here, we implement {@code Sink.ChainedReference<U>}, meaning that we expect
+ * to receive elements of type {@code U} as input, and pass the downstream sink
+ * to the constructor.  Because the next stage expects to receive integers, we
+ * must call the {@code accept(int)} method when emitting values to the downstream.
  * The {@code accept()} method applies the mapping function from {@code U} to
  * {@code int} and passes the resulting value to the downstream {@code Sink}.
  *
  * @param <T> Type of elements for value streams
  * @since 1.8
  */
-@FunctionalInterface
 interface Sink<T> extends Consumer<T> {
     /**
-     * Resets the sink state to receive a fresh data set. This is used when a
-     * {@code Sink} is being reused by multiple calculations.
+     * Resets the sink state to receive a fresh data set.  This must be called
+     * before sending any data to the sink.  After calling {@link #end()},
+     * you may call this method to reset the sink for another calculation.
      * @param size The exact size of the data to be pushed downstream, if
-     * known or {@code Long.MAX_VALUE} if unknown or infinite.
+     * known or {@code -1} if unknown or infinite.
      *
      * <p>Prior to this call, the sink must be in the initial state, and after
      * this call it is in the active state.
@@ -131,8 +131,7 @@
     /**
      * Indicates that all elements have been pushed.  If the {@code Sink} is
      * stateful, it should send any stored state downstream at this time, and
-     * should clear any accumulated state (and associated resources) so that the
-     * sink may be reused for another computation.
+     * should clear any accumulated state (and associated resources).
      *
      * <p>Prior to this call, the sink must be in the active state, and after
      * this call it is returned to the initial state.
@@ -140,8 +139,7 @@
     default void end() {}
 
     /**
-     * Communicates to upstream sources that this {@code Sink} does not
-     * wish to receive any more data
+     * Indicates that this {@code Sink} does not wish to receive any more data.
      *
      * @implSpec The default implementation always returns false
      *
@@ -152,7 +150,7 @@
     }
 
     /**
-     * Accepts an int value
+     * Accepts an int value.
      *
      * @implSpec The default implementation throws IllegalStateException
      *
@@ -163,7 +161,7 @@
     }
 
     /**
-     * Accepts a long value
+     * Accepts a long value.
      * @implSpec The default implementation throws IllegalStateException
      *
      * @throws IllegalStateException If this sink does not accept long values
@@ -173,7 +171,7 @@
     }
 
     /**
-     * Accepts a double value
+     * Accepts a double value.
      * @implSpec The default implementation throws IllegalStateException
      *
      * @throws IllegalStateException If this sink does not accept double values
@@ -187,7 +185,6 @@
      * {@code accept(int)}, and wires {@code accept(Integer)} to bridge to
      * {@code accept(int)}.
      */
-    @FunctionalInterface
     interface OfInt extends Sink<Integer>, IntConsumer {
         @Override
         void accept(int value);
@@ -205,7 +202,6 @@
      * {@code accept(long)}, and wires {@code accept(Long)} to bridge to
      * {@code accept(long)}.
      */
-    @FunctionalInterface
     interface OfLong extends Sink<Long>, LongConsumer {
         @Override
         void accept(long value);
@@ -223,7 +219,6 @@
      * {@code accept(double)}, and wires {@code accept(Double)} to bridge to
      * {@code accept(double)}.
      */
-    @FunctionalInterface
     interface OfDouble extends Sink<Double>, DoubleConsumer {
         @Override
         void accept(double value);
@@ -237,8 +232,8 @@
     }
 
     /**
-     * Abstract {@code Sink} implementation designed for creating chains of
-     * sinks.  The {@code begin} and {@code end}, and
+     * Abstract {@code Sink} implementation for creating chains of
+     * sinks.  The {@code begin}, {@code end}, and
      * {@code cancellationRequested} methods are wired to chain to the
      * downstream {@code Sink}.  This implementation takes a downstream
      * {@code Sink} of unknown input shape and produces a {@code Sink<T>}.  The
@@ -270,7 +265,7 @@
 
     /**
      * Abstract {@code Sink} implementation designed for creating chains of
-     * sinks.  The {@code begin} and {@code end}, and
+     * sinks.  The {@code begin}, {@code end}, and
      * {@code cancellationRequested} methods are wired to chain to the
      * downstream {@code Sink}.  This implementation takes a downstream
      * {@code Sink} of unknown input shape and produces a {@code Sink.OfInt}.
@@ -302,7 +297,7 @@
 
     /**
      * Abstract {@code Sink} implementation designed for creating chains of
-     * sinks.  The {@code begin} and {@code end}, and
+     * sinks.  The {@code begin}, {@code end}, and
      * {@code cancellationRequested} methods are wired to chain to the
      * downstream {@code Sink}.  This implementation takes a downstream
      * {@code Sink} of unknown input shape and produces a {@code Sink.OfLong}.
@@ -334,7 +329,7 @@
 
     /**
      * Abstract {@code Sink} implementation designed for creating chains of
-     * sinks.  The {@code begin} and {@code end}, and
+     * sinks.  The {@code begin}, {@code end}, and
      * {@code cancellationRequested} methods are wired to chain to the
      * downstream {@code Sink}.  This implementation takes a downstream
      * {@code Sink} of unknown input shape and produces a {@code Sink.OfDouble}.
--- a/src/share/classes/java/util/stream/SliceOps.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/stream/SliceOps.java	Mon Apr 08 16:19:46 2013 -0700
@@ -31,7 +31,8 @@
 import java.util.function.IntFunction;
 
 /**
- * Factory methods for transforming a stream into a subsequence of itself.
+ * Factory for instances of a short-circuiting stateful intermediate operations
+ * that produce subsequences of their input stream.
  *
  * @since 1.8
  */
--- a/src/share/classes/java/util/stream/Stream.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/stream/Stream.java	Mon Apr 08 16:19:46 2013 -0700
@@ -62,7 +62,18 @@
  * stream operations preserve the <a href="package-summary.html#Ordering">
  * encounter order</a> of their source, and terminal operations
  * respect the encounter order of their source, if the source
- * has an encounter order.
+ * has an encounter order.  Provided that and parameters to stream operations
+ * satisfy the <a href="package-summary.html#NonInterference">non-interference
+ * requirements</a>, and excepting differences arising from the absence of
+ * a defined encounter order, the result of a stream pipeline should be the
+ * stable across multiple executions of the same operations on the same source.
+ * However, the timing and thread in which side-effects occur (for those
+ * operations which are allowed to produce side-effects, such as
+ * {@link #forEach(Consumer)}), are explicitly nondeterministic for parallel
+ * execution of stream pipelines.
+ *
+ * <p>Unless otherwise noted, passing a {@code null} argument to any stream
+ * method may result in a {@link NullPointerException}.
  *
  * @apiNote
  * Streams are not data structures; they do not manage the storage for their
@@ -70,9 +81,6 @@
  * you can use the {@link #iterator()} or {@link #spliterator()} operations to
  * perform a controlled traversal.
  *
- * <p>Unless otherwise noted, passing a {@code null} argument to any stream
- * method may result in a {@link NullPointerException}.
- *
  * @param <T> Type of elements.
  * @since 1.8
  * @see <a href="package-summary.html">java.util.stream</a>
@@ -164,9 +172,9 @@
      * is a stream of purchase orders, and each purchase order contains a
      * collection of line items, then the following produces a stream of line
      * items:
-     * <pre>
+     * <pre>{@code
      *     orderStream.flatMap(order -> order.getLineItems().stream())...
-     * </pre>
+     * }</pre>
      *
      * <p>This implementation is likely to be less efficient than the other
      * form of {@link #flatMap(FlatMapper)}, and is provided for convenience.
@@ -175,7 +183,7 @@
      *               each element which produces a stream of new values
      * @return the new stream
      */
-    <R> Stream<R> flatMap(Function<T, Stream<? extends R>> mapper);
+    <R> Stream<R> flatMap(Function<? super T, ? extends Stream<? extends R>> mapper);
 
     /**
      * Produces a stream consisting of the results of replacing each
@@ -189,13 +197,13 @@
      * transform, and a {@code Consumer} into which it deposits zero or more
      * values corresponding to that element.  For example, to map a stream of
      * strings into a stream of the characters in those strings, you would do:
-     * <pre>
+     * <pre>{@code
      *     stringStream.flatMap((elt, destination) -> {
-     *                              for (i=0; i &lt; elt.length(); i++)
+     *                              for (i=0; i < elt.length(); i++)
      *                                  destination.accept(charAt(i));
      *                          })
      *                 ...
-     * </pre>
+     * }</pre>
      * @implNote
      * This form of {@code flatMap} is usually less convenient to use than the
      * {@link #flatMap(Function)} form, but is often considerably more efficient
@@ -319,14 +327,14 @@
      *
      * @apiNote This method exists mainly to support debugging, where you want
      * to see the elements as they flow past a certain point in a pipeline:
-     * <pre>
+     * <pre>{@code
      *     list.stream()
      *         .filter(filteringFunction)
      *         .peek(e -> {System.out.println("Filtered value: " + e); });
      *         .map(mappingFunction)
      *         .peek(e -> {System.out.println("Mapped value: " + e); });
      *         .collect(Collectors.intoList());
-     * </pre>
+     * }</pre>
      *
      * @param consumer A <a href="package-summary.html#NonInterference">
      *                 non-interfering</a> action to perform on the elements as
@@ -438,12 +446,12 @@
      * an <a href="package-summary.html#Associativity">associative</a>
      * accumulation function, and return the reduced value.  This is equivalent
      * to:
-     * <pre>
+     * <pre>{@code
      *     T result = identity;
      *     for (T element : this stream)
      *         result = accumulator.apply(result, element)
      *     return result;
-     * </pre>
+     * }</pre>
      *
      * but is not constrained to execute sequentially.
      *
@@ -459,15 +467,15 @@
      * @apiNote Sum, min, max, average, and string concatenation are all special
      * cases of reduction. Summing a stream of numbers can be expressed as:
      *
-     * <pre>
+     * <pre>{@code
      *     Integer sum = integers.reduce(0, (a, b) -> a+b);
-     * </pre>
+     * }</pre>
      *
      * or more compactly:
      *
-     * <pre>
+     * <pre>{@code
      *     Integer sum = integers.reduce(0, Integer::sum);
-     * </pre>
+     * }</pre>
      *
      * <p>While this may seem a more roundabout way to perform an aggregation
      * compared to simply mutating a running total in a loop, reduction
@@ -488,7 +496,7 @@
      * <a href="package-summary.html#Associativity">associative</a> accumulation
      * function, and return an {@code Optional} describing the reduced value,
      * if any. This is equivalent to:
-     * <pre>
+     * <pre>{@code
      *     boolean foundAny = false;
      *     T result = null;
      *     for (T element : this stream) {
@@ -499,7 +507,7 @@
      *         else
      *             result = accumulator.apply(result, element)
      *     return foundAny ? Optional.of(result) : Optional.empty();
-     * </pre>
+     * }</pre>
      *
      * but is not constrained to execute sequentially.
      *
@@ -521,12 +529,12 @@
      * Performs a <a href="package-summary.html#Reduction">reduction</a> on the
      * elements of this stream, using the provided identity, accumulation
      * function, and a combining functions.  This is equivalent to:
-     * <pre>
+     * <pre>{@code
      *     U result = identity;
      *     for (T element : this stream)
      *         result = accumulator.apply(result, element)
      *     return result;
-     * </pre>
+     * }</pre>
      *
      * but is not constrained to execute sequentially.
      *
@@ -535,9 +543,9 @@
      * is equal to {@code u}.  Additionally, the {@code combiner} function
      * must be compatible with the {@code accumulator} function; for all
      * {@code u} and {@code t}, the following must hold:
-     * <pre>
+     * <pre>{@code
      *     combiner.apply(u, accumulator.apply(identity, t)) == accumulator.apply(u, t)
-     * </pre>
+     * }</pre>
      *
      * <p>This is a <a href="package-summary.html#StreamOps">terminal operation</a>.
      *
@@ -573,12 +581,12 @@
      * such as an {@code ArrayList}, and elements are incorporated by updating
      * the state of the result, rather than by replacing the result.  This
      * produces a result equivalent to:
-     * <pre>
+     * <pre>{@code
      *     R result = resultFactory.get();
      *     for (T element : this stream)
      *         accumulator.accept(result, element);
      *     return result;
-     * </pre>
+     * }</pre>
      *
      * Like {@link #reduce(Object, BinaryOperator)}, {@code collect} operations
      * can be parallelized without requiring additional sychronization.
@@ -589,17 +597,17 @@
      * @apiNote There are many existing classes in the JDK whose signatures are
      * a good match for use as arguments to {@code collect()}.  For example,
      * the following will accumulate strings into an ArrayList:
-     * <pre>
-     *     List&lt;String> asList = stringStream.collect(ArrayList::new, ArrayList::add, ArrayList::addAll);
-     * </pre>
+     * <pre>{@code
+     *     List<String> asList = stringStream.collect(ArrayList::new, ArrayList::add, ArrayList::addAll);
+     * }</pre>
      *
      * The following will take a stream of strings and concatenates them into a
      * single string:
-     * <pre>
+     * <pre>{@code
      *     String concat = stringStream.collect(StringBuilder::new, StringBuilder::append,
      *                                          StringBuilder::append)
      *                                 .toString();
-     * </pre>
+     * }</pre>
      *
      * @param resultFactory Function that creates a new result container.
      *                      For a parallel execution, this function may be
@@ -639,23 +647,23 @@
      *
      * @apiNote
      * The following will accumulate strings into an ArrayList:
-     * <pre>
-     *     List&lt;String> asList = stringStream.collect(Collectors.toList());
+     * <pre>{@code
+     *     List<String> asList = stringStream.collect(Collectors.toList());
      * }</pre>
      *
      * The following will classify {@code Person} objects by city:
-     * <pre>
-     *     Map&lt;String, Collection&lt;Person>> peopleByCity
+     * <pre>{@code
+     *     Map<String, Collection<Person>> peopleByCity
      *         = personStream.collect(Collectors.groupBy(Person::getCity));
-     * </pre>
+     * }</pre>
      *
      * The following will classify {@code Person} objects by state and city,
      * cascading two {@code Collector}s together:
-     * <pre>
-     *     Map&lt;String, Map&lt;String, Collection&lt;Person>>> peopleByStateAndCity
+     * <pre>{@code
+     *     Map<String, Map<String, Collection<Person>>> peopleByStateAndCity
      *         = personStream.collect(Collectors.groupBy(Person::getState,
      *                                                   Collectors.groupBy(Person::getCity)));
-     * </pre>
+     * }</pre>
      *
      * @param collector The {@code Collector} describing the reduction
      * @param <R> The type of the result
@@ -665,25 +673,6 @@
      */
     <R> R collect(Collector<? super T, R> collector);
 
-    /** Performs a <a href="package-summary.html#MutableReduction">mutable
-     * reduction</a> operation on the elements of this stream using a
-     * {@code Collector} object to describe the reduction, without regard to
-     * encounter order. If the provided {@code Collector} is concurrent, this
-     * implementation may invoke the function returned by
-     * {@link Collector#accumulator()} concurrently on the same result object.
-     * In some cases, implementing a reduction by concurrently modifying a
-     * shared data structure may be more efficient than partitioning and merging.
-     *
-     * <p>This is a <a href="package-summary.html#StreamOps">terminal operation</a>.
-     *
-     * @param collector The {@code Collector} describing the reduction
-     * @param <R> The type of the result
-     * @return The result of the reduction
-     * @see #collect(Supplier, BiConsumer, BiConsumer)
-     * @see Collectors
-     */
-    <R> R collectUnordered(Collector<? super T, R> collector);
-
     /**
      * Returns the maximal element of this stream according to the provided
      * {@code Comparator}.  This is a special case of a
@@ -778,6 +767,7 @@
      *
      * @return An {@code Optional} describing the first element of this stream,
      * or an empty {@code Optional} if the stream is empty
+     * @throws NullPointerException if the element selected is null
      */
     Optional<T> findFirst();
 
@@ -796,6 +786,7 @@
      *
      * @return An {@code Optional} describing some element of this stream, or an
      * empty {@code Optional} if the stream is empty
+     * @throws NullPointerException if the element selected is null
      * @see #findFirst()
      */
     Optional<T> findAny();
--- a/src/share/classes/java/util/stream/StreamOpFlag.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/stream/StreamOpFlag.java	Mon Apr 08 16:19:46 2013 -0700
@@ -27,7 +27,6 @@
 import java.util.EnumMap;
 import java.util.Map;
 import java.util.Spliterator;
-import java.util.StringJoiner;
 
 /**
  * Flags corresponding to characteristics of streams and operations. Flags are
@@ -52,7 +51,6 @@
  *       <th>{@code ORDERED}</th>
  *       <th>{@code SIZED}</th>
  *       <th>{@code SHORT_CIRCUIT}</th>
- *       <th>{@code PARALLEL}</th>
  *     </tr>
  *   </thead>
  *   <tbody>
@@ -63,7 +61,6 @@
  *        <td>Y</td>
  *        <td>Y</td>
  *        <td>N</td>
- *        <td>Y</td>
  *      </tr>
  *      <tr>
  *        <th colspan="2" class="tableSubHeadingColor">Intermediate operation</th>
@@ -72,34 +69,32 @@
  *        <td>PCI</td>
  *        <td>PC</td>
  *        <td>PI</td>
- *        <td>PC</td>
  *      </tr>
-  *      <tr>
+ *      <tr>
  *        <th colspan="2" class="tableSubHeadingColor">Terminal operation</th>
  *        <td>N</td>
  *        <td>N</td>
  *        <td>PC</td>
  *        <td>N</td>
  *        <td>PI</td>
- *        <td>N</td>
  *      </tr>
-*   </tbody>
-*   <tfoot>
-*       <tr>
-*         <th class="tableSubHeadingColor" colspan="2">Legend</th>
-*         <th colspan="6" rowspan="7">&nbsp;</th>
-*       </tr>
-*       <tr>
-*         <th class="tableSubHeadingColor">Flag</th>
-*         <th class="tableSubHeadingColor">Meaning</th>
-*         <th colspan="6"></th>
-*       </tr>
-*       <tr><td>Y</td><td>Allowed</td></tr>
-*       <tr><td>N</td><td>Invalid</td></tr>
-*       <tr><td>P</td><td>Preserves</td></tr>
-*       <tr><td>C</td><td>Clears</td></tr>
-*       <tr><td>I</td><td>Injects</td></tr>
-*   </tfoot>
+ *   </tbody>
+ *   <tfoot>
+ *       <tr>
+ *         <th class="tableSubHeadingColor" colspan="2">Legend</th>
+ *         <th colspan="6" rowspan="7">&nbsp;</th>
+ *       </tr>
+ *       <tr>
+ *         <th class="tableSubHeadingColor">Flag</th>
+ *         <th class="tableSubHeadingColor">Meaning</th>
+ *         <th colspan="6"></th>
+ *       </tr>
+ *       <tr><td>Y</td><td>Allowed</td></tr>
+ *       <tr><td>N</td><td>Invalid</td></tr>
+ *       <tr><td>P</td><td>Preserves</td></tr>
+ *       <tr><td>C</td><td>Clears</td></tr>
+ *       <tr><td>I</td><td>Injects</td></tr>
+ *   </tfoot>
  * </table>
  * </div>
  *
@@ -135,7 +130,7 @@
  * correct order.
  *
  * <p>
- * With the exception of {@link #PARALLEL}, stream characteristics can be
+ * With the exception of {@link #SHORT_CIRCUIT}, stream characteristics can be
  * derived from the equivalent {@link java.util.Spliterator} characteristics:
  * {@link java.util.Spliterator#DISTINCT}, {@link java.util.Spliterator#SORTED},
  * {@link java.util.Spliterator#ORDERED}, and
@@ -206,9 +201,6 @@
  *
  * @since 1.8
  */
-// @@@ When a new flag is added what should happen for existing operations?
-//     Need to move to a builder approach used by ops where the masks for the new flag are
-//     taken into account for default behaviour.
 enum StreamOpFlag {
 
     /*
@@ -223,12 +215,12 @@
      * Characteristics belong to certain types, see the Type enum. Bit masks for
      * the types are constructed as per the following table:
      *
-     *                        DISTINCT  SORTED  ORDERED  SIZED  SHORT_CIRCUIT  PARALLEL
-     *          SPLITERATOR      01       01       01      01        00           00
-     *               STREAM      01       01       01      01        00           01
-     *                   OP      11       11       11      10        01           10
-     *          TERMINAL_OP      00       00       10      00        01           00
-     * UPSTREAM_TERMINAL_OP      00       00       10      00        00           00
+     *                        DISTINCT  SORTED  ORDERED  SIZED  SHORT_CIRCUIT
+     *          SPLITERATOR      01       01       01      01        00
+     *               STREAM      01       01       01      01        00
+     *                   OP      11       11       11      10        01
+     *          TERMINAL_OP      00       00       10      00        01
+     * UPSTREAM_TERMINAL_OP      00       00       10      00        00
      *
      * 01 = set/inject
      * 10 = clear
@@ -333,23 +325,12 @@
      */
     // 12, 0x01000000
     SHORT_CIRCUIT(12,
-                  set(Type.OP).set(Type.TERMINAL_OP)),
-
-
-    /**
-     * Characteristic value signifying that the stream is to be evaluated in
-     * parallel rather than sequentially.
-     * <p>
-     * A stream can have this value or an intermediate operation can preserve or
-     * clear this value.
-     */
-    // 13, 0x04000000
-    PARALLEL(13,
-             set(Type.STREAM).clear(Type.OP));
+                  set(Type.OP).set(Type.TERMINAL_OP));
 
     // The following 2 flags are currently undefined and a free for any further
     // stream flags if/when required
     //
+    // 13, 0x04000000
     // 14, 0x10000000
     // 15, 0x40000000
 
@@ -623,16 +604,6 @@
      */
     static final int IS_SHORT_CIRCUIT = SHORT_CIRCUIT.set;
 
-    /**
-     * The bit value to set {@link #PARALLEL}
-     */
-    static final int IS_PARALLEL = PARALLEL.set;
-
-    /**
-     * The bit value to clear {@link #PARALLEL}
-     */
-    static final int NOT_PARALLEL = PARALLEL.clear;
-
     private static int getMask(int flags) {
         return (flags == 0)
                ? FLAG_MASK
--- a/src/share/classes/java/util/stream/StreamShape.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/stream/StreamShape.java	Mon Apr 08 16:19:46 2013 -0700
@@ -37,18 +37,14 @@
  * @apiNote
  * This enum is used by implementations to determine compatibility between
  * streams and operations (i.e., if the output shape of a stream is compatible
- * with the input shape of the next operation).  It is also used to reduce the
- * code bloat associated with having multiple specialized stream types for
- * primitives by allowing some code to be largely shape-independent.
+ * with the input shape of the next operation).
  *
  * <p>Some APIs require you to specify both a generic type and a stream shape
- * for input or output elements, such as {@link IntermediateOp} which has both
- * generic type parameters for its input and output types, and getters for the
- * input and output shape.  When representing primitive streams in this way, the
+ * for input or output elements, such as {@link TerminalOp} which has both
+ * generic type parameters for its input types, and a getter for the
+ * input shape.  When representing primitive streams in this way, the
  * generic type parameter should correspond to the wrapper type for that
- * primitive type.  Accordingly, the {@code IntermediateOp} implementing
- * {@link Stream#mapToInt(ToIntFunction)} would have an output type parameter of
- * {@code Integer} and an output shape of @{code INT_VALUE}.
+ * primitive type.
  * @since 1.8
  */
 enum StreamShape {
--- a/src/share/classes/java/util/stream/StreamSpliterators.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/stream/StreamSpliterators.java	Mon Apr 08 16:19:46 2013 -0700
@@ -43,11 +43,19 @@
     /**
      * Abstract wrapping spliterator that binds to the spliterator of a
      * pipeline helper on first operation.
-     * @@@ If Spliterator.SUBSIZED was propagated as a stream flag through
-     * the pipeline this class could be converted to a late-binding spliterator
+     *
+     * <p>This spliterator is not late-binding and will bind to the source
+     * spliterator when first operated on.
+     *
+     * <p>A wrapping spliterator produced from a sequential stream
+     * cannot be split if there are stateful operations present.
      */
     private static abstract class AbstractWrappingSpliterator<P_IN, P_OUT, T_BUFFER extends AbstractSpinedBuffer<P_OUT>>
             implements Spliterator<P_OUT> {
+
+        // @@@ Detect if stateful operations are present or not
+        //     If not then can split otherwise cannot
+
         // True if this spliterator supports splitting
         final boolean isParallel;
 
@@ -177,15 +185,12 @@
             // Get the characteristics from the pipeline
             int c = StreamOpFlag.toCharacteristics(StreamOpFlag.toStreamFlags(ph.getStreamAndOpFlags()));
 
-            // @@@ determining if the source spliterator is SUBSIZED results in
-            // in the wrapping spliterator not being late-binding
-            // to fix this requires that SUBSIZED is mapped to a stream flag
-            // and propagated through the pipeline
-
-            // Mask off the size and uniform characteristics and replace with those of the spliterator
-            // Note that a non-uniform spliterator can change from something with an exact size to an
-            // estimate for a sub-split, for example with HashSet where the size is known at the top
-            // level spliterator but for sub-splits only an estimate is known
+            // Mask off the size and uniform characteristics and replace with
+            // those of the spliterator
+            // Note that a non-uniform spliterator can change from something
+            // with an exact size to an estimate for a sub-split, for example
+            // with HashSet where the size is known at the top level spliterator
+            // but for sub-splits only an estimate is known
             if ((c & Spliterator.SIZED) != 0) {
                 c &= ~(Spliterator.SIZED | Spliterator.SUBSIZED);
                 c |= (spliterator.characteristics() & Spliterator.SIZED & Spliterator.SUBSIZED);
@@ -195,6 +200,14 @@
         }
 
         @Override
+        public Comparator<? super P_OUT> getComparator() {
+            if (hasCharacteristics(SORTED)) {
+                return null;
+            }
+            throw new IllegalStateException();
+        }
+
+        @Override
         public final String toString() {
             return getClass().getName() + "[" + spliterator + "]";
         }
--- a/src/share/classes/java/util/stream/Streams.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/stream/Streams.java	Mon Apr 08 16:19:46 2013 -0700
@@ -112,7 +112,8 @@
                                       int characteristics) {
         Objects.requireNonNull(supplier);
         return new ReferencePipeline.Head<>(supplier,
-                                            StreamOpFlag.fromCharacteristics(characteristics) & ~StreamOpFlag.IS_PARALLEL);
+                                            StreamOpFlag.fromCharacteristics(characteristics),
+                                            false);
     }
 
     /**
@@ -145,7 +146,8 @@
                                               int characteristics) {
         Objects.requireNonNull(supplier);
         return new ReferencePipeline.Head<>(supplier,
-                                            StreamOpFlag.fromCharacteristics(characteristics) | StreamOpFlag.IS_PARALLEL);
+                                            StreamOpFlag.fromCharacteristics(characteristics),
+                                            true);
     }
 
     /**
@@ -173,7 +175,8 @@
     public static<T> Stream<T> stream(Spliterator<T> spliterator) {
         Objects.requireNonNull(spliterator);
         return new ReferencePipeline.Head<>(spliterator,
-                                            StreamOpFlag.fromCharacteristics(spliterator) & ~StreamOpFlag.IS_PARALLEL);
+                                            StreamOpFlag.fromCharacteristics(spliterator),
+                                            false);
     }
 
     /**
@@ -201,7 +204,8 @@
     public static<T> Stream<T> parallelStream(Spliterator<T> spliterator) {
         Objects.requireNonNull(spliterator);
         return new ReferencePipeline.Head<>(spliterator,
-                                            StreamOpFlag.fromCharacteristics(spliterator) | StreamOpFlag.IS_PARALLEL);
+                                            StreamOpFlag.fromCharacteristics(spliterator),
+                                            true);
     }
 
     // IntStream construction
@@ -243,7 +247,8 @@
     public static IntStream intStream(Supplier<? extends Spliterator.OfInt> supplier,
                                       int characteristics) {
         return new IntPipeline.Head<>(supplier,
-                                      StreamOpFlag.fromCharacteristics(characteristics) & ~StreamOpFlag.IS_PARALLEL);
+                                      StreamOpFlag.fromCharacteristics(characteristics),
+                                      false);
     }
 
     /**
@@ -274,7 +279,8 @@
     public static IntStream intParallelStream(Supplier<? extends Spliterator.OfInt> supplier,
                                               int characteristics) {
         return new IntPipeline.Head<>(supplier,
-                                      StreamOpFlag.fromCharacteristics(characteristics) | StreamOpFlag.IS_PARALLEL);
+                                      StreamOpFlag.fromCharacteristics(characteristics),
+                                      true);
     }
 
     /**
@@ -300,7 +306,9 @@
      * @return A new sequential {@code IntStream}
      */
     public static IntStream intStream(Spliterator.OfInt spliterator) {
-        return new IntPipeline.Head<>(spliterator, spliterator.characteristics() & ~StreamOpFlag.IS_PARALLEL);
+        return new IntPipeline.Head<>(spliterator,
+                                      StreamOpFlag.fromCharacteristics(spliterator),
+                                      false);
     }
 
     /**
@@ -327,7 +335,8 @@
      */
     public static IntStream intParallelStream(Spliterator.OfInt spliterator) {
         return new IntPipeline.Head<>(spliterator,
-                                      StreamOpFlag.fromCharacteristics(spliterator.characteristics()) | StreamOpFlag.IS_PARALLEL);
+                                      StreamOpFlag.fromCharacteristics(spliterator),
+                                      true);
     }
 
     // LongStream construction
@@ -369,7 +378,8 @@
     public static LongStream longStream(Supplier<? extends Spliterator.OfLong> supplier,
                                         int characteristics) {
         return new LongPipeline.Head<>(supplier,
-                                       StreamOpFlag.fromCharacteristics(characteristics) & ~StreamOpFlag.IS_PARALLEL);
+                                       StreamOpFlag.fromCharacteristics(characteristics),
+                                       false);
     }
 
     /**
@@ -400,7 +410,8 @@
     public static LongStream longParallelStream(Supplier<? extends Spliterator.OfLong> supplier,
                                                 int characteristics) {
         return new LongPipeline.Head<>(supplier,
-                                       StreamOpFlag.fromCharacteristics(characteristics) | StreamOpFlag.IS_PARALLEL);
+                                       StreamOpFlag.fromCharacteristics(characteristics),
+                                       true);
     }
 
     /**
@@ -427,7 +438,8 @@
      */
     public static LongStream longStream(Spliterator.OfLong spliterator) {
         return new LongPipeline.Head<>(spliterator,
-                                       StreamOpFlag.fromCharacteristics(spliterator.characteristics()) & ~StreamOpFlag.IS_PARALLEL);
+                                       StreamOpFlag.fromCharacteristics(spliterator),
+                                       false);
     }
 
     /**
@@ -454,7 +466,8 @@
      */
     public static LongStream longParallelStream(Spliterator.OfLong spliterator) {
         return new LongPipeline.Head<>(spliterator,
-                                       StreamOpFlag.fromCharacteristics(spliterator.characteristics()) | StreamOpFlag.IS_PARALLEL);
+                                       StreamOpFlag.fromCharacteristics(spliterator),
+                                       true);
     }
 
     // DoubleStream construction
@@ -496,7 +509,8 @@
     public static DoubleStream doubleStream(Supplier<? extends Spliterator.OfDouble> supplier,
                                             int characteristics) {
         return new DoublePipeline.Head<>(supplier,
-                                         StreamOpFlag.fromCharacteristics(characteristics) & ~StreamOpFlag.IS_PARALLEL);
+                                         StreamOpFlag.fromCharacteristics(characteristics),
+                                         false);
     }
 
     /**
@@ -527,7 +541,8 @@
     public static DoubleStream doubleParallelStream(Supplier<? extends Spliterator.OfDouble> supplier,
                                                     int characteristics) {
         return new DoublePipeline.Head<>(supplier,
-                                         StreamOpFlag.fromCharacteristics(characteristics) | StreamOpFlag.IS_PARALLEL);
+                                         StreamOpFlag.fromCharacteristics(characteristics),
+                                         true);
     }
 
     /**
@@ -554,7 +569,8 @@
      */
     public static DoubleStream doubleStream(Spliterator.OfDouble spliterator) {
         return new DoublePipeline.Head<>(spliterator,
-                                         StreamOpFlag.fromCharacteristics(spliterator.characteristics()) & ~StreamOpFlag.IS_PARALLEL);
+                                         StreamOpFlag.fromCharacteristics(spliterator),
+                                         false);
     }
 
     /**
@@ -581,7 +597,8 @@
      */
     public static DoubleStream doubleParallelStream(Spliterator.OfDouble spliterator) {
         return new DoublePipeline.Head<>(spliterator,
-                                         StreamOpFlag.fromCharacteristics(spliterator.characteristics()) | StreamOpFlag.IS_PARALLEL);
+                                         StreamOpFlag.fromCharacteristics(spliterator),
+                                         true);
     }
 
     // Infinite Stream generators
@@ -849,9 +866,9 @@
      * <p>
      * @implSpec
      * The implementation behaves as if:
-     * <pre>
-     *     longRange(start, end, start &lt;= end ? 1 : -1);
-     * </pre>
+     * <pre>{@code
+     *     longRange(start, end, start <= end ? 1 : -1);
+     * }</pre>
      *
      * @param start the (inclusive) initial value
      * @param end the exclusive upper bound
@@ -874,9 +891,9 @@
      * <p>
      * An equivalent sequence of increasing values can be produced,
      * sequentially, using a {@code for} loop as follows:
-     * <pre>
-     *     for (long i = start; i &lt; end ; i += step) { ... }
-     * </pre>
+     * <pre>{@code
+     *     for (long i = start; i < end ; i += step) { ... }
+     * }</pre>
      *
      * @param start the (inclusive) initial value
      * @param end the exclusive upper bound
@@ -977,9 +994,9 @@
      * <p>
      * @implSpec
      * The implementation behaves as if:
-     * <pre>
-     *     doubleRange(start, end, start &lt;= end ? 1.0 : -1.0);
-     * </pre>
+     * <pre>{@code
+     *     doubleRange(start, end, start <= end ? 1.0 : -1.0);
+     * }</pre>
      *
      * @param start the (inclusive) initial value
      * @param end the exclusive upper bound
@@ -1004,19 +1021,19 @@
      * <p>
      * An equivalent sequence of increasing values can be produced,
      * sequentially, using a {@code for} loop as follows:
-     * <pre>
+     * <pre>{@code
      *     long size = (long) Math.ceil((start - end) / step);
      *     long i = 0
      *     for (double v = start; i < size; i++, v = start + step * i) {
      *         ...
      *     }
-     * </pre>
+     * }</pre>
      * A stream of equivalent values can be produced as follows:
-     * <pre>
+     * <pre>{@code
      *     long size = (long) Math.ceil((start - end) / step);
      *     DoubleStream ds = Streams.longStream(0, size).doubles()
      *         .map(i -> start + step * i);
-     * </pre>
+     * }</pre>
      *
      * @param start the (inclusive) initial value
      * @param end the exclusive upper bound
@@ -1298,7 +1315,9 @@
 
         @Override
         public Comparator<? super Integer> getComparator() {
-            return null;
+            if (step > 0)
+                return null;
+            throw new IllegalStateException();
         }
 
         @Override
@@ -1373,7 +1392,9 @@
 
         @Override
         public Comparator<? super Long> getComparator() {
-            return null;
+            if (step > 0)
+                return null;
+            throw new IllegalStateException();
         }
 
         @Override
@@ -1460,7 +1481,9 @@
 
         @Override
         public Comparator<? super Double> getComparator() {
-            return null;
+            if (step > 0)
+                return null;
+            throw new IllegalStateException();
         }
 
         @Override
--- a/src/share/classes/java/util/stream/TerminalOp.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/stream/TerminalOp.java	Mon Apr 08 16:19:46 2013 -0700
@@ -52,21 +52,21 @@
     default StreamShape inputShape() { return StreamShape.REFERENCE; }
 
     /**
-     * Gets the properties of the operation.  Terminal operations may set a
+     * Gets the stream flags of the operation.  Terminal operations may set a
      * limited subset of the stream flags defined in {@link StreamOpFlag}, and
      * these flags are combined with the previously combined stream and
      * intermediate operation flags for the pipeline.
      *
      * @implSpec The default implementation returns zero
-     * @return the properties of the operation
-     * @see {@link StreamOpFlag}
+     * @return the stream flags for this operation
+     * @see StreamOpFlag
      */
     default int getOpFlags() { return 0; }
 
     /**
      * Performs a parallel evaluation of the operation using the specified
-     * {@code PipelineHelper}, which describes the stream source and upstream
-     * intermediate operations.
+     * {@code PipelineHelper}, which describes the upstream intermediate
+     * operations.
      *
      * @implSpec The default performs a sequential evaluation of the operation
      * using the specified {@code PipelineHelper}
@@ -84,8 +84,8 @@
 
     /**
      * Performs a sequential evaluation of the operation using the specified
-     * {@code PipelineHelper}, which describes the stream source and upstream
-     * intermediate operations.
+     * {@code PipelineHelper}, which describes the upstream intermediate
+     * operations.
      *
      * @param helper the pipeline helper
      * @param spliterator the source spliterator
--- a/src/share/classes/java/util/stream/TerminalSink.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/stream/TerminalSink.java	Mon Apr 08 16:19:46 2013 -0700
@@ -27,8 +27,8 @@
 import java.util.function.Supplier;
 
 /**
- * A Sink which accumulates state as elements are accepted, and allows a result
- * to be retrieved after the computation is finished.
+ * A {@link Sink} which accumulates state as elements are accepted, and allows
+ * a result to be retrieved after the computation is finished.
  *
  * @param <T> The type of elements to be accepted
  * @param <R> The type of the result
--- a/src/share/classes/java/util/stream/Tripwire.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/stream/Tripwire.java	Mon Apr 08 16:19:46 2013 -0700
@@ -24,8 +24,10 @@
  */
 package java.util.stream;
 
-import java.util.logging.Level;
-import java.util.logging.Logger;
+import sun.util.logging.PlatformLogger;
+
+import java.security.AccessController;
+import java.security.PrivilegedAction;
 
 /**
  * Utility class for detecting inadvertent uses of boxing in
@@ -47,20 +49,21 @@
     private static final String TRIPWIRE_PROPERTY = "org.openjdk.java.util.stream.tripwire";
 
     /** Should debugging checks be enabled? */
-    static final boolean ENABLED = true;
-//            = Boolean.getBoolean(TRIPWIRE_PROPERTY);
+    static final boolean ENABLED = AccessController.doPrivileged(
+            (PrivilegedAction<Boolean>) () -> Boolean.getBoolean(TRIPWIRE_PROPERTY));
 
     private Tripwire() { }
 
     /**
-     * Produces a log warning, using {@code Logger.getLogger(className)}, using
-     * the supplied message.  The class name of {@code trippingClass} will be
-     * used as the first parameter to the message.
+     * Produces a log warning, using {@code PlatformLogger.getLogger(className)},
+     * using the supplied message.  The class name of {@code trippingClass} will
+     * be used as the first parameter to the message.
      *
      * @param trippingClass Name of the class generating the message
-     * @param msg A message format string of the type expected by {@link Logger}
+     * @param msg A message format string of the type expected by
+     * {@link PlatformLogger}
      */
     static void trip(Class<?> trippingClass, String msg) {
-        Logger.getLogger(trippingClass.getName()).log(Level.WARNING, msg, trippingClass.getName());
+        PlatformLogger.getLogger(trippingClass.getName()).warning(msg, trippingClass.getName());
     }
 }
--- a/src/share/classes/java/util/stream/package-info.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/src/share/classes/java/util/stream/package-info.java	Mon Apr 08 16:19:46 2013 -0700
@@ -26,11 +26,11 @@
 /**
  * Classes to support functional-style operations on streams of values, as in the following:
  *
- * <pre>
+ * <pre>{@code
  *     int sumOfWeights = blocks.stream().filter(b -> b.getColor() == RED)
  *                                       .mapToInt(b -> b.getWeight())
  *                                       .sum();
- * </pre>
+ * }</pre>
  *
  * <p>Here we use {@code blocks}, which might be a {@code Collection}, as a source for a stream,
  * and then perform a filter-map-reduce ({@code sum()} is an example of a {@link #Reduction reduction}
@@ -79,14 +79,14 @@
  * return to the data source, or select a new data source, to get a new stream. For example,
  * obtaining the sum of weights of all red blocks, and then of all blue blocks, requires a
  * filter-map-reduce on two different streams:
- * <pre>
+ * <pre>{@code
  *     int sumOfRedWeights  = blocks.stream().filter(b -> b.getColor() == RED)
  *                                           .mapToInt(b -> b.getWeight())
  *                                           .sum();
  *     int sumOfBlueWeights = blocks.stream().filter(b -> b.getColor() == BLUE)
  *                                           .mapToInt(b -> b.getWeight())
  *                                           .sum();
- * </pre>
+ * }</pre>
  *
  * <p>However, there are other techniques that allow you to obtain both results in a single
  * pass if multiple traversal is impractical or inefficient.  TODO provide link
@@ -143,11 +143,11 @@
  * result. The set of operations on serial and parallel streams is identical. To execute the
  * "sum of weights of blocks" query in parallel, we would do:
  *
- * <pre>
+ * <pre>{@code
  *     int sumOfWeights = blocks.parallelStream().filter(b -> b.getColor() == RED)
  *                                               .mapToInt(b -> b.getWeight())
  *                                               .sum();
- * </pre>
+ * }</pre>
  *
  * <p>The only difference between the serial and parallel versions of this example code is
  * the creation of the initial {@code Stream}.  Whether a {@code Stream} will execute in serial
@@ -163,26 +163,42 @@
  *
  * <h3><a name="Ordering">Ordering</a></h3>
  *
- * <p>Streams may or may not have an <em>encounter order</em>.  Whether or not there is an
- * encounter order depends on the source, the intermediate operations, and the terminal
- * operation.  Certain stream sources (such as {@code List} or arrays) are intrinsically ordered,
- * whereas others (such as {@code HashSet}) are not.  Some intermediate operations may impose
- * an encounter order on an otherwise unordered stream, such as
- * {@link java.util.stream.Stream#sorted()}.  Some intermediate operations may remove the
- * constraint of ordering, rendering unordered a previously ordered stream.  Finally, some
- * terminal operations may ignore encounter order, such as {@link java.util.stream.Stream#forEach},
- * and others may have optimized implementations for the case where there is no defined
- * encounter order.
+ * <p>Streams may or may not have an <em>encounter order</em>.  Whether or not
+ * there is an encounter order depends on the source, the intermediate
+ * operations, and the terminal operation.  Certain stream sources (such as
+ * {@code List} or arrays) are intrinsically ordered, whereas others (such as
+ * {@code HashSet}) are not.  Some intermediate operations may impose an
+ * encounter order on an otherwise unordered stream, such as
+ * {@link java.util.stream.Stream#sorted()}, and others may render an ordered
+ * stream unordered (such as {@link {@link java.util.stream.Stream#unordered()}}).
+ * Some terminal operations may ignore encounter order, such as
+ * {@link java.util.stream.Stream#forEach}.
  *
- * <p>If a Stream is ordered, most operations are constrained to operate on the elements in their
- * encounter order; if the source of a stream is a {@code List} containing {@code [1, 2, 3]},
- * then the result of executing {@code map(x -> x*2)} must be {@code [2, 4, 6]}.  However, if
- * the source has no defined encounter order, than any permutation of the values {@code [2, 4, 6]}
- * would be a valid result. Many operations can still be efficiently parallelized even under
- * ordering constraints, but some (such as duplicate removal) may be more efficient without
+ * <p>If a Stream is ordered, most operations are constrained to operate on the
+ * elements in their encounter order; if the source of a stream is a {@code List}
+ * containing {@code [1, 2, 3]}, then the result of executing {@code map(x -> x*2)}
+ * must be {@code [2, 4, 6]}.  However, if the source has no defined encounter
+ * order, than any permutation of the values {@code [2, 4, 6]} would be a valid
+ * result. Many operations can still be efficiently parallelized even under
  * ordering constraints.
  *
- * TODO Interaction between ordering and concurrency
+ * <p>For sequential streams, ordering is only relevant to the determinism
+ * of operations performed repeatedly on the same source.  (An {@code ArrayList}
+ * is constrained to iterate elements in order; a {@code HashSet} is not, and
+ * repeated iteration might produce a different order.)
+ *
+ * <p>For parallel streams, relaxing the ordering constraint can enable
+ * optimized implementation for some operations.  For example, duplicate
+ * filtration on an ordered stream must completely process the first partition
+ * before it can return any elements from a subsequent partition, even if those
+ * elements are available earlier.  On the other hand, without the constraint of
+ * ordering, duplicate filtration can be done more efficiently by using
+ * a shared {@code ConcurrentHashSet}.  There will be cases where the stream
+ * is structurally ordered (the source is ordered and the intermediate
+ * operations are order-preserving), but the user does not particularly care
+ * about the encounter order.  In some cases, explicitly de-ordering the stream
+ * with the {@link java.util.stream.Stream#unordered()} method may result in
+ * improved parallel performance for some stateful or terminal operations.
  *
  * <h2><a name="Non-Interference">Non-interference</h2>
  *
@@ -208,10 +224,10 @@
  * stream operations are <em>stateful</em>.  A stateful lambda (or other object implementing the
  * appropriate functional interface) is one whose result depends on any state which might change
  * during the execution of the stream pipeline.  An example of a stateful lambda is:
- * <pre>
+ * <pre>{@code
  *     Set<Integer> seen = Collections.synchronizedSet(new HashSet<>());
  *     stream.parallel().map(e -> { if (seen.add(e)) return 0; else return e; })...
- * </pre>
+ * }</pre>
  * Here, if the mapping operation us performed in parallel, the results for the same input
  * could vary from run to run, due to thread scheduling differences, whereas, with a stateless
  * lambda expression the results would always be the same.
@@ -228,24 +244,24 @@
  * from each block before summing up the weights.)
  *
  * <p>Of course, such operations can be readily implemented as simple sequential loops, as in:
- * <pre>
+ * <pre>{@code
  *    int sum = 0;
  *    for (int x : numbers) {
  *       sum += x;
  *    }
- * </pre>
+ * }</pre>
  * However, there may be a significant advantage to preferring a {@link Stream#reduce reduce operation}
  * over a mutative accumulation such as the above -- a properly constructed reduce operation is
  * inherently parallelizable, so long as the {@link BinaryOperator} has the right characteristics,
  * specifically that it is <a href="#Associativity">associative</a>.  For example, given a
  * stream of numbers for which we want to find the sum, we can write:
- * <pre>
+ * <pre>{@code
  *    int sum = numbers.reduce(0, (x,y) -> x+y);
- * </pre>
+ * }</pre>
  * or more succinctly:
- * <pre>
+ * <pre>{@code
  *    int sum = numbers.reduce(0, Integer::sum);
- * </pre>
+ * }</pre>
  *
  * <p>(The primitive specializations of {@link java.util.stream.Stream}, such as
  * {@link java.util.stream.IntStream}, even have convenience methods for common reductions,
@@ -264,19 +280,19 @@
  * <p>The "blocks" examples shown earlier shows how reduction combines with other operations
  * to replace for loops with bulk operations.  If {@code blocks} is a collection of {@code Block}
  * objects, which have a {@code getWeight} method, we can find the heaviest block with:
- * <pre>
+ * <pre>{@code
  *     OptionalInt heaviest = blocks.stream()
  *                                  .mapToInt(Block::getWeight)
  *                                  .reduce(Integer::max);
- * </pre>
+ * }</pre>
  *
  * <p>In its more general form, a {@code reduce} operation on elements of type {@code T}
  * yielding a result of type {@code U} requires three parameters:
- * <pre>
- * &lt;U> U reduce(U identity,
- *                 BiFunction&lt;U, ? super T, U> accumlator,
- *                 BinaryOperator&lt;U> combiner);
- * </pre>
+ * <pre>{@code
+ * <U> U reduce(U identity,
+ *              BiFunction<U, ? super T, U> accumlator,
+ *              BinaryOperator<U> combiner);
+ * }</pre>
  * Here, the <em>identity</em> element is both an initial seed for the reduction, and a default
  * result if there are no elements. The <em>accumulator</em> function takes a partial result and
  * the next element, and produce a new partial result. The <em>combiner</em> function combines
@@ -288,11 +304,11 @@
  * example using the more general form, {@code 0} would be the identity element, while
  * {@code Integer::sum} would be both the accumulator and combiner. For the sum-of-weights
  * example, this could be re-cast as:
- * <pre>
+ * <pre>{@code
  *     int sumOfWeights = blocks.stream().reduce(0,
  *                                               (sum, b) -> sum + b.getWeight())
  *                                               Integer::sum);
- * </pre>
+ * }</pre>
  * though the map-reduce form is more readable and generally preferable.  The generalized form
  * is provided for cases where significant work can be optimized away by combining mapping and
  * reducing into a single function.
@@ -302,9 +318,9 @@
  * to {@code u}. Additionally, the {@code combiner} function must be
  * <a href="#Associativity">associative</a> and must be compatible with the {@code accumulator}
  * function; for all {@code u} and {@code t}, the following must hold:
- * <pre>
+ * <pre>{@code
  *     combiner.apply(u, accumulator.apply(identity, t)) == accumulator.apply(u, t)
- * </pre>
+ * }</pre>
  *
  * <h3><a name="MutableReduction">Mutable Reduction</a></h3>
  *
@@ -315,9 +331,9 @@
  *
  * <p>For example, if we wanted to take a stream of strings and concatenate them into a single
  * long string, we <em>could</em> achieve this with ordinary reduction:
- * <pre>
+ * <pre>{@code
  *     String concatenated = strings.reduce("", String::concat)
- * </pre>
+ * }</pre>
  *
  * We would get the desired result, and it would even work in parallel.  However, we might not
  * be happy about the performance!  Such an implementation would do a great deal of string
@@ -333,33 +349,33 @@
  * container by incorporating a new element, and a combining function that can take two
  * result containers and merge their contents.  The form of this is very similar to the general
  * form of ordinary reduction:
- * <pre>
- * &lt;R> R collect(Supplier&lt;R> resultFactory,
- *                  BiConsumer&lt;R, ? super T> accumulator,
- *                  BiConsumer&lt;R, R> combiner);
- * </pre>
+ * <pre>{@code
+ * <R> R collect(Supplier<R> resultFactory,
+ *               BiConsumer<R, ? super T> accumulator,
+ *               BiConsumer<R, R> combiner);
+ * }</pre>
  * As with {@code reduce()}, the benefit of expressing {@collect} in this abstract way is
  * that it is directly amenable to parallelization: we can accumulate partial results in parallel
  * and then combine them.  For example, to collect the string representations of the elements
  * in a stream into an {@code ArrayList}, we could write the obvious sequential for-each form:
- * <pre>
- *     ArrayList&lt;String> strings = new ArrayList&lt;>();
+ * <pre>{@code
+ *     ArrayList<String> strings = new ArrayList<>();
  *     for (T element : stream) {
  *         strings.add(element.toString());
  *     }
- * </pre>
+ * }</pre>
  * Or we could use a parallelizable collect form:
- * <pre>
- *     ArrayList&lt;String> strings = stream.collect(() -> new ArrayList&lt;>(),
- *                                                   (c, e) -> c.add(e.toString()),
- *                                                   (c1, c2) -> c1.addAll(c2));
- * </pre>
+ * <pre>{@code
+ *     ArrayList<String> strings = stream.collect(() -> new ArrayList<>(),
+ *                                                (c, e) -> c.add(e.toString()),
+ *                                                (c1, c2) -> c1.addAll(c2));
+ * }</pre>
  * or, noting that we have buried a mapping operation inside the accumlator function, more
  * succinctly as:
- * <pre>
- *     ArrayList&lt;String> strings = stream.map(Object::toString)
- *                                          .collect(ArrayList::new, ArrayList::add, ArrayList::addAll);
- * </pre>
+ * <pre>{@code
+ *     ArrayList<String> strings = stream.map(Object::toString)
+ *                                       .collect(ArrayList::new, ArrayList::add, ArrayList::addAll);
+ * }</pre>
  * Here, our supplier is just the {@link ArrayList#ArrayList() ArrayList constructor}, the
  * accumulator adds the stringified element to an {@code ArrayList}, and the combiner simply
  * uses {@link ArrayList#addAll addAll} to copy the strings from one container into the other.
@@ -367,34 +383,34 @@
  * <p>As with the regular reduction operation, the ability to parallelize only comes if an 
  * {@link #Associativity associativity} condition is met. The {@code combiner} is associative
  * if for result containers {@code r1}, {@code r2}, and {@code r3}:
- * <pre>
+ * <pre>{@code
  *    combiner.accept(r1, r2);
  *    combiner.accept(r1, r3);
- * </pre>
+ * }</pre>
  * is equivalent to
- * <pre>
+ * <pre>{@code
  *    combiner.accept(r2, r3);
  *    combiner.accept(r1, r2);
- * </pre>
+ * }</pre>
  * where equivalence means that {@code r1} is left in the same state (according to the meaning
  * of {@link Object#equals equals} for the element types). Similarly, the {@code resultFactory}
  * must act as an <em>identity</em> with respect to the {@code combiner} so that for any result
  * container {@code r}:
- * <pre>
+ * <pre>{@code
  *     combiner.accept(r, resultFactory.get());
- * </pre>
+ * }</pre>
  * does not modify the state of {@code r} (again according to the meaning of
  * {@link Object#equals equals}). Finally, the {@code accumulator} and {@code combiner} must be
  * compatible such that for a result container {@code r} and element {@code t}:
- * <pre>
+ * <pre>{@code
  *    r2 = resultFactory.get();
  *    accumulator.accept(r2, t);
  *    combiner.accept(r, r2);
- * </pre>
+ * }</pre>
  * is equivalent to:
- * <pre>
+ * <pre>{@code
  *    accumulator.accept(r,t);
- * </pre>
+ * }</pre>
  * where equivalence means that {@code r} is left in the same state (again according to the
  * meaning of {@link Object#equals equals}).
  *
@@ -404,27 +420,84 @@
  * method that simply takes a {@code Collector} and returns the resulting container.
  * The above example for collecting strings into a {@code List} can be rewritten using a
  * standard {@code Collector} as:
- * <pre>
- *     ArrayList&lt;String> strings = stream.map(Object::toString)
- *                                          .collect(Collectors.toList());
- * </pre>
+ * <pre>{@code
+ *     ArrayList<String> strings = stream.map(Object::toString)
+ *                                       .collect(Collectors.toList());
+ * }</pre>
+ *
+ * <h3><a name="ConcurrentReduction">Reduction, Concurrency, and Ordering</a></h3>
+ *
+ * With some complex reduction operations, for example a collect that produces a
+ * {@code Map}, such as:
+ * <pre>{@code
+ *     Map<Buyer, List<Transaction>> salesByBuyer
+ *         = txns.parallelStream()
+ *               .collect(Collectors.groupingBy(Transaction::getBuyer));
+ * }</pre>
+ * (where {@link java.util.stream.Collectors#groupingBy} is a utility function
+ * that returns a {@link Collector} for grouping sets of elements based on some key)
+ * it may actually be counterproductive to perform the operation in parallel.
+ * This is because the combining step (merging one {@code Map} into another by key)
+ * can be expensive for some {@code Map} implementations.
+ *
+ * <p>Suppose, however, that the result container used in this reduction
+ * was a concurrently modifiable collection -- such as a
+ * {@link java.util.concurrent.ConcurrentHashMap ConcurrentHashMap}. In that case,
+ * the parallel invocations of the accumulator could actually deposit their results
+ * concurrently into the same shared result container, elminating the need for the combiner to
+ * merge distinct result containers. This potentially provides a boost
+ * to the parallel execution performance. We call this a <em>concurrent</em> reduction.
+ *
+ * <p>A {@link Collector} that supports concurrent reduction is marked with the
+ * {@link java.util.stream.Collector.Characteristics.CONCURRENT} characteristic. 
+ * Having a concurrent collector is a necessary condition for performing a 
+ * concurrent reduction, but that alone is not sufficient. If you imagine multiple
+ * accumulators depositing results into a shared container, the order in which 
+ * results are deposited is non-deterministic. Consequently, a concurrent reduction
+ * is only possible if ordering is not important for the stream being processed.
+ * The {@link java.util.stream.Stream#collect(Collector)}
+ * implementation will only perform a concurrent reduction if 
+ * <ul>
+ * <li>The stream is parallel</li>;
+ * <li>The collector has the
+ * {@link java.util.stream.Collector.Characteristics.CONCURRENT} characteristic,
+ * and;</li>
+ * <li>Either the stream is unordered, or the collector has the
+ * {@link java.util.stream.Collector.Characteristics.UNORDERED} characteristic.
+ * </ul>
+ * For example:
+ * <pre>{@code
+ *     Map<Buyer, List<Transaction>> salesByBuyer
+ *         = txns.parallelStream()
+ *               .unordered()
+ *               .collect(groupingByConcurrent(Transaction::getBuyer));
+ * }</pre>
+ * (where {@link java.util.stream.Collectors#groupingByConcurrent} is the concurrent companion
+ * to {@code groupingBy}).
+ *
+ * <p>Note that if it is important that the elements for a given key appear in the
+ * order they appear in the source, then we cannot use a concurrent reduction,
+ * as ordering is one of the casualties of concurrent insertion.  We would then
+ * be constrained to implement either a sequential reduction or a merge-based
+ * parallel reduction.
  *
  * <a name="Associativity"><h2>Associativity</h2></a>
  *
  * An operator or function {@code op} is <em>associative</em> if the following holds:
- * <pre>
+ * <pre>{@code
  *     (a op b) op c == a op (b op c)
- * </pre>
+ * }</pre>
  * The importance of this to parallel evaluation can be seen if we expand this to four terms:
- * <pre>
+ * <pre>{@code
  *     a op b op c op d == (a op b) op (c op d)
- * </pre>
+ * }</pre>
  * So we can evaluate {@code (a op b)} in parallel with {@code (c op d)} and then invoke {@code op} on
  * the results.
  * TODO what does associative mean for mutative combining functions?
+ * FIXME: we described mutative associativity above.
  *
+ * <h2><a name="StreamSources">Stream sources</a></h2>
  * TODO where does this section go?
- * <h2><a name="StreamSources">Stream sources</a></h2>
  *
  * XXX - change to section to stream construction gradually introducing more
  *       complex ways to construct
@@ -463,22 +536,22 @@
  * might not be reflected and the throwing of a {@code ConcurrentModificationException} may occur.
  *
  * <p>For example, consider the following code:
- * <pre>
- *     List&lt;String> l = new ArrayList(Arrays.asList("one", "two"));
- *     Stream&lt;String> sl = l.stream();
+ * <pre>{@code
+ *     List<String> l = new ArrayList(Arrays.asList("one", "two"));
+ *     Stream<String> sl = l.stream();
  *     l.add("three");
  *     String s = sl.collect(toStringJoiner(" ")).toString();
- * </pre>
+ * }</pre>
  * First a list is created consisting of two strings: "one"; and "two". Then a stream is created from that list.
  * Next the list is modified by adding a third string: "three".  Finally the elements of the stream are collected
  * and joined together.  Since the list was modified before the terminal {@code collect} operation commenced
  * the result will be a string of "one two three". However, if the list is modified after the terminal operation
  * commences, as in:
- * <pre>
- *     List&lt;String> l = new ArrayList(Arrays.asList("one", "two"));
- *     Stream&lt;String> sl = l.stream();
+ * <pre>{@code
+ *     List<String> l = new ArrayList(Arrays.asList("one", "two"));
+ *     Stream<String> sl = l.stream();
  *     String s = sl.peek(s -> l.add("BAD LAMBDA")).collect(toStringJoiner(" ")).toString();
- * </pre>
+ * }</pre>
  * then a {@code ConcurrentModificationException} will be thrown since the {@code peek} operation will attempt
  * to add the string "BAD LAMBDA" to the list after the terminal operation has commenced.
  *
@@ -486,3 +559,6 @@
  */
 
 package java.util.stream;
+
+import java.util.function.BiConsumer;
+import java.util.function.Supplier;
\ No newline at end of file
--- a/test-ng/agent/conf/serialize.list	Mon Apr 08 15:57:12 2013 -0700
+++ b/test-ng/agent/conf/serialize.list	Mon Apr 08 16:19:46 2013 -0700
@@ -31,3 +31,25 @@
 org/openjdk/tests/java/util/stream/TabulatorsTest
 org/openjdk/tests/java/util/stream/TabulatorsTest\$.*Assertion
 org/openjdk/tests/java/util/stream/StreamLinkTest
+org.openjdk.tests.java.util.stream.SpliteratorLateBindingFailFastTest\$SpliteratorDataBuilder.*
+org.openjdk.tests.java.util.stream.SpliteratorTraversingAndSplittingTest\$SpliteratorDataBuilder.*
+java.util.HashMap\$Values
+java.util.LinkedHashMap\$KeyIterator
+java.util.HashMap\$Entry
+java.util.LinkedList\$Node
+java.util.TreeMap\$Entry
+java.util.HashMap\$EntrySet
+java.util.HashMap\$KeySet 
+java.util.IdentityHashMap\$KeySet
+java.util.LinkedHashMap\$ValueIterator
+java.util.TreeMap\$KeySet
+java.util.TreeMap\$EntrySet
+java.util.TreeMap\$Values
+java.util.WeakHashMap
+java.util.WeakHashMap\$EntrySet
+java.util.WeakHashMap\$KeySet
+java.util.WeakHashMap\$Values
+java.util.LinkedHashMap\$EntryIterator
+java.util.IdentityHashMap\$EntrySet
+java.util.IdentityHashMap\$Values
+
--- a/test-ng/agent/make/build.xml	Mon Apr 08 15:57:12 2013 -0700
+++ b/test-ng/agent/make/build.xml	Mon Apr 08 16:19:46 2013 -0700
@@ -3,18 +3,18 @@
 
   <!-- set global properties for this build -->
   <property name="build.sysclasspath" value="ignore"/>
+  <property name="build.dir" value="../../../build/test-ng/agent" />
   <property name="src"      value="${basedir}/src"/>
-  <property name="build"    value="${basedir}/build"/>
-  <property name="dist"     value="${basedir}/dist"/>
   <property name="make"     value="${basedir}/make"/>
-    <property name="conf"     value="${basedir}/conf"/>
-  <property name="classes"  value="${build}/classes"/>
+  <property name="conf"     value="${basedir}/conf"/>
+  <property name="classes"  value="${build.dir}/classes"/>
+  <property name="dist"     value="${build.dir}/dist"/>
 
   <target name="init">
     <!-- Create the time stamp -->
     <tstamp/>
     <!-- Create the build directory structure used by compile -->
-    <mkdir dir="${build}"/>
+    <mkdir dir="${build.dir}"/>
     <mkdir dir="${dist}"/>
     <mkdir dir="${classes}"/>
     <echo message="${java.home}"/>
@@ -25,7 +25,7 @@
     <javac 
 	source="1.7"
 	srcdir="${src}"
-	destdir="${build}/classes"
+	destdir="${build.dir}/classes"
 	verbose="no"
 	debug="on"
     >
--- a/test-ng/agent/src/com/oracle/lambda/Agent.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/test-ng/agent/src/com/oracle/lambda/Agent.java	Mon Apr 08 16:19:46 2013 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -38,7 +38,7 @@
             throws IOException, NoSuchFieldException, IllegalAccessException {
         // If run without a config file, all we do is set up the shutdown hook
         if (agentArgs != null) {
-            final SerializationInjector si = new SerializationInjector(agentArgs);
+            final SerializationInjector si = new SerializationInjector();
             instrumentation.addTransformer(new ClassFileTransformer() {
                 @Override
                 public byte[] transform(final ClassLoader cl, String string,
--- a/test-ng/agent/src/com/oracle/lambda/Main.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/test-ng/agent/src/com/oracle/lambda/Main.java	Mon Apr 08 16:19:46 2013 -0700
@@ -58,9 +58,9 @@
 public class Main {
 
     static void usage() {
-        System.out.println("usage: -o output_dir -f path_to_list path_to/SomeClass.class");
-        System.out.println("usage: -o output_dir -f path_to_list path_to_classes");
-        System.out.println("usage: -o output_dir -s -f path_to_list path_to_jar");
+        System.out.println("usage: -o output_dir path_to/SomeClass.class");
+        System.out.println("usage: -o output_dir path_to_classes");
+        System.out.println("usage: -o output_dir -s path_to_jar");
         System.exit(1);
     }
 
@@ -77,7 +77,6 @@
         }
         File outDir = null;
         File inFile = null;
-        File confFile = null;
         boolean serializeOnly = false;
         for (int i = 0 ; i < args.length ; i++) {
             switch (args[i]) {
@@ -85,10 +84,6 @@
                     i++;
                     outDir = new File(args[i]);
                     break;
-                case "-f":
-                    i++;
-                    confFile = new File(args[i]);
-                    break;
                 case "-s":
                     serializeOnly = true;
                     break;
@@ -117,7 +112,7 @@
             Pattern pattern = Pattern.compile(".*");
             EnumSet<SerializationInjector.Options> options
                     = serializeOnly ? EnumSet.of(SerializationInjector.Options.SERIALIZE_ONLY) : EnumSet.noneOf(SerializationInjector.Options.class);
-            doJar(confFile, inFile, outDir, cl, pattern, options);
+            doJar(inFile, outDir, cl, pattern, options);
         } else {
             outDir.mkdirs();
             URL[] urls = {inFile.getParentFile().toURI().toURL()};
@@ -140,11 +135,11 @@
         return baos.toByteArray();
     }
 
-    static void doJar(File confFile, File inFile, File outDir, ClassLoader cl, Pattern pattern,
+    static void doJar(File inFile, File outDir, ClassLoader cl, Pattern pattern,
                       EnumSet<SerializationInjector.Options> options) throws IOException {
         JarFile jf = new JarFile(inFile);
         byte[] buffer;
-        SerializationInjector si = new SerializationInjector(confFile.getAbsolutePath());
+        SerializationInjector si = new SerializationInjector();
         FileSystem fs = null;
         boolean jarOut = outDir.getName().endsWith(".jar");
         Set<PosixFilePermission> perms = PosixFilePermissions.fromString("rwxrwxrwx");
--- a/test-ng/agent/src/com/oracle/lambda/SerializationInjector.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/test-ng/agent/src/com/oracle/lambda/SerializationInjector.java	Mon Apr 08 16:19:46 2013 -0700
@@ -24,7 +24,6 @@
 
 import java.io.BufferedOutputStream;
 import java.io.File;
-import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.PrintStream;
@@ -37,7 +36,6 @@
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.Properties;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.logging.Level;
 import java.util.logging.Logger;
@@ -53,13 +51,21 @@
 
 import static java.nio.file.StandardOpenOption.*;
 
+
 public class SerializationInjector {
     static final String PREFIX = "com.oracle.lambda";
     static final String INCLUDES = "java.util.*|org.openjdk.tests.java.*"; // also should include java.util...
     static final String SERIALIZE = "";
-    static PrintStream outputStream;
-    static boolean shouldClose = false;
-    static boolean storeStackTraces = true;
+
+    // properties that can be set
+    static final String INCLUDE_FILES  = PREFIX + ".include.files";
+    static final String SERIALIZE_FILE = PREFIX + ".serialize.file";
+    static final String STORE_FRAMES   = PREFIX + ".store.frames";
+    static final String LOGFILE        = PREFIX + ".log.file";
+    static final String DEBUG          = PREFIX + ".debug";
+    static final String NROUNDS        = PREFIX + ".nrounds";
+
+    static boolean storeFrames = true;
     static boolean deserializationWorkaround;
 
     final DebugPrint debug;
@@ -69,84 +75,27 @@
 
     public enum Options { SERIALIZE_ONLY, FORCE_SERIALIZE }
 
-    public SerializationInjector() {
-        includePattern = Pattern.compile(INCLUDES);
-        serializePattern = Pattern.compile(SERIALIZE);
-        outputStream = System.out;
-        debug = new DebugPrint(outputStream, false);
+    public SerializationInjector() throws IOException {
+        includePattern   = propToPattern(INCLUDE_FILES, INCLUDES);
+        serializePattern = fileToPattern(SERIALIZE_FILE, SERIALIZE);
+        debug = new DebugPrint();
         TestLambdaSerialization.initializeShutDownHook();
-        storeStackTraces = false;
+        storeFrames = Boolean.getBoolean(STORE_FRAMES);
         deserializationWorkaround = false;
         serializeOnly = false;
     }
-
-    public SerializationInjector(String confFile) throws IOException {
-        if (confFile != null) {
-            Properties props = new Properties();
-            try (FileInputStream fis = new FileInputStream(confFile)) {
-                props.load(fis);
-            } catch (IOException ex) {
-                System.err.println("Error: " + ex.getMessage());
-                throw new Error(ex);
+    final Pattern propToPattern(String propKey, String defValue) {
+        return Pattern.compile(System.getProperty(propKey, defValue));
+    }
+    final Pattern fileToPattern(String propKey, String defValue) throws IOException {
+        String fname = System.getProperty(propKey, null);
+        if (fname != null) {
+            File pFile = new File(fname);
+            if (pFile.canRead()) {
+                return fileToPattern(pFile);
             }
-            props.list(System.err);
-            String include = props.getProperty(PREFIX + ".include.files", INCLUDES);
-            includePattern = Pattern.compile(include);
-            String injectSerializeName = props.getProperty(PREFIX + ".serialize.file", null);
-            storeStackTraces = props.getProperty(PREFIX + ".storeStackTraces", null) != null;
-            deserializationWorkaround = props.getProperty(PREFIX +
-                    ".deserialization.workaround", "false").equals("true");
-
-            System.err.println("Serialization file: ");
-            if (injectSerializeName != null) {
-                System.err.println(injectSerializeName);
-                serializePattern = fileToPattern(injectSerializeName);
-            } else {
-                // if there is no serialization file listed, try to see if there
-                // is one in the agent/conf directory it is usually the same dir
-                // as the props file, othewise use the default patterns.
-                File serializeFile = new File(new File(confFile).getParentFile(), "serialize.list");
-                if (serializeFile.exists()) {
-                    System.err.println(serializeFile.getAbsolutePath());
-                    serializePattern = fileToPattern(serializeFile);
-                } else {
-                    System.err.println("default pattern");
-                    serializePattern = Pattern.compile(SERIALIZE);
-                }
-            }
-            String output = props.getProperty(PREFIX + ".log.file", "out");
-            switch (output) {
-                case "out":
-                    outputStream = System.out;
-                    break;
-                case "err":
-                    outputStream = System.err;
-                    break;
-                default:
-                    File f = new File(output);
-                    if (f.exists()) {
-                        File dir = f.getParentFile();
-                        f = File.createTempFile("agent", ".log", dir);
-                    }
-                    FileOutputStream fos = new FileOutputStream(f);
-                    outputStream = new PrintStream(new BufferedOutputStream(fos));
-                    shouldClose = true;
-            }
-            debug = new DebugPrint(outputStream,
-            props.getProperty(PREFIX + ".debug") != null);
-        } else {
-            includePattern = Pattern.compile(INCLUDES);
-            serializePattern = Pattern.compile(SERIALIZE);
-            outputStream = System.out;
-            debug = new DebugPrint(outputStream, false);
-            storeStackTraces = false;
-            deserializationWorkaround = false;
         }
-        TestLambdaSerialization.initializeShutDownHook();
-        serializeOnly = false;
-    }
-    final Pattern fileToPattern(String filename) throws IOException {
-        return fileToPattern(new File(filename));
+        return Pattern.compile(defValue);
     }
     final Pattern fileToPattern(File inFile) throws IOException {
         List<String> serList = Files.readAllLines(inFile.toPath(),
@@ -169,6 +118,7 @@
         byte[] xBuffer = classBuffer;
         ClassReader classReader = new ClassReader(xBuffer);
         String cname = classReader.getClassName();
+
         if (options.contains(Options.FORCE_SERIALIZE) || serializePattern.matcher(cname).matches()) {
             debug.println("Implementing Serialization: " + cname);
             xBuffer = injectSerialization(xBuffer, cl);
@@ -294,13 +244,13 @@
                     public void visitInvokeDynamicInsn(String callsite,
                             String sig, Handle handle, Object... bsmArgs) {
                         // instrument  only! lambda objects
-                        final boolean injectSand = handle.getName().equals("metaFactory") ||
+                        final boolean needsTransform = handle.getName().equals("metaFactory");
+                        final boolean injectSand = needsTransform ||
                                 handle.getName().equals("altMetaFactory");
                         if (!injectSand) {
                             super.visitInvokeDynamicInsn(callsite, sig, handle, bsmArgs);
                             return;
                         }
-                        final boolean needsTransform = handle.getName().equals("metaFactory");
                         debug.println("indy:" + mid + ", locals = " + locals);
                         debug.println("  fixing: " + callsite + ":" + sig);
                         Handle nHandle = handle;
@@ -387,6 +337,7 @@
                     foundSer = foundSer || x.contains(sername);
                 }
                 if (!foundSer) {
+                    debug.println("Injecting serialization: " + name);
                     int n = interfaces.length;
                     interfaces = Arrays.copyOf(interfaces, n + 1);
                     interfaces[n] = sername;
@@ -395,6 +346,7 @@
                 }
                 super.visit(version, access, name, signature, superName, interfaces);
                 if (injectNoArg && (access & ACC_INTERFACE) == 0) {
+                    debug.println("Injecting <init>: " + name);
                     MethodVisitor ctor = visitMethod(ACC_PUBLIC, "<init>", "()V", null, null);
                     ctor.visitCode();
                     ctor.visitVarInsn(ALOAD, 0);
@@ -407,15 +359,35 @@
         };
     }
 }
-
 class DebugPrint {
     final PrintStream out;
     final boolean mustPrint;
-    DebugPrint(PrintStream out, boolean print) {
-        this.out = out;
-        boolean prop = Boolean.getBoolean(SerializationInjector.PREFIX + ".debug");
-        mustPrint = print ? true : prop;
+    final boolean shouldClose;
+
+    DebugPrint() throws IOException {
+        mustPrint = Boolean.getBoolean(SerializationInjector.DEBUG);
+        String output = System.getProperty(SerializationInjector.LOGFILE, "out");
+        switch (output) {
+            case "out":
+                out = System.out;
+                shouldClose = false;
+                break;
+            case "err":
+                out = System.err;
+                shouldClose = false;
+                break;
+            default:
+                File f = new File(output);
+                if (f.exists()) {
+                    File dir = f.getParentFile();
+                    f = File.createTempFile("agent", ".log", dir);
+                }
+                FileOutputStream fos = new FileOutputStream(f);
+                out = new PrintStream(new BufferedOutputStream(fos));
+                shouldClose = true;
+        }
     }
+
     void println(String s) {
         if (mustPrint)
             out.println(s);
--- a/test-ng/agent/src/com/oracle/lambda/TestLambdaSerialization.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/test-ng/agent/src/com/oracle/lambda/TestLambdaSerialization.java	Mon Apr 08 16:19:46 2013 -0700
@@ -47,11 +47,11 @@
             Collections.synchronizedMap(new HashMap<String, SerializableStatus>());
     private static final CounterSet serCounters = new CounterSet();
     private static final CounterSet deserCounters = new CounterSet();
+    private static final int nRounds =
+            Integer.parseInt(System.getProperty(SerializationInjector.NROUNDS, "1"));
 
     public static void printStats() {
-        PrintStream rpt = SerializationInjector.outputStream;
-        if (rpt == null)
-            rpt = System.out;
+        PrintStream rpt = System.out;
         try {
             rpt.println("Lambda Serialization Test Status:");
             rpt.println("  serializations attempted:       " + serCounters.attempted);
@@ -81,20 +81,22 @@
                 }
             }
         } finally {
-            if (SerializationInjector.shouldClose) {
-                rpt.close();
-            } else {
-                rpt.flush();
-            }
+            rpt.flush();
         }
     }
 
     public static Object serializeOnly(Object lambdaObj, Object... args) {
-        return serializeAndDeserialize0(true, lambdaObj, args);
+        Object lObj = lambdaObj;
+        for (int i = 0 ; i < nRounds; i++)
+            lObj = serializeAndDeserialize0(true, lObj, args);
+        return lObj;
     }
 
     public static Object serializeAndDeserialize(Object lambdaObj, Object... args) {
-        return serializeAndDeserialize0(false, lambdaObj, args);
+        Object lObj = lambdaObj;
+        for (int i = 0; i < nRounds; i++)
+            lObj = serializeAndDeserialize0(false, lObj, args);
+        return lObj;
     }
 
     private static Object serializeAndDeserialize0(boolean serializeonly,
@@ -107,7 +109,7 @@
                 SerializableStatus s = nonSerializableClasses.get(name);
                 if (s == null) {
                     s = new SerializableStatus("NonSerializableObject", name,
-                            SerializationInjector.storeStackTraces
+                            SerializationInjector.storeFrames
                                 ? new Exception(name) : null);
                     nonSerializableClasses.put(name, s);
 
@@ -127,27 +129,25 @@
                 oos.close();
                 serBytes = bos.toByteArray();
                 serCounters.succeeded.incrementAndGet();
-            }
-            catch (java.io.NotSerializableException e) {
+            } catch (java.io.NotSerializableException e) {
                 serCounters.failed.incrementAndGet();
                 String name = e.getMessage();
                 SerializableStatus s = nonSerializableClasses.get(name);
                 if (s == null) {
                     s = new SerializableStatus("NonSerializableObject", name,
-                                               SerializationInjector.storeStackTraces ? e : null);
+                                               SerializationInjector.storeFrames ? e : null);
                     nonSerializableClasses.put(name, s);
                 } else {
                     s.incrementCount();
                 }
                 return lambdaObj;
-            }
-            catch (RuntimeException|IOException|Error e) {
+            } catch (RuntimeException | IOException | Error e) {
                 serCounters.failed.incrementAndGet();
                 String cname = lambdaObj.getClass().getName();
                 SerializableStatus s = serializableButFailedClasses.get(cname);
                 if (s == null) {
                     serializableButFailedClasses.put(cname, new SerializableStatus("LambdaObject",
-                            lambdaObj, SerializationInjector.storeStackTraces ? e : null));
+                            lambdaObj, SerializationInjector.storeFrames ? e : null));
                 } else {
                     s.incrementCount();
                 }
@@ -168,17 +168,16 @@
             Object nlambdaObj = ois.readObject();
             deserCounters.succeeded.incrementAndGet();
             return nlambdaObj;
-        }
-        catch (IOException | ClassNotFoundException e) {
+        } catch (Exception e) {
             deserCounters.failed.incrementAndGet();
             String cname = lambdaObj.getClass().getName();
             SerializableStatus s = deserFailures.get(cname);
             if (s == null) {
                 deserFailures.put(cname, new SerializableStatus("NonDeserializableObject",
-                                                                lambdaObj, SerializationInjector.storeStackTraces ? e : null));
+                        lambdaObj, SerializationInjector.storeFrames ? e : null));
+            } else {
+                s.incrementCount();
             }
-            else
-                s.incrementCount();
             return lambdaObj;
         }
     }
--- a/test-ng/bootlib/java/util/stream/CollectorOps.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/test-ng/bootlib/java/util/stream/CollectorOps.java	Mon Apr 08 16:19:46 2013 -0700
@@ -32,15 +32,11 @@
 public final class CollectorOps {
     private CollectorOps() { }
 
-    public static <E_IN> StatefulOp<E_IN> sequentialCollector() {
-        return new StatefulCollector<>(StreamOpFlag.NOT_PARALLEL, StreamShape.REFERENCE);
-    }
-
-    public static <E_IN> StatefulOp<E_IN> parallelCollector() {
+    public static <E_IN> StatefulTestOp<E_IN> collector() {
         return new StatefulCollector<>(0, StreamShape.REFERENCE);
     }
 
-    public static class StatefulCollector<E_IN> implements StatefulOp<E_IN> {
+    public static class StatefulCollector<E_IN> implements StatefulTestOp<E_IN> {
         private final int opFlags;
         private final StreamShape inputShape;
 
@@ -65,7 +61,7 @@
         }
 
         @Override
-        public Sink<E_IN> opWrapSink(int flags, Sink<E_IN> sink) {
+        public Sink<E_IN> opWrapSink(int flags, boolean parallel, Sink<E_IN> sink) {
             return sink;
         }
 
--- a/test-ng/bootlib/java/util/stream/FlagDeclaringOp.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/test-ng/bootlib/java/util/stream/FlagDeclaringOp.java	Mon Apr 08 16:19:46 2013 -0700
@@ -29,7 +29,7 @@
  *
  */
 @SuppressWarnings({"rawtypes", "unchecked"})
-public class FlagDeclaringOp<T> implements IntermediateOp<T, T> {
+public class FlagDeclaringOp<T> implements StatelessTestOp<T, T> {
     private final int flags;
     private final StreamShape shape;
 
@@ -58,7 +58,7 @@
     }
 
     @Override
-    public Sink<T> opWrapSink(int flags, Sink sink) {
+    public Sink<T> opWrapSink(int flags, boolean parallel, Sink sink) {
         return sink;
     }
 }
--- a/test-ng/bootlib/java/util/stream/IntermediateOp.java	Mon Apr 08 15:57:12 2013 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,235 +0,0 @@
-/*
- * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.  Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-package java.util.stream;
-
-import java.util.Spliterator;
-import java.util.function.IntFunction;
-
-/**
- * An operation in a stream pipeline that takes a stream as input and produces
- * a stream, possibly of a different type, as output.  An intermediate operation
- * has an input type and an output type, reflected in its type parameters
- * {@code E_IN} and {@code E_OUT}, and, an associated input shape and
- * output shape.  An intermediate operation also has a set of <em>operation
- * flags</em> that describes how it transforms characteristics of the stream
- * (such as sortedness or size; see {@link StreamOpFlag}).
- *
- * <p>Intermediate operations are implemented in terms of <em>sink transforms
- * </em>; given a {@code Sink} for the output type of the operation, produce a
- * {@code Sink} for the input type of the operation, which, when fed with
- * values, has the effect of implementing the desired operation on the input
- * values and feeding them to the output sink.
- *
- * <p>Some intermediate operations are <em>stateful</em>.  This means that the
- * sinks they produce as a result of the above wrapping may maintain state from
- * processing earlier elements.  Stateful intermediate operations must implement
- * the {@link StatefulOp} interface.  Statefulness has an effect on how the
- * operation can be parallelized.  Stateless operations parallelize trivially
- * because they are homomorphisms under concatenation:
- *
- * <pre>
- *     statelessOp(a || b) = statelessOp(a) || statelessOp(b)
- * </pre>
- *
- * where {@code ||} denotes concatenation.  Stateful operations may still be
- * parallelizable, but are not amenable to the automatic parallelization of
- * stateless operations.  Accordingly, a stateful operation must provide its own
- * parallel execution implementation
- * ({@link IntermediateOp#opEvaluateParallel(PipelineHelper, java.util.Spliterator, java.util.function.IntFunction)}).
- *
- * @apiNote
- * As an example, consider the stream pipeline:
- * <pre>
- *     int oldestBob = people.stream()
- *                            .filter(p -> p.getFirstName.equals("Bob"))
- *                            .mapToInt(p -> p.getAge())
- *                            .max();
- * </pre>
- *
- * <p>This pipeline has two intermediate operations, filter and map.  The
- * filtering operation has input and output types of {@code Person} (with input
- * and output shape of {@code REFERENCE}), and the mapping operation has an
- * input type of {@code Person} and an output type of {@code Integer} (with
- * shape {@code INT_VALUE}.)  When we construct a sink chain, the mapping
- * operation will be asked to transform a {@code Sink.OfInt} which computes the
- * maximum value into a {@code Sink} which accepts {@code Person} objects, and
- * whose behavior is to take the supplied {@code Person}, call {@code getAge()}
- * on it, and pass the resulting value to the downstream sink.  This sink
- * transform might be implement as:
- *
- * <pre>
- *     new Sink.ChainedReference<U>(sink) {
- *         public void accept(U u) {
- *             downstream.accept(mappingFunction.applyAsInt(u));
- *         }
- *     }
- * </pre>
- *
- * @param <E_IN>  Type of input elements to the operation
- * @param <E_OUT> Type of output elements to the operation
- * @see TerminalOp
- * @see StatefulOp
- * @since 1.8
- */
-interface IntermediateOp<E_IN, E_OUT> {
-
-    @SuppressWarnings({"rawtypes", "unchecked"})
-    public static<T> AbstractPipeline chain(AbstractPipeline upstream,
-                                            IntermediateOp<?, T> op) {
-        if (op instanceof StatefulOp)
-            return StatefulOp.chain(upstream, (StatefulOp) op);
-        switch (op.outputShape()) {
-            case REFERENCE:
-                return new ReferencePipeline.StatelessOp<Object, T>(upstream, op.inputShape(), op.opGetFlags()) {
-                    public Sink opWrapSink(int flags, Sink<T> sink) {
-                        return op.opWrapSink(flags, sink);
-                    }
-                };
-            case INT_VALUE:
-                return new IntPipeline.StatelessOp<Object>(upstream, op.inputShape(), op.opGetFlags()) {
-                    public Sink opWrapSink(int flags, Sink sink) {
-                        return op.opWrapSink(flags, sink);
-                    }
-                };
-            case LONG_VALUE:
-                return new LongPipeline.StatelessOp<Object>(upstream, op.inputShape(), op.opGetFlags()) {
-                    @Override
-                    Sink opWrapSink(int flags, Sink sink) {
-                        return op.opWrapSink(flags, sink);
-                    }
-                };
-            case DOUBLE_VALUE:
-                return new DoublePipeline.StatelessOp<Object>(upstream, op.inputShape(), op.opGetFlags()) {
-                    @Override
-                    Sink opWrapSink(int flags, Sink sink) {
-                        return op.opWrapSink(flags, sink);
-                    }
-                };
-            default: throw new IllegalStateException(op.outputShape().toString());
-        }
-    }
-
-
-    /**
-     * Gets the shape of the input type of this operation
-     *
-     * @implSpec The default returns {@code StreamShape.REFERENCE}
-     * @return Shape of the input type of this operation
-     */
-    default StreamShape inputShape() { return StreamShape.REFERENCE; }
-
-    /**
-     * Gets the shape of the output type of this operation
-     *
-     * @implSpec The default returns {@code StreamShape.REFERENCE}
-     * @return Shape of the output type of this operation
-     */
-    default StreamShape outputShape() { return StreamShape.REFERENCE; }
-
-    /**
-     * Gets the operation flags of this operation.
-     *
-     * @implSpec The default returns {@code 0}
-     * @return a bitmap describing the operation flags of this operation
-     * @see StreamOpFlag
-     */
-    default int opGetFlags() { return 0; }
-
-    /**
-     * Returns whether this operation is stateful or not.  If it is stateful,
-     * then the method
-     * {@link #opEvaluateParallel(PipelineHelper, java.util.Spliterator, java.util.function.IntFunction)}
-     * must be overridden.
-     *
-     * @implSpec The default implementation returns {@code false}.
-     * @return {@code true} if this operation is stateful
-     */
-    default boolean opIsStateful() { return false; }
-
-    /**
-     * Accepts a {@code Sink} which will receive the results of this operation,
-     * and return a {@code Sink} which accepts elements of the input type of
-     * this operation and which performs the operation, passing the results to
-     * the provided {@code Sink}.
-     *
-     * <p>The implementation may use the {@code flags} parameter to optimize the
-     * sink wrapping.  For example, if the input is already {@code DISTINCT},
-     * the implementation for the {@code Stream#distinct()} method could just
-     * return the sink it was passed.
-     *
-     * @param flags The combined stream and operation flags up to, but not
-     *        including, this operation.
-     * @param sink elements will be sent to this sink after the processing.
-     * @return a sink which will accept elements and perform the operation upon
-     *         each element, passing the results (if any) to the provided
-     *         {@code Sink}.
-     */
-    Sink<E_IN> opWrapSink(int flags, Sink<E_OUT> sink);
-
-    /**
-     * Performs a parallel evaluation of the operation using the specified
-     * {@code PipelineHelper} which describes the stream source and upstream
-     * intermediate operations.  Only called on stateful operations.  If
-     * {@link #opIsStateful()} returns true then implementations must override the
-     * default implementation.
-     *
-     * @implSpec The default implementation throws an
-     * {@link UnsupportedOperationException}
-     *
-     * @param helper the pipeline helper
-     * @param spliterator the source {@code Spliterator}
-     * @param generator the array generator
-     * @return a {@code Node} describing the result of the evaluation
-     */
-    default <P_IN> Node<E_OUT> opEvaluateParallel(PipelineHelper<E_OUT> helper,
-                                                  Spliterator<P_IN> spliterator,
-                                                  IntFunction<E_OUT[]> generator) {
-        throw new UnsupportedOperationException("Parallel evaluation is not supported");
-    }
-
-    /**
-     * Returns a {@code Spliterator} describing a parallel evaluation of the operation using
-     * the specified {@code PipelineHelper} which describes the stream source and upstream
-     * intermediate operations.  Only called on stateful operations.  It is not necessary
-     * (though acceptable) to do a full computation of the result here; it is preferable, if
-     * possible, to describe the result via a lazily evaluated spliterator.
-     *
-     * @implSpec The default implementation behaves as if:
-     * <pre>{@code
-     *     return evaluateParallel(helper, i -> (E_OUT[]) new Object[i]).spliterator();
-     * }</pre>
-     * and is suitable for implementations that cannot do better than a full synchronous
-     * evaluation.
-     *
-     * @param helper the pipeline helper
-     * @param spliterator the source {@code Spliterator}
-     * @return a {@code Spliterator} describing the result of the evaluation
-     */
-    @SuppressWarnings("unchecked")
-    default <P_IN> Spliterator<E_OUT> opEvaluateParallelLazy(PipelineHelper<E_OUT> helper,
-                                                             Spliterator<P_IN> spliterator) {
-        return opEvaluateParallel(helper, spliterator, i -> (E_OUT[]) new Object[i]).spliterator();
-    }
-}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-ng/bootlib/java/util/stream/IntermediateTestOp.java	Mon Apr 08 16:19:46 2013 -0700
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package java.util.stream;
+
+/**
+ * A base type for test operations
+ */
+interface IntermediateTestOp<E_IN, E_OUT> {
+
+    @SuppressWarnings({"rawtypes", "unchecked"})
+    public static<T> AbstractPipeline chain(AbstractPipeline upstream,
+                                            IntermediateTestOp<?, T> op) {
+        if (op instanceof StatelessTestOp)
+            return StatelessTestOp.chain(upstream, (StatelessTestOp) op);
+
+        if (op instanceof StatefulTestOp)
+            return StatefulTestOp.chain(upstream, (StatefulTestOp) op);
+
+        throw new IllegalStateException("Unknown test op type: " + op.getClass().getName());
+    }
+}
--- a/test-ng/bootlib/java/util/stream/LambdaTestHelpers.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/test-ng/bootlib/java/util/stream/LambdaTestHelpers.java	Mon Apr 08 16:19:46 2013 -0700
@@ -449,7 +449,6 @@
         if (StreamOpFlag.SIZED.isKnown(flags)) sj.add("IS_SIZED");
         if (StreamOpFlag.SORTED.isKnown(flags)) sj.add("IS_SORTED");
         if (StreamOpFlag.SHORT_CIRCUIT.isKnown(flags)) sj.add("IS_SHORT_CIRCUIT");
-        if (StreamOpFlag.PARALLEL.isKnown(flags)) sj.add("IS_PARALLEL");
         return sj.toString();
     }
 }
--- a/test-ng/bootlib/java/util/stream/OpTestCase.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/test-ng/bootlib/java/util/stream/OpTestCase.java	Mon Apr 08 16:19:46 2013 -0700
@@ -78,25 +78,48 @@
 
         boolean isParallel();
 
-        abstract <T, U, S_IN extends BaseStream<T, S_IN>, S_OUT extends BaseStream<U, S_OUT>> void run(TestData<T, S_IN> data, Consumer<U> b, Function<S_IN, S_OUT> m);
+        abstract <T, U, S_IN extends BaseStream<T, S_IN>, S_OUT extends BaseStream<U, S_OUT>>
+        void run(TestData<T, S_IN> data, Consumer<U> b, Function<S_IN, S_OUT> m);
     }
 
-    public <T, U, S_IN extends BaseStream<T, S_IN>, S_OUT extends BaseStream<U, S_OUT>> Collection<U> exerciseOps(TestData<T, S_IN> data, Function<S_IN, S_OUT> m) {
+    public <T, U, S_IN extends BaseStream<T, S_IN>, S_OUT extends BaseStream<U, S_OUT>>
+    Collection<U> exerciseOps(TestData<T, S_IN> data, Function<S_IN, S_OUT> m) {
         return withData(data).stream(m).exercise();
     }
 
-    public <T, U, S_OUT extends BaseStream<U, S_OUT>> Collection<U> exerciseOps(Collection<T> data, Function<Stream<T>, S_OUT> m) {
+    // Run multiple versions of exercise(), returning the result of the first, and asserting that others return the same result
+    // If the first version is s -> s.foo(), can be used with s -> s.mapToInt(i -> i).foo().mapToObj(i -> i) to test all shape variants
+    @SafeVarargs
+    public final<T, U, S_IN extends BaseStream<T, S_IN>, S_OUT extends BaseStream<U, S_OUT>>
+    Collection<U> exerciseOpsMulti(TestData<T, S_IN> data,
+                                   Function<S_IN, S_OUT>... ms) {
+        Collection<U> result = null;
+        for (Function<S_IN, S_OUT> m : ms) {
+            if (result == null)
+                result = withData(data).stream(m).exercise();
+            else {
+                Collection<U> r2 = withData(data).stream(m).exercise();
+                assertEquals(result, r2);
+            }
+        }
+        return result;
+    }
+
+    public <T, U, S_OUT extends BaseStream<U, S_OUT>>
+    Collection<U> exerciseOps(Collection<T> data, Function<Stream<T>, S_OUT> m) {
         StreamTestData.CollectionData<T> data1 = new StreamTestData.CollectionData<>("Collection of type " + data.getClass().getName(), data);
         return withData(data1).stream(m).exercise();
     }
 
-    public <T, U, S_OUT extends BaseStream<U, S_OUT>, I extends Iterable<U>> Collection<U> exerciseOps(Collection<T> data, Function<Stream<T>, S_OUT> m, I expected) {
+    public <T, U, S_OUT extends BaseStream<U, S_OUT>, I extends Iterable<U>>
+    Collection<U> exerciseOps(Collection<T> data, Function<Stream<T>, S_OUT> m, I expected) {
         StreamTestData.CollectionData<T> data1 = new StreamTestData.CollectionData<>("Collection of type " + data.getClass().getName(), data);
         return withData(data1).stream(m).expectedResult(expected).exercise();
     }
 
     @SuppressWarnings("unchecked")
-    public <U, S_OUT extends BaseStream<U, S_OUT>> Collection<U> exerciseOps(int[] data, Function<IntStream, S_OUT> m) {
+    public <U, S_OUT extends BaseStream<U, S_OUT>>
+    Collection<U> exerciseOps(int[] data, Function<IntStream, S_OUT> m) {
         return withData(new IntStreamTestData.ArrayData("int array", data)).stream(m).exercise();
     }
 
@@ -118,7 +141,8 @@
             this.data = Objects.requireNonNull(data);
         }
 
-        public <U, S_OUT extends BaseStream<U, S_OUT>> ExerciseDataStreamBuilder<T, U, S_IN, S_OUT> ops(IntermediateOp... ops) {
+        public <U, S_OUT extends BaseStream<U, S_OUT>>
+        ExerciseDataStreamBuilder<T, U, S_IN, S_OUT> ops(IntermediateTestOp... ops) {
             return new ExerciseDataStreamBuilder<>(data, (S_IN s) -> (S_OUT) chain(s, ops));
         }
 
@@ -128,7 +152,7 @@
         }
 
         public <U, S_OUT extends BaseStream<U, S_OUT>> ExerciseDataStreamBuilder<T, U, S_IN, S_OUT>
-        stream(Function<S_IN, S_OUT> m, IntermediateOp<U, U> additionalOp) {
+        stream(Function<S_IN, S_OUT> m, IntermediateTestOp<U, U> additionalOp) {
             return new ExerciseDataStreamBuilder<>(data, s -> (S_OUT) chain(m.apply(s), additionalOp));
         }
 
@@ -187,8 +211,8 @@
 
         public ExerciseDataStreamBuilder<T, U, S_IN, S_OUT> expectedResult(int[] expectedResult) {
             List l = new ArrayList();
-            for (int i = 0; i < expectedResult.length; i++) {
-                l.add(expectedResult[i]);
+            for (int anExpectedResult : expectedResult) {
+                l.add(anExpectedResult);
             }
             refResult = l;
             return this;
@@ -196,8 +220,8 @@
 
         public ExerciseDataStreamBuilder<T, U, S_IN, S_OUT> expectedResult(long[] expectedResult) {
             List l = new ArrayList();
-            for (int i = 0; i < expectedResult.length; i++) {
-                l.add(expectedResult[i]);
+            for (long anExpectedResult : expectedResult) {
+                l.add(anExpectedResult);
             }
             refResult = l;
             return this;
@@ -205,8 +229,8 @@
 
         public ExerciseDataStreamBuilder<T, U, S_IN, S_OUT> expectedResult(double[] expectedResult) {
             List l = new ArrayList();
-            for (int i = 0; i < expectedResult.length; i++) {
-                l.add(expectedResult[i]);
+            for (double anExpectedResult : expectedResult) {
+                l.add(anExpectedResult);
             }
             refResult = l;
             return this;
@@ -272,6 +296,15 @@
 
         // Build method
 
+        private long count(StreamShape shape, BaseStream s) {
+            switch (shape) {
+                case REFERENCE:    return ((Stream) s).count();
+                case INT_VALUE:    return ((IntStream) s).count();
+                case LONG_VALUE:   return ((LongStream) s).count();
+                case DOUBLE_VALUE: return ((DoubleStream) s).count();
+                default: throw new IllegalStateException("Unknown shape: " + shape);
+            }
+        }
 
         public Collection<U> exercise() {
             if (refResult == null) {
@@ -281,6 +314,9 @@
                 Node<U> refNodeResult = ((AbstractPipeline<?, U, ?>) sOut).evaluateToArrayNode(size -> (U[]) new Object[size]);
                 refResult = LambdaTestHelpers.toBoxedList(refNodeResult.spliterator());
                 after.accept(data);
+                S_OUT anotherCopy = m.apply(data.stream());
+                long count = count(((AbstractPipeline) anotherCopy).getOutputShape(), anotherCopy);
+                assertEquals(count, refNodeResult.count());
             }
 
             List<Error> errors = new ArrayList<>();
@@ -332,10 +368,10 @@
 
     static enum TerminalTestScenario {
         SINGLE_SEQUENTIAL,
-        SINGLE_SEQUENTIAL_PULL,
+        SINGLE_SEQUENTIAL_SHORT_CIRCUIT,
         SINGLE_PARALLEL,
         ALL_SEQUENTIAL,
-        ALL_SEQUENTIAL_PULL,
+        ALL_SEQUENTIAL_SHORT_CIRCUIT,
         ALL_PARALLEL,
         ALL_PARALLEL_SEQUENTIAL,
     }
@@ -419,21 +455,24 @@
             if (refResult == null) {
                 // Sequentially collect the output that will be input to the terminal op
                 refResult = terminalF.apply((S_OUT) createPipeline(shape, node.spliterator(),
-                                                                   StreamOpFlag.IS_ORDERED | StreamOpFlag.IS_SIZED));
+                                                                   StreamOpFlag.IS_ORDERED | StreamOpFlag.IS_SIZED,
+                                                                   false));
             } else if (testSet.contains(TerminalTestScenario.SINGLE_SEQUENTIAL)) {
                 S_OUT source = (S_OUT) createPipeline(shape, node.spliterator(),
-                                                      StreamOpFlag.IS_ORDERED | StreamOpFlag.IS_SIZED);
+                                                      StreamOpFlag.IS_ORDERED | StreamOpFlag.IS_SIZED,
+                                                      false);
                 BiConsumer<R, R> asserter = sequentialEqualityAsserter.apply(source);
                 R result = terminalF.apply(source);
                 LambdaTestHelpers.launderAssertion(() -> asserter.accept(refResult, result),
                                                    () -> String.format("Single sequential: %s != %s", refResult, result));
             }
 
-            if (testSet.contains(TerminalTestScenario.SINGLE_SEQUENTIAL_PULL)) {
+            if (testSet.contains(TerminalTestScenario.SINGLE_SEQUENTIAL_SHORT_CIRCUIT)) {
                 S_OUT source = (S_OUT) createPipeline(shape, node.spliterator(),
-                                                      StreamOpFlag.IS_ORDERED | StreamOpFlag.IS_SIZED);
-                // Force pull mode
-                source = (S_OUT) chain(source, new PullOnlyOp<U>(shape));
+                                                      StreamOpFlag.IS_ORDERED | StreamOpFlag.IS_SIZED,
+                                                      false);
+                // Force short-curcuit
+                source = (S_OUT) chain(source, new ShortCircuitOp<U>(shape));
                 BiConsumer<R, R> asserter = sequentialEqualityAsserter.apply(source);
                 R result = terminalF.apply(source);
                 LambdaTestHelpers.launderAssertion(() -> asserter.accept(refResult, result),
@@ -442,7 +481,8 @@
 
             if (testSet.contains(TerminalTestScenario.SINGLE_PARALLEL)) {
                 S_OUT source = (S_OUT) createPipeline(shape, node.spliterator(),
-                                                      StreamOpFlag.IS_ORDERED | StreamOpFlag.IS_SIZED | StreamOpFlag.IS_PARALLEL);
+                                                      StreamOpFlag.IS_ORDERED | StreamOpFlag.IS_SIZED,
+                                                      true);
                 BiConsumer<R, R> asserter = parallelEqualityAsserter.apply(source);
                 R result = terminalF.apply(source);
                 LambdaTestHelpers.launderAssertion(() -> asserter.accept(refResult, result),
@@ -450,8 +490,7 @@
             }
 
             if (testSet.contains(TerminalTestScenario.ALL_SEQUENTIAL)) {
-                // This may push or pull depending on the terminal op implementation
-
+                // This may forEach or tryAdvance depending on the terminal op implementation
                 S_OUT source = streamF.apply(data.stream());
                 BiConsumer<R, R> asserter = sequentialEqualityAsserter.apply(source);
                 R result = terminalF.apply(source);
@@ -459,10 +498,10 @@
                                                    () -> String.format("All sequential: %s != %s", refResult, result));
             }
 
-            if (testSet.contains(TerminalTestScenario.ALL_SEQUENTIAL_PULL)) {
+            if (testSet.contains(TerminalTestScenario.ALL_SEQUENTIAL_SHORT_CIRCUIT)) {
                 S_OUT source = streamF.apply(data.stream());
-                // Force pull mode
-                source = (S_OUT) chain(source, new PullOnlyOp<U>(shape));
+                // Force short-curcuit
+                source = (S_OUT) chain(source, new ShortCircuitOp<U>(shape));
                 BiConsumer<R, R> asserter = sequentialEqualityAsserter.apply(source);
                 R result = terminalF.apply(source);
                 LambdaTestHelpers.launderAssertion(() -> asserter.accept(refResult, result),
@@ -488,12 +527,12 @@
             return refResult;
         }
 
-        AbstractPipeline createPipeline(StreamShape shape, Spliterator s, int flags) {
+        AbstractPipeline createPipeline(StreamShape shape, Spliterator s, int flags, boolean parallel) {
             switch (shape) {
-                case REFERENCE:    return new ReferencePipeline.Head<>(s, flags);
-                case INT_VALUE:    return new IntPipeline.Head(s, flags);
-                case LONG_VALUE:   return new LongPipeline.Head(s, flags);
-                case DOUBLE_VALUE: return new DoublePipeline.Head(s, flags);
+                case REFERENCE:    return new ReferencePipeline.Head<>(s, flags, parallel);
+                case INT_VALUE:    return new IntPipeline.Head(s, flags, parallel);
+                case LONG_VALUE:   return new LongPipeline.Head(s, flags, parallel);
+                case DOUBLE_VALUE: return new DoublePipeline.Head(s, flags, parallel);
                 default: throw new IllegalStateException("Unknown shape: " + shape);
             }
         }
@@ -521,24 +560,24 @@
     //
 
     @SuppressWarnings({"rawtypes", "unchecked"})
-    private static <T> AbstractPipeline<?, T, ?> chain(AbstractPipeline upstream, IntermediateOp<?, T> op) {
-        return (AbstractPipeline<?, T, ?>) IntermediateOp.chain(upstream, op);
+    private static <T> AbstractPipeline<?, T, ?> chain(AbstractPipeline upstream, IntermediateTestOp<?, T> op) {
+        return (AbstractPipeline<?, T, ?>) IntermediateTestOp.chain(upstream, op);
     }
 
     @SuppressWarnings({"rawtypes", "unchecked"})
-    private static AbstractPipeline<?, ?, ?> chain(AbstractPipeline pipe, IntermediateOp... ops) {
-        for (IntermediateOp op : ops)
+    private static AbstractPipeline<?, ?, ?> chain(AbstractPipeline pipe, IntermediateTestOp... ops) {
+        for (IntermediateTestOp op : ops)
             pipe = chain(pipe, op);
         return pipe;
     }
 
     @SuppressWarnings("rawtypes")
-    private static <T> AbstractPipeline<?, T, ?> chain(BaseStream pipe, IntermediateOp<?, T> op) {
+    private static <T> AbstractPipeline<?, T, ?> chain(BaseStream pipe, IntermediateTestOp<?, T> op) {
         return chain((AbstractPipeline) pipe, op);
     }
 
     @SuppressWarnings("rawtypes")
-    public static AbstractPipeline<?, ?, ?> chain(BaseStream pipe, IntermediateOp... ops) {
+    public static AbstractPipeline<?, ?, ?> chain(BaseStream pipe, IntermediateTestOp... ops) {
         return chain((AbstractPipeline) pipe, ops);
     }
 
@@ -555,6 +594,10 @@
 
         //
 
+        default boolean isOrdered() {
+            return spliterator().hasCharacteristics(Spliterator.ORDERED);
+        }
+
         StreamShape getShape();
 
         default <A extends Collection<? super T>> A into(A target) {
@@ -569,15 +612,15 @@
         S parallelStream();
     }
 
-    private class PullOnlyOp<T> implements IntermediateOp<T,T> {
+    private class ShortCircuitOp<T> implements StatelessTestOp<T,T> {
         private final StreamShape shape;
 
-        private PullOnlyOp(StreamShape shape) {
+        private ShortCircuitOp(StreamShape shape) {
             this.shape = shape;
         }
 
         @Override
-        public Sink<T> opWrapSink(int flags, Sink<T> sink) {
+        public Sink<T> opWrapSink(int flags, boolean parallel, Sink<T> sink) {
             return sink;
         }
 
--- a/test-ng/bootlib/java/util/stream/SpliteratorTestHelper.java	Mon Apr 08 15:57:12 2013 -0700
+++ b/test-ng/bootlib/java/util/stream/SpliteratorTestHelper.java	Mon Apr 08 16:19:46 2013 -0700
@@ -26,12 +26,17 @@
 
 import java.util.ArrayDeque;
 import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
 import java.util.Deque;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.Spliterator;
 import java.util.function.*;
 
 import static org.testng.Assert.*;
+import static org.testng.Assert.assertEquals;
 import static org.testng.Assert.fail;
 
 /**
@@ -119,47 +124,85 @@
 
     static <T, S extends Spliterator<T>> void testSpliterator(Supplier<S> supplier,
                                                               UnaryOperator<Consumer<T>> boxingAdapter) {
-        testTryAdvanceAgainstForEach("testTryAdvanceAgainstForEach", supplier, boxingAdapter);
-        testMixedTryAdvanceForEach("testMixedTryAdvanceForEach", supplier, boxingAdapter);
-        testSplitAfterFullTraversal("testSplitAfterFullTraversal", supplier, boxingAdapter);
-        testSplitOnce("testSplitOnce", supplier, boxingAdapter);
-        testSplitSixDeep("testSplitSixDeep", supplier, boxingAdapter);
-        testSplitUntilNull("testSplitUntilNull", supplier, boxingAdapter);
+        ArrayList<T> fromForEach = new ArrayList<>();
+        Spliterator<T> spliterator = supplier.get();
+        Consumer<T> addToFromForEach = boxingAdapter.apply(fromForEach::add);
+        spliterator.forEachRemaining(addToFromForEach);
+
+        Collection<T> exp = Collections.unmodifiableList(fromForEach);
+
+        testForEach(exp, supplier, boxingAdapter);
+        testTryAdvance(exp, supplier, boxingAdapter);
+        testMixedTryAdvanceForEach(exp, supplier, boxingAdapter);
+        testSplitAfterFullTraversal(supplier, boxingAdapter);
+        testSplitOnce(exp, supplier, boxingAdapter);
+        testSplitSixDeep(exp, supplier, boxingAdapter);
+        testSplitUntilNull(exp, supplier, boxingAdapter);
     }
 
-    static <T, S extends Spliterator<T>> void testTryAdvanceAgainstForEach(String name,
-                                                                           Supplier<S> supplier,
-                                                                           UnaryOperator<Consumer<T>> boxingAdapter) {
-        long sizeIfKnown = supplier.get().getExactSizeIfKnown();
+    //
+
+    private static <T, S extends Spliterator<T>> void testForEach(
+            Collection<T> exp,
+            Supplier<S> supplier,
+            UnaryOperator<Consumer<T>> boxingAdapter) {
+        S spliterator = supplier.get();
+        long sizeIfKnown = spliterator.getExactSizeIfKnown();
+        boolean isOrdered = spliterator.hasCharacteristics(Spliterator.ORDERED);
+
+        ArrayList<T> fromForEach = new ArrayList<>();
+        spliterator = supplier.get();
+        Consumer<T> addToFromForEach = boxingAdapter.apply(fromForEach::add);
+        spliterator.forEachRemaining(addToFromForEach);
+
+        // Assert that forEach now produces no elements
+        spliterator.forEachRemaining(boxingAdapter.apply(e -> fail("Spliterator.forEach produced an element after spliterator exhausted: " + e)));
+        // Assert that tryAdvance now produce no elements
+        spliterator.tryAdvance(boxingAdapter.apply(e -> fail("Spliterator.tryAdvance produced an element after spliterator exhausted: " + e)));
+
+        // assert that size, tryAdvance, and forEach are consistent
+        if (sizeIfKnown >= 0) {
+            assertEquals(sizeIfKnown, exp.size());
+        }
+        assertEquals(fromForEach.size(), exp.size());
+
+        assertContents(fromForEach, exp, isOrdered);
+    }
+
+    private static <T, S extends Spliterator<T>> void testTryAdvance(
+            Collection<T> exp,
+            Supplier<S> supplier,
+            UnaryOperator<Consumer<T>> boxingAdapter) {
+        S spliterator = supplier.get();
+        long sizeIfKnown = spliterator.getExactSizeIfKnown();
+        boolean isOrdered = spliterator.hasCharacteristics(Spliterator.ORDERED);
+
+        spliterator = supplier.get();
         ArrayList<T> fromTryAdvance = new ArrayList<>();
-
-        S spliterator = supplier.get();
         Consumer<T> addToFromTryAdvance = boxingAdapter.apply(fromTryAdvance::add);
         while (spliterator.tryAdvance(addToFromTryAdvance)) { }
 
-        ArrayList<T> fromForEach = new ArrayList<>();
-        Consumer<T> addToFromForEach = boxingAdapter.apply(fromForEach::add);
-        supplier.get().forEachRemaining(addToFromForEach);
+        // Assert that forEach now produces no elements
+        spliterator.forEachRemaining(boxingAdapter.apply(e -> fail("Spliterator.forEach produced an element after spliterator exhausted: " + e)));
+        // Assert that tryAdvance now produce no elements
+        spliterator.tryAdvance(boxingAdapter.apply(e -> fail("Spliterator.tryAdvance produced an element after spliterator exhausted: " + e)));
 
         // assert that size, tryAdvance, and forEach are consistent
         if (sizeIfKnown >= 0) {
-            assertEquals(sizeIfKnown, fromForEach.size());
-            assertEquals(sizeIfKnown, fromTryAdvance.size());
+            assertEquals(sizeIfKnown, exp.size());
         }
-        assertEquals(fromForEach, fromTryAdvance);
+        assertEquals(fromTryAdvance.size(), exp.size());
+
+        assertContents(fromTryAdvance, exp, isOrdered);
     }
 
-    static <T, S extends Spliterator<T>> void testMixedTryAdvanceForEach(String name,
-                                                                         Supplier<S> supplier,
-                                                                         UnaryOperator<Consumer<T>> boxingAdapter) {
-        long sizeIfKnown = supplier.get().getExactSizeIfKnown();
-        ArrayList<T> fromTryAdvance = new ArrayList<>();
-
-        Spliterator<T> spliterator = supplier.get();
-        Consumer<T> addToFromTryAdvance = boxingAdapter.apply(fromTryAdvance::add);
-        while (spliterator.tryAdvance(addToFromTryAdvance)) { }
-        // Assert that forEach now produces no elements
-        spliterator.forEachRemaining(boxingAdapter.apply(e -> fail("Spliterator.forEach produced an element after spliterator exhausted: " + e)));
+    private static <T, S extends Spliterator<T>> void testMixedTryAdvanceForEach(
+            Collection<T> exp,
+            Supplier<S> supplier,
+            UnaryOperator<Consumer<T>> boxingAdapter) {
+        S spliterator = supplier.get();
+        long sizeIfKnown = spliterator.getExactSizeIfKnown();
+        boolean isOrdered = spliterator.hasCharacteristics(Spliterator.ORDERED);
 
         // tryAdvance first few elements, then forEach rest
         ArrayList<T> dest = new ArrayList<>();
@@ -168,17 +211,27 @@
         for (int i = 0; i < 10 && spliterator.tryAdvance(addToDest); i++) { }
         spliterator.forEachRemaining(addToDest);
 
+        // Assert that forEach now produces no elements
+        spliterator.forEachRemaining(boxingAdapter.apply(e -> fail("Spliterator.forEach produced an element after spliterator exhausted: " + e)));
         // Assert that tryAdvance now produce no elements
         spliterator.tryAdvance(boxingAdapter.apply(e -> fail("Spliterator.tryAdvance produced an element after spliterator exhausted: " + e)));
 
-        if (sizeIfKnown >= 0)
+        if (sizeIfKnown >= 0) {
             assertEquals(sizeIfKnown, dest.size());
-        assertEquals(fromTryAdvance, dest);
+        }
+        assertEquals(dest.size(), exp.size());
+
+        if (isOrdered) {
+            assertEquals(dest, exp);
+        }
+        else {
+            assertContentsUnordered(dest, exp);
+        }
     }
 
-    static <T, S extends Spliterator<T>> void testSplitAfterFullTraversal(String name,
-                                                                          Supplier<S> supplier,
-                                                                          UnaryOperator<Consumer<T>> boxingAdapter) {
+    private static <T, S extends Spliterator<T>> void testSplitAfterFullTraversal(
+            Supplier<S> supplier,
+            UnaryOperator<Consumer<T>> boxingAdapter) {
         // Full traversal using tryAdvance
         Spliterator<T> spliterator = supplier.get();
         while (spliterator.tryAdvance(boxingAdapter.apply(e -> { }))) { }
@@ -201,15 +254,13 @@
         assertNull(split);
     }
 
-    static <T, S extends Spliterator<T>> void testSplitOnce(String name,
-                                                            Supplier<S> supplier,
-                                                            UnaryOperator<Consumer<T>> boxingAdapter) {
-        long sizeIfKnown = supplier.get().getExactSizeIfKnown();
-        ArrayList<T> fromTryAdvance = new ArrayList<>();
-
-        Spliterator<T> spliterator = supplier.get();
-        Consumer<T> addToFromTryAdvance = boxingAdapter.apply(fromTryAdvance::add);
-        while (spliterator.tryAdvance(addToFromTryAdvance)) { }
+    private static <T, S extends Spliterator<T>> void testSplitOnce(
+            Collection<T> exp,
+            Supplier<S> supplier,
+            UnaryOperator<Consumer<T>> boxingAdapter) {
+        S spliterator = supplier.get();
+        long sizeIfKnown = spliterator.getExactSizeIfKnown();
+        boolean isOrdered = spliterator.hasCharacteristics(Spliterator.ORDERED);
 
         ArrayList<T> fromSplit = new ArrayList<>();
         Spliterator<T> s1 = supplier.get();
@@ -226,43 +277,37 @@
             if (s1Size >= 0 && s2Size >= 0)
                 assertEquals(sizeIfKnown, s1Size + s2Size);
         }
-        assertEquals(fromSplit, fromTryAdvance);
+        assertContents(fromSplit, exp, isOrdered);
     }
 
-    static <T, S extends Spliterator<T>> void testSplitSixDeep(String name,
-                                                               Supplier<S> supplier,
-                                                               UnaryOperator<Consumer<T>> boxingAdapter) {
-        long sizeIfKnown = supplier.get().getExactSizeIfKnown();
-        ArrayList<T> fromTryAdvance = new ArrayList<>();
-
-        Spliterator<T> spliterator = supplier.get();
-        Consumer<T> addToFromTryAdvance = boxingAdapter.apply(fromTryAdvance::add);
-        while (spliterator.tryAdvance(addToFromTryAdvance)) { }
-
-        if (sizeIfKnown != -1)
-            assertEquals(sizeIfKnown, fromTryAdvance.size());
+    private static <T, S extends Spliterator<T>> void testSplitSixDeep(
+            Collection<T> exp,
+            Supplier<S> supplier,
+            UnaryOperator<Consumer<T>> boxingAdapter) {
+        S spliterator = supplier.get();
+        boolean isOrdered = spliterator.hasCharacteristics(Spliterator.ORDERED);
 
         for (int depth=0; depth < 6; depth++) {
-            ArrayList<T> dest = new ArrayList<>();
+            List<T> dest = new ArrayList<>();
             spliterator = supplier.get();
 
             assertSpliterator(spliterator);
 
             // verify splitting with forEach
             visit(depth, 0, dest, spliterator, boxingAdapter, spliterator.characteristics(), false);
-            assertEquals(fromTryAdvance, dest);
+            assertContents(dest, exp, isOrdered);
 
             // verify splitting with tryAdvance
             dest.clear();
             spliterator = supplier.get();
             visit(depth, 0, dest, spliterator, boxingAdapter, spliterator.characteristics(), true);
-            assertEquals(fromTryAdvance, dest);
+            assertContents(dest, exp, isOrdered);
         }
     }
 
     private static <T, S extends Spliterator<T>> void visit(int depth, int curLevel,
-                              List<T> dest, S spliterator, UnaryOperator<Consumer<T>> boxingAdapter,
-                              int rootCharacteristics, boolean useTryAdvance) {
+                                                            List<T> dest, S spliterator, UnaryOperator<Consumer<T>> boxingAdapter,
+                                                            int rootCharacteristics, boolean useTryAdvance) {
         if (curLevel < depth) {
             long beforeSize = spliterator.getExactSizeIfKnown();
             Spliterator<T> split = spliterator.trySplit();
@@ -315,23 +360,22 @@
         }
     }
 
-    static <T, S extends Spliterator<T>> void testSplitUntilNull(String name,
-                                                                 Supplier<S> supplier,
-                                                                 UnaryOperator<Consumer<T>> boxingAdapter) {
-        List<T> root = new ArrayList<>();
-        supplier.get().forEachRemaining(boxingAdapter.apply(root::add));
+    private static <T, S extends Spliterator<T>> void testSplitUntilNull(
+            Collection<T> exp,
+            Supplier<S> supplier,
+            UnaryOperator<Consumer<T>> boxingAdapter) {
+        Spliterator<T> s = supplier.get();
+        boolean isOrdered = s.hasCharacteristics(Spliterator.ORDERED);
+        assertSpliterator(s);
 
         List<T> splits = new ArrayList<>();
         Consumer<T> c = boxingAdapter.apply(splits::add);
 
-        Spliterator<T> s = supplier.get();
-        assertSpliterator(s);
-
         testSplitUntilNull(new SplitNode<T>(c, s));
-        assertEquals(splits, root);
+        assertContents(splits, exp, isOrdered);
     }
 
-    static class SplitNode<T> {
+    private static class SplitNode<T> {
         // Constant for every node
         final Consumer<T> c;
         final int rootCharacteristics;
@@ -354,11 +398,11 @@
     }
 
     /**
-     * Set the maximum stack capacity to 8M. This should be more than enough to detect a bad spliterator
+     * Set the maximum stack capacity to 0.25MB. This should be more than enough to detect a bad spliterator
      * while not unduly disrupting test infrastructure given the test data sizes that are used are small.
      * Note that j.u.c.ForkJoinPool sets the max queue size to 64M (1 << 26).
      */
-    static final int MAXIMUM_STACK_CAPACITY = 1 << 21; // 2M
+    private static final int MAXIMUM_STACK_CAPACITY = 1 << 18; // 0.25MB
 
     private static <T> void testSplitUntilNull(SplitNode<T> e) {
         // Use an explicit stack to avoid a StackOverflowException when testing a Spliterator
@@ -367,8 +411,9 @@
         Deque<SplitNode<T>> stack = new ArrayDeque<>();
         stack.push(e);
 
+        int iteration = 0;
         while (!stack.isEmpty()) {
-            assertTrue(stack.size() < MAXIMUM_STACK_CAPACITY, "Stack exceeded size of 2 MB");
+            assertTrue(iteration++ < MAXIMUM_STACK_CAPACITY, "Exceeded maximum stack modification count of 1 << 18");
 
             e = stack.pop();
             Spliterator<T> parentAndRightSplit = e.s;
@@ -391,13 +436,13 @@
                 assertTrue(leftSplit.estimateSize() < parentEstimateSize,
                            String.format("Left split size estimate %d >= parent split size estimate %d", leftSplit.estimateSize(), parentEstimateSize));
                 assertTrue(parentAndRightSplit.estimateSize() < parentEstimateSize,
-                            String.format("Right split size estimate %d >= parent split size estimate %d", leftSplit.estimateSize(), parentEstimateSize));
+                           String.format("Right split size estimate %d >= parent split size estimate %d", leftSplit.estimateSize(), parentEstimateSize));
             }
             else {
                 assertTrue(leftSplit.estimateSize() <= parentEstimateSize,
-                    String.format("Left split size estimate %d > parent split size estimate %d", leftSplit.estimateSize(), parentEstimateSize));
+                           String.format("Left split size estimate %d > parent split size estimate %d", leftSplit.estimateSize(), parentEstimateSize));
                 assertTrue(parentAndRightSplit.estimateSize() <= parentEstimateSize,
-                    String.format("Right split size estimate %d > parent split size estimate %d", leftSplit.estimateSize(), parentEstimateSize));
+                           String.format("Right split size estimate %d > parent split size estimate %d", leftSplit.estimateSize(), parentEstimateSize));
             }
 
             long leftSize = leftSplit.getExactSizeIfKnown();
@@ -429,5 +474,34 @@
             assertTrue(s.estimateSize() != Long.MAX_VALUE);
             assertTrue(s.getExactSizeIfKnown() >= 0);
         }
+        try {
+            s.getComparator();
+            assertTrue(s.hasCharacteristics(Spliterator.SORTED));
+        } catch (IllegalStateException e) {
+            assertFalse(s.hasCharacteristics(Spliterator.SORTED));
+        }
     }
+
+    private static<T> void assertContents(Collection<T> actual, Collection<T> expected, boolean isOrdered) {
+        if (isOrdered) {
+            assertEquals(actual, expected);
+        }
+        else {
+            assertContentsUnordered(actual, expected);
+        }
+    }
+
+    private static<T> void assertContentsUnordered(Iterable<T> actual, Iterable<T> expected) {
+        assertEquals(toBoxedMultiset(actual), toBoxedMultiset(expected));
+    }
+
+    private static <T> Map<T, Integer> toBoxedMultiset(Iterable<T> c) {
+        Map<T, Integer> result = new HashMap<>();
+        c.forEach(e -> {
+            if (result.containsKey(e)) result.put(e, result.get(e) + 1);
+            else result.put(e, 1);
+        });
+        return result;
+    }
+
 }
--- a/test-ng/bootlib/java/util/stream/StatefulOp.java	Mon Apr 08 15:57:12 2013 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,125 +0,0 @@
-/*
- * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.  Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-package java.util.stream;
-
-import java.util.Spliterator;
-import java.util.function.IntFunction;
-
-/**
- * A stateful intermediate stream operation ({@link IntermediateOp}).
- * <em>Stateful</em> means that state is accumulated as elements are processed.
- * Examples of stateful operations are sorting, extracting a subsequence of the
- * input, or removing duplicates.  Statefulness has an effect on how the
- * operation can be parallelized.  Stateless operations parallelize trivially
- * because they are homomorphisms under concatenation:
- *
- * <pre>
- *     statelessOp(a || b) = statelessOp(a) || statelessOp(b)
- * </pre>
- *
- * where {@code ||} denotes concatenation.  Stateful operations may still be
- * parallelizable, but are not amenable to the automatic parallelization of
- * stateless operations.  Accordingly, a stateful operation must provide its
- * own parallel execution implementation
- * ({@link IntermediateOp#opEvaluateParallel(PipelineHelper, java.util.Spliterator, java.util.function.IntFunction)})
- * as well as {@link IntermediateOp#opWrapSink(int, Sink)}.
- *
- * @param <E> Type of input and output elements.
- *
- * @see IntermediateOp
- * @see TerminalOp
- * @since 1.8
- */
-interface StatefulOp<E> extends IntermediateOp<E, E> {
-
-    @SuppressWarnings({"rawtypes", "unchecked"})
-    public static<T> AbstractPipeline chain(AbstractPipeline upstream,
-                                            StatefulOp op) {
-        switch (op.outputShape()) {
-            case REFERENCE:
-                return new ReferencePipeline.StatefulOp<Object, T>(upstream, op.inputShape(), op.opGetFlags()) {
-                    @Override
-                    Sink opWrapSink(int flags, Sink sink) {