changeset 17356:a6e4d9522919

Merge
author amurillo
date Thu, 21 Jul 2016 20:09:20 -0700
parents 03e2fb699fdc 96288b884a04
children 22ba0da84fee
files test/ProblemList.txt test/java/util/jar/JarFile/mrjar/MultiReleaseJarIterators.java
diffstat 272 files changed, 8396 insertions(+), 4988 deletions(-) [+]
line wrap: on
line diff
--- a/.hgtags	Thu Jul 21 16:29:17 2016 +0200
+++ b/.hgtags	Thu Jul 21 20:09:20 2016 -0700
@@ -370,3 +370,4 @@
 073ab1d4edf5590cf1af7b6d819350c14e425c1a jdk-9+125
 6fda66a5bdf2da8994032b9da2078a4137f4d954 jdk-9+126
 7a97b89ba83077ca62e4aa5a05437adc8f315343 jdk-9+127
+9446c534f0222b4eecfd9d9e25ab37c4fd4400a5 jdk-9+128
--- a/src/java.base/macosx/classes/java/lang/ClassLoaderHelper.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/macosx/classes/java/lang/ClassLoaderHelper.java	Thu Jul 21 20:09:20 2016 -0700
@@ -22,6 +22,7 @@
  * or visit www.oracle.com if you need additional information or have any
  * questions.
  */
+
 package java.lang;
 
 import java.io.File;
--- a/src/java.base/macosx/native/libjava/java_props_macosx.c	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/macosx/native/libjava/java_props_macosx.c	Thu Jul 21 20:09:20 2016 -0700
@@ -177,8 +177,14 @@
         OSVerStruct (*procInfoFn)(id rec, SEL sel) = (OSVerStruct(*)(id, SEL))objc_msgSend_stret;
         OSVerStruct osVer = procInfoFn([NSProcessInfo processInfo],
                                        @selector(operatingSystemVersion));
-        NSString *nsVerStr = [NSString stringWithFormat:@"%ld.%ld.%ld",
-                (long)osVer.majorVersion, (long)osVer.minorVersion, (long)osVer.patchVersion];
+        NSString *nsVerStr;
+        if (osVer.patchVersion == 0) { // Omit trailing ".0"
+            nsVerStr = [NSString stringWithFormat:@"%ld.%ld",
+                    (long)osVer.majorVersion, (long)osVer.minorVersion];
+        } else {
+            nsVerStr = [NSString stringWithFormat:@"%ld.%ld.%ld",
+                    (long)osVer.majorVersion, (long)osVer.minorVersion, (long)osVer.patchVersion];
+        }
         // Copy out the char*
         osVersionCStr = strdup([nsVerStr UTF8String]);
     }
--- a/src/java.base/share/classes/com/sun/crypto/provider/AESCipher.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/com/sun/crypto/provider/AESCipher.java	Thu Jul 21 20:09:20 2016 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -172,6 +172,11 @@
      */
     private final int fixedKeySize; // in bytes, -1 if no restriction
 
+    /*
+     * needed to enforce ISE thrown when updateAAD is called after update for GCM mode.
+     */
+    private boolean updateCalled;
+
     /**
      * Creates an instance of AES cipher with default ECB mode and
      * PKCS5Padding.
@@ -304,6 +309,7 @@
     protected void engineInit(int opmode, Key key, SecureRandom random)
         throws InvalidKeyException {
         checkKeySize(key, fixedKeySize);
+        updateCalled = false;
         core.init(opmode, key, random);
     }
 
@@ -336,6 +342,7 @@
                               SecureRandom random)
         throws InvalidKeyException, InvalidAlgorithmParameterException {
         checkKeySize(key, fixedKeySize);
+        updateCalled = false;
         core.init(opmode, key, params, random);
     }
 
@@ -344,6 +351,7 @@
                               SecureRandom random)
         throws InvalidKeyException, InvalidAlgorithmParameterException {
         checkKeySize(key, fixedKeySize);
+        updateCalled = false;
         core.init(opmode, key, params, random);
     }
 
@@ -368,6 +376,7 @@
      */
     protected byte[] engineUpdate(byte[] input, int inputOffset,
                                   int inputLen) {
+        updateCalled = true;
         return core.update(input, inputOffset, inputLen);
     }
 
@@ -397,6 +406,7 @@
     protected int engineUpdate(byte[] input, int inputOffset, int inputLen,
                                byte[] output, int outputOffset)
         throws ShortBufferException {
+        updateCalled = true;
         return core.update(input, inputOffset, inputLen, output,
                            outputOffset);
     }
@@ -433,7 +443,9 @@
      */
     protected byte[] engineDoFinal(byte[] input, int inputOffset, int inputLen)
         throws IllegalBlockSizeException, BadPaddingException {
-        return core.doFinal(input, inputOffset, inputLen);
+        byte[] out = core.doFinal(input, inputOffset, inputLen);
+        updateCalled = false;
+        return out;
     }
 
     /**
@@ -476,8 +488,10 @@
                                 byte[] output, int outputOffset)
         throws IllegalBlockSizeException, ShortBufferException,
                BadPaddingException {
-        return core.doFinal(input, inputOffset, inputLen, output,
-                            outputOffset);
+        int outLen = core.doFinal(input, inputOffset, inputLen, output,
+                                  outputOffset);
+        updateCalled = false;
+        return outLen;
     }
 
     /**
@@ -574,6 +588,9 @@
      */
     @Override
     protected void engineUpdateAAD(byte[] src, int offset, int len) {
+        if (core.getMode() == CipherCore.GCM_MODE && updateCalled) {
+            throw new IllegalStateException("AAD must be supplied before encryption/decryption starts");
+        }
         core.updateAAD(src, offset, len);
     }
 
@@ -606,6 +623,9 @@
      */
     @Override
     protected void engineUpdateAAD(ByteBuffer src) {
+        if (core.getMode() == CipherCore.GCM_MODE && updateCalled) {
+            throw new IllegalStateException("AAD must be supplied before encryption/decryption starts");
+        }
         if (src != null) {
             int aadLen = src.limit() - src.position();
             if (aadLen != 0) {
--- a/src/java.base/share/classes/com/sun/crypto/provider/CipherCore.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/com/sun/crypto/provider/CipherCore.java	Thu Jul 21 20:09:20 2016 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -124,7 +124,7 @@
     private static final int PCBC_MODE = 4;
     private static final int CTR_MODE = 5;
     private static final int CTS_MODE = 6;
-    private static final int GCM_MODE = 7;
+    static final int GCM_MODE = 7;
 
     /*
      * variables used for performing the GCM (key+iv) uniqueness check.
@@ -196,7 +196,7 @@
             cipher = new CounterMode(rawImpl);
             unitBytes = 1;
             padding = null;
-        }  else if (modeUpperCase.startsWith("GCM")) {
+        }  else if (modeUpperCase.equals("GCM")) {
             // can only be used for block ciphers w/ 128-bit block size
             if (blockSize != 16) {
                 throw new NoSuchAlgorithmException
@@ -223,6 +223,15 @@
         }
     }
 
+    /**
+     * Returns the mode of this cipher.
+     *
+     * @return the parsed cipher mode
+     */
+    int getMode() {
+        return cipherMode;
+    }
+
     private static int getNumOfUnit(String mode, int offset, int blockSize)
         throws NoSuchAlgorithmException {
         int result = blockSize; // use blockSize as default value
--- a/src/java.base/share/classes/com/sun/crypto/provider/GaloisCounterMode.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/com/sun/crypto/provider/GaloisCounterMode.java	Thu Jul 21 20:09:20 2016 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -49,6 +49,16 @@
     static int DEFAULT_TAG_LEN = AES_BLOCK_SIZE;
     static int DEFAULT_IV_LEN = 12; // in bytes
 
+    // In NIST SP 800-38D, GCM input size is limited to be no longer
+    // than (2^36 - 32) bytes. Otherwise, the counter will wrap
+    // around and lead to a leak of plaintext.
+    // However, given the current GCM spec requirement that recovered
+    // text can only be returned after successful tag verification,
+    // we are bound by limiting the data size to the size limit of
+    // java byte array, e.g. Integer.MAX_VALUE, since all data
+    // can only be returned by the doFinal(...) call.
+    private static final int MAX_BUF_SIZE = Integer.MAX_VALUE;
+
     // buffer for AAD data; if null, meaning update has been called
     private ByteArrayOutputStream aadBuffer = new ByteArrayOutputStream();
     private int sizeOfAAD = 0;
@@ -89,9 +99,13 @@
         }
     }
 
-    // ivLen in bits
-    private static byte[] getLengthBlock(int ivLen) {
+    private static byte[] getLengthBlock(int ivLenInBytes) {
+        long ivLen = ((long)ivLenInBytes) << 3;
         byte[] out = new byte[AES_BLOCK_SIZE];
+        out[8] = (byte)(ivLen >>> 56);
+        out[9] = (byte)(ivLen >>> 48);
+        out[10] = (byte)(ivLen >>> 40);
+        out[11] = (byte)(ivLen >>> 32);
         out[12] = (byte)(ivLen >>> 24);
         out[13] = (byte)(ivLen >>> 16);
         out[14] = (byte)(ivLen >>> 8);
@@ -99,13 +113,22 @@
         return out;
     }
 
-    // aLen and cLen both in bits
-    private static byte[] getLengthBlock(int aLen, int cLen) {
+    private static byte[] getLengthBlock(int aLenInBytes, int cLenInBytes) {
+        long aLen = ((long)aLenInBytes) << 3;
+        long cLen = ((long)cLenInBytes) << 3;
         byte[] out = new byte[AES_BLOCK_SIZE];
+        out[0] = (byte)(aLen >>> 56);
+        out[1] = (byte)(aLen >>> 48);
+        out[2] = (byte)(aLen >>> 40);
+        out[3] = (byte)(aLen >>> 32);
         out[4] = (byte)(aLen >>> 24);
         out[5] = (byte)(aLen >>> 16);
         out[6] = (byte)(aLen >>> 8);
         out[7] = (byte)aLen;
+        out[8] = (byte)(cLen >>> 56);
+        out[9] = (byte)(cLen >>> 48);
+        out[10] = (byte)(cLen >>> 40);
+        out[11] = (byte)(cLen >>> 32);
         out[12] = (byte)(cLen >>> 24);
         out[13] = (byte)(cLen >>> 16);
         out[14] = (byte)(cLen >>> 8);
@@ -142,13 +165,20 @@
             } else {
                 g.update(iv);
             }
-            byte[] lengthBlock = getLengthBlock(iv.length*8);
+            byte[] lengthBlock = getLengthBlock(iv.length);
             g.update(lengthBlock);
             j0 = g.digest();
         }
         return j0;
     }
 
+    private static void checkDataLength(int processed, int len) {
+        if (processed > MAX_BUF_SIZE - len) {
+            throw new ProviderException("SunJCE provider only supports " +
+                "input size up to " + MAX_BUF_SIZE + " bytes");
+        }
+    }
+
     GaloisCounterMode(SymmetricCipher embeddedCipher) {
         super(embeddedCipher);
         aadBuffer = new ByteArrayOutputStream();
@@ -319,20 +349,22 @@
 
     // Feed the AAD data to GHASH, pad if necessary
     void processAAD() {
-        if (aadBuffer != null && aadBuffer.size() > 0) {
-            byte[] aad = aadBuffer.toByteArray();
-            sizeOfAAD = aad.length;
+        if (aadBuffer != null) {
+            if (aadBuffer.size() > 0) {
+                byte[] aad = aadBuffer.toByteArray();
+                sizeOfAAD = aad.length;
+
+                int lastLen = aad.length % AES_BLOCK_SIZE;
+                if (lastLen != 0) {
+                    ghashAllToS.update(aad, 0, aad.length - lastLen);
+                    byte[] padded = expandToOneBlock(aad, aad.length - lastLen,
+                                                     lastLen);
+                    ghashAllToS.update(padded);
+                } else {
+                    ghashAllToS.update(aad);
+                }
+            }
             aadBuffer = null;
-
-            int lastLen = aad.length % AES_BLOCK_SIZE;
-            if (lastLen != 0) {
-                ghashAllToS.update(aad, 0, aad.length - lastLen);
-                byte[] padded = expandToOneBlock(aad, aad.length - lastLen,
-                                                 lastLen);
-                ghashAllToS.update(padded);
-            } else {
-                ghashAllToS.update(aad);
-            }
         }
     }
 
@@ -384,6 +416,9 @@
         if ((len % blockSize) != 0) {
              throw new ProviderException("Internal error in input buffering");
         }
+
+        checkDataLength(processed, len);
+
         processAAD();
         if (len > 0) {
             gctrPAndC.update(in, inOfs, len, out, outOfs);
@@ -405,17 +440,23 @@
      */
     int encryptFinal(byte[] in, int inOfs, int len, byte[] out, int outOfs)
         throws IllegalBlockSizeException, ShortBufferException {
+        if (len > MAX_BUF_SIZE - tagLenBytes) {
+            throw new ShortBufferException
+                ("Can't fit both data and tag into one buffer");
+        }
         if (out.length - outOfs < (len + tagLenBytes)) {
             throw new ShortBufferException("Output buffer too small");
         }
 
+        checkDataLength(processed, len);
+
         processAAD();
         if (len > 0) {
             doLastBlock(in, inOfs, len, out, outOfs, true);
         }
 
         byte[] lengthBlock =
-            getLengthBlock(sizeOfAAD*8, processed*8);
+            getLengthBlock(sizeOfAAD, processed);
         ghashAllToS.update(lengthBlock);
         byte[] s = ghashAllToS.digest();
         byte[] sOut = new byte[s.length];
@@ -447,6 +488,9 @@
         if ((len % blockSize) != 0) {
              throw new ProviderException("Internal error in input buffering");
         }
+
+        checkDataLength(ibuffer.size(), len);
+
         processAAD();
 
         if (len > 0) {
@@ -481,10 +525,21 @@
         if (len < tagLenBytes) {
             throw new AEADBadTagException("Input too short - need tag");
         }
+        // do this check here can also catch the potential integer overflow
+        // scenario for the subsequent output buffer capacity check.
+        checkDataLength(ibuffer.size(), (len - tagLenBytes));
+
         if (out.length - outOfs < ((ibuffer.size() + len) - tagLenBytes)) {
             throw new ShortBufferException("Output buffer too small");
         }
+
         processAAD();
+
+        // get the trailing tag bytes from 'in'
+        byte[] tag = new byte[tagLenBytes];
+        System.arraycopy(in, inOfs + len - tagLenBytes, tag, 0, tagLenBytes);
+        len -= tagLenBytes;
+
         if (len != 0) {
             ibuffer.write(in, inOfs, len);
         }
@@ -495,17 +550,12 @@
         len = in.length;
         ibuffer.reset();
 
-        byte[] tag = new byte[tagLenBytes];
-        // get the trailing tag bytes from 'in'
-        System.arraycopy(in, len - tagLenBytes, tag, 0, tagLenBytes);
-        len -= tagLenBytes;
-
         if (len > 0) {
             doLastBlock(in, inOfs, len, out, outOfs, false);
         }
 
         byte[] lengthBlock =
-            getLengthBlock(sizeOfAAD*8, processed*8);
+            getLengthBlock(sizeOfAAD, processed);
         ghashAllToS.update(lengthBlock);
 
         byte[] s = ghashAllToS.digest();
--- a/src/java.base/share/classes/com/sun/security/ntlm/NTLM.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/com/sun/security/ntlm/NTLM.java	Thu Jul 21 20:09:20 2016 -0700
@@ -169,7 +169,7 @@
 
         byte[] readSecurityBuffer(int offset) throws NTLMException {
             int pos = readInt(offset+4);
-            if (pos == 0) return null;
+            if (pos == 0) return new byte[0];
             try {
                 return Arrays.copyOfRange(
                         internal, pos, pos + readShort(offset));
--- a/src/java.base/share/classes/com/sun/security/ntlm/Server.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/com/sun/security/ntlm/Server.java	Thu Jul 21 20:09:20 2016 -0700
@@ -1,4 +1,3 @@
-
 /*
  * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
--- a/src/java.base/share/classes/java/lang/Class.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/java/lang/Class.java	Thu Jul 21 20:09:20 2016 -0700
@@ -238,15 +238,11 @@
 
             TypeVariable<?>[] typeparms = component.getTypeParameters();
             if (typeparms.length > 0) {
-                boolean first = true;
-                sb.append('<');
+                StringJoiner sj = new StringJoiner(",", "<", ">");
                 for(TypeVariable<?> typeparm: typeparms) {
-                    if (!first)
-                        sb.append(',');
-                    sb.append(typeparm.getTypeName());
-                    first = false;
+                    sj.add(typeparm.getTypeName());
                 }
-                sb.append('>');
+                sb.append(sj.toString());
             }
 
             for (int i = 0; i < arrayDepth; i++)
--- a/src/java.base/share/classes/java/lang/Math.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/java/lang/Math.java	Thu Jul 21 20:09:20 2016 -0700
@@ -1613,6 +1613,8 @@
      * @return (<i>a</i>&nbsp;&times;&nbsp;<i>b</i>&nbsp;+&nbsp;<i>c</i>)
      * computed, as if with unlimited range and precision, and rounded
      * once to the nearest {@code double} value
+     *
+     * @since 9
      */
     // @HotSpotIntrinsicCandidate
     public static double fma(double a, double b, double c) {
@@ -1728,6 +1730,8 @@
      * @return (<i>a</i>&nbsp;&times;&nbsp;<i>b</i>&nbsp;+&nbsp;<i>c</i>)
      * computed, as if with unlimited range and precision, and rounded
      * once to the nearest {@code float} value
+     *
+     * @since 9
      */
     // @HotSpotIntrinsicCandidate
     public static float fma(float a, float b, float c) {
--- a/src/java.base/share/classes/java/lang/Runtime.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/java/lang/Runtime.java	Thu Jul 21 20:09:20 2016 -0700
@@ -945,7 +945,7 @@
     }
 
     /**
-     * A representation of a version string for an implemenation of the
+     * A representation of a version string for an implementation of the
      * Java&nbsp;SE Platform.  A version string contains a version number
      * optionally followed by pre-release and build information.
      *
@@ -1058,10 +1058,10 @@
      * <p> When comparing two version strings, the value of {@code $OPT}, if
      * present, may or may not be significant depending on the chosen
      * comparison method.  The comparison methods {@link #compareTo(Version)
-     * compareTo()} and {@link #compareToIgnoreOpt(Version)
-     * compareToIgnoreOpt()} should be used consistently with the
+     * compareTo()} and {@link #compareToIgnoreOptional(Version)
+     * compareToIgnoreOptional()} should be used consistently with the
      * corresponding methods {@link #equals(Object) equals()} and {@link
-     * #equalsIgnoreOpt(Object) equalsIgnoreOpt()}.  </p>
+     * #equalsIgnoreOptional(Object) equalsIgnoreOptional()}.  </p>
      *
      * <p> A <em>short version string</em>, {@code $SVSTR}, often useful in
      * less formal contexts, is a version number optionally followed by a
@@ -1249,7 +1249,7 @@
          * @throws  NullPointerException
          *          If the given object is {@code null}
          */
-        public int compareToIgnoreOpt(Version ob) {
+        public int compareToIgnoreOptional(Version ob) {
             return compare(ob, true);
         }
 
@@ -1270,7 +1270,7 @@
                 return ret;
 
             if (!ignoreOpt)
-                return compareOpt(ob);
+                return compareOptional(ob);
 
             return 0;
         }
@@ -1325,7 +1325,7 @@
             return 0;
         }
 
-        private int compareOpt(Version ob) {
+        private int compareOptional(Version ob) {
             Optional<String> oOpt = ob.optional();
             if (!optional.isPresent()) {
                 if (oOpt.isPresent())
@@ -1384,7 +1384,7 @@
          */
         @Override
         public boolean equals(Object ob) {
-            boolean ret = equalsIgnoreOpt(ob);
+            boolean ret = equalsIgnoreOptional(ob);
             if (!ret)
                 return false;
 
@@ -1407,7 +1407,7 @@
          *          ignoring the optinal build information
          *
          */
-        public boolean equalsIgnoreOpt(Object ob) {
+        public boolean equalsIgnoreOptional(Object ob) {
             if (this == ob)
                 return true;
             if (!(ob instanceof Version))
--- a/src/java.base/share/classes/java/lang/StrictMath.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/java/lang/StrictMath.java	Thu Jul 21 20:09:20 2016 -0700
@@ -1276,6 +1276,8 @@
      * @return (<i>a</i>&nbsp;&times;&nbsp;<i>b</i>&nbsp;+&nbsp;<i>c</i>)
      * computed, as if with unlimited range and precision, and rounded
      * once to the nearest {@code double} value
+     *
+     * @since 9
      */
     public static double fma(double a, double b, double c) {
         return Math.fma(a, b, c);
@@ -1328,6 +1330,8 @@
      * @return (<i>a</i>&nbsp;&times;&nbsp;<i>b</i>&nbsp;+&nbsp;<i>c</i>)
      * computed, as if with unlimited range and precision, and rounded
      * once to the nearest {@code float} value
+     *
+     * @since 9
      */
     public static float fma(float a, float b, float c) {
         return Math.fma(a, b, c);
--- a/src/java.base/share/classes/java/lang/invoke/DirectMethodHandle.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/java/lang/invoke/DirectMethodHandle.java	Thu Jul 21 20:09:20 2016 -0700
@@ -155,7 +155,7 @@
     private static LambdaForm preparedLambdaForm(MemberName m) {
         assert(m.isInvocable()) : m;  // call preparedFieldLambdaForm instead
         MethodType mtype = m.getInvocationType().basicType();
-        assert(!m.isMethodHandleInvoke() || "invokeBasic".equals(m.getName())) : m;
+        assert(!m.isMethodHandleInvoke()) : m;
         int which;
         switch (m.getReferenceKind()) {
         case REF_invokeVirtual:    which = LF_INVVIRTUAL;    break;
--- a/src/java.base/share/classes/java/lang/invoke/LambdaForm.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/java/lang/invoke/LambdaForm.java	Thu Jul 21 20:09:20 2016 -0700
@@ -1049,7 +1049,7 @@
             this.member = member;
             this.resolvedHandle = resolvedHandle;
              // The following assert is almost always correct, but will fail for corner cases, such as PrivateInvokeTest.
-             //assert(!isInvokeBasic());
+             //assert(!isInvokeBasic(member));
         }
         NamedFunction(MethodType basicInvokerType) {
             assert(basicInvokerType == basicInvokerType.basicType()) : basicInvokerType;
@@ -1060,13 +1060,13 @@
                 // necessary to pass BigArityTest
                 this.member = Invokers.invokeBasicMethod(basicInvokerType);
             }
-            assert(isInvokeBasic());
+            assert(isInvokeBasic(member));
         }
 
-        private boolean isInvokeBasic() {
+        private static boolean isInvokeBasic(MemberName member) {
             return member != null &&
-                   member.isMethodHandleInvoke() &&
-                   "invokeBasic".equals(member.getName());
+                   member.getDeclaringClass() == MethodHandle.class &&
+                  "invokeBasic".equals(member.getName());
         }
 
         // The next 2 constructors are used to break circular dependencies on MH.invokeStatic, etc.
@@ -1204,7 +1204,7 @@
             assert(mh.type().basicType() == MethodType.genericMethodType(arity).changeReturnType(rtype))
                     : Arrays.asList(mh, rtype, arity);
             MemberName member = mh.internalMemberName();
-            if (member != null && member.getName().equals("invokeBasic") && member.isMethodHandleInvoke()) {
+            if (isInvokeBasic(member)) {
                 assert(arity > 0);
                 assert(a[0] instanceof MethodHandle);
                 MethodHandle mh2 = (MethodHandle) a[0];
--- a/src/java.base/share/classes/java/lang/invoke/MemberName.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/java/lang/invoke/MemberName.java	Thu Jul 21 20:09:20 2016 -0700
@@ -346,7 +346,6 @@
     }
 
     /** Utility method to query if this member is a method handle invocation (invoke or invokeExact).
-     *  Also returns true for the non-public MH.invokeBasic.
      */
     public boolean isMethodHandleInvoke() {
         final int bits = MH_INVOKE_MODS &~ Modifier.PUBLIC;
@@ -361,7 +360,6 @@
         switch (name) {
         case "invoke":
         case "invokeExact":
-        case "invokeBasic":  // internal sig-poly method
             return true;
         default:
             return false;
--- a/src/java.base/share/classes/java/lang/invoke/MethodHandles.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/java/lang/invoke/MethodHandles.java	Thu Jul 21 20:09:20 2016 -0700
@@ -951,8 +951,6 @@
                 return invoker(type);
             if ("invokeExact".equals(name))
                 return exactInvoker(type);
-            if ("invokeBasic".equals(name))
-                return basicInvoker(type);
             assert(!MemberName.isMethodHandleInvokeName(name));
             return null;
         }
@@ -3268,6 +3266,16 @@
      */
     public static
     MethodHandle dropArguments(MethodHandle target, int pos, List<Class<?>> valueTypes) {
+        return dropArguments0(target, pos, copyTypes(valueTypes));
+    }
+
+    private static List<Class<?>> copyTypes(List<Class<?>> types) {
+        Object[] a = types.toArray();
+        return Arrays.asList(Arrays.copyOf(a, a.length, Class[].class));
+    }
+
+    private static
+    MethodHandle dropArguments0(MethodHandle target, int pos, List<Class<?>> valueTypes) {
         MethodType oldType = target.type();  // get NPE
         int dropped = dropArgumentChecks(oldType, pos, valueTypes);
         MethodType newType = oldType.insertParameterTypes(pos, valueTypes);
@@ -3348,6 +3356,7 @@
     // private version which allows caller some freedom with error handling
     private static MethodHandle dropArgumentsToMatch(MethodHandle target, int skip, List<Class<?>> newTypes, int pos,
                                       boolean nullOnFailure) {
+        newTypes = copyTypes(newTypes);
         List<Class<?>> oldTypes = target.type().parameterList();
         int match = oldTypes.size();
         if (skip != 0) {
@@ -3379,11 +3388,11 @@
         // target: ( S*[skip],        M*[match]  )
         MethodHandle adapter = target;
         if (add > 0) {
-            adapter = dropArguments(adapter, skip+ match, addTypes);
+            adapter = dropArguments0(adapter, skip+ match, addTypes);
         }
         // adapter: (S*[skip],        M*[match], A*[add] )
         if (pos > 0) {
-            adapter = dropArguments(adapter, skip, newTypes.subList(0, pos));
+            adapter = dropArguments0(adapter, skip, newTypes.subList(0, pos));
        }
         // adapter: (S*[skip], P*[pos], M*[match], A*[add] )
         return adapter;
@@ -3787,7 +3796,7 @@
         int filterValues = filterType.parameterCount();
         if (filterValues == 0
                 ? (rtype != void.class)
-                : (rtype != filterType.parameterType(0)))
+                : (rtype != filterType.parameterType(0) || filterValues != 1))
             throw newIllegalArgumentException("target and filter types do not match", targetType, filterType);
     }
 
@@ -4290,7 +4299,7 @@
                 step.set(i, dropArgumentsToMatch(identityOrVoid(t), 0, commonParameterSequence, i));
             }
             if (pred.get(i) == null) {
-                pred.set(i, dropArguments(constant(boolean.class, true), 0, commonParameterSequence));
+                pred.set(i, dropArguments0(constant(boolean.class, true), 0, commonParameterSequence));
             }
             if (fini.get(i) == null) {
                 fini.set(i, empty(methodType(t, commonParameterSequence)));
@@ -4315,7 +4324,7 @@
         return hs.stream().map(h -> {
             int pc = h.type().parameterCount();
             int tpsize = targetParams.size();
-            return pc < tpsize ? dropArguments(h, pc, targetParams.subList(pc, tpsize)) : h;
+            return pc < tpsize ? dropArguments0(h, pc, targetParams.subList(pc, tpsize)) : h;
         }).collect(Collectors.toList());
     }
 
--- a/src/java.base/share/classes/java/lang/module/ModulePath.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/java/lang/module/ModulePath.java	Thu Jul 21 20:09:20 2016 -0700
@@ -52,6 +52,7 @@
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.stream.Collectors;
+import java.util.stream.Stream;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipFile;
 
@@ -420,7 +421,7 @@
         // scan the entries in the JAR file to locate the .class and service
         // configuration file
         Map<Boolean, Set<String>> map =
-            jf.stream()
+            versionedStream(jf)
               .map(JarEntry::getName)
               .filter(s -> (s.endsWith(".class") ^ s.startsWith(SERVICES_PREFIX)))
               .collect(Collectors.partitioningBy(s -> s.endsWith(".class"),
@@ -503,8 +504,21 @@
         return mn;
     }
 
+    private Stream<JarEntry> versionedStream(JarFile jf) {
+        if (jf.isMultiRelease()) {
+            // a stream of JarEntries whose names are base names and whose
+            // contents are from the corresponding versioned entries in
+            // a multi-release jar file
+            return jf.stream().map(JarEntry::getName)
+                    .filter(name -> !name.startsWith("META-INF/versions/"))
+                    .map(jf::getJarEntry);
+        } else {
+            return jf.stream();
+        }
+    }
+
     private Set<String> jarPackages(JarFile jf) {
-        return jf.stream()
+        return versionedStream(jf)
             .filter(e -> e.getName().endsWith(".class"))
             .map(e -> toPackageName(e.getName()))
             .filter(pkg -> pkg.length() > 0)   // module-info
--- a/src/java.base/share/classes/java/lang/module/package-info.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/java/lang/module/package-info.java	Thu Jul 21 20:09:20 2016 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, 2016 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
--- a/src/java.base/share/classes/java/lang/reflect/Executable.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/java/lang/reflect/Executable.java	Thu Jul 21 20:09:20 2016 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,6 +28,7 @@
 import java.lang.annotation.*;
 import java.util.Map;
 import java.util.Objects;
+import java.util.StringJoiner;
 
 import jdk.internal.misc.SharedSecrets;
 import sun.reflect.annotation.AnnotationParser;
@@ -86,15 +87,6 @@
                getDeclaringClass());
     }
 
-    void separateWithCommas(Class<?>[] types, StringBuilder sb) {
-        for (int j = 0; j < types.length; j++) {
-            sb.append(types[j].getTypeName());
-            if (j < (types.length - 1))
-                sb.append(",");
-        }
-
-    }
-
     void printModifiersIfNonzero(StringBuilder sb, int mask, boolean isDefault) {
         int mod = getModifiers() & mask;
 
@@ -121,13 +113,20 @@
 
             printModifiersIfNonzero(sb, modifierMask, isDefault);
             specificToStringHeader(sb);
+            sb.append('(');
+            StringJoiner sj = new StringJoiner(",");
+            for (Class<?> parameterType : parameterTypes) {
+                sj.add(parameterType.getTypeName());
+            }
+            sb.append(sj.toString());
+            sb.append(')');
 
-            sb.append('(');
-            separateWithCommas(parameterTypes, sb);
-            sb.append(')');
             if (exceptionTypes.length > 0) {
-                sb.append(" throws ");
-                separateWithCommas(exceptionTypes, sb);
+                StringJoiner joiner = new StringJoiner(",", "throws ", "");
+                for (Class<?> exceptionType : exceptionTypes) {
+                    joiner.add(exceptionType.getTypeName());
+                }
+                sb.append(joiner.toString());
             }
             return sb.toString();
         } catch (Exception e) {
@@ -149,42 +148,34 @@
 
             TypeVariable<?>[] typeparms = getTypeParameters();
             if (typeparms.length > 0) {
-                boolean first = true;
-                sb.append('<');
+                StringJoiner sj = new StringJoiner(",", "<", "> ");
                 for(TypeVariable<?> typeparm: typeparms) {
-                    if (!first)
-                        sb.append(',');
-                    // Class objects can't occur here; no need to test
-                    // and call Class.getName().
-                    sb.append(typeparm.toString());
-                    first = false;
+                    sj.add(typeparm.getTypeName());
                 }
-                sb.append("> ");
+                sb.append(sj.toString());
             }
 
             specificToGenericStringHeader(sb);
 
             sb.append('(');
+            StringJoiner sj = new StringJoiner(",");
             Type[] params = getGenericParameterTypes();
             for (int j = 0; j < params.length; j++) {
                 String param = params[j].getTypeName();
                 if (isVarArgs() && (j == params.length - 1)) // replace T[] with T...
                     param = param.replaceFirst("\\[\\]$", "...");
-                sb.append(param);
-                if (j < (params.length - 1))
-                    sb.append(',');
+                sj.add(param);
             }
+            sb.append(sj.toString());
             sb.append(')');
-            Type[] exceptions = getGenericExceptionTypes();
-            if (exceptions.length > 0) {
-                sb.append(" throws ");
-                for (int k = 0; k < exceptions.length; k++) {
-                    sb.append((exceptions[k] instanceof Class)?
-                              ((Class)exceptions[k]).getName():
-                              exceptions[k].toString());
-                    if (k < (exceptions.length - 1))
-                        sb.append(',');
+
+            Type[] exceptionTypes = getGenericExceptionTypes();
+            if (exceptionTypes.length > 0) {
+                StringJoiner joiner = new StringJoiner(",", " throws ", "");
+                for (Type exceptionType : exceptionTypes) {
+                    joiner.add(exceptionType.getTypeName());
                 }
+                sb.append(joiner.toString());
             }
             return sb.toString();
         } catch (Exception e) {
--- a/src/java.base/share/classes/java/nio/file/Files.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/java/nio/file/Files.java	Thu Jul 21 20:09:20 2016 -0700
@@ -3290,8 +3290,8 @@
      * a size of {@code 0}. All bytes in the byte array are written to the file.
      * The method ensures that the file is closed when all bytes have been
      * written (or an I/O error or other runtime exception is thrown). If an I/O
-     * error occurs then it may do so after the file has created or truncated,
-     * or after some bytes have been written to the file.
+     * error occurs then it may do so after the file has been created or
+     * truncated, or after some bytes have been written to the file.
      *
      * <p> <b>Usage example</b>: By default the method creates a new file or
      * overwrites an existing file. Suppose you instead want to append bytes
@@ -3360,7 +3360,8 @@
      * a size of {@code 0}. The method ensures that the file is closed when all
      * lines have been written (or an I/O error or other runtime exception is
      * thrown). If an I/O error occurs then it may do so after the file has
-     * created or truncated, or after some bytes have been written to the file.
+     * been created or truncated, or after some bytes have been written to the
+     * file.
      *
      * @param   path
      *          the path to the file
--- a/src/java.base/share/classes/java/security/ProtectionDomain.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/java/security/ProtectionDomain.java	Thu Jul 21 20:09:20 2016 -0700
@@ -132,7 +132,7 @@
 
     /* the PermissionCollection is static (pre 1.4 constructor)
        or dynamic (via a policy refresh) */
-    private boolean staticPermissions;
+    private final boolean staticPermissions;
 
     /*
      * An object used as a key when the ProtectionDomain is stored in a Map.
@@ -143,8 +143,12 @@
      * Creates a new ProtectionDomain with the given CodeSource and
      * Permissions. If the permissions object is not null, then
      *  {@code setReadOnly()} will be called on the passed in
-     * Permissions object. The only permissions granted to this domain
-     * are the ones specified; the current Policy will not be consulted.
+     * Permissions object.
+     * <p>
+     * The permissions granted to this domain are static, i.e.
+     * invoking the {@link #staticPermissionsOnly()} method returns true.
+     * They contain only the ones passed to this constructor and
+     * the current Policy will not be consulted.
      *
      * @param codesource the codesource associated with this domain
      * @param permissions the permissions granted to this domain
@@ -170,9 +174,11 @@
      * Permissions, ClassLoader and array of Principals. If the
      * permissions object is not null, then {@code setReadOnly()}
      * will be called on the passed in Permissions object.
-     * The permissions granted to this domain are dynamic; they include
-     * both the static permissions passed to this constructor, and any
-     * permissions granted to this domain by the current Policy at the
+     * <p>
+     * The permissions granted to this domain are dynamic, i.e.
+     * invoking the {@link #staticPermissionsOnly()} method returns false.
+     * They include both the static permissions passed to this constructor,
+     * and any permissions granted to this domain by the current Policy at the
      * time a permission is checked.
      * <p>
      * This constructor is typically used by
@@ -256,6 +262,19 @@
     }
 
     /**
+     * Returns true if this domain contains only static permissions
+     * and does not check the current {@code Policy} at the time of
+     * permission checking.
+     *
+     * @return true if this domain contains only static permissions.
+     *
+     * @since 9
+     */
+    public final boolean staticPermissionsOnly() {
+        return this.staticPermissions;
+    }
+
+    /**
      * Check and see if this ProtectionDomain implies the permissions
      * expressed in the Permission object.
      * <p>
@@ -263,25 +282,19 @@
      * ProtectionDomain was constructed with a static set of permissions
      * or it was bound to a dynamically mapped set of permissions.
      * <p>
-     * If the ProtectionDomain was constructed to a
-     * {@link #ProtectionDomain(CodeSource, PermissionCollection)
-     * statically bound} PermissionCollection then the permission will
-     * only be checked against the PermissionCollection supplied at
-     * construction.
+     * If the {@link #staticPermissionsOnly()} method returns
+     * true, then the permission will only be checked against the
+     * PermissionCollection supplied at construction.
      * <p>
-     * However, if the ProtectionDomain was constructed with
-     * the constructor variant which supports
-     * {@link #ProtectionDomain(CodeSource, PermissionCollection,
-     * ClassLoader, java.security.Principal[]) dynamically binding}
-     * permissions, then the permission will be checked against the
-     * combination of the PermissionCollection supplied at construction and
+     * Otherwise, the permission will be checked against the combination
+     * of the PermissionCollection supplied at construction and
      * the current Policy binding.
      *
-     * @param permission the Permission object to check.
+     * @param perm the Permission object to check.
      *
-     * @return true if "permission" is implicit to this ProtectionDomain.
+     * @return true if {@code perm} is implied by this ProtectionDomain.
      */
-    public boolean implies(Permission permission) {
+    public boolean implies(Permission perm) {
 
         if (hasAllPerm) {
             // internal permission collection already has AllPermission -
@@ -290,10 +303,10 @@
         }
 
         if (!staticPermissions &&
-            Policy.getPolicyNoCheck().implies(this, permission))
+            Policy.getPolicyNoCheck().implies(this, perm))
             return true;
         if (permissions != null)
-            return permissions.implies(permission);
+            return permissions.implies(perm);
 
         return false;
     }
--- a/src/java.base/share/classes/java/security/Provider.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/java/security/Provider.java	Thu Jul 21 20:09:20 2016 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1996, 2016, Oracle and/or its affiliates. All rights reserved
+ * Copyright (c) 1996, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
--- a/src/java.base/share/classes/java/util/Queue.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/java/util/Queue.java	Thu Jul 21 20:09:20 2016 -0700
@@ -124,7 +124,6 @@
  * always well-defined for queues with the same elements but different
  * ordering properties.
  *
- *
  * <p>This interface is a member of the
  * <a href="{@docRoot}/../technotes/guides/collections/index.html">
  * Java Collections Framework</a>.
--- a/src/java.base/share/classes/java/util/concurrent/CompletableFuture.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/java/util/concurrent/CompletableFuture.java	Thu Jul 21 20:09:20 2016 -0700
@@ -35,6 +35,8 @@
 
 package java.util.concurrent;
 
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.VarHandle;
 import java.util.concurrent.locks.LockSupport;
 import java.util.function.BiConsumer;
 import java.util.function.BiFunction;
@@ -149,26 +151,29 @@
      * applies across normal vs exceptional outcomes, sync vs async
      * actions, binary triggers, and various forms of completions.
      *
-     * Non-nullness of field result (set via CAS) indicates done.  An
-     * AltResult is used to box null as a result, as well as to hold
-     * exceptions.  Using a single field makes completion simple to
-     * detect and trigger.  Encoding and decoding is straightforward
-     * but adds to the sprawl of trapping and associating exceptions
-     * with targets.  Minor simplifications rely on (static) NIL (to
-     * box null results) being the only AltResult with a null
-     * exception field, so we don't usually need explicit comparisons.
-     * Even though some of the generics casts are unchecked (see
-     * SuppressWarnings annotations), they are placed to be
-     * appropriate even if checked.
+     * Non-nullness of volatile field "result" indicates done.  It may
+     * be set directly if known to be thread-confined, else via CAS.
+     * An AltResult is used to box null as a result, as well as to
+     * hold exceptions.  Using a single field makes completion simple
+     * to detect and trigger.  Result encoding and decoding is
+     * straightforward but tedious and adds to the sprawl of trapping
+     * and associating exceptions with targets.  Minor simplifications
+     * rely on (static) NIL (to box null results) being the only
+     * AltResult with a null exception field, so we don't usually need
+     * explicit comparisons.  Even though some of the generics casts
+     * are unchecked (see SuppressWarnings annotations), they are
+     * placed to be appropriate even if checked.
      *
      * Dependent actions are represented by Completion objects linked
      * as Treiber stacks headed by field "stack". There are Completion
-     * classes for each kind of action, grouped into single-input
-     * (UniCompletion), two-input (BiCompletion), projected
-     * (BiCompletions using either (not both) of two inputs), shared
-     * (CoCompletion, used by the second of two sources), zero-input
-     * source actions, and Signallers that unblock waiters. Class
-     * Completion extends ForkJoinTask to enable async execution
+     * classes for each kind of action, grouped into:
+     * - single-input (UniCompletion),
+     * - two-input (BiCompletion),
+     * - projected (BiCompletions using exactly one of two inputs),
+     * - shared (CoCompletion, used by the second of two sources),
+     * - zero-input source actions,
+     * - Signallers that unblock waiters.
+     * Class Completion extends ForkJoinTask to enable async execution
      * (adding no space overhead because we exploit its "tag" methods
      * to maintain claims). It is also declared as Runnable to allow
      * usage with arbitrary executors.
@@ -184,7 +189,7 @@
      *   encounter layers of adapters in common usages.
      *
      * * Boolean CompletableFuture method x(...) (for example
-     *   uniApply) takes all of the arguments needed to check that an
+     *   biApply) takes all of the arguments needed to check that an
      *   action is triggerable, and then either runs the action or
      *   arranges its async execution by executing its Completion
      *   argument, if present. The method returns true if known to be
@@ -194,24 +199,29 @@
      *   method with its held arguments, and on success cleans up.
      *   The mode argument allows tryFire to be called twice (SYNC,
      *   then ASYNC); the first to screen and trap exceptions while
-     *   arranging to execute, and the second when called from a
-     *   task. (A few classes are not used async so take slightly
-     *   different forms.)  The claim() callback suppresses function
-     *   invocation if already claimed by another thread.
+     *   arranging to execute, and the second when called from a task.
+     *   (A few classes are not used async so take slightly different
+     *   forms.)  The claim() callback suppresses function invocation
+     *   if already claimed by another thread.
+     *
+     * * Some classes (for example UniApply) have separate handling
+     *   code for when known to be thread-confined ("now" methods) and
+     *   for when shared (in tryFire), for efficiency.
      *
      * * CompletableFuture method xStage(...) is called from a public
-     *   stage method of CompletableFuture x. It screens user
+     *   stage method of CompletableFuture f. It screens user
      *   arguments and invokes and/or creates the stage object.  If
-     *   not async and x is already complete, the action is run
-     *   immediately.  Otherwise a Completion c is created, pushed to
-     *   x's stack (unless done), and started or triggered via
-     *   c.tryFire.  This also covers races possible if x completes
-     *   while pushing.  Classes with two inputs (for example BiApply)
-     *   deal with races across both while pushing actions.  The
-     *   second completion is a CoCompletion pointing to the first,
-     *   shared so that at most one performs the action.  The
-     *   multiple-arity methods allOf and anyOf do this pairwise to
-     *   form trees of completions.
+     *   not async and already triggerable, the action is run
+     *   immediately.  Otherwise a Completion c is created, and
+     *   submitted to the executor if triggerable, or pushed onto f's
+     *   stack if not.  Completion actions are started via c.tryFire.
+     *   We recheck after pushing to a source future's stack to cover
+     *   possible races if the source completes while pushing.
+     *   Classes with two inputs (for example BiApply) deal with races
+     *   across both while pushing actions.  The second completion is
+     *   a CoCompletion pointing to the first, shared so that at most
+     *   one performs the action.  The multiple-arity methods allOf
+     *   and anyOf do this pairwise to form trees of completions.
      *
      * Note that the generic type parameters of methods vary according
      * to whether "this" is a source, dependent, or completion.
@@ -236,29 +246,30 @@
      * pointing back to its sources. So we null out fields as soon as
      * possible.  The screening checks needed anyway harmlessly ignore
      * null arguments that may have been obtained during races with
-     * threads nulling out fields.  We also try to unlink fired
-     * Completions from stacks that might never be popped (see method
-     * postFire).  Completion fields need not be declared as final or
-     * volatile because they are only visible to other threads upon
-     * safe publication.
+     * threads nulling out fields.  We also try to unlink non-isLive
+     * (fired or cancelled) Completions from stacks that might
+     * otherwise never be popped: Method cleanStack always unlinks non
+     * isLive completions from the head of stack; others may
+     * occasionally remain if racing with other cancellations or
+     * removals.
+     *
+     * Completion fields need not be declared as final or volatile
+     * because they are only visible to other threads upon safe
+     * publication.
      */
 
     volatile Object result;       // Either the result or boxed AltResult
     volatile Completion stack;    // Top of Treiber stack of dependent actions
 
     final boolean internalComplete(Object r) { // CAS from null to r
-        return U.compareAndSwapObject(this, RESULT, null, r);
-    }
-
-    final boolean casStack(Completion cmp, Completion val) {
-        return U.compareAndSwapObject(this, STACK, cmp, val);
+        return RESULT.compareAndSet(this, null, r);
     }
 
     /** Returns true if successfully pushed c onto stack. */
     final boolean tryPushStack(Completion c) {
         Completion h = stack;
-        lazySetNext(c, h);
-        return U.compareAndSwapObject(this, STACK, h, c);
+        NEXT.set(c, h);         // CAS piggyback
+        return STACK.compareAndSet(this, h, c);
     }
 
     /** Unconditionally pushes c onto stack, retrying if necessary. */
@@ -278,8 +289,7 @@
 
     /** Completes with the null value, unless already completed. */
     final boolean completeNull() {
-        return U.compareAndSwapObject(this, RESULT, null,
-                                      NIL);
+        return RESULT.compareAndSet(this, null, NIL);
     }
 
     /** Returns the encoding of the given non-exceptional value. */
@@ -289,8 +299,7 @@
 
     /** Completes with a non-exceptional result, unless already completed. */
     final boolean completeValue(T t) {
-        return U.compareAndSwapObject(this, RESULT, null,
-                                      (t == null) ? NIL : t);
+        return RESULT.compareAndSet(this, null, (t == null) ? NIL : t);
     }
 
     /**
@@ -304,8 +313,7 @@
 
     /** Completes with an exceptional result, unless already completed. */
     final boolean completeThrowable(Throwable x) {
-        return U.compareAndSwapObject(this, RESULT, null,
-                                      encodeThrowable(x));
+        return RESULT.compareAndSet(this, null, encodeThrowable(x));
     }
 
     /**
@@ -332,8 +340,7 @@
      * existing CompletionException.
      */
     final boolean completeThrowable(Throwable x, Object r) {
-        return U.compareAndSwapObject(this, RESULT, null,
-                                      encodeThrowable(x, r));
+        return RESULT.compareAndSet(this, null, encodeThrowable(x, r));
     }
 
     /**
@@ -351,10 +358,11 @@
      */
     static Object encodeRelay(Object r) {
         Throwable x;
-        return (((r instanceof AltResult) &&
-                 (x = ((AltResult)r).ex) != null &&
-                 !(x instanceof CompletionException)) ?
-                new AltResult(new CompletionException(x)) : r);
+        if (r instanceof AltResult
+            && (x = ((AltResult)r).ex) != null
+            && !(x instanceof CompletionException))
+            r = new AltResult(new CompletionException(x));
+        return r;
     }
 
     /**
@@ -362,14 +370,13 @@
      * If exceptional, r is first coerced to a CompletionException.
      */
     final boolean completeRelay(Object r) {
-        return U.compareAndSwapObject(this, RESULT, null,
-                                      encodeRelay(r));
+        return RESULT.compareAndSet(this, null, encodeRelay(r));
     }
 
     /**
      * Reports result using Future.get conventions.
      */
-    private static <T> T reportGet(Object r)
+    private static Object reportGet(Object r)
         throws InterruptedException, ExecutionException {
         if (r == null) // by convention below, null means interrupted
             throw new InterruptedException();
@@ -384,14 +391,13 @@
                 x = cause;
             throw new ExecutionException(x);
         }
-        @SuppressWarnings("unchecked") T t = (T) r;
-        return t;
+        return r;
     }
 
     /**
      * Decodes outcome to return result or throw unchecked exception.
      */
-    private static <T> T reportJoin(Object r) {
+    private static Object reportJoin(Object r) {
         if (r instanceof AltResult) {
             Throwable x;
             if ((x = ((AltResult)r).ex) == null)
@@ -402,8 +408,7 @@
                 throw (CompletionException)x;
             throw new CompletionException(x);
         }
-        @SuppressWarnings("unchecked") T t = (T) r;
-        return t;
+        return r;
     }
 
     /* ------------- Async task preliminaries -------------- */
@@ -449,12 +454,6 @@
     static final int ASYNC  =  1;
     static final int NESTED = -1;
 
-    /**
-     * Spins before blocking in waitingGet
-     */
-    static final int SPINS = (Runtime.getRuntime().availableProcessors() > 1 ?
-                              1 << 8 : 0);
-
     /* ------------- Base Completion classes and operations -------------- */
 
     @SuppressWarnings("serial")
@@ -479,10 +478,6 @@
         public final void setRawResult(Void v) {}
     }
 
-    static void lazySetNext(Completion c, Completion next) {
-        U.putObjectRelease(c, NEXT, next);
-    }
-
     /**
      * Pops and tries to trigger all reachable dependents.  Call only
      * when known to be done.
@@ -497,40 +492,47 @@
         while ((h = f.stack) != null ||
                (f != this && (h = (f = this).stack) != null)) {
             CompletableFuture<?> d; Completion t;
-            if (f.casStack(h, t = h.next)) {
+            if (STACK.compareAndSet(f, h, t = h.next)) {
                 if (t != null) {
                     if (f != this) {
                         pushStack(h);
                         continue;
                     }
-                    h.next = null;    // detach
+                    NEXT.compareAndSet(h, t, null); // try to detach
                 }
                 f = (d = h.tryFire(NESTED)) == null ? this : d;
             }
         }
     }
 
-    /** Traverses stack and unlinks dead Completions. */
+    /** Traverses stack and unlinks one or more dead Completions, if found. */
     final void cleanStack() {
-        for (Completion p = null, q = stack; q != null;) {
+        Completion p = stack;
+        // ensure head of stack live
+        for (boolean unlinked = false;;) {
+            if (p == null)
+                return;
+            else if (p.isLive()) {
+                if (unlinked)
+                    return;
+                else
+                    break;
+            }
+            else if (STACK.weakCompareAndSetVolatile(this, p, (p = p.next)))
+                unlinked = true;
+            else
+                p = stack;
+        }
+        // try to unlink first non-live
+        for (Completion q = p.next; q != null;) {
             Completion s = q.next;
             if (q.isLive()) {
                 p = q;
                 q = s;
-            }
-            else if (p == null) {
-                casStack(q, s);
-                q = stack;
-            }
-            else {
-                p.next = s;
-                if (p.isLive())
-                    q = s;
-                else {
-                    p = null;  // restart
-                    q = stack;
-                }
-            }
+            } else if (NEXT.weakCompareAndSetVolatile(p, q, s))
+                break;
+            else
+                q = p.next;
         }
     }
 
@@ -568,11 +570,20 @@
         final boolean isLive() { return dep != null; }
     }
 
-    /** Pushes the given completion (if it exists) unless done. */
-    final void push(UniCompletion<?,?> c) {
+    /**
+     * Pushes the given completion unless it completes while trying.
+     * Caller should first check that result is null.
+     */
+    final void unipush(Completion c) {
         if (c != null) {
-            while (result == null && !tryPushStack(c))
-                lazySetNext(c, null); // clear on failure
+            while (!tryPushStack(c)) {
+                if (result != null) {
+                    NEXT.set(c, null);
+                    break;
+                }
+            }
+            if (result != null)
+                c.tryFire(SYNC);
         }
     }
 
@@ -583,9 +594,10 @@
      */
     final CompletableFuture<T> postFire(CompletableFuture<?> a, int mode) {
         if (a != null && a.stack != null) {
-            if (a.result == null)
+            Object r;
+            if ((r = a.result) == null)
                 a.cleanStack();
-            else if (mode >= 0)
+            if (mode >= 0 && (r != null || a.result != null))
                 a.postComplete();
         }
         if (result != null && stack != null) {
@@ -607,48 +619,65 @@
         }
         final CompletableFuture<V> tryFire(int mode) {
             CompletableFuture<V> d; CompletableFuture<T> a;
-            if ((d = dep) == null ||
-                !d.uniApply(a = src, fn, mode > 0 ? null : this))
+            Object r; Throwable x; Function<? super T,? extends V> f;
+            if ((d = dep) == null || (f = fn) == null
+                || (a = src) == null || (r = a.result) == null)
                 return null;
+            tryComplete: if (d.result == null) {
+                if (r instanceof AltResult) {
+                    if ((x = ((AltResult)r).ex) != null) {
+                        d.completeThrowable(x, r);
+                        break tryComplete;
+                    }
+                    r = null;
+                }
+                try {
+                    if (mode <= 0 && !claim())
+                        return null;
+                    else {
+                        @SuppressWarnings("unchecked") T t = (T) r;
+                        d.completeValue(f.apply(t));
+                    }
+                } catch (Throwable ex) {
+                    d.completeThrowable(ex);
+                }
+            }
             dep = null; src = null; fn = null;
             return d.postFire(a, mode);
         }
     }
 
-    final <S> boolean uniApply(CompletableFuture<S> a,
-                               Function<? super S,? extends T> f,
-                               UniApply<S,T> c) {
-        Object r; Throwable x;
-        if (a == null || (r = a.result) == null || f == null)
-            return false;
-        tryComplete: if (result == null) {
-            if (r instanceof AltResult) {
-                if ((x = ((AltResult)r).ex) != null) {
-                    completeThrowable(x, r);
-                    break tryComplete;
-                }
-                r = null;
-            }
-            try {
-                if (c != null && !c.claim())
-                    return false;
-                @SuppressWarnings("unchecked") S s = (S) r;
-                completeValue(f.apply(s));
-            } catch (Throwable ex) {
-                completeThrowable(ex);
-            }
-        }
-        return true;
-    }
-
     private <V> CompletableFuture<V> uniApplyStage(
         Executor e, Function<? super T,? extends V> f) {
         if (f == null) throw new NullPointerException();
+        Object r;
+        if ((r = result) != null)
+            return uniApplyNow(r, e, f);
         CompletableFuture<V> d = newIncompleteFuture();
-        if (e != null || !d.uniApply(this, f, null)) {
-            UniApply<T,V> c = new UniApply<T,V>(e, d, this, f);
-            push(c);
-            c.tryFire(SYNC);
+        unipush(new UniApply<T,V>(e, d, this, f));
+        return d;
+    }
+
+    private <V> CompletableFuture<V> uniApplyNow(
+        Object r, Executor e, Function<? super T,? extends V> f) {
+        Throwable x;
+        CompletableFuture<V> d = newIncompleteFuture();
+        if (r instanceof AltResult) {
+            if ((x = ((AltResult)r).ex) != null) {
+                d.result = encodeThrowable(x, r);
+                return d;
+            }
+            r = null;
+        }
+        try {
+            if (e != null) {
+                e.execute(new UniApply<T,V>(null, d, this, f));
+            } else {
+                @SuppressWarnings("unchecked") T t = (T) r;
+                d.result = d.encodeValue(f.apply(t));
+            }
+        } catch (Throwable ex) {
+            d.result = encodeThrowable(ex);
         }
         return d;
     }
@@ -662,48 +691,67 @@
         }
         final CompletableFuture<Void> tryFire(int mode) {
             CompletableFuture<Void> d; CompletableFuture<T> a;
-            if ((d = dep) == null ||
-                !d.uniAccept(a = src, fn, mode > 0 ? null : this))
+            Object r; Throwable x; Consumer<? super T> f;
+            if ((d = dep) == null || (f = fn) == null
+                || (a = src) == null || (r = a.result) == null)
                 return null;
+            tryComplete: if (d.result == null) {
+                if (r instanceof AltResult) {
+                    if ((x = ((AltResult)r).ex) != null) {
+                        d.completeThrowable(x, r);
+                        break tryComplete;
+                    }
+                    r = null;
+                }
+                try {
+                    if (mode <= 0 && !claim())
+                        return null;
+                    else {
+                        @SuppressWarnings("unchecked") T t = (T) r;
+                        f.accept(t);
+                        d.completeNull();
+                    }
+                } catch (Throwable ex) {
+                    d.completeThrowable(ex);
+                }
+            }
             dep = null; src = null; fn = null;
             return d.postFire(a, mode);
         }
     }
 
-    final <S> boolean uniAccept(CompletableFuture<S> a,
-                                Consumer<? super S> f, UniAccept<S> c) {
-        Object r; Throwable x;
-        if (a == null || (r = a.result) == null || f == null)
-            return false;
-        tryComplete: if (result == null) {
-            if (r instanceof AltResult) {
-                if ((x = ((AltResult)r).ex) != null) {
-                    completeThrowable(x, r);
-                    break tryComplete;
-                }
-                r = null;
-            }
-            try {
-                if (c != null && !c.claim())
-                    return false;
-                @SuppressWarnings("unchecked") S s = (S) r;
-                f.accept(s);
-                completeNull();
-            } catch (Throwable ex) {
-                completeThrowable(ex);
-            }
-        }
-        return true;
-    }
-
     private CompletableFuture<Void> uniAcceptStage(Executor e,
                                                    Consumer<? super T> f) {
         if (f == null) throw new NullPointerException();
+        Object r;
+        if ((r = result) != null)
+            return uniAcceptNow(r, e, f);
         CompletableFuture<Void> d = newIncompleteFuture();
-        if (e != null || !d.uniAccept(this, f, null)) {
-            UniAccept<T> c = new UniAccept<T>(e, d, this, f);
-            push(c);
-            c.tryFire(SYNC);
+        unipush(new UniAccept<T>(e, d, this, f));
+        return d;
+    }
+
+    private CompletableFuture<Void> uniAcceptNow(
+        Object r, Executor e, Consumer<? super T> f) {
+        Throwable x;
+        CompletableFuture<Void> d = newIncompleteFuture();
+        if (r instanceof AltResult) {
+            if ((x = ((AltResult)r).ex) != null) {
+                d.result = encodeThrowable(x, r);
+                return d;
+            }
+            r = null;
+        }
+        try {
+            if (e != null) {
+                e.execute(new UniAccept<T>(null, d, this, f));
+            } else {
+                @SuppressWarnings("unchecked") T t = (T) r;
+                f.accept(t);
+                d.result = NIL;
+            }
+        } catch (Throwable ex) {
+            d.result = encodeThrowable(ex);
         }
         return d;
     }
@@ -717,42 +765,56 @@
         }
         final CompletableFuture<Void> tryFire(int mode) {
             CompletableFuture<Void> d; CompletableFuture<T> a;
-            if ((d = dep) == null ||
-                !d.uniRun(a = src, fn, mode > 0 ? null : this))
+            Object r; Throwable x; Runnable f;
+            if ((d = dep) == null || (f = fn) == null
+                || (a = src) == null || (r = a.result) == null)
                 return null;
+            if (d.result == null) {
+                if (r instanceof AltResult && (x = ((AltResult)r).ex) != null)
+                    d.completeThrowable(x, r);
+                else
+                    try {
+                        if (mode <= 0 && !claim())
+                            return null;
+                        else {
+                            f.run();
+                            d.completeNull();
+                        }
+                    } catch (Throwable ex) {
+                        d.completeThrowable(ex);
+                    }
+            }
             dep = null; src = null; fn = null;
             return d.postFire(a, mode);
         }
     }
 
-    final boolean uniRun(CompletableFuture<?> a, Runnable f, UniRun<?> c) {
-        Object r; Throwable x;
-        if (a == null || (r = a.result) == null || f == null)
-            return false;
-        if (result == null) {
-            if (r instanceof AltResult && (x = ((AltResult)r).ex) != null)
-                completeThrowable(x, r);
-            else
-                try {
-                    if (c != null && !c.claim())
-                        return false;
-                    f.run();
-                    completeNull();
-                } catch (Throwable ex) {
-                    completeThrowable(ex);
-                }
-        }
-        return true;
+    private CompletableFuture<Void> uniRunStage(Executor e, Runnable f) {
+        if (f == null) throw new NullPointerException();
+        Object r;
+        if ((r = result) != null)
+            return uniRunNow(r, e, f);
+        CompletableFuture<Void> d = newIncompleteFuture();
+        unipush(new UniRun<T>(e, d, this, f));
+        return d;
     }
 
-    private CompletableFuture<Void> uniRunStage(Executor e, Runnable f) {
-        if (f == null) throw new NullPointerException();
+    private CompletableFuture<Void> uniRunNow(Object r, Executor e, Runnable f) {
+        Throwable x;
         CompletableFuture<Void> d = newIncompleteFuture();
-        if (e != null || !d.uniRun(this, f, null)) {
-            UniRun<T> c = new UniRun<T>(e, d, this, f);
-            push(c);
-            c.tryFire(SYNC);
-        }
+        if (r instanceof AltResult && (x = ((AltResult)r).ex) != null)
+            d.result = encodeThrowable(x, r);
+        else
+            try {
+                if (e != null) {
+                    e.execute(new UniRun<T>(null, d, this, f));
+                } else {
+                    f.run();
+                    d.result = NIL;
+                }
+            } catch (Throwable ex) {
+                d.result = encodeThrowable(ex);
+            }
         return d;
     }
 
@@ -766,20 +828,20 @@
         }
         final CompletableFuture<T> tryFire(int mode) {
             CompletableFuture<T> d; CompletableFuture<T> a;
-            if ((d = dep) == null ||
-                !d.uniWhenComplete(a = src, fn, mode > 0 ? null : this))
+            Object r; BiConsumer<? super T, ? super Throwable> f;
+            if ((d = dep) == null || (f = fn) == null
+                || (a = src) == null || (r = a.result) == null
+                || !d.uniWhenComplete(r, f, mode > 0 ? null : this))
                 return null;
             dep = null; src = null; fn = null;
             return d.postFire(a, mode);
         }
     }
 
-    final boolean uniWhenComplete(CompletableFuture<T> a,
+    final boolean uniWhenComplete(Object r,
                                   BiConsumer<? super T,? super Throwable> f,
                                   UniWhenComplete<T> c) {
-        Object r; T t; Throwable x = null;
-        if (a == null || (r = a.result) == null || f == null)
-            return false;
+        T t; Throwable x = null;
         if (result == null) {
             try {
                 if (c != null && !c.claim())
@@ -811,10 +873,17 @@
         Executor e, BiConsumer<? super T, ? super Throwable> f) {
         if (f == null) throw new NullPointerException();
         CompletableFuture<T> d = newIncompleteFuture();
-        if (e != null || !d.uniWhenComplete(this, f, null)) {
-            UniWhenComplete<T> c = new UniWhenComplete<T>(e, d, this, f);
-            push(c);
-            c.tryFire(SYNC);
+        Object r;
+        if ((r = result) == null)
+            unipush(new UniWhenComplete<T>(e, d, this, f));
+        else if (e == null)
+            d.uniWhenComplete(r, f, null);
+        else {
+            try {
+                e.execute(new UniWhenComplete<T>(null, d, this, f));
+            } catch (Throwable ex) {
+                d.result = encodeThrowable(ex);
+            }
         }
         return d;
     }
@@ -829,20 +898,20 @@
         }
         final CompletableFuture<V> tryFire(int mode) {
             CompletableFuture<V> d; CompletableFuture<T> a;
-            if ((d = dep) == null ||
-                !d.uniHandle(a = src, fn, mode > 0 ? null : this))
+            Object r; BiFunction<? super T, Throwable, ? extends V> f;
+            if ((d = dep) == null || (f = fn) == null
+                || (a = src) == null || (r = a.result) == null
+                || !d.uniHandle(r, f, mode > 0 ? null : this))
                 return null;
             dep = null; src = null; fn = null;
             return d.postFire(a, mode);
         }
     }
 
-    final <S> boolean uniHandle(CompletableFuture<S> a,
+    final <S> boolean uniHandle(Object r,
                                 BiFunction<? super S, Throwable, ? extends T> f,
                                 UniHandle<S,T> c) {
-        Object r; S s; Throwable x;
-        if (a == null || (r = a.result) == null || f == null)
-            return false;
+        S s; Throwable x;
         if (result == null) {
             try {
                 if (c != null && !c.claim())
@@ -867,10 +936,17 @@
         Executor e, BiFunction<? super T, Throwable, ? extends V> f) {
         if (f == null) throw new NullPointerException();
         CompletableFuture<V> d = newIncompleteFuture();
-        if (e != null || !d.uniHandle(this, f, null)) {
-            UniHandle<T,V> c = new UniHandle<T,V>(e, d, this, f);
-            push(c);
-            c.tryFire(SYNC);
+        Object r;
+        if ((r = result) == null)
+            unipush(new UniHandle<T,V>(e, d, this, f));
+        else if (e == null)
+            d.uniHandle(r, f, null);
+        else {
+            try {
+                e.execute(new UniHandle<T,V>(null, d, this, f));
+            } catch (Throwable ex) {
+                d.result = encodeThrowable(ex);
+            }
         }
         return d;
     }
@@ -885,19 +961,20 @@
         final CompletableFuture<T> tryFire(int mode) { // never ASYNC
             // assert mode != ASYNC;
             CompletableFuture<T> d; CompletableFuture<T> a;
-            if ((d = dep) == null || !d.uniExceptionally(a = src, fn, this))
+            Object r; Function<? super Throwable, ? extends T> f;
+            if ((d = dep) == null || (f = fn) == null
+                || (a = src) == null || (r = a.result) == null
+                || !d.uniExceptionally(r, f, this))
                 return null;
             dep = null; src = null; fn = null;
             return d.postFire(a, mode);
         }
     }
 
-    final boolean uniExceptionally(CompletableFuture<T> a,
+    final boolean uniExceptionally(Object r,
                                    Function<? super Throwable, ? extends T> f,
                                    UniExceptionally<T> c) {
-        Object r; Throwable x;
-        if (a == null || (r = a.result) == null || f == null)
-            return false;
+        Throwable x;
         if (result == null) {
             try {
                 if (r instanceof AltResult && (x = ((AltResult)r).ex) != null) {
@@ -917,47 +994,38 @@
         Function<Throwable, ? extends T> f) {
         if (f == null) throw new NullPointerException();
         CompletableFuture<T> d = newIncompleteFuture();
-        if (!d.uniExceptionally(this, f, null)) {
-            UniExceptionally<T> c = new UniExceptionally<T>(d, this, f);
-            push(c);
-            c.tryFire(SYNC);
-        }
+        Object r;
+        if ((r = result) == null)
+            unipush(new UniExceptionally<T>(d, this, f));
+        else
+            d.uniExceptionally(r, f, null);
         return d;
     }
 
     @SuppressWarnings("serial")
-    static final class UniRelay<T> extends UniCompletion<T,T> { // for Compose
+    static final class UniRelay<T> extends UniCompletion<T,T> {
         UniRelay(CompletableFuture<T> dep, CompletableFuture<T> src) {
             super(null, dep, src);
         }
         final CompletableFuture<T> tryFire(int mode) {
-            CompletableFuture<T> d; CompletableFuture<T> a;
-            if ((d = dep) == null || !d.uniRelay(a = src))
+            CompletableFuture<T> d; CompletableFuture<T> a; Object r;
+            if ((d = dep) == null
+                || (a = src) == null || (r = a.result) == null)
                 return null;
+            if (d.result == null)
+                d.completeRelay(r);
             src = null; dep = null;
             return d.postFire(a, mode);
         }
     }
 
-    final boolean uniRelay(CompletableFuture<T> a) {
-        Object r;
-        if (a == null || (r = a.result) == null)
-            return false;
-        if (result == null) // no need to claim
-            completeRelay(r);
-        return true;
-    }
-
     private CompletableFuture<T> uniCopyStage() {
         Object r;
         CompletableFuture<T> d = newIncompleteFuture();
         if ((r = result) != null)
-            d.completeRelay(r);
-        else {
-            UniRelay<T> c = new UniRelay<T>(d, this);
-            push(c);
-            c.tryFire(SYNC);
-        }
+            d.result = encodeRelay(r);
+        else
+            unipush(new UniRelay<T>(d, this));
         return d;
     }
 
@@ -966,9 +1034,7 @@
         if ((r = result) != null)
             return new MinimalStage<T>(encodeRelay(r));
         MinimalStage<T> d = new MinimalStage<T>();
-        UniRelay<T> c = new UniRelay<T>(d, this);
-        push(c);
-        c.tryFire(SYNC);
+        unipush(new UniRelay<T>(d, this));
         return d;
     }
 
@@ -982,54 +1048,48 @@
         }
         final CompletableFuture<V> tryFire(int mode) {
             CompletableFuture<V> d; CompletableFuture<T> a;
-            if ((d = dep) == null ||
-                !d.uniCompose(a = src, fn, mode > 0 ? null : this))
+            Function<? super T, ? extends CompletionStage<V>> f;
+            Object r; Throwable x;
+            if ((d = dep) == null || (f = fn) == null
+                || (a = src) == null || (r = a.result) == null)
                 return null;
+            tryComplete: if (d.result == null) {
+                if (r instanceof AltResult) {
+                    if ((x = ((AltResult)r).ex) != null) {
+                        d.completeThrowable(x, r);
+                        break tryComplete;
+                    }
+                    r = null;
+                }
+                try {
+                    if (mode <= 0 && !claim())
+                        return null;
+                    @SuppressWarnings("unchecked") T t = (T) r;
+                    CompletableFuture<V> g = f.apply(t).toCompletableFuture();
+                    if ((r = g.result) != null)
+                        d.completeRelay(r);
+                    else {
+                        g.unipush(new UniRelay<V>(d, g));
+                        if (d.result == null)
+                            return null;
+                    }
+                } catch (Throwable ex) {
+                    d.completeThrowable(ex);
+                }
+            }
             dep = null; src = null; fn = null;
             return d.postFire(a, mode);
         }
     }
 
-    final <S> boolean uniCompose(
-        CompletableFuture<S> a,
-        Function<? super S, ? extends CompletionStage<T>> f,
-        UniCompose<S,T> c) {
-        Object r; Throwable x;
-        if (a == null || (r = a.result) == null || f == null)
-            return false;
-        tryComplete: if (result == null) {
-            if (r instanceof AltResult) {
-                if ((x = ((AltResult)r).ex) != null) {
-                    completeThrowable(x, r);
-                    break tryComplete;
-                }
-                r = null;
-            }
-            try {
-                if (c != null && !c.claim())
-                    return false;
-                @SuppressWarnings("unchecked") S s = (S) r;
-                CompletableFuture<T> g = f.apply(s).toCompletableFuture();
-                if (g.result == null || !uniRelay(g)) {
-                    UniRelay<T> copy = new UniRelay<T>(this, g);
-                    g.push(copy);
-                    copy.tryFire(SYNC);
-                    if (result == null)
-                        return false;
-                }
-            } catch (Throwable ex) {
-                completeThrowable(ex);
-            }
-        }
-        return true;
-    }
-
     private <V> CompletableFuture<V> uniComposeStage(
         Executor e, Function<? super T, ? extends CompletionStage<V>> f) {
         if (f == null) throw new NullPointerException();
+        CompletableFuture<V> d = newIncompleteFuture();
         Object r, s; Throwable x;
-        CompletableFuture<V> d = newIncompleteFuture();
-        if (e == null && (r = result) != null) {
+        if ((r = result) == null)
+            unipush(new UniCompose<T,V>(e, d, this, f));
+        else if (e == null) {
             if (r instanceof AltResult) {
                 if ((x = ((AltResult)r).ex) != null) {
                     d.result = encodeThrowable(x, r);
@@ -1041,21 +1101,20 @@
                 @SuppressWarnings("unchecked") T t = (T) r;
                 CompletableFuture<V> g = f.apply(t).toCompletableFuture();
                 if ((s = g.result) != null)
-                    d.completeRelay(s);
+                    d.result = encodeRelay(s);
                 else {
-                    UniRelay<V> c = new UniRelay<V>(d, g);
-                    g.push(c);
-                    c.tryFire(SYNC);
+                    g.unipush(new UniRelay<V>(d, g));
                 }
-                return d;
             } catch (Throwable ex) {
                 d.result = encodeThrowable(ex);
-                return d;
             }
         }
-        UniCompose<T,V> c = new UniCompose<T,V>(e, d, this, f);
-        push(c);
-        c.tryFire(SYNC);
+        else
+            try {
+                e.execute(new UniCompose<T,V>(null, d, this, f));
+            } catch (Throwable ex) {
+                d.result = encodeThrowable(ex);
+            }
         return d;
     }
 
@@ -1085,21 +1144,28 @@
         }
         final boolean isLive() {
             BiCompletion<?,?,?> c;
-            return (c = base) != null && c.dep != null;
+            return (c = base) != null
+                // && c.isLive()
+                && c.dep != null;
         }
     }
 
-    /** Pushes completion to this and b unless both done. */
+    /**
+     * Pushes completion to this and b unless both done.
+     * Caller should first check that either result or b.result is null.
+     */
     final void bipush(CompletableFuture<?> b, BiCompletion<?,?,?> c) {
         if (c != null) {
-            Object r;
-            while ((r = result) == null && !tryPushStack(c))
-                lazySetNext(c, null); // clear on failure
-            if (b != null && b != this && b.result == null) {
-                Completion q = (r != null) ? c : new CoCompletion(c);
-                while (b.result == null && !b.tryPushStack(q))
-                    lazySetNext(q, null); // clear on failure
+            while (result == null) {
+                if (tryPushStack(c)) {
+                    if (b.result == null)
+                        b.unipush(new CoCompletion(c));
+                    else if (result != null)
+                        c.tryFire(SYNC);
+                    return;
+                }
             }
+            b.unipush(c);
         }
     }
 
@@ -1107,9 +1173,10 @@
     final CompletableFuture<T> postFire(CompletableFuture<?> a,
                                         CompletableFuture<?> b, int mode) {
         if (b != null && b.stack != null) { // clean second source
-            if (b.result == null)
+            Object r;
+            if ((r = b.result) == null)
                 b.cleanStack();
-            else if (mode >= 0)
+            if (mode >= 0 && (r != null || b.result != null))
                 b.postComplete();
         }
         return postFire(a, mode);
@@ -1127,22 +1194,21 @@
             CompletableFuture<V> d;
             CompletableFuture<T> a;
             CompletableFuture<U> b;
-            if ((d = dep) == null ||
-                !d.biApply(a = src, b = snd, fn, mode > 0 ? null : this))
+            Object r, s; BiFunction<? super T,? super U,? extends V> f;
+            if ((d = dep) == null || (f = fn) == null
+                || (a = src) == null || (r = a.result) == null
+                || (b = snd) == null || (s = b.result) == null
+                || !d.biApply(r, s, f, mode > 0 ? null : this))
                 return null;
             dep = null; src = null; snd = null; fn = null;
             return d.postFire(a, b, mode);
         }
     }
 
-    final <R,S> boolean biApply(CompletableFuture<R> a,
-                                CompletableFuture<S> b,
+    final <R,S> boolean biApply(Object r, Object s,
                                 BiFunction<? super R,? super S,? extends T> f,
                                 BiApply<R,S,T> c) {
-        Object r, s; Throwable x;
-        if (a == null || (r = a.result) == null ||
-            b == null || (s = b.result) == null || f == null)
-            return false;
+        Throwable x;
         tryComplete: if (result == null) {
             if (r instanceof AltResult) {
                 if ((x = ((AltResult)r).ex) != null) {
@@ -1174,15 +1240,20 @@
     private <U,V> CompletableFuture<V> biApplyStage(
         Executor e, CompletionStage<U> o,
         BiFunction<? super T,? super U,? extends V> f) {
-        CompletableFuture<U> b;
+        CompletableFuture<U> b; Object r, s;
         if (f == null || (b = o.toCompletableFuture()) == null)
             throw new NullPointerException();
         CompletableFuture<V> d = newIncompleteFuture();
-        if (e != null || !d.biApply(this, b, f, null)) {
-            BiApply<T,U,V> c = new BiApply<T,U,V>(e, d, this, b, f);
-            bipush(b, c);
-            c.tryFire(SYNC);
-        }
+        if ((r = result) == null || (s = b.result) == null)
+            bipush(b, new BiApply<T,U,V>(e, d, this, b, f));
+        else if (e == null)
+            d.biApply(r, s, f, null);
+        else
+            try {
+                e.execute(new BiApply<T,U,V>(null, d, this, b, f));
+            } catch (Throwable ex) {
+                d.result = encodeThrowable(ex);
+            }
         return d;
     }
 
@@ -1198,22 +1269,21 @@
             CompletableFuture<Void> d;
             CompletableFuture<T> a;
             CompletableFuture<U> b;
-            if ((d = dep) == null ||
-                !d.biAccept(a = src, b = snd, fn, mode > 0 ? null : this))
+            Object r, s; BiConsumer<? super T,? super U> f;
+            if ((d = dep) == null || (f = fn) == null
+                || (a = src) == null || (r = a.result) == null
+                || (b = snd) == null || (s = b.result) == null
+                || !d.biAccept(r, s, f, mode > 0 ? null : this))
                 return null;
             dep = null; src = null; snd = null; fn = null;
             return d.postFire(a, b, mode);
         }
     }
 
-    final <R,S> boolean biAccept(CompletableFuture<R> a,
-                                 CompletableFuture<S> b,
+    final <R,S> boolean biAccept(Object r, Object s,
                                  BiConsumer<? super R,? super S> f,
                                  BiAccept<R,S> c) {
-        Object r, s; Throwable x;
-        if (a == null || (r = a.result) == null ||
-            b == null || (s = b.result) == null || f == null)
-            return false;
+        Throwable x;
         tryComplete: if (result == null) {
             if (r instanceof AltResult) {
                 if ((x = ((AltResult)r).ex) != null) {
@@ -1246,15 +1316,20 @@
     private <U> CompletableFuture<Void> biAcceptStage(
         Executor e, CompletionStage<U> o,
         BiConsumer<? super T,? super U> f) {
-        CompletableFuture<U> b;
+        CompletableFuture<U> b; Object r, s;
         if (f == null || (b = o.toCompletableFuture()) == null)
             throw new NullPointerException();
         CompletableFuture<Void> d = newIncompleteFuture();
-        if (e != null || !d.biAccept(this, b, f, null)) {
-            BiAccept<T,U> c = new BiAccept<T,U>(e, d, this, b, f);
-            bipush(b, c);
-            c.tryFire(SYNC);
-        }
+        if ((r = result) == null || (s = b.result) == null)
+            bipush(b, new BiAccept<T,U>(e, d, this, b, f));
+        else if (e == null)
+            d.biAccept(r, s, f, null);
+        else
+            try {
+                e.execute(new BiAccept<T,U>(null, d, this, b, f));
+            } catch (Throwable ex) {
+                d.result = encodeThrowable(ex);
+            }
         return d;
     }
 
@@ -1262,8 +1337,7 @@
     static final class BiRun<T,U> extends BiCompletion<T,U,Void> {
         Runnable fn;
         BiRun(Executor executor, CompletableFuture<Void> dep,
-              CompletableFuture<T> src,
-              CompletableFuture<U> snd,
+              CompletableFuture<T> src, CompletableFuture<U> snd,
               Runnable fn) {
             super(executor, dep, src, snd); this.fn = fn;
         }
@@ -1271,25 +1345,25 @@
             CompletableFuture<Void> d;
             CompletableFuture<T> a;
             CompletableFuture<U> b;
-            if ((d = dep) == null ||
-                !d.biRun(a = src, b = snd, fn, mode > 0 ? null : this))
+            Object r, s; Runnable f;
+            if ((d = dep) == null || (f = fn) == null
+                || (a = src) == null || (r = a.result) == null
+                || (b = snd) == null || (s = b.result) == null
+                || !d.biRun(r, s, f, mode > 0 ? null : this))
                 return null;
             dep = null; src = null; snd = null; fn = null;
             return d.postFire(a, b, mode);
         }
     }
 
-    final boolean biRun(CompletableFuture<?> a, CompletableFuture<?> b,
-                        Runnable f, BiRun<?,?> c) {
-        Object r, s; Throwable x;
-        if (a == null || (r = a.result) == null ||
-            b == null || (s = b.result) == null || f == null)
-            return false;
+    final boolean biRun(Object r, Object s, Runnable f, BiRun<?,?> c) {
+        Throwable x; Object z;
         if (result == null) {
-            if (r instanceof AltResult && (x = ((AltResult)r).ex) != null)
-                completeThrowable(x, r);
-            else if (s instanceof AltResult && (x = ((AltResult)s).ex) != null)
-                completeThrowable(x, s);
+            if ((r instanceof AltResult
+                 && (x = ((AltResult)(z = r)).ex) != null) ||
+                (s instanceof AltResult
+                 && (x = ((AltResult)(z = s)).ex) != null))
+                completeThrowable(x, z);
             else
                 try {
                     if (c != null && !c.claim())
@@ -1305,52 +1379,52 @@
 
     private CompletableFuture<Void> biRunStage(Executor e, CompletionStage<?> o,
                                                Runnable f) {
-        CompletableFuture<?> b;
+        CompletableFuture<?> b; Object r, s;
         if (f == null || (b = o.toCompletableFuture()) == null)
             throw new NullPointerException();
         CompletableFuture<Void> d = newIncompleteFuture();
-        if (e != null || !d.biRun(this, b, f, null)) {
-            BiRun<T,?> c = new BiRun<>(e, d, this, b, f);
-            bipush(b, c);
-            c.tryFire(SYNC);
-        }
+        if ((r = result) == null || (s = b.result) == null)
+            bipush(b, new BiRun<>(e, d, this, b, f));
+        else if (e == null)
+            d.biRun(r, s, f, null);
+        else
+            try {
+                e.execute(new BiRun<>(null, d, this, b, f));
+            } catch (Throwable ex) {
+                d.result = encodeThrowable(ex);
+            }
         return d;
     }
 
     @SuppressWarnings("serial")
     static final class BiRelay<T,U> extends BiCompletion<T,U,Void> { // for And
         BiRelay(CompletableFuture<Void> dep,
-                CompletableFuture<T> src,
-                CompletableFuture<U> snd) {
+                CompletableFuture<T> src, CompletableFuture<U> snd) {
             super(null, dep, src, snd);
         }
         final CompletableFuture<Void> tryFire(int mode) {
             CompletableFuture<Void> d;
             CompletableFuture<T> a;
             CompletableFuture<U> b;
-            if ((d = dep) == null || !d.biRelay(a = src, b = snd))
+            Object r, s, z; Throwable x;
+            if ((d = dep) == null
+                || (a = src) == null || (r = a.result) == null
+                || (b = snd) == null || (s = b.result) == null)
                 return null;
+            if (d.result == null) {
+                if ((r instanceof AltResult
+                     && (x = ((AltResult)(z = r)).ex) != null) ||
+                    (s instanceof AltResult
+                     && (x = ((AltResult)(z = s)).ex) != null))
+                    d.completeThrowable(x, z);
+                else
+                    d.completeNull();
+            }
             src = null; snd = null; dep = null;
             return d.postFire(a, b, mode);
         }
     }
 
-    boolean biRelay(CompletableFuture<?> a, CompletableFuture<?> b) {
-        Object r, s; Throwable x;
-        if (a == null || (r = a.result) == null ||
-            b == null || (s = b.result) == null)
-            return false;
-        if (result == null) {
-            if (r instanceof AltResult && (x = ((AltResult)r).ex) != null)
-                completeThrowable(x, r);
-            else if (s instanceof AltResult && (x = ((AltResult)s).ex) != null)
-                completeThrowable(x, s);
-            else
-                completeNull();
-        }
-        return true;
-    }
-
     /** Recursively constructs a tree of completions. */
     static CompletableFuture<Void> andTree(CompletableFuture<?>[] cfs,
                                            int lo, int hi) {
@@ -1358,39 +1432,44 @@
         if (lo > hi) // empty
             d.result = NIL;
         else {
-            CompletableFuture<?> a, b;
+            CompletableFuture<?> a, b; Object r, s, z; Throwable x;
             int mid = (lo + hi) >>> 1;
             if ((a = (lo == mid ? cfs[lo] :
                       andTree(cfs, lo, mid))) == null ||
                 (b = (lo == hi ? a : (hi == mid+1) ? cfs[hi] :
                       andTree(cfs, mid+1, hi))) == null)
                 throw new NullPointerException();
-            if (!d.biRelay(a, b)) {
-                BiRelay<?,?> c = new BiRelay<>(d, a, b);
-                a.bipush(b, c);
-                c.tryFire(SYNC);
-            }
+            if ((r = a.result) == null || (s = b.result) == null)
+                a.bipush(b, new BiRelay<>(d, a, b));
+            else if ((r instanceof AltResult
+                      && (x = ((AltResult)(z = r)).ex) != null) ||
+                     (s instanceof AltResult
+                      && (x = ((AltResult)(z = s)).ex) != null))
+                d.result = encodeThrowable(x, z);
+            else
+                d.result = NIL;
         }
         return d;
     }
 
     /* ------------- Projected (Ored) BiCompletions -------------- */
 
-    /** Pushes completion to this and b unless either done. */
+    /**
+     * Pushes completion to this and b unless either done.
+     * Caller should first check that result and b.result are both null.
+     */
     final void orpush(CompletableFuture<?> b, BiCompletion<?,?,?> c) {
         if (c != null) {
-            while ((b == null || b.result == null) && result == null) {
-                if (tryPushStack(c)) {
-                    if (b != null && b != this && b.result == null) {
-                        Completion q = new CoCompletion(c);
-                        while (result == null && b.result == null &&
-                               !b.tryPushStack(q))
-                            lazySetNext(q, null); // clear on failure
-                    }
+            while (!tryPushStack(c)) {
+                if (result != null) {
+                    NEXT.set(c, null);
                     break;
                 }
-                lazySetNext(c, null); // clear on failure
             }
+            if (result != null)
+                c.tryFire(SYNC);
+            else
+                b.unipush(new CoCompletion(c));
         }
     }
 
@@ -1398,8 +1477,7 @@
     static final class OrApply<T,U extends T,V> extends BiCompletion<T,U,V> {
         Function<? super T,? extends V> fn;
         OrApply(Executor executor, CompletableFuture<V> dep,
-                CompletableFuture<T> src,
-                CompletableFuture<U> snd,
+                CompletableFuture<T> src, CompletableFuture<U> snd,
                 Function<? super T,? extends V> fn) {
             super(executor, dep, src, snd); this.fn = fn;
         }
@@ -1407,54 +1485,46 @@
             CompletableFuture<V> d;
             CompletableFuture<T> a;
             CompletableFuture<U> b;
-            if ((d = dep) == null ||
-                !d.orApply(a = src, b = snd, fn, mode > 0 ? null : this))
+            Object r; Throwable x; Function<? super T,? extends V> f;
+            if ((d = dep) == null || (f = fn) == null
+                || (a = src) == null || (b = snd) == null
+                || ((r = a.result) == null && (r = b.result) == null))
                 return null;
+            tryComplete: if (d.result == null) {
+                try {
+                    if (mode <= 0 && !claim())
+                        return null;
+                    if (r instanceof AltResult) {
+                        if ((x = ((AltResult)r).ex) != null) {
+                            d.completeThrowable(x, r);
+                            break tryComplete;
+                        }
+                        r = null;
+                    }
+                    @SuppressWarnings("unchecked") T t = (T) r;
+                    d.completeValue(f.apply(t));
+                } catch (Throwable ex) {
+                    d.completeThrowable(ex);
+                }
+            }
             dep = null; src = null; snd = null; fn = null;
             return d.postFire(a, b, mode);
         }
     }
 
-    final <R,S extends R> boolean orApply(CompletableFuture<R> a,
-                                          CompletableFuture<S> b,
-                                          Function<? super R, ? extends T> f,
-                                          OrApply<R,S,T> c) {
-        Object r; Throwable x;
-        if (a == null || b == null ||
-            ((r = a.result) == null && (r = b.result) == null) || f == null)
-            return false;
-        tryComplete: if (result == null) {
-            try {
-                if (c != null && !c.claim())
-                    return false;
-                if (r instanceof AltResult) {
-                    if ((x = ((AltResult)r).ex) != null) {
-                        completeThrowable(x, r);
-                        break tryComplete;
-                    }
-                    r = null;
-                }
-                @SuppressWarnings("unchecked") R rr = (R) r;
-                completeValue(f.apply(rr));
-            } catch (Throwable ex) {
-                completeThrowable(ex);
-            }
-        }
-        return true;
-    }
-
     private <U extends T,V> CompletableFuture<V> orApplyStage(
-        Executor e, CompletionStage<U> o,
-        Function<? super T, ? extends V> f) {
+        Executor e, CompletionStage<U> o, Function<? super T, ? extends V> f) {
         CompletableFuture<U> b;
         if (f == null || (b = o.toCompletableFuture()) == null)
             throw new NullPointerException();
+
+        Object r; CompletableFuture<? extends T> z;
+        if ((r = (z = this).result) != null ||
+            (r = (z = b).result) != null)
+            return z.uniApplyNow(r, e, f);
+
         CompletableFuture<V> d = newIncompleteFuture();
-        if (e != null || !d.orApply(this, b, f, null)) {
-            OrApply<T,U,V> c = new OrApply<T,U,V>(e, d, this, b, f);
-            orpush(b, c);
-            c.tryFire(SYNC);
-        }
+        orpush(b, new OrApply<T,U,V>(e, d, this, b, f));
         return d;
     }
 
@@ -1462,8 +1532,7 @@
     static final class OrAccept<T,U extends T> extends BiCompletion<T,U,Void> {
         Consumer<? super T> fn;
         OrAccept(Executor executor, CompletableFuture<Void> dep,
-                 CompletableFuture<T> src,
-                 CompletableFuture<U> snd,
+                 CompletableFuture<T> src, CompletableFuture<U> snd,
                  Consumer<? super T> fn) {
             super(executor, dep, src, snd); this.fn = fn;
         }
@@ -1471,54 +1540,47 @@
             CompletableFuture<Void> d;
             CompletableFuture<T> a;
             CompletableFuture<U> b;
-            if ((d = dep) == null ||
-                !d.orAccept(a = src, b = snd, fn, mode > 0 ? null : this))
+            Object r; Throwable x; Consumer<? super T> f;
+            if ((d = dep) == null || (f = fn) == null
+                || (a = src) == null || (b = snd) == null
+                || ((r = a.result) == null && (r = b.result) == null))
                 return null;
+            tryComplete: if (d.result == null) {
+                try {
+                    if (mode <= 0 && !claim())
+                        return null;
+                    if (r instanceof AltResult) {
+                        if ((x = ((AltResult)r).ex) != null) {
+                            d.completeThrowable(x, r);
+                            break tryComplete;
+                        }
+                        r = null;
+                    }
+                    @SuppressWarnings("unchecked") T t = (T) r;
+                    f.accept(t);
+                    d.completeNull();
+                } catch (Throwable ex) {
+                    d.completeThrowable(ex);
+                }
+            }
             dep = null; src = null; snd = null; fn = null;
             return d.postFire(a, b, mode);
         }
     }
 
-    final <R,S extends R> boolean orAccept(CompletableFuture<R> a,
-                                           CompletableFuture<S> b,
-                                           Consumer<? super R> f,
-                                           OrAccept<R,S> c) {
-        Object r; Throwable x;
-        if (a == null || b == null ||
-            ((r = a.result) == null && (r = b.result) == null) || f == null)
-            return false;
-        tryComplete: if (result == null) {
-            try {
-                if (c != null && !c.claim())
-                    return false;
-                if (r instanceof AltResult) {
-                    if ((x = ((AltResult)r).ex) != null) {
-                        completeThrowable(x, r);
-                        break tryComplete;
-                    }
-                    r = null;
-                }
-                @SuppressWarnings("unchecked") R rr = (R) r;
-                f.accept(rr);
-                completeNull();
-            } catch (Throwable ex) {
-                completeThrowable(ex);
-            }
-        }
-        return true;
-    }
-
     private <U extends T> CompletableFuture<Void> orAcceptStage(
         Executor e, CompletionStage<U> o, Consumer<? super T> f) {
         CompletableFuture<U> b;
         if (f == null || (b = o.toCompletableFuture()) == null)
             throw new NullPointerException();
+
+        Object r; CompletableFuture<? extends T> z;
+        if ((r = (z = this).result) != null ||
+            (r = (z = b).result) != null)
+            return z.uniAcceptNow(r, e, f);
+
         CompletableFuture<Void> d = newIncompleteFuture();
-        if (e != null || !d.orAccept(this, b, f, null)) {
-            OrAccept<T,U> c = new OrAccept<T,U>(e, d, this, b, f);
-            orpush(b, c);
-            c.tryFire(SYNC);
-        }
+        orpush(b, new OrAccept<T,U>(e, d, this, b, f));
         return d;
     }
 
@@ -1526,8 +1588,7 @@
     static final class OrRun<T,U> extends BiCompletion<T,U,Void> {
         Runnable fn;
         OrRun(Executor executor, CompletableFuture<Void> dep,
-              CompletableFuture<T> src,
-              CompletableFuture<U> snd,
+              CompletableFuture<T> src, CompletableFuture<U> snd,
               Runnable fn) {
             super(executor, dep, src, snd); this.fn = fn;
         }
@@ -1535,95 +1596,84 @@
             CompletableFuture<Void> d;
             CompletableFuture<T> a;
             CompletableFuture<U> b;
-            if ((d = dep) == null ||
-                !d.orRun(a = src, b = snd, fn, mode > 0 ? null : this))
+            Object r; Throwable x; Runnable f;
+            if ((d = dep) == null || (f = fn) == null
+                || (a = src) == null || (b = snd) == null
+                || ((r = a.result) == null && (r = b.result) == null))
                 return null;
+            if (d.result == null) {
+                try {
+                    if (mode <= 0 && !claim())
+                        return null;
+                    else if (r instanceof AltResult
+                        && (x = ((AltResult)r).ex) != null)
+                        d.completeThrowable(x, r);
+                    else {
+                        f.run();
+                        d.completeNull();
+                    }
+                } catch (Throwable ex) {
+                    d.completeThrowable(ex);
+                }
+            }
             dep = null; src = null; snd = null; fn = null;
             return d.postFire(a, b, mode);
         }
     }
 
-    final boolean orRun(CompletableFuture<?> a, CompletableFuture<?> b,
-                        Runnable f, OrRun<?,?> c) {
-        Object r; Throwable x;
-        if (a == null || b == null ||
-            ((r = a.result) == null && (r = b.result) == null) || f == null)
-            return false;
-        if (result == null) {
-            try {
-                if (c != null && !c.claim())
-                    return false;
-                if (r instanceof AltResult && (x = ((AltResult)r).ex) != null)
-                    completeThrowable(x, r);
-                else {
-                    f.run();
-                    completeNull();
-                }
-            } catch (Throwable ex) {
-                completeThrowable(ex);
-            }
-        }
-        return true;
-    }
-
     private CompletableFuture<Void> orRunStage(Executor e, CompletionStage<?> o,
                                                Runnable f) {
         CompletableFuture<?> b;
         if (f == null || (b = o.toCompletableFuture()) == null)
             throw new NullPointerException();
+
+        Object r; CompletableFuture<?> z;
+        if ((r = (z = this).result) != null ||
+            (r = (z = b).result) != null)
+            return z.uniRunNow(r, e, f);
+
         CompletableFuture<Void> d = newIncompleteFuture();
-        if (e != null || !d.orRun(this, b, f, null)) {
-            OrRun<T,?> c = new OrRun<>(e, d, this, b, f);
-            orpush(b, c);
-            c.tryFire(SYNC);
-        }
+        orpush(b, new OrRun<>(e, d, this, b, f));
         return d;
     }
 
     @SuppressWarnings("serial")
     static final class OrRelay<T,U> extends BiCompletion<T,U,Object> { // for Or
-        OrRelay(CompletableFuture<Object> dep, CompletableFuture<T> src,
-                CompletableFuture<U> snd) {
+        OrRelay(CompletableFuture<Object> dep,
+                CompletableFuture<T> src, CompletableFuture<U> snd) {
             super(null, dep, src, snd);
         }
         final CompletableFuture<Object> tryFire(int mode) {
             CompletableFuture<Object> d;
             CompletableFuture<T> a;
             CompletableFuture<U> b;
-            if ((d = dep) == null || !d.orRelay(a = src, b = snd))
+            Object r;
+            if ((d = dep) == null
+                || (a = src) == null || (b = snd) == null
+                || ((r = a.result) == null && (r = b.result) == null))
                 return null;
+            d.completeRelay(r);
             src = null; snd = null; dep = null;
             return d.postFire(a, b, mode);
         }
     }
 
-    final boolean orRelay(CompletableFuture<?> a, CompletableFuture<?> b) {
-        Object r;
-        if (a == null || b == null ||
-            ((r = a.result) == null && (r = b.result) == null))
-            return false;
-        if (result == null)
-            completeRelay(r);
-        return true;
-    }
-
     /** Recursively constructs a tree of completions. */
     static CompletableFuture<Object> orTree(CompletableFuture<?>[] cfs,
                                             int lo, int hi) {
         CompletableFuture<Object> d = new CompletableFuture<Object>();
         if (lo <= hi) {
-            CompletableFuture<?> a, b;
+            CompletableFuture<?> a, b; Object r;
             int mid = (lo + hi) >>> 1;
             if ((a = (lo == mid ? cfs[lo] :
                       orTree(cfs, lo, mid))) == null ||
                 (b = (lo == hi ? a : (hi == mid+1) ? cfs[hi] :
                       orTree(cfs, mid+1, hi))) == null)
                 throw new NullPointerException();
-            if (!d.orRelay(a, b)) {
-                OrRelay<?,?> c = new OrRelay<>(d, a, b);
-                a.orpush(b, c);
-                c.tryFire(SYNC);
-            }
+            if ((r = a.result) != null && (r = b.result) != null)
+                d.result = encodeRelay(r);
+            else
+                a.orpush(b, new OrRelay<>(d, a, b));
         }
         return d;
     }
@@ -1640,7 +1690,7 @@
 
         public final Void getRawResult() { return null; }
         public final void setRawResult(Void v) {}
-        public final boolean exec() { run(); return true; }
+        public final boolean exec() { run(); return false; }
 
         public void run() {
             CompletableFuture<T> d; Supplier<? extends T> f;
@@ -1676,7 +1726,7 @@
 
         public final Void getRawResult() { return null; }
         public final void setRawResult(Void v) {}
-        public final boolean exec() { run(); return true; }
+        public final boolean exec() { run(); return false; }
 
         public void run() {
             CompletableFuture<Void> d; Runnable f;
@@ -1760,15 +1810,13 @@
     private Object waitingGet(boolean interruptible) {
         Signaller q = null;
         boolean queued = false;
-        int spins = SPINS;
         Object r;
         while ((r = result) == null) {
-            if (spins > 0) {
-                if (ThreadLocalRandom.nextSecondarySeed() >= 0)
-                    --spins;
+            if (q == null) {
+                q = new Signaller(interruptible, 0L, 0L);
+                if (Thread.currentThread() instanceof ForkJoinWorkerThread)
+                    ForkJoinPool.helpAsyncBlocker(defaultExecutor(), q);
             }
-            else if (q == null)
-                q = new Signaller(interruptible, 0L, 0L);
             else if (!queued)
                 queued = tryPushStack(q);
             else {
@@ -1781,16 +1829,14 @@
                     break;
             }
         }
-        if (q != null) {
+        if (q != null && queued) {
             q.thread = null;
-            if (q.interrupted) {
-                if (interruptible)
-                    cleanStack();
-                else
-                    Thread.currentThread().interrupt();
-            }
+            if (!interruptible && q.interrupted)
+                Thread.currentThread().interrupt();
+            if (r == null)
+                cleanStack();
         }
-        if (r != null)
+        if (r != null || (r = result) != null)
             postComplete();
         return r;
     }
@@ -1808,9 +1854,12 @@
             Signaller q = null;
             boolean queued = false;
             Object r;
-            while ((r = result) == null) { // similar to untimed, without spins
-                if (q == null)
+            while ((r = result) == null) { // similar to untimed
+                if (q == null) {
                     q = new Signaller(true, nanos, deadline);
+                    if (Thread.currentThread() instanceof ForkJoinWorkerThread)
+                        ForkJoinPool.helpAsyncBlocker(defaultExecutor(), q);
+                }
                 else if (!queued)
                     queued = tryPushStack(q);
                 else if (q.nanos <= 0L)
@@ -1825,12 +1874,13 @@
                         break;
                 }
             }
-            if (q != null)
+            if (q != null && queued) {
                 q.thread = null;
-            if (r != null)
+                if (r == null)
+                    cleanStack();
+            }
+            if (r != null || (r = result) != null)
                 postComplete();
-            else
-                cleanStack();
             if (r != null || (q != null && q.interrupted))
                 return r;
         }
@@ -1942,9 +1992,12 @@
      * @throws InterruptedException if the current thread was interrupted
      * while waiting
      */
+    @SuppressWarnings("unchecked")
     public T get() throws InterruptedException, ExecutionException {
         Object r;
-        return reportGet((r = result) == null ? waitingGet(true) : r);
+        if ((r = result) == null)
+            r = waitingGet(true);
+        return (T) reportGet(r);
     }
 
     /**
@@ -1960,11 +2013,14 @@
      * while waiting
      * @throws TimeoutException if the wait timed out
      */
+    @SuppressWarnings("unchecked")
     public T get(long timeout, TimeUnit unit)
         throws InterruptedException, ExecutionException, TimeoutException {
+        long nanos = unit.toNanos(timeout);
         Object r;
-        long nanos = unit.toNanos(timeout);
-        return reportGet((r = result) == null ? timedGet(nanos) : r);
+        if ((r = result) == null)
+            r = timedGet(nanos);
+        return (T) reportGet(r);
     }
 
     /**
@@ -1981,9 +2037,12 @@
      * @throws CompletionException if this future completed
      * exceptionally or a completion computation threw an exception
      */
+    @SuppressWarnings("unchecked")
     public T join() {
         Object r;
-        return reportJoin((r = result) == null ? waitingGet(false) : r);
+        if ((r = result) == null)
+            r = waitingGet(false);
+        return (T) reportJoin(r);
     }
 
     /**
@@ -1996,9 +2055,10 @@
      * @throws CompletionException if this future completed
      * exceptionally or a completion computation threw an exception
      */
+    @SuppressWarnings("unchecked")
     public T getNow(T valueIfAbsent) {
         Object r;
-        return ((r = result) == null) ? valueIfAbsent : reportJoin(r);
+        return ((r = result) == null) ? valueIfAbsent : (T) reportJoin(r);
     }
 
     /**
@@ -2775,19 +2835,16 @@
             throw new UnsupportedOperationException(); }
     }
 
-    // Unsafe mechanics
-    private static final jdk.internal.misc.Unsafe U = jdk.internal.misc.Unsafe.getUnsafe();
-    private static final long RESULT;
-    private static final long STACK;
-    private static final long NEXT;
+    // VarHandle mechanics
+    private static final VarHandle RESULT;
+    private static final VarHandle STACK;
+    private static final VarHandle NEXT;
     static {
         try {
-            RESULT = U.objectFieldOffset
-                (CompletableFuture.class.getDeclaredField("result"));
-            STACK = U.objectFieldOffset
-                (CompletableFuture.class.getDeclaredField("stack"));
-            NEXT = U.objectFieldOffset
-                (Completion.class.getDeclaredField("next"));
+            MethodHandles.Lookup l = MethodHandles.lookup();
+            RESULT = l.findVarHandle(CompletableFuture.class, "result", Object.class);
+            STACK = l.findVarHandle(CompletableFuture.class, "stack", Completion.class);
+            NEXT = l.findVarHandle(Completion.class, "next", Completion.class);
         } catch (ReflectiveOperationException e) {
             throw new Error(e);
         }
--- a/src/java.base/share/classes/java/util/concurrent/ConcurrentHashMap.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/java/util/concurrent/ConcurrentHashMap.java	Thu Jul 21 20:09:20 2016 -0700
@@ -68,6 +68,7 @@
 import java.util.function.ToLongBiFunction;
 import java.util.function.ToLongFunction;
 import java.util.stream.Stream;
+import jdk.internal.misc.Unsafe;
 
 /**
  * A hash table supporting full concurrency of retrievals and
@@ -747,7 +748,7 @@
     /* ---------------- Table element access -------------- */
 
     /*
-     * Volatile access methods are used for table elements as well as
+     * Atomic access methods are used for table elements as well as
      * elements of in-progress next table while resizing.  All uses of
      * the tab arguments must be null checked by callers.  All callers
      * also paranoically precheck that tab's length is not zero (or an
@@ -757,14 +758,12 @@
      * errors by users, these checks must operate on local variables,
      * which accounts for some odd-looking inline assignments below.
      * Note that calls to setTabAt always occur within locked regions,
-     * and so in principle require only release ordering, not
-     * full volatile semantics, but are currently coded as volatile
-     * writes to be conservative.
+     * and so require only release ordering.
      */
 
     @SuppressWarnings("unchecked")
     static final <K,V> Node<K,V> tabAt(Node<K,V>[] tab, int i) {
-        return (Node<K,V>)U.getObjectVolatile(tab, ((long)i << ASHIFT) + ABASE);
+        return (Node<K,V>)U.getObjectAcquire(tab, ((long)i << ASHIFT) + ABASE);
     }
 
     static final <K,V> boolean casTabAt(Node<K,V>[] tab, int i,
@@ -773,7 +772,7 @@
     }
 
     static final <K,V> void setTabAt(Node<K,V>[] tab, int i, Node<K,V> v) {
-        U.putObjectVolatile(tab, ((long)i << ASHIFT) + ABASE, v);
+        U.putObjectRelease(tab, ((long)i << ASHIFT) + ABASE, v);
     }
 
     /* ---------------- Fields -------------- */
@@ -3298,7 +3297,7 @@
             return true;
         }
 
-        private static final jdk.internal.misc.Unsafe U = jdk.internal.misc.Unsafe.getUnsafe();
+        private static final Unsafe U = Unsafe.getUnsafe();
         private static final long LOCKSTATE;
         static {
             try {
@@ -6341,7 +6340,7 @@
     }
 
     // Unsafe mechanics
-    private static final jdk.internal.misc.Unsafe U = jdk.internal.misc.Unsafe.getUnsafe();
+    private static final Unsafe U = Unsafe.getUnsafe();
     private static final long SIZECTL;
     private static final long TRANSFERINDEX;
     private static final long BASECOUNT;
--- a/src/java.base/share/classes/java/util/concurrent/ConcurrentLinkedDeque.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/java/util/concurrent/ConcurrentLinkedDeque.java	Thu Jul 21 20:09:20 2016 -0700
@@ -35,6 +35,8 @@
 
 package java.util.concurrent;
 
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.VarHandle;
 import java.util.AbstractCollection;
 import java.util.Arrays;
 import java.util.Collection;
@@ -292,64 +294,23 @@
         volatile Node<E> prev;
         volatile E item;
         volatile Node<E> next;
+    }
 
-        Node() {  // default constructor for NEXT_TERMINATOR, PREV_TERMINATOR
-        }
-
-        /**
-         * Constructs a new node.  Uses relaxed write because item can
-         * only be seen after publication via casNext or casPrev.
-         */
-        Node(E item) {
-            U.putObject(this, ITEM, item);
-        }
-
-        boolean casItem(E cmp, E val) {
-            return U.compareAndSwapObject(this, ITEM, cmp, val);
-        }
-
-        void lazySetNext(Node<E> val) {
-            U.putObjectRelease(this, NEXT, val);
-        }
-
-        boolean casNext(Node<E> cmp, Node<E> val) {
-            return U.compareAndSwapObject(this, NEXT, cmp, val);
-        }
-
-        void lazySetPrev(Node<E> val) {
-            U.putObjectRelease(this, PREV, val);
-        }
-
-        boolean casPrev(Node<E> cmp, Node<E> val) {
-            return U.compareAndSwapObject(this, PREV, cmp, val);
-        }
-
-        // Unsafe mechanics
-
-        private static final jdk.internal.misc.Unsafe U = jdk.internal.misc.Unsafe.getUnsafe();
-        private static final long PREV;
-        private static final long ITEM;
-        private static final long NEXT;
-
-        static {
-            try {
-                PREV = U.objectFieldOffset
-                    (Node.class.getDeclaredField("prev"));
-                ITEM = U.objectFieldOffset
-                    (Node.class.getDeclaredField("item"));
-                NEXT = U.objectFieldOffset
-                    (Node.class.getDeclaredField("next"));
-            } catch (ReflectiveOperationException e) {
-                throw new Error(e);
-            }
-        }
+    /**
+     * Returns a new node holding item.  Uses relaxed write because item
+     * can only be seen after piggy-backing publication via CAS.
+     */
+    static <E> Node<E> newNode(E item) {
+        Node<E> node = new Node<E>();
+        ITEM.set(node, item);
+        return node;
     }
 
     /**
      * Links e as first element.
      */
     private void linkFirst(E e) {
-        final Node<E> newNode = new Node<E>(Objects.requireNonNull(e));
+        final Node<E> newNode = newNode(Objects.requireNonNull(e));
 
         restartFromHead:
         for (;;)
@@ -363,13 +324,13 @@
                     continue restartFromHead;
                 else {
                     // p is first node
-                    newNode.lazySetNext(p); // CAS piggyback
-                    if (p.casPrev(null, newNode)) {
+                    NEXT.set(newNode, p); // CAS piggyback
+                    if (PREV.compareAndSet(p, null, newNode)) {
                         // Successful CAS is the linearization point
                         // for e to become an element of this deque,
                         // and for newNode to become "live".
-                        if (p != h) // hop two nodes at a time
-                            casHead(h, newNode);  // Failure is OK.
+                        if (p != h) // hop two nodes at a time; failure is OK
+                            HEAD.weakCompareAndSetVolatile(this, h, newNode);
                         return;
                     }
                     // Lost CAS race to another thread; re-read prev
@@ -381,7 +342,7 @@
      * Links e as last element.
      */
     private void linkLast(E e) {
-        final Node<E> newNode = new Node<E>(Objects.requireNonNull(e));
+        final Node<E> newNode = newNode(Objects.requireNonNull(e));
 
         restartFromTail:
         for (;;)
@@ -395,13 +356,13 @@
                     continue restartFromTail;
                 else {
                     // p is last node
-                    newNode.lazySetPrev(p); // CAS piggyback
-                    if (p.casNext(null, newNode)) {
+                    PREV.set(newNode, p); // CAS piggyback
+                    if (NEXT.compareAndSet(p, null, newNode)) {
                         // Successful CAS is the linearization point
                         // for e to become an element of this deque,
                         // and for newNode to become "live".
-                        if (p != t) // hop two nodes at a time
-                            casTail(t, newNode);  // Failure is OK.
+                        if (p != t) // hop two nodes at a time; failure is OK
+                            TAIL.weakCompareAndSetVolatile(this, t, newNode);
                         return;
                     }
                     // Lost CAS race to another thread; re-read next
@@ -516,8 +477,8 @@
                 updateTail(); // Ensure x is not reachable from tail
 
                 // Finally, actually gc-unlink
-                x.lazySetPrev(isFirst ? prevTerminator() : x);
-                x.lazySetNext(isLast  ? nextTerminator() : x);
+                PREV.setRelease(x, isFirst ? prevTerminator() : x);
+                NEXT.setRelease(x, isLast  ? nextTerminator() : x);
             }
         }
     }
@@ -531,7 +492,8 @@
         // assert first.item == null;
         for (Node<E> o = null, p = next, q;;) {
             if (p.item != null || (q = p.next) == null) {
-                if (o != null && p.prev != p && first.casNext(next, p)) {
+                if (o != null && p.prev != p &&
+                    NEXT.compareAndSet(first, next, p)) {
                     skipDeletedPredecessors(p);
                     if (first.prev == null &&
                         (p.next == null || p.item != null) &&
@@ -541,8 +503,8 @@
                         updateTail(); // Ensure o is not reachable from tail
 
                         // Finally, actually gc-unlink
-                        o.lazySetNext(o);
-                        o.lazySetPrev(prevTerminator());
+                        NEXT.setRelease(o, o);
+                        PREV.setRelease(o, prevTerminator());
                     }
                 }
                 return;
@@ -565,7 +527,8 @@
         // assert last.item == null;
         for (Node<E> o = null, p = prev, q;;) {
             if (p.item != null || (q = p.prev) == null) {
-                if (o != null && p.next != p && last.casPrev(prev, p)) {
+                if (o != null && p.next != p &&
+                    PREV.compareAndSet(last, prev, p)) {
                     skipDeletedSuccessors(p);
                     if (last.next == null &&
                         (p.prev == null || p.item != null) &&
@@ -575,8 +538,8 @@
                         updateTail(); // Ensure o is not reachable from tail
 
                         // Finally, actually gc-unlink
-                        o.lazySetPrev(o);
-                        o.lazySetNext(nextTerminator());
+                        PREV.setRelease(o, o);
+                        NEXT.setRelease(o, nextTerminator());
                     }
                 }
                 return;
@@ -607,7 +570,7 @@
                     (q = (p = q).prev) == null) {
                     // It is possible that p is PREV_TERMINATOR,
                     // but if so, the CAS is guaranteed to fail.
-                    if (casHead(h, p))
+                    if (HEAD.compareAndSet(this, h, p))
                         return;
                     else
                         continue restartFromHead;
@@ -637,7 +600,7 @@
                     (q = (p = q).next) == null) {
                     // It is possible that p is NEXT_TERMINATOR,
                     // but if so, the CAS is guaranteed to fail.
-                    if (casTail(t, p))
+                    if (TAIL.compareAndSet(this, t, p))
                         return;
                     else
                         continue restartFromTail;
@@ -675,7 +638,7 @@
             }
 
             // found active CAS target
-            if (prev == p || x.casPrev(prev, p))
+            if (prev == p || PREV.compareAndSet(x, prev, p))
                 return;
 
         } while (x.item != null || x.next == null);
@@ -706,7 +669,7 @@
             }
 
             // found active CAS target
-            if (next == p || x.casNext(next, p))
+            if (next == p || NEXT.compareAndSet(x, next, p))
                 return;
 
         } while (x.item != null || x.prev == null);
@@ -751,7 +714,7 @@
                 else if (p == h
                          // It is possible that p is PREV_TERMINATOR,
                          // but if so, the CAS is guaranteed to fail.
-                         || casHead(h, p))
+                         || HEAD.compareAndSet(this, h, p))
                     return p;
                 else
                     continue restartFromHead;
@@ -776,7 +739,7 @@
                 else if (p == t
                          // It is possible that p is NEXT_TERMINATOR,
                          // but if so, the CAS is guaranteed to fail.
-                         || casTail(t, p))
+                         || TAIL.compareAndSet(this, t, p))
                     return p;
                 else
                     continue restartFromTail;
@@ -802,7 +765,7 @@
      * Constructs an empty deque.
      */
     public ConcurrentLinkedDeque() {
-        head = tail = new Node<E>(null);
+        head = tail = new Node<E>();
     }
 
     /**
@@ -818,12 +781,12 @@
         // Copy c into a private chain of Nodes
         Node<E> h = null, t = null;
         for (E e : c) {
-            Node<E> newNode = new Node<E>(Objects.requireNonNull(e));
+            Node<E> newNode = newNode(Objects.requireNonNull(e));
             if (h == null)
                 h = t = newNode;
             else {
-                t.lazySetNext(newNode);
-                newNode.lazySetPrev(t);
+                NEXT.set(t, newNode);
+                PREV.set(newNode, t);
                 t = newNode;
             }
         }
@@ -836,12 +799,12 @@
     private void initHeadTail(Node<E> h, Node<E> t) {
         if (h == t) {
             if (h == null)
-                h = t = new Node<E>(null);
+                h = t = new Node<E>();
             else {
                 // Avoid edge case of a single Node with non-null item.
-                Node<E> newNode = new Node<E>(null);
-                t.lazySetNext(newNode);
-                newNode.lazySetPrev(t);
+                Node<E> newNode = new Node<E>();
+                NEXT.set(t, newNode);
+                PREV.set(newNode, t);
                 t = newNode;
             }
         }
@@ -934,7 +897,7 @@
     public E pollFirst() {
         for (Node<E> p = first(); p != null; p = succ(p)) {
             E item = p.item;
-            if (item != null && p.casItem(item, null)) {
+            if (item != null && ITEM.compareAndSet(p, item, null)) {
                 unlink(p);
                 return item;
             }
@@ -945,7 +908,7 @@
     public E pollLast() {
         for (Node<E> p = last(); p != null; p = pred(p)) {
             E item = p.item;
-            if (item != null && p.casItem(item, null)) {
+            if (item != null && ITEM.compareAndSet(p, item, null)) {
                 unlink(p);
                 return item;
             }
@@ -1031,7 +994,8 @@
         Objects.requireNonNull(o);
         for (Node<E> p = first(); p != null; p = succ(p)) {
             E item = p.item;
-            if (item != null && o.equals(item) && p.casItem(item, null)) {
+            if (item != null && o.equals(item) &&
+                ITEM.compareAndSet(p, item, null)) {
                 unlink(p);
                 return true;
             }
@@ -1055,7 +1019,8 @@
         Objects.requireNonNull(o);
         for (Node<E> p = last(); p != null; p = pred(p)) {
             E item = p.item;
-            if (item != null && o.equals(item) && p.casItem(item, null)) {
+            if (item != null && o.equals(item) &&
+                ITEM.compareAndSet(p, item, null)) {
                 unlink(p);
                 return true;
             }
@@ -1159,12 +1124,12 @@
         // Copy c into a private chain of Nodes
         Node<E> beginningOfTheEnd = null, last = null;
         for (E e : c) {
-            Node<E> newNode = new Node<E>(Objects.requireNonNull(e));
+            Node<E> newNode = newNode(Objects.requireNonNull(e));
             if (beginningOfTheEnd == null)
                 beginningOfTheEnd = last = newNode;
             else {
-                last.lazySetNext(newNode);
-                newNode.lazySetPrev(last);
+                NEXT.set(last, newNode);
+                PREV.set(newNode, last);
                 last = newNode;
             }
         }
@@ -1184,16 +1149,16 @@
                     continue restartFromTail;
                 else {
                     // p is last node
-                    beginningOfTheEnd.lazySetPrev(p); // CAS piggyback
-                    if (p.casNext(null, beginningOfTheEnd)) {
+                    PREV.set(beginningOfTheEnd, p); // CAS piggyback
+                    if (NEXT.compareAndSet(p, null, beginningOfTheEnd)) {
                         // Successful CAS is the linearization point
                         // for all elements to be added to this deque.
-                        if (!casTail(t, last)) {
+                        if (!TAIL.weakCompareAndSetVolatile(this, t, last)) {
                             // Try a little harder to update tail,
                             // since we may be adding many elements.
                             t = tail;
                             if (last.next == null)
-                                casTail(t, last);
+                                TAIL.weakCompareAndSetVolatile(this, t, last);
                         }
                         return true;
                     }
@@ -1586,41 +1551,38 @@
         Node<E> h = null, t = null;
         for (Object item; (item = s.readObject()) != null; ) {
             @SuppressWarnings("unchecked")
-            Node<E> newNode = new Node<E>((E) item);
+            Node<E> newNode = newNode((E) item);
             if (h == null)
                 h = t = newNode;
             else {
-                t.lazySetNext(newNode);
-                newNode.lazySetPrev(t);
+                NEXT.set(t, newNode);
+                PREV.set(newNode, t);
                 t = newNode;
             }
         }
         initHeadTail(h, t);
     }
 
-    private boolean casHead(Node<E> cmp, Node<E> val) {
-        return U.compareAndSwapObject(this, HEAD, cmp, val);
-    }
-
-    private boolean casTail(Node<E> cmp, Node<E> val) {
-        return U.compareAndSwapObject(this, TAIL, cmp, val);
-    }
-
-    // Unsafe mechanics
-
-    private static final jdk.internal.misc.Unsafe U = jdk.internal.misc.Unsafe.getUnsafe();
-    private static final long HEAD;
-    private static final long TAIL;
+    // VarHandle mechanics
+    private static final VarHandle HEAD;
+    private static final VarHandle TAIL;
+    private static final VarHandle PREV;
+    private static final VarHandle NEXT;
+    private static final VarHandle ITEM;
     static {
         PREV_TERMINATOR = new Node<Object>();
         PREV_TERMINATOR.next = PREV_TERMINATOR;
         NEXT_TERMINATOR = new Node<Object>();
         NEXT_TERMINATOR.prev = NEXT_TERMINATOR;
         try {
-            HEAD = U.objectFieldOffset
-                (ConcurrentLinkedDeque.class.getDeclaredField("head"));
-            TAIL = U.objectFieldOffset
-                (ConcurrentLinkedDeque.class.getDeclaredField("tail"));
+            MethodHandles.Lookup l = MethodHandles.lookup();
+            HEAD = l.findVarHandle(ConcurrentLinkedDeque.class, "head",
+                                   Node.class);
+            TAIL = l.findVarHandle(ConcurrentLinkedDeque.class, "tail",
+                                   Node.class);
+            PREV = l.findVarHandle(Node.class, "prev", Node.class);
+            NEXT = l.findVarHandle(Node.class, "next", Node.class);
+            ITEM = l.findVarHandle(Node.class, "item", Object.class);
         } catch (ReflectiveOperationException e) {
             throw new Error(e);
         }
--- a/src/java.base/share/classes/java/util/concurrent/ConcurrentLinkedQueue.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/java/util/concurrent/ConcurrentLinkedQueue.java	Thu Jul 21 20:09:20 2016 -0700
@@ -35,6 +35,8 @@
 
 package java.util.concurrent;
 
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.VarHandle;
 import java.util.AbstractQueue;
 import java.util.Arrays;
 import java.util.Collection;
@@ -166,9 +168,8 @@
      * this is merely an optimization.
      *
      * When constructing a Node (before enqueuing it) we avoid paying
-     * for a volatile write to item by using Unsafe.putObject instead
-     * of a normal write.  This allows the cost of enqueue to be
-     * "one-and-a-half" CASes.
+     * for a volatile write to item.  This allows the cost of enqueue
+     * to be "one-and-a-half" CASes.
      *
      * Both head and tail may or may not point to a Node with a
      * non-null item.  If the queue is empty, all items must of course
@@ -178,33 +179,21 @@
      * optimization.
      */
 
-    private static class Node<E> {
+    static final class Node<E> {
         volatile E item;
         volatile Node<E> next;
     }
 
     /**
      * Returns a new node holding item.  Uses relaxed write because item
-     * can only be seen after piggy-backing publication via casNext.
+     * can only be seen after piggy-backing publication via CAS.
      */
     static <E> Node<E> newNode(E item) {
         Node<E> node = new Node<E>();
-        U.putObject(node, ITEM, item);
+        ITEM.set(node, item);
         return node;
     }
 
-    static <E> boolean casItem(Node<E> node, E cmp, E val) {
-        return U.compareAndSwapObject(node, ITEM, cmp, val);
-    }
-
-    static <E> void lazySetNext(Node<E> node, Node<E> val) {
-        U.putObjectRelease(node, NEXT, val);
-    }
-
-    static <E> boolean casNext(Node<E> node, Node<E> cmp, Node<E> val) {
-        return U.compareAndSwapObject(node, NEXT, cmp, val);
-    }
-
     /**
      * A node from which the first live (non-deleted) node (if any)
      * can be reached in O(1) time.
@@ -256,7 +245,7 @@
             if (h == null)
                 h = t = newNode;
             else {
-                lazySetNext(t, newNode);
+                NEXT.set(t, newNode);
                 t = newNode;
             }
         }
@@ -286,8 +275,8 @@
      */
     final void updateHead(Node<E> h, Node<E> p) {
         // assert h != null && p != null && (h == p || h.item == null);
-        if (h != p && casHead(h, p))
-            lazySetNext(h, h);
+        if (h != p && HEAD.compareAndSet(this, h, p))
+            NEXT.setRelease(h, h);
     }
 
     /**
@@ -314,12 +303,12 @@
             Node<E> q = p.next;
             if (q == null) {
                 // p is last node
-                if (casNext(p, null, newNode)) {
+                if (NEXT.compareAndSet(p, null, newNode)) {
                     // Successful CAS is the linearization point
                     // for e to become an element of this queue,
                     // and for newNode to become "live".
-                    if (p != t) // hop two nodes at a time
-                        casTail(t, newNode);  // Failure is OK.
+                    if (p != t) // hop two nodes at a time; failure is OK
+                        TAIL.weakCompareAndSetVolatile(this, t, newNode);
                     return true;
                 }
                 // Lost CAS race to another thread; re-read next
@@ -342,7 +331,7 @@
             for (Node<E> h = head, p = h, q;;) {
                 E item = p.item;
 
-                if (item != null && casItem(p, item, null)) {
+                if (item != null && ITEM.compareAndSet(p, item, null)) {
                     // Successful CAS is the linearization point
                     // for item to be removed from this queue.
                     if (p != h) // hop two nodes at a time
@@ -483,12 +472,12 @@
                         next = succ(p);
                         continue;
                     }
-                    removed = casItem(p, item, null);
+                    removed = ITEM.compareAndSet(p, item, null);
                 }
 
                 next = succ(p);
                 if (pred != null && next != null) // unlink
-                    casNext(pred, p, next);
+                    NEXT.weakCompareAndSetVolatile(pred, p, next);
                 if (removed)
                     return true;
             }
@@ -520,7 +509,7 @@
             if (beginningOfTheEnd == null)
                 beginningOfTheEnd = last = newNode;
             else {
-                lazySetNext(last, newNode);
+                NEXT.set(last, newNode);
                 last = newNode;
             }
         }
@@ -532,15 +521,15 @@
             Node<E> q = p.next;
             if (q == null) {
                 // p is last node
-                if (casNext(p, null, beginningOfTheEnd)) {
+                if (NEXT.compareAndSet(p, null, beginningOfTheEnd)) {
                     // Successful CAS is the linearization point
                     // for all elements to be added to this queue.
-                    if (!casTail(t, last)) {
+                    if (!TAIL.weakCompareAndSetVolatile(this, t, last)) {
                         // Try a little harder to update tail,
                         // since we may be adding many elements.
                         t = tail;
                         if (last.next == null)
-                            casTail(t, last);
+                            TAIL.weakCompareAndSetVolatile(this, t, last);
                     }
                     return true;
                 }
@@ -744,7 +733,7 @@
                 }
                 // unlink deleted nodes
                 if ((q = succ(p)) != null)
-                    casNext(pred, p, q);
+                    NEXT.compareAndSet(pred, p, q);
             }
         }
 
@@ -801,7 +790,7 @@
             if (h == null)
                 h = t = newNode;
             else {
-                lazySetNext(t, newNode);
+                NEXT.set(t, newNode);
                 t = newNode;
             }
         }
@@ -919,31 +908,20 @@
         return new CLQSpliterator<E>(this);
     }
 
-    private boolean casTail(Node<E> cmp, Node<E> val) {
-        return U.compareAndSwapObject(this, TAIL, cmp, val);
-    }
-
-    private boolean casHead(Node<E> cmp, Node<E> val) {
-        return U.compareAndSwapObject(this, HEAD, cmp, val);
-    }
-
-    // Unsafe mechanics
-
-    private static final jdk.internal.misc.Unsafe U = jdk.internal.misc.Unsafe.getUnsafe();
-    private static final long HEAD;
-    private static final long TAIL;
-    private static final long ITEM;
-    private static final long NEXT;
+    // VarHandle mechanics
+    private static final VarHandle HEAD;
+    private static final VarHandle TAIL;
+    private static final VarHandle ITEM;
+    private static final VarHandle NEXT;
     static {
         try {
-            HEAD = U.objectFieldOffset
-                (ConcurrentLinkedQueue.class.getDeclaredField("head"));
-            TAIL = U.objectFieldOffset
-                (ConcurrentLinkedQueue.class.getDeclaredField("tail"));
-            ITEM = U.objectFieldOffset
-                (Node.class.getDeclaredField("item"));
-            NEXT = U.objectFieldOffset
-                (Node.class.getDeclaredField("next"));
+            MethodHandles.Lookup l = MethodHandles.lookup();
+            HEAD = l.findVarHandle(ConcurrentLinkedQueue.class, "head",
+                                   Node.class);
+            TAIL = l.findVarHandle(ConcurrentLinkedQueue.class, "tail",
+                                   Node.class);
+            ITEM = l.findVarHandle(Node.class, "item", Object.class);
+            NEXT = l.findVarHandle(Node.class, "next", Node.class);
         } catch (ReflectiveOperationException e) {
             throw new Error(e);
         }
--- a/src/java.base/share/classes/java/util/concurrent/ConcurrentSkipListMap.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/java/util/concurrent/ConcurrentSkipListMap.java	Thu Jul 21 20:09:20 2016 -0700
@@ -35,6 +35,8 @@
 
 package java.util.concurrent;
 
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.VarHandle;
 import java.io.Serializable;
 import java.util.AbstractCollection;
 import java.util.AbstractMap;
@@ -401,7 +403,7 @@
      * compareAndSet head node.
      */
     private boolean casHead(HeadIndex<K,V> cmp, HeadIndex<K,V> val) {
-        return U.compareAndSwapObject(this, HEAD, cmp, val);
+        return HEAD.compareAndSet(this, cmp, val);
     }
 
     /* ---------------- Nodes -------------- */
@@ -444,14 +446,14 @@
          * compareAndSet value field.
          */
         boolean casValue(Object cmp, Object val) {
-            return U.compareAndSwapObject(this, VALUE, cmp, val);
+            return VALUE.compareAndSet(this, cmp, val);
         }
 
         /**
          * compareAndSet next field.
          */
         boolean casNext(Node<K,V> cmp, Node<K,V> val) {
-            return U.compareAndSwapObject(this, NEXT, cmp, val);
+            return NEXT.compareAndSet(this, cmp, val);
         }
 
         /**
@@ -532,20 +534,16 @@
             return new AbstractMap.SimpleImmutableEntry<K,V>(key, vv);
         }
 
-        // Unsafe mechanics
-
-        private static final jdk.internal.misc.Unsafe U = jdk.internal.misc.Unsafe.getUnsafe();
-        private static final long VALUE;
-        private static final long NEXT;
-
+        // VarHandle mechanics
+        private static final VarHandle VALUE;
+        private static final VarHandle NEXT;
         static {
             try {
-                VALUE = U.objectFieldOffset
-                    (Node.class.getDeclaredField("value"));
-                NEXT = U.objectFieldOffset
-                    (Node.class.getDeclaredField("next"));
+                MethodHandles.Lookup l = MethodHandles.lookup();
+                VALUE = l.findVarHandle(Node.class, "value", Object.class);
+                NEXT = l.findVarHandle(Node.class, "next", Node.class);
             } catch (ReflectiveOperationException e) {
-                throw new Error(e);
+                    throw new Error(e);
             }
         }
     }
@@ -577,7 +575,7 @@
          * compareAndSet right field.
          */
         final boolean casRight(Index<K,V> cmp, Index<K,V> val) {
-            return U.compareAndSwapObject(this, RIGHT, cmp, val);
+            return RIGHT.compareAndSet(this, cmp, val);
         }
 
         /**
@@ -613,13 +611,12 @@
             return node.value != null && casRight(succ, succ.right);
         }
 
-        // Unsafe mechanics
-        private static final jdk.internal.misc.Unsafe U = jdk.internal.misc.Unsafe.getUnsafe();
-        private static final long RIGHT;
+        // VarHandle mechanics
+        private static final VarHandle RIGHT;
         static {
             try {
-                RIGHT = U.objectFieldOffset
-                    (Index.class.getDeclaredField("right"));
+                MethodHandles.Lookup l = MethodHandles.lookup();
+                RIGHT = l.findVarHandle(Index.class, "right", Index.class);
             } catch (ReflectiveOperationException e) {
                 throw new Error(e);
             }
@@ -3607,13 +3604,13 @@
         }
     }
 
-    // Unsafe mechanics
-    private static final jdk.internal.misc.Unsafe U = jdk.internal.misc.Unsafe.getUnsafe();
-    private static final long HEAD;
+    // VarHandle mechanics
+    private static final VarHandle HEAD;
     static {
         try {
-            HEAD = U.objectFieldOffset
-                (ConcurrentSkipListMap.class.getDeclaredField("head"));
+            MethodHandles.Lookup l = MethodHandles.lookup();
+            HEAD = l.findVarHandle(ConcurrentSkipListMap.class, "head",
+                                   HeadIndex.class);
         } catch (ReflectiveOperationException e) {
             throw new Error(e);
         }
--- a/src/java.base/share/classes/java/util/concurrent/ConcurrentSkipListSet.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/java/util/concurrent/ConcurrentSkipListSet.java	Thu Jul 21 20:09:20 2016 -0700
@@ -35,6 +35,8 @@
 
 package java.util.concurrent;
 
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.VarHandle;
 import java.util.AbstractSet;
 import java.util.Collection;
 import java.util.Collections;
@@ -507,15 +509,16 @@
 
     // Support for resetting map in clone
     private void setMap(ConcurrentNavigableMap<E,Object> map) {
-        U.putObjectVolatile(this, MAP, map);
+        MAP.setVolatile(this, map);
     }
 
-    private static final jdk.internal.misc.Unsafe U = jdk.internal.misc.Unsafe.getUnsafe();
-    private static final long MAP;
+    // VarHandle mechanics
+    private static final VarHandle MAP;
     static {
         try {
-            MAP = U.objectFieldOffset
-                (ConcurrentSkipListSet.class.getDeclaredField("m"));
+            MethodHandles.Lookup l = MethodHandles.lookup();
+            MAP = l.findVarHandle(ConcurrentSkipListSet.class, "m",
+                                  ConcurrentNavigableMap.class);
         } catch (ReflectiveOperationException e) {
             throw new Error(e);
         }
--- a/src/java.base/share/classes/java/util/concurrent/CopyOnWriteArrayList.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/java/util/concurrent/CopyOnWriteArrayList.java	Thu Jul 21 20:09:20 2016 -0700
@@ -34,6 +34,7 @@
 
 package java.util.concurrent;
 
+import java.lang.reflect.Field;
 import java.util.AbstractList;
 import java.util.Arrays;
 import java.util.Collection;
@@ -1541,17 +1542,21 @@
         }
     }
 
-    // Support for resetting lock while deserializing
+    /** Initializes the lock; for use when deserializing or cloning. */
     private void resetLock() {
-        U.putObjectVolatile(this, LOCK, new Object());
-    }
-    private static final jdk.internal.misc.Unsafe U = jdk.internal.misc.Unsafe.getUnsafe();
-    private static final long LOCK;
-    static {
+        Field lockField = java.security.AccessController.doPrivileged(
+            (java.security.PrivilegedAction<Field>) () -> {
+                try {
+                    Field f = CopyOnWriteArrayList.class
+                        .getDeclaredField("lock");
+                    f.setAccessible(true);
+                    return f;
+                } catch (ReflectiveOperationException e) {
+                    throw new Error(e);
+                }});
         try {
-            LOCK = U.objectFieldOffset
-                (CopyOnWriteArrayList.class.getDeclaredField("lock"));
-        } catch (ReflectiveOperationException e) {
+            lockField.set(this, new Object());
+        } catch (IllegalAccessException e) {
             throw new Error(e);
         }
     }
--- a/src/java.base/share/classes/java/util/concurrent/CountedCompleter.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/java/util/concurrent/CountedCompleter.java	Thu Jul 21 20:09:20 2016 -0700
@@ -35,6 +35,9 @@
 
 package java.util.concurrent;
 
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.VarHandle;
+
 /**
  * A {@link ForkJoinTask} with a completion action performed when
  * triggered and there are no remaining pending actions.
@@ -524,7 +527,7 @@
      * @param delta the value to add
      */
     public final void addToPendingCount(int delta) {
-        U.getAndAddInt(this, PENDING, delta);
+        PENDING.getAndAdd(this, delta);
     }
 
     /**
@@ -536,7 +539,7 @@
      * @return {@code true} if successful
      */
     public final boolean compareAndSetPendingCount(int expected, int count) {
-        return U.compareAndSwapInt(this, PENDING, expected, count);
+        return PENDING.compareAndSet(this, expected, count);
     }
 
     /**
@@ -548,7 +551,7 @@
     public final int decrementPendingCountUnlessZero() {
         int c;
         do {} while ((c = pending) != 0 &&
-                     !U.compareAndSwapInt(this, PENDING, c, c - 1));
+                     !PENDING.weakCompareAndSetVolatile(this, c, c - 1));
         return c;
     }
 
@@ -581,7 +584,7 @@
                     return;
                 }
             }
-            else if (U.compareAndSwapInt(a, PENDING, c, c - 1))
+            else if (PENDING.weakCompareAndSetVolatile(a, c, c - 1))
                 return;
         }
     }
@@ -604,7 +607,7 @@
                     return;
                 }
             }
-            else if (U.compareAndSwapInt(a, PENDING, c, c - 1))
+            else if (PENDING.weakCompareAndSetVolatile(a, c, c - 1))
                 return;
         }
     }
@@ -649,7 +652,7 @@
         for (int c;;) {
             if ((c = pending) == 0)
                 return this;
-            else if (U.compareAndSwapInt(this, PENDING, c, c - 1))
+            else if (PENDING.weakCompareAndSetVolatile(this, c, c - 1))
                 return null;
         }
     }
@@ -753,13 +756,13 @@
      */
     protected void setRawResult(T t) { }
 
-    // Unsafe mechanics
-    private static final jdk.internal.misc.Unsafe U = jdk.internal.misc.Unsafe.getUnsafe();
-    private static final long PENDING;
+    // VarHandle mechanics
+    private static final VarHandle PENDING;
     static {
         try {
-            PENDING = U.objectFieldOffset
-                (CountedCompleter.class.getDeclaredField("pending"));
+            MethodHandles.Lookup l = MethodHandles.lookup();
+            PENDING = l.findVarHandle(CountedCompleter.class, "pending", int.class);
+
         } catch (ReflectiveOperationException e) {
             throw new Error(e);
         }
--- a/src/java.base/share/classes/java/util/concurrent/Exchanger.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/java/util/concurrent/Exchanger.java	Thu Jul 21 20:09:20 2016 -0700
@@ -36,6 +36,10 @@
 
 package java.util.concurrent;
 
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.VarHandle;
+import java.util.concurrent.locks.LockSupport;
+
 /**
  * A synchronization point at which threads can pair and swap elements
  * within pairs.  Each thread presents some object on entry to the
@@ -155,9 +159,7 @@
      * a value that is enough for common platforms.  Additionally,
      * extra care elsewhere is taken to avoid other false/unintended
      * sharing and to enhance locality, including adding padding (via
-     * @Contended) to Nodes, embedding "bound" as an Exchanger field,
-     * and reworking some park/unpark mechanics compared to
-     * LockSupport versions.
+     * @Contended) to Nodes, embedding "bound" as an Exchanger field.
      *
      * The arena starts out with only one used slot. We expand the
      * effective arena size by tracking collisions; i.e., failed CASes
@@ -234,12 +236,12 @@
      * because most of the logic relies on reads of fields that are
      * maintained as local variables so can't be nicely factored --
      * mainly, here, bulky spin->yield->block/cancel code), and
-     * heavily dependent on intrinsics (Unsafe) to use inlined
+     * heavily dependent on intrinsics (VarHandles) to use inlined
      * embedded CAS and related memory access operations (that tend
      * not to be as readily inlined by dynamic compilers when they are
      * hidden behind other methods that would more nicely name and
      * encapsulate the intended effects). This includes the use of
-     * putXRelease to clear fields of the per-thread Nodes between
+     * setRelease to clear fields of the per-thread Nodes between
      * uses. Note that field Node.item is not declared as volatile
      * even though it is read by releasing threads, because they only
      * do so after CAS operations that must precede access, and all
@@ -252,10 +254,10 @@
      */
 
     /**
-     * The byte distance (as a shift value) between any two used slots
-     * in the arena.  1 << ASHIFT should be at least cacheline size.
+     * The index distance (as a shift value) between any two used slots
+     * in the arena, spacing them out to avoid false sharing.
      */
-    private static final int ASHIFT = 7;
+    private static final int ASHIFT = 5;
 
     /**
      * The maximum supported arena index. The maximum allocatable
@@ -356,27 +358,31 @@
      */
     private final Object arenaExchange(Object item, boolean timed, long ns) {
         Node[] a = arena;
+        int alen = a.length;
         Node p = participant.get();
         for (int i = p.index;;) {                      // access slot at i
-            int b, m, c; long j;                       // j is raw array offset
-            Node q = (Node)U.getObjectVolatile(a, j = (i << ASHIFT) + ABASE);
-            if (q != null && U.compareAndSwapObject(a, j, q, null)) {
+            int b, m, c;
+            int j = (i << ASHIFT) + ((1 << ASHIFT) - 1);
+            if (j < 0 || j >= alen)
+                j = alen - 1;
+            Node q = (Node)AA.getAcquire(a, j);
+            if (q != null && AA.compareAndSet(a, j, q, null)) {
                 Object v = q.item;                     // release
                 q.match = item;
                 Thread w = q.parked;
                 if (w != null)
-                    U.unpark(w);
+                    LockSupport.unpark(w);
                 return v;
             }
             else if (i <= (m = (b = bound) & MMASK) && q == null) {
                 p.item = item;                         // offer
-                if (U.compareAndSwapObject(a, j, null, p)) {
+                if (AA.compareAndSet(a, j, null, p)) {
                     long end = (timed && m == 0) ? System.nanoTime() + ns : 0L;
                     Thread t = Thread.currentThread(); // wait
                     for (int h = p.hash, spins = SPINS;;) {
                         Object v = p.match;
                         if (v != null) {
-                            U.putObjectRelease(p, MATCH, null);
+                            MATCH.setRelease(p, null);
                             p.item = null;             // clear for next use
                             p.hash = h;
                             return v;
@@ -389,22 +395,24 @@
                                      (--spins & ((SPINS >>> 1) - 1)) == 0)
                                 Thread.yield();        // two yields per wait
                         }
-                        else if (U.getObjectVolatile(a, j) != p)
+                        else if (AA.getAcquire(a, j) != p)
                             spins = SPINS;       // releaser hasn't set match yet
                         else if (!t.isInterrupted() && m == 0 &&
                                  (!timed ||
                                   (ns = end - System.nanoTime()) > 0L)) {
-                            U.putObject(t, BLOCKER, this); // emulate LockSupport
                             p.parked = t;              // minimize window
-                            if (U.getObjectVolatile(a, j) == p)
-                                U.park(false, ns);
+                            if (AA.getAcquire(a, j) == p) {
+                                if (ns == 0L)
+                                    LockSupport.park(this);
+                                else
+                                    LockSupport.parkNanos(this, ns);
+                            }
                             p.parked = null;
-                            U.putObject(t, BLOCKER, null);
                         }
-                        else if (U.getObjectVolatile(a, j) == p &&
-                                 U.compareAndSwapObject(a, j, p, null)) {
+                        else if (AA.getAcquire(a, j) == p &&
+                                 AA.compareAndSet(a, j, p, null)) {
                             if (m != 0)                // try to shrink
-                                U.compareAndSwapInt(this, BOUND, b, b + SEQ - 1);
+                                BOUND.compareAndSet(this, b, b + SEQ - 1);
                             p.item = null;
                             p.hash = h;
                             i = p.index >>>= 1;        // descend
@@ -426,7 +434,7 @@
                     i = (i != m || m == 0) ? m : m - 1;
                 }
                 else if ((c = p.collides) < m || m == FULL ||
-                         !U.compareAndSwapInt(this, BOUND, b, b + SEQ + 1)) {
+                         !BOUND.compareAndSet(this, b, b + SEQ + 1)) {
                     p.collides = c + 1;
                     i = (i == 0) ? m : i - 1;          // cyclically traverse
                 }
@@ -455,24 +463,24 @@
 
         for (Node q;;) {
             if ((q = slot) != null) {
-                if (U.compareAndSwapObject(this, SLOT, q, null)) {
+                if (SLOT.compareAndSet(this, q, null)) {
                     Object v = q.item;
                     q.match = item;
                     Thread w = q.parked;
                     if (w != null)
-                        U.unpark(w);
+                        LockSupport.unpark(w);
                     return v;
                 }
                 // create arena on contention, but continue until slot null
                 if (NCPU > 1 && bound == 0 &&
-                    U.compareAndSwapInt(this, BOUND, 0, SEQ))
+                    BOUND.compareAndSet(this, 0, SEQ))
                     arena = new Node[(FULL + 2) << ASHIFT];
             }
             else if (arena != null)
                 return null; // caller must reroute to arenaExchange
             else {
                 p.item = item;
-                if (U.compareAndSwapObject(this, SLOT, null, p))
+                if (SLOT.compareAndSet(this, null, p))
                     break;
                 p.item = null;
             }
@@ -495,19 +503,21 @@
                 spins = SPINS;
             else if (!t.isInterrupted() && arena == null &&
                      (!timed || (ns = end - System.nanoTime()) > 0L)) {
-                U.putObject(t, BLOCKER, this);
                 p.parked = t;
-                if (slot == p)
-                    U.park(false, ns);
+                if (slot == p) {
+                    if (ns == 0L)
+                        LockSupport.park(this);
+                    else
+                        LockSupport.parkNanos(this, ns);
+                }
                 p.parked = null;
-                U.putObject(t, BLOCKER, null);
             }
-            else if (U.compareAndSwapObject(this, SLOT, p, null)) {
+            else if (SLOT.compareAndSet(this, p, null)) {
                 v = timed && ns <= 0L && !t.isInterrupted() ? TIMED_OUT : null;
                 break;
             }
         }
-        U.putObjectRelease(p, MATCH, null);
+        MATCH.setRelease(p, null);
         p.item = null;
         p.hash = h;
         return v;
@@ -556,8 +566,9 @@
     @SuppressWarnings("unchecked")
     public V exchange(V x) throws InterruptedException {
         Object v;
+        Node[] a;
         Object item = (x == null) ? NULL_ITEM : x; // translate null args
-        if ((arena != null ||
+        if (((a = arena) != null ||
              (v = slotExchange(item, false, 0L)) == null) &&
             ((Thread.interrupted() || // disambiguates null return
               (v = arenaExchange(item, false, 0L)) == null)))
@@ -623,31 +634,18 @@
         return (v == NULL_ITEM) ? null : (V)v;
     }
 
-    // Unsafe mechanics
-    private static final jdk.internal.misc.Unsafe U = jdk.internal.misc.Unsafe.getUnsafe();
-    private static final long BOUND;
-    private static final long SLOT;
-    private static final long MATCH;
-    private static final long BLOCKER;
-    private static final int ABASE;
+    // VarHandle mechanics
+    private static final VarHandle BOUND;
+    private static final VarHandle SLOT;
+    private static final VarHandle MATCH;
+    private static final VarHandle AA;
     static {
         try {
-            BOUND = U.objectFieldOffset
-                (Exchanger.class.getDeclaredField("bound"));
-            SLOT = U.objectFieldOffset
-                (Exchanger.class.getDeclaredField("slot"));
-
-            MATCH = U.objectFieldOffset
-                (Node.class.getDeclaredField("match"));
-
-            BLOCKER = U.objectFieldOffset
-                (Thread.class.getDeclaredField("parkBlocker"));
-
-            int scale = U.arrayIndexScale(Node[].class);
-            if ((scale & (scale - 1)) != 0 || scale > (1 << ASHIFT))
-                throw new Error("Unsupported array scale");
-            // ABASE absorbs padding in front of element 0
-            ABASE = U.arrayBaseOffset(Node[].class) + (1 << ASHIFT);
+            MethodHandles.Lookup l = MethodHandles.lookup();
+            BOUND = l.findVarHandle(Exchanger.class, "bound", int.class);
+            SLOT = l.findVarHandle(Exchanger.class, "slot", Node.class);
+            MATCH = l.findVarHandle(Node.class, "match", Object.class);
+            AA = MethodHandles.arrayElementVarHandle(Node[].class);
         } catch (ReflectiveOperationException e) {
             throw new Error(e);
         }
--- a/src/java.base/share/classes/java/util/concurrent/ForkJoinPool.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/java/util/concurrent/ForkJoinPool.java	Thu Jul 21 20:09:20 2016 -0700
@@ -36,6 +36,8 @@
 package java.util.concurrent;
 
 import java.lang.Thread.UncaughtExceptionHandler;
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.VarHandle;
 import java.security.AccessControlContext;
 import java.security.Permissions;
 import java.security.ProtectionDomain;
@@ -44,7 +46,11 @@
 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
-import java.util.concurrent.locks.ReentrantLock;
+import java.util.function.Predicate;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.CountedCompleter;
+import java.util.concurrent.ForkJoinTask;
+import java.util.concurrent.ForkJoinWorkerThread;
 import java.util.concurrent.locks.LockSupport;
 
 /**
@@ -81,7 +87,9 @@
  * However, no such adjustments are guaranteed in the face of blocked
  * I/O or other unmanaged synchronization. The nested {@link
  * ManagedBlocker} interface enables extension of the kinds of
- * synchronization accommodated.
+ * synchronization accommodated. The default policies may be
+ * overridden using a constructor with parameters corresponding to
+ * those documented in class {@link ThreadPoolExecutor}.
  *
  * <p>In addition to execution and lifecycle control methods, this
  * class provides status check methods (for example
@@ -162,7 +170,6 @@
  * @since 1.7
  * @author Doug Lea
  */
-@jdk.internal.vm.annotation.Contended
 public class ForkJoinPool extends AbstractExecutorService {
 
     /*
@@ -229,10 +236,9 @@
      *        (CAS slot to null))
      *           increment base and return task;
      *
-     * There are several variants of each of these; for example most
-     * versions of poll pre-screen the CAS by rechecking that the base
-     * has not changed since reading the slot, and most methods only
-     * attempt the CAS if base appears not to be equal to top.
+     * There are several variants of each of these. In particular,
+     * almost all uses of poll occur within scan operations that also
+     * interleave contention tracking (with associated code sprawl.)
      *
      * Memory ordering.  See "Correct and Efficient Work-Stealing for
      * Weak Memory Models" by Le, Pop, Cohen, and Nardelli, PPoPP 2013
@@ -264,10 +270,7 @@
      * thief chooses a different random victim target to try next. So,
      * in order for one thief to progress, it suffices for any
      * in-progress poll or new push on any empty queue to
-     * complete. (This is why we normally use method pollAt and its
-     * variants that try once at the apparent base index, else
-     * consider alternative actions, rather than method poll, which
-     * retries.)
+     * complete.
      *
      * This approach also enables support of a user mode in which
      * local task processing is in FIFO, not LIFO order, simply by
@@ -282,16 +285,13 @@
      * choosing existing queues, and may be randomly repositioned upon
      * contention with other submitters.  In essence, submitters act
      * like workers except that they are restricted to executing local
-     * tasks that they submitted (or in the case of CountedCompleters,
-     * others with the same root task).  Insertion of tasks in shared
-     * mode requires a lock but we use only a simple spinlock (using
-     * field qlock), because submitters encountering a busy queue move
-     * on to try or create other queues -- they block only when
-     * creating and registering new queues. Because it is used only as
-     * a spinlock, unlocking requires only a "releasing" store (using
-     * putIntRelease).  The qlock is also used during termination
-     * detection, in which case it is forced to a negative
-     * non-lockable value.
+     * tasks that they submitted.  Insertion of tasks in shared mode
+     * requires a lock but we use only a simple spinlock (using field
+     * phase), because submitters encountering a busy queue move to a
+     * different position to use or create other queues -- they block
+     * only when creating and registering new queues. Because it is
+     * used only as a spinlock, unlocking requires only a "releasing"
+     * store (using setRelease).
      *
      * Management
      * ==========
@@ -305,42 +305,34 @@
      * There are only a few properties that we can globally track or
      * maintain, so we pack them into a small number of variables,
      * often maintaining atomicity without blocking or locking.
-     * Nearly all essentially atomic control state is held in two
+     * Nearly all essentially atomic control state is held in a few
      * volatile variables that are by far most often read (not
-     * written) as status and consistency checks. (Also, field
-     * "config" holds unchanging configuration state.)
+     * written) as status and consistency checks. We pack as much
+     * information into them as we can.
      *
      * Field "ctl" contains 64 bits holding information needed to
-     * atomically decide to add, inactivate, enqueue (on an event
-     * queue), dequeue, and/or re-activate workers.  To enable this
+     * atomically decide to add, enqueue (on an event queue), and
+     * dequeue (and release)-activate workers.  To enable this
      * packing, we restrict maximum parallelism to (1<<15)-1 (which is
      * far in excess of normal operating range) to allow ids, counts,
      * and their negations (used for thresholding) to fit into 16bit
      * subfields.
      *
-     * Field "runState" holds lifetime status, atomically and
-     * monotonically setting STARTED, SHUTDOWN, STOP, and finally
-     * TERMINATED bits.
-     *
-     * Field "auxState" is a ReentrantLock subclass that also
-     * opportunistically holds some other bookkeeping fields accessed
-     * only when locked.  It is mainly used to lock (infrequent)
-     * updates to workQueues.  The auxState instance is itself lazily
-     * constructed (see tryInitialize), requiring a double-check-style
-     * bootstrapping use of field runState, and locking a private
-     * static.
+     * Field "mode" holds configuration parameters as well as lifetime
+     * status, atomically and monotonically setting SHUTDOWN, STOP,
+     * and finally TERMINATED bits.
      *
      * Field "workQueues" holds references to WorkQueues.  It is
-     * updated (only during worker creation and termination) under the
-     * lock, but is otherwise concurrently readable, and accessed
-     * directly. We also ensure that reads of the array reference
-     * itself never become too stale (for example, re-reading before
-     * each scan). To simplify index-based operations, the array size
-     * is always a power of two, and all readers must tolerate null
-     * slots. Worker queues are at odd indices. Shared (submission)
-     * queues are at even indices, up to a maximum of 64 slots, to
-     * limit growth even if array needs to expand to add more
-     * workers. Grouping them together in this way simplifies and
+     * updated (only during worker creation and termination) under
+     * lock (using field workerNamePrefix as lock), but is otherwise
+     * concurrently readable, and accessed directly. We also ensure
+     * that uses of the array reference itself never become too stale
+     * in case of resizing.  To simplify index-based operations, the
+     * array size is always a power of two, and all readers must
+     * tolerate null slots. Worker queues are at odd indices. Shared
+     * (submission) queues are at even indices, up to a maximum of 64
+     * slots, to limit growth even if array needs to expand to add
+     * more workers. Grouping them together in this way simplifies and
      * speeds up task scanning.
      *
      * All worker thread creation is on-demand, triggered by task
@@ -360,30 +352,37 @@
      * workers unless there appear to be tasks available.  On the
      * other hand, we must quickly prod them into action when new
      * tasks are submitted or generated. In many usages, ramp-up time
-     * to activate workers is the main limiting factor in overall
-     * performance, which is compounded at program start-up by JIT
-     * compilation and allocation. So we streamline this as much as
-     * possible.
+     * is the main limiting factor in overall performance, which is
+     * compounded at program start-up by JIT compilation and
+     * allocation. So we streamline this as much as possible.
      *
-     * The "ctl" field atomically maintains active and total worker
-     * counts as well as a queue to place waiting threads so they can
-     * be located for signalling. Active counts also play the role of
-     * quiescence indicators, so are decremented when workers believe
-     * that there are no more tasks to execute. The "queue" is
-     * actually a form of Treiber stack.  A stack is ideal for
-     * activating threads in most-recently used order. This improves
+     * The "ctl" field atomically maintains total worker and
+     * "released" worker counts, plus the head of the available worker
+     * queue (actually stack, represented by the lower 32bit subfield
+     * of ctl).  Released workers are those known to be scanning for
+     * and/or running tasks. Unreleased ("available") workers are
+     * recorded in the ctl stack. These workers are made available for
+     * signalling by enqueuing in ctl (see method runWorker).  The
+     * "queue" is a form of Treiber stack. This is ideal for
+     * activating threads in most-recently used order, and improves
      * performance and locality, outweighing the disadvantages of
      * being prone to contention and inability to release a worker
-     * unless it is topmost on stack.  We block/unblock workers after
-     * pushing on the idle worker stack (represented by the lower
-     * 32bit subfield of ctl) when they cannot find work.  The top
-     * stack state holds the value of the "scanState" field of the
-     * worker: its index and status, plus a version counter that, in
-     * addition to the count subfields (also serving as version
-     * stamps) provide protection against Treiber stack ABA effects.
+     * unless it is topmost on stack.  To avoid missed signal problems
+     * inherent in any wait/signal design, available workers rescan
+     * for (and if found run) tasks after enqueuing.  Normally their
+     * release status will be updated while doing so, but the released
+     * worker ctl count may underestimate the number of active
+     * threads. (However, it is still possible to determine quiescence
+     * via a validation traversal -- see isQuiescent).  After an
+     * unsuccessful rescan, available workers are blocked until
+     * signalled (see signalWork).  The top stack state holds the
+     * value of the "phase" field of the worker: its index and status,
+     * plus a version counter that, in addition to the count subfields
+     * (also serving as version stamps) provide protection against
+     * Treiber stack ABA effects.
      *
-     * Creating workers. To create a worker, we pre-increment total
-     * count (serving as a reservation), and attempt to construct a
+     * Creating workers. To create a worker, we pre-increment counts
+     * (serving as a reservation), and attempt to construct a
      * ForkJoinWorkerThread via its factory. Upon construction, the
      * new thread invokes registerWorker, where it constructs a
      * WorkQueue and is assigned an index in the workQueues array
@@ -405,16 +404,15 @@
      * submission queues for existing external threads (see
      * externalPush).
      *
-     * WorkQueue field scanState is used by both workers and the pool
-     * to manage and track whether a worker is UNSIGNALLED (possibly
-     * blocked waiting for a signal).  When a worker is inactivated,
-     * its scanState field is set, and is prevented from executing
-     * tasks, even though it must scan once for them to avoid queuing
-     * races. Note that scanState updates lag queue CAS releases so
-     * usage requires care. When queued, the lower 16 bits of
-     * scanState must hold its pool index. So we place the index there
-     * upon initialization (see registerWorker) and otherwise keep it
-     * there or restore it when necessary.
+     * WorkQueue field "phase" is used by both workers and the pool to
+     * manage and track whether a worker is UNSIGNALLED (possibly
+     * blocked waiting for a signal).  When a worker is enqueued its
+     * phase field is set. Note that phase field updates lag queue CAS
+     * releases so usage requires care -- seeing a negative phase does
+     * not guarantee that the worker is available. When queued, the
+     * lower 16 bits of scanState must hold its pool index. So we
+     * place the index there upon initialization (see registerWorker)
+     * and otherwise keep it there or restore it when necessary.
      *
      * The ctl field also serves as the basis for memory
      * synchronization surrounding activation. This uses a more
@@ -423,15 +421,14 @@
      * if to its current value).  This would be extremely costly. So
      * we relax it in several ways: (1) Producers only signal when
      * their queue is empty. Other workers propagate this signal (in
-     * method scan) when they find tasks. (2) Workers only enqueue
-     * after scanning (see below) and not finding any tasks.  (3)
-     * Rather than CASing ctl to its current value in the common case
-     * where no action is required, we reduce write contention by
-     * equivalently prefacing signalWork when called by an external
-     * task producer using a memory access with full-volatile
-     * semantics or a "fullFence". (4) For internal task producers we
-     * rely on the fact that even if no other workers awaken, the
-     * producer itself will eventually see the task and execute it.
+     * method scan) when they find tasks; to further reduce flailing,
+     * each worker signals only one other per activation. (2) Workers
+     * only enqueue after scanning (see below) and not finding any
+     * tasks.  (3) Rather than CASing ctl to its current value in the
+     * common case where no action is required, we reduce write
+     * contention by equivalently prefacing signalWork when called by
+     * an external task producer using a memory access with
+     * full-volatile semantics or a "fullFence".
      *
      * Almost always, too many signals are issued. A task producer
      * cannot in general tell if some existing worker is in the midst
@@ -443,64 +440,40 @@
      * and bookkeeping bottlenecks during ramp-up, ramp-down, and small
      * computations involving only a few workers.
      *
-     * Scanning. Method scan() performs top-level scanning for tasks.
-     * Each scan traverses (and tries to poll from) each queue in
-     * pseudorandom permutation order by randomly selecting an origin
-     * index and a step value.  (The pseudorandom generator need not
-     * have high-quality statistical properties in the long term, but
-     * just within computations; We use 64bit and 32bit Marsaglia
-     * XorShifts, which are cheap and suffice here.)  Scanning also
-     * employs contention reduction: When scanning workers fail a CAS
-     * polling for work, they soon restart with a different
-     * pseudorandom scan order (thus likely retrying at different
-     * intervals). This improves throughput when many threads are
-     * trying to take tasks from few queues.  Scans do not otherwise
-     * explicitly take into account core affinities, loads, cache
-     * localities, etc, However, they do exploit temporal locality
-     * (which usually approximates these) by preferring to re-poll (up
-     * to POLL_LIMIT times) from the same queue after a successful
-     * poll before trying others.  Restricted forms of scanning occur
-     * in methods helpComplete and findNonEmptyStealQueue, and take
-     * similar but simpler forms.
-     *
-     * Deactivation and waiting. Queuing encounters several intrinsic
-     * races; most notably that an inactivating scanning worker can
-     * miss seeing a task produced during a scan.  So when a worker
-     * cannot find a task to steal, it inactivates and enqueues, and
-     * then rescans to ensure that it didn't miss one, reactivating
-     * upon seeing one with probability approximately proportional to
-     * probability of a miss.  (In most cases, the worker will be
-     * signalled before self-signalling, avoiding cascades of multiple
-     * signals for the same task).
-     *
-     * Workers block (in method awaitWork) using park/unpark;
-     * advertising the need for signallers to unpark by setting their
-     * "parker" fields.
+     * Scanning. Method runWorker performs top-level scanning for
+     * tasks.  Each scan traverses and tries to poll from each queue
+     * starting at a random index and circularly stepping. Scans are
+     * not performed in ideal random permutation order, to reduce
+     * cacheline contention.  The pseudorandom generator need not have
+     * high-quality statistical properties in the long term, but just
+     * within computations; We use Marsaglia XorShifts (often via
+     * ThreadLocalRandom.nextSecondarySeed), which are cheap and
+     * suffice. Scanning also employs contention reduction: When
+     * scanning workers fail to extract an apparently existing task,
+     * they soon restart at a different pseudorandom index.  This
+     * improves throughput when many threads are trying to take tasks
+     * from few queues, which can be common in some usages.  Scans do
+     * not otherwise explicitly take into account core affinities,
+     * loads, cache localities, etc, However, they do exploit temporal
+     * locality (which usually approximates these) by preferring to
+     * re-poll (at most #workers times) from the same queue after a
+     * successful poll before trying others.
      *
      * Trimming workers. To release resources after periods of lack of
      * use, a worker starting to wait when the pool is quiescent will
-     * time out and terminate (see awaitWork) if the pool has remained
-     * quiescent for period given by IDLE_TIMEOUT_MS, increasing the
-     * period as the number of threads decreases, eventually removing
-     * all workers.
+     * time out and terminate (see method scan) if the pool has
+     * remained quiescent for period given by field keepAlive.
      *
      * Shutdown and Termination. A call to shutdownNow invokes
      * tryTerminate to atomically set a runState bit. The calling
      * thread, as well as every other worker thereafter terminating,
-     * helps terminate others by setting their (qlock) status,
-     * cancelling their unprocessed tasks, and waking them up, doing
-     * so repeatedly until stable. Calls to non-abrupt shutdown()
-     * preface this by checking whether termination should commence.
-     * This relies primarily on the active count bits of "ctl"
-     * maintaining consensus -- tryTerminate is called from awaitWork
-     * whenever quiescent. However, external submitters do not take
-     * part in this consensus.  So, tryTerminate sweeps through queues
-     * (until stable) to ensure lack of in-flight submissions and
-     * workers about to process them before triggering the "STOP"
-     * phase of termination. (Note: there is an intrinsic conflict if
-     * helpQuiescePool is called when shutdown is enabled. Both wait
-     * for quiescence, but tryTerminate is biased to not trigger until
-     * helpQuiescePool completes.)
+     * helps terminate others by cancelling their unprocessed tasks,
+     * and waking them up, doing so repeatedly until stable. Calls to
+     * non-abrupt shutdown() preface this by checking whether
+     * termination should commence by sweeping through queues (until
+     * stable) to ensure lack of in-flight submissions and workers
+     * about to process them before triggering the "STOP" phase of
+     * termination.
      *
      * Joining Tasks
      * =============
@@ -508,12 +481,12 @@
      * Any of several actions may be taken when one worker is waiting
      * to join a task stolen (or always held) by another.  Because we
      * are multiplexing many tasks on to a pool of workers, we can't
-     * just let them block (as in Thread.join).  We also cannot just
-     * reassign the joiner's run-time stack with another and replace
-     * it later, which would be a form of "continuation", that even if
-     * possible is not necessarily a good idea since we may need both
-     * an unblocked task and its continuation to progress.  Instead we
-     * combine two tactics:
+     * always just let them block (as in Thread.join).  We also cannot
+     * just reassign the joiner's run-time stack with another and
+     * replace it later, which would be a form of "continuation", that
+     * even if possible is not necessarily a good idea since we may
+     * need both an unblocked task and its continuation to progress.
+     * Instead we combine two tactics:
      *
      *   Helping: Arranging for the joiner to execute some task that it
      *      would be running if the steal had not occurred.
@@ -526,79 +499,43 @@
      * helping a hypothetical compensator: If we can readily tell that
      * a possible action of a compensator is to steal and execute the
      * task being joined, the joining thread can do so directly,
-     * without the need for a compensation thread (although at the
-     * expense of larger run-time stacks, but the tradeoff is
-     * typically worthwhile).
+     * without the need for a compensation thread.
      *
      * The ManagedBlocker extension API can't use helping so relies
      * only on compensation in method awaitBlocker.
      *
-     * The algorithm in helpStealer entails a form of "linear
-     * helping".  Each worker records (in field currentSteal) the most
-     * recent task it stole from some other worker (or a submission).
-     * It also records (in field currentJoin) the task it is currently
-     * actively joining. Method helpStealer uses these markers to try
-     * to find a worker to help (i.e., steal back a task from and
-     * execute it) that could hasten completion of the actively joined
-     * task.  Thus, the joiner executes a task that would be on its
-     * own local deque had the to-be-joined task not been stolen. This
-     * is a conservative variant of the approach described in Wagner &
-     * Calder "Leapfrogging: a portable technique for implementing
-     * efficient futures" SIGPLAN Notices, 1993
-     * (http://portal.acm.org/citation.cfm?id=155354). It differs in
-     * that: (1) We only maintain dependency links across workers upon
-     * steals, rather than use per-task bookkeeping.  This sometimes
-     * requires a linear scan of workQueues array to locate stealers,
-     * but often doesn't because stealers leave hints (that may become
-     * stale/wrong) of where to locate them.  It is only a hint
-     * because a worker might have had multiple steals and the hint
-     * records only one of them (usually the most current).  Hinting
-     * isolates cost to when it is needed, rather than adding to
-     * per-task overhead.  (2) It is "shallow", ignoring nesting and
-     * potentially cyclic mutual steals.  (3) It is intentionally
-     * racy: field currentJoin is updated only while actively joining,
-     * which means that we miss links in the chain during long-lived
-     * tasks, GC stalls etc (which is OK since blocking in such cases
-     * is usually a good idea).  (4) We bound the number of attempts
-     * to find work using checksums and fall back to suspending the
-     * worker and if necessary replacing it with another.
+     * The algorithm in awaitJoin entails a form of "linear helping".
+     * Each worker records (in field source) the id of the queue from
+     * which it last stole a task.  The scan in method awaitJoin uses
+     * these markers to try to find a worker to help (i.e., steal back
+     * a task from and execute it) that could hasten completion of the
+     * actively joined task.  Thus, the joiner executes a task that
+     * would be on its own local deque if the to-be-joined task had
+     * not been stolen. This is a conservative variant of the approach
+     * described in Wagner & Calder "Leapfrogging: a portable
+     * technique for implementing efficient futures" SIGPLAN Notices,
+     * 1993 (http://portal.acm.org/citation.cfm?id=155354). It differs
+     * mainly in that we only record queue ids, not full dependency
+     * links.  This requires a linear scan of the workQueues array to
+     * locate stealers, but isolates cost to when it is needed, rather
+     * than adding to per-task overhead. Searches can fail to locate
+     * stealers GC stalls and the like delay recording sources.
+     * Further, even when accurately identified, stealers might not
+     * ever produce a task that the joiner can in turn help with. So,
+     * compensation is tried upon failure to find tasks to run.
      *
-     * Helping actions for CountedCompleters do not require tracking
-     * currentJoins: Method helpComplete takes and executes any task
-     * with the same root as the task being waited on (preferring
-     * local pops to non-local polls). However, this still entails
-     * some traversal of completer chains, so is less efficient than
-     * using CountedCompleters without explicit joins.
-     *
-     * Compensation does not aim to keep exactly the target
+     * Compensation does not by default aim to keep exactly the target
      * parallelism number of unblocked threads running at any given
      * time. Some previous versions of this class employed immediate
      * compensations for any blocked join. However, in practice, the
      * vast majority of blockages are transient byproducts of GC and
      * other JVM or OS activities that are made worse by replacement.
-     * Currently, compensation is attempted only after validating that
-     * all purportedly active threads are processing tasks by checking
-     * field WorkQueue.scanState, which eliminates most false
-     * positives.  Also, compensation is bypassed (tolerating fewer
-     * threads) in the most common case in which it is rarely
-     * beneficial: when a worker with an empty queue (thus no
-     * continuation tasks) blocks on a join and there still remain
-     * enough threads to ensure liveness.
-     *
-     * Spare threads are removed as soon as they notice that the
-     * target parallelism level has been exceeded, in method
-     * tryDropSpare. (Method scan arranges returns for rechecks upon
-     * each probe via the "bound" parameter.)
-     *
-     * The compensation mechanism may be bounded.  Bounds for the
-     * commonPool (see COMMON_MAX_SPARES) better enable JVMs to cope
-     * with programming errors and abuse before running out of
-     * resources to do so. In other cases, users may supply factories
-     * that limit thread construction. The effects of bounding in this
-     * pool (like all others) is imprecise.  Total worker counts are
-     * decremented when threads deregister, not when they exit and
-     * resources are reclaimed by the JVM and OS. So the number of
-     * simultaneously live threads may transiently exceed bounds.
+     * Rather than impose arbitrary policies, we allow users to
+     * override the default of only adding threads upon apparent
+     * starvation.  The compensation mechanism may also be bounded.
+     * Bounds for the commonPool (see COMMON_MAX_SPARES) better enable
+     * JVMs to cope with programming errors and abuse before running
+     * out of resources to do so.
      *
      * Common Pool
      * ===========
@@ -606,9 +543,7 @@
      * The static common pool always exists after static
      * initialization.  Since it (or any other created pool) need
      * never be used, we minimize initial construction overhead and
-     * footprint to the setup of about a dozen fields, with no nested
-     * allocation. Most bootstrapping occurs within method
-     * externalSubmit during the first submission to the pool.
+     * footprint to the setup of about a dozen fields.
      *
      * When external threads submit to the common pool, they can
      * perform subtask processing (see externalHelpComplete and
@@ -628,28 +563,22 @@
      * InnocuousForkJoinWorkerThread when there is a SecurityManager
      * present. These workers have no permissions set, do not belong
      * to any user-defined ThreadGroup, and erase all ThreadLocals
-     * after executing any top-level task (see WorkQueue.runTask).
-     * The associated mechanics (mainly in ForkJoinWorkerThread) may
-     * be JVM-dependent and must access particular Thread class fields
-     * to achieve this effect.
+     * after executing any top-level task (see
+     * WorkQueue.afterTopLevelExec).  The associated mechanics (mainly
+     * in ForkJoinWorkerThread) may be JVM-dependent and must access
+     * particular Thread class fields to achieve this effect.
      *
      * Style notes
      * ===========
      *
-     * Memory ordering relies mainly on Unsafe intrinsics that carry
-     * the further responsibility of explicitly performing null- and
-     * bounds- checks otherwise carried out implicitly by JVMs.  This
-     * can be awkward and ugly, but also reflects the need to control
+     * Memory ordering relies mainly on VarHandles.  This can be
+     * awkward and ugly, but also reflects the need to control
      * outcomes across the unusual cases that arise in very racy code
-     * with very few invariants. So these explicit checks would exist
-     * in some form anyway.  All fields are read into locals before
-     * use, and null-checked if they are references.  This is usually
-     * done in a "C"-like style of listing declarations at the heads
-     * of methods or blocks, and using inline assignments on first
-     * encounter.  Array bounds-checks are usually performed by
-     * masking with array.length-1, which relies on the invariant that
-     * these arrays are created with positive lengths, which is itself
-     * paranoically checked. Nearly all explicit checks lead to
+     * with very few invariants. All fields are read into locals
+     * before use, and null-checked if they are references.  This is
+     * usually done in a "C"-like style of listing declarations at the
+     * heads of methods or blocks, and using inline assignments on
+     * first encounter.  Nearly all explicit checks lead to
      * bypass/return, not exception throws, because they may
      * legitimately arise due to cancellation/revocation during
      * shutdown.
@@ -701,10 +630,17 @@
     public static interface ForkJoinWorkerThreadFactory {
         /**
          * Returns a new worker thread operating in the given pool.
+         * Returning null or throwing an exception may result in tasks
+         * never being executed.  If this method throws an exception,
+         * it is relayed to the caller of the method (for example
+         * {@code execute}) causing attempted thread creation. If this
+         * method returns null or throws an exception, it is not
+         * retried until the next attempted creation (for example
+         * another call to {@code execute}).
          *
          * @param pool the pool this thread works in
          * @return the new worker thread, or {@code null} if the request
-         *         to create a thread is rejected
+         *         to create a thread is rejected.
          * @throws NullPointerException if the pool is null
          */
         public ForkJoinWorkerThread newThread(ForkJoinPool pool);
@@ -721,56 +657,35 @@
         }
     }
 
-    /**
-     * Class for artificial tasks that are used to replace the target
-     * of local joins if they are removed from an interior queue slot
-     * in WorkQueue.tryRemoveAndExec. We don't need the proxy to
-     * actually do anything beyond having a unique identity.
-     */
-    private static final class EmptyTask extends ForkJoinTask<Void> {
-        private static final long serialVersionUID = -7721805057305804111L;
-        EmptyTask() { status = ForkJoinTask.NORMAL; } // force done
-        public final Void getRawResult() { return null; }
-        public final void setRawResult(Void x) {}
-        public final boolean exec() { return true; }
-    }
-
-    /**
-     * Additional fields and lock created upon initialization.
-     */
-    private static final class AuxState extends ReentrantLock {
-        private static final long serialVersionUID = -6001602636862214147L;
-        volatile long stealCount;     // cumulative steal count
-        long indexSeed;               // index bits for registerWorker
-        AuxState() {}
-    }
-
     // Constants shared across ForkJoinPool and WorkQueue
 
     // Bounds
+    static final int SWIDTH       = 16;            // width of short
     static final int SMASK        = 0xffff;        // short bits == max index
     static final int MAX_CAP      = 0x7fff;        // max #workers - 1
-    static final int EVENMASK     = 0xfffe;        // even short bits
     static final int SQMASK       = 0x007e;        // max 64 (even) slots
 
-    // Masks and units for WorkQueue.scanState and ctl sp subfield
+    // Masks and units for WorkQueue.phase and ctl sp subfield
     static final int UNSIGNALLED  = 1 << 31;       // must be negative
     static final int SS_SEQ       = 1 << 16;       // version count
+    static final int QLOCK        = 1;             // must be 1
 
-    // Mode bits for ForkJoinPool.config and WorkQueue.config
-    static final int MODE_MASK    = 0xffff << 16;  // top half of int
-    static final int SPARE_WORKER = 1 << 17;       // set if tc > 0 on creation
-    static final int UNREGISTERED = 1 << 18;       // to skip some of deregister
-    static final int FIFO_QUEUE   = 1 << 31;       // must be negative
-    static final int LIFO_QUEUE   = 0;             // for clarity
-    static final int IS_OWNED     = 1;             // low bit 0 if shared
+    // Mode bits and sentinels, some also used in WorkQueue id and.source fields
+    static final int OWNED        = 1;             // queue has owner thread
+    static final int FIFO         = 1 << 16;       // fifo queue or access mode
+    static final int SHUTDOWN     = 1 << 18;
+    static final int TERMINATED   = 1 << 19;
+    static final int STOP         = 1 << 31;       // must be negative
+    static final int QUIET        = 1 << 30;       // not scanning or working
+    static final int DORMANT      = QUIET | UNSIGNALLED;
 
     /**
-     * The maximum number of task executions from the same queue
-     * before checking other queues, bounding unfairness and impact of
-     * infinite user task recursion.  Must be a power of two minus 1.
+     * The maximum number of local polls from the same queue before
+     * checking others. This is a safeguard against infinitely unfair
+     * looping under unbounded user task recursion, and must be larger
+     * than plausible cases of intentional bounded task recursion.
      */
-    static final int POLL_LIMIT = (1 << 10) - 1;
+    static final int POLL_LIMIT = 1 << 10;
 
     /**
      * Queues supporting work-stealing as well as external task
@@ -805,23 +720,16 @@
         static final int MAXIMUM_QUEUE_CAPACITY = 1 << 26; // 64M
 
         // Instance fields
-
-        volatile int scanState;    // versioned, negative if inactive
-        int stackPred;             // pool stack (ctl) predecessor
+        volatile int phase;        // versioned, negative: queued, 1: locked
+        int stackPred;             // pool stack (ctl) predecessor link
         int nsteals;               // number of steals
-        int hint;                  // randomization and stealer index hint
-        int config;                // pool index and mode
-        volatile int qlock;        // 1: locked, < 0: terminate; else 0
+        int id;                    // index, mode, tag
+        volatile int source;       // source queue id, or sentinel
         volatile int base;         // index of next slot for poll
         int top;                   // index of next slot for push
         ForkJoinTask<?>[] array;   // the elements (initially unallocated)
         final ForkJoinPool pool;   // the containing pool (may be null)
         final ForkJoinWorkerThread owner; // owning thread or null if shared
-        volatile Thread parker;    // == owner during call to park; else null
-        volatile ForkJoinTask<?> currentJoin; // task being joined in awaitJoin
-
-        @jdk.internal.vm.annotation.Contended("group2") // segregate
-        volatile ForkJoinTask<?> currentSteal; // nonnull when running some task
 
         WorkQueue(ForkJoinPool pool, ForkJoinWorkerThread owner) {
             this.pool = pool;
@@ -834,7 +742,7 @@
          * Returns an exportable index (used by ForkJoinWorkerThread).
          */
         final int getPoolIndex() {
-            return (config & 0xffff) >>> 1; // ignore odd/even tag bit
+            return (id & 0xffff) >>> 1; // ignore odd/even tag bit
         }
 
         /**
@@ -851,13 +759,14 @@
          * near-empty queue has at least one unclaimed task.
          */
         final boolean isEmpty() {
-            ForkJoinTask<?>[] a; int n, al, s;
-            return ((n = base - (s = top)) >= 0 || // possibly one task
+            ForkJoinTask<?>[] a; int n, al, b;
+            return ((n = (b = base) - top) >= 0 || // possibly one task
                     (n == -1 && ((a = array) == null ||
                                  (al = a.length) == 0 ||
-                                 a[(al - 1) & (s - 1)] == null)));
+                                 a[(al - 1) & b] == null)));
         }
 
+
         /**
          * Pushes a task. Call only by owner in unshared queues.
          *
@@ -865,17 +774,17 @@
          * @throws RejectedExecutionException if array cannot be resized
          */
         final void push(ForkJoinTask<?> task) {
-            U.storeFence();              // ensure safe publication
-            int s = top, al, d; ForkJoinTask<?>[] a;
+            int s = top; ForkJoinTask<?>[] a; int al, d;
             if ((a = array) != null && (al = a.length) > 0) {
-                a[(al - 1) & s] = task;  // relaxed writes OK
+                int index = (al - 1) & s;
+                ForkJoinPool p = pool;
                 top = s + 1;
-                ForkJoinPool p = pool;
+                QA.setRelease(a, index, task);
                 if ((d = base - s) == 0 && p != null) {
-                    U.fullFence();
+                    VarHandle.fullFence();
                     p.signalWork();
                 }
-                else if (al + d == 1)
+                else if (d + al == 1)
                     growArray();
             }
         }
@@ -887,24 +796,24 @@
          */
         final ForkJoinTask<?>[] growArray() {
             ForkJoinTask<?>[] oldA = array;
-            int size = oldA != null ? oldA.length << 1 : INITIAL_QUEUE_CAPACITY;
+            int oldSize = oldA != null ? oldA.length : 0;
+            int size = oldSize > 0 ? oldSize << 1 : INITIAL_QUEUE_CAPACITY;
             if (size < INITIAL_QUEUE_CAPACITY || size > MAXIMUM_QUEUE_CAPACITY)
                 throw new RejectedExecutionException("Queue capacity exceeded");
             int oldMask, t, b;
             ForkJoinTask<?>[] a = array = new ForkJoinTask<?>[size];
-            if (oldA != null && (oldMask = oldA.length - 1) > 0 &&
+            if (oldA != null && (oldMask = oldSize - 1) > 0 &&
                 (t = top) - (b = base) > 0) {
                 int mask = size - 1;
                 do { // emulate poll from old array, push to new array
                     int index = b & oldMask;
-                    long offset = ((long)index << ASHIFT) + ABASE;
                     ForkJoinTask<?> x = (ForkJoinTask<?>)
-                        U.getObjectVolatile(oldA, offset);
+                        QA.getAcquire(oldA, index);
                     if (x != null &&
-                        U.compareAndSwapObject(oldA, offset, x, null))
+                        QA.compareAndSet(oldA, index, x, null))
                         a[b & mask] = x;
                 } while (++b != t);
-                U.storeFence();
+                VarHandle.releaseFence();
             }
             return a;
         }
@@ -917,33 +826,12 @@
             int b = base, s = top, al, i; ForkJoinTask<?>[] a;
             if ((a = array) != null && b != s && (al = a.length) > 0) {
                 int index = (al - 1) & --s;
-                long offset = ((long)index << ASHIFT) + ABASE;
                 ForkJoinTask<?> t = (ForkJoinTask<?>)
-                    U.getObject(a, offset);
+                    QA.get(a, index);
                 if (t != null &&
-                    U.compareAndSwapObject(a, offset, t, null)) {
+                    QA.compareAndSet(a, index, t, null)) {
                     top = s;
-                    return t;
-                }
-            }
-            return null;
-        }
-
-        /**
-         * Takes a task in FIFO order if b is base of queue and a task
-         * can be claimed without contention. Specialized versions
-         * appear in ForkJoinPool methods scan and helpStealer.
-         */
-        final ForkJoinTask<?> pollAt(int b) {
-            ForkJoinTask<?>[] a; int al;
-            if ((a = array) != null && (al = a.length) > 0) {
-                int index = (al - 1) & b;
-                long offset = ((long)index << ASHIFT) + ABASE;
-                ForkJoinTask<?> t = (ForkJoinTask<?>)
-                    U.getObjectVolatile(a, offset);
-                if (t != null && b++ == base &&
-                    U.compareAndSwapObject(a, offset, t, null)) {
-                    base = b;
+                    VarHandle.releaseFence();
                     return t;
                 }
             }
@@ -959,12 +847,11 @@
                 if ((a = array) != null && (d = b - s) < 0 &&
                     (al = a.length) > 0) {
                     int index = (al - 1) & b;
-                    long offset = ((long)index << ASHIFT) + ABASE;
                     ForkJoinTask<?> t = (ForkJoinTask<?>)
-                        U.getObjectVolatile(a, offset);
+                        QA.getAcquire(a, index);
                     if (b++ == base) {
                         if (t != null) {
-                            if (U.compareAndSwapObject(a, offset, t, null)) {
+                            if (QA.compareAndSet(a, index, t, null)) {
                                 base = b;
                                 return t;
                             }
@@ -983,7 +870,7 @@
          * Takes next task, if one exists, in order specified by mode.
          */
         final ForkJoinTask<?> nextLocalTask() {
-            return (config < 0) ? poll() : pop();
+            return ((id & FIFO) != 0) ? poll() : pop();
         }
 
         /**
@@ -992,7 +879,8 @@
         final ForkJoinTask<?> peek() {
             int al; ForkJoinTask<?>[] a;
             return ((a = array) != null && (al = a.length) > 0) ?
-                a[(al - 1) & (config < 0 ? base : top - 1)] : null;
+                a[(al - 1) &
+                  ((id & FIFO) != 0 ? base : top - 1)] : null;
         }
 
         /**
@@ -1002,9 +890,9 @@
             int b = base, s = top, al; ForkJoinTask<?>[] a;
             if ((a = array) != null && b != s && (al = a.length) > 0) {
                 int index = (al - 1) & --s;
-                long offset = ((long)index << ASHIFT) + ABASE;
-                if (U.compareAndSwapObject(a, offset, task, null)) {
+                if (QA.compareAndSet(a, index, task, null)) {
                     top = s;
+                    VarHandle.releaseFence();
                     return true;
                 }
             }
@@ -1012,105 +900,32 @@
         }
 
         /**
-         * Shared version of push. Fails if already locked.
-         *
-         * @return status: > 0 locked, 0 possibly was empty, < 0 was nonempty
-         */
-        final int sharedPush(ForkJoinTask<?> task) {
-            int stat;
-            if (U.compareAndSwapInt(this, QLOCK, 0, 1)) {
-                int b = base, s = top, al, d; ForkJoinTask<?>[] a;
-                if ((a = array) != null && (al = a.length) > 0 &&
-                    al - 1 + (d = b - s) > 0) {
-                    a[(al - 1) & s] = task;
-                    top = s + 1;                 // relaxed writes OK here
-                    qlock = 0;
-                    stat = (d < 0 && b == base) ? d : 0;
-                }
-                else {
-                    growAndSharedPush(task);
-                    stat = 0;
-                }
-            }
-            else
-                stat = 1;
-            return stat;
-        }
-
-        /**
-         * Helper for sharedPush; called only when locked and resize
-         * needed.
-         */
-        private void growAndSharedPush(ForkJoinTask<?> task) {
-            try {
-                growArray();
-                int s = top, al; ForkJoinTask<?>[] a;
-                if ((a = array) != null && (al = a.length) > 0) {
-                    a[(al - 1) & s] = task;
-                    top = s + 1;
-                }
-            } finally {
-                qlock = 0;
-            }
-        }
-
-        /**
-         * Shared version of tryUnpush.
-         */
-        final boolean trySharedUnpush(ForkJoinTask<?> task) {
-            boolean popped = false;
-            int s = top - 1, al; ForkJoinTask<?>[] a;
-            if ((a = array) != null && (al = a.length) > 0) {
-                int index = (al - 1) & s;
-                long offset = ((long)index << ASHIFT) + ABASE;
-                ForkJoinTask<?> t = (ForkJoinTask<?>) U.getObject(a, offset);
-                if (t == task &&
-                    U.compareAndSwapInt(this, QLOCK, 0, 1)) {
-                    if (top == s + 1 && array == a &&
-                        U.compareAndSwapObject(a, offset, task, null)) {
-                        popped = true;
-                        top = s;
-                    }
-                    U.putIntRelease(this, QLOCK, 0);
-                }
-            }
-            return popped;
-        }
-
-        /**
          * Removes and cancels all known tasks, ignoring any exceptions.
          */
         final void cancelAll() {
-            ForkJoinTask<?> t;
-            if ((t = currentJoin) != null) {
-                currentJoin = null;
-                ForkJoinTask.cancelIgnoringExceptions(t);
-            }
-            if ((t = currentSteal) != null) {
-                currentSteal = null;
-                ForkJoinTask.cancelIgnoringExceptions(t);
-            }
-            while ((t = poll()) != null)
+            for (ForkJoinTask<?> t; (t = poll()) != null; )
                 ForkJoinTask.cancelIgnoringExceptions(t);
         }
 
         // Specialized execution methods
 
         /**
-         * Pops and executes up to POLL_LIMIT tasks or until empty.
+         * Pops and executes up to limit consecutive tasks or until empty.
+         *
+         * @param limit max runs, or zero for no limit
          */
-        final void localPopAndExec() {
-            for (int nexec = 0;;) {
+        final void localPopAndExec(int limit) {
+            for (;;) {
                 int b = base, s = top, al; ForkJoinTask<?>[] a;
                 if ((a = array) != null && b != s && (al = a.length) > 0) {
                     int index = (al - 1) & --s;
-                    long offset = ((long)index << ASHIFT) + ABASE;
                     ForkJoinTask<?> t = (ForkJoinTask<?>)
-                        U.getAndSetObject(a, offset, null);
+                        QA.getAndSet(a, index, null);
                     if (t != null) {
                         top = s;
-                        (currentSteal = t).doExec();
-                        if (++nexec > POLL_LIMIT)
+                        VarHandle.releaseFence();
+                        t.doExec();
+                        if (limit != 0 && --limit == 0)
                             break;
                     }
                     else
@@ -1122,22 +937,28 @@
         }
 
         /**
-         * Polls and executes up to POLL_LIMIT tasks or until empty.
+         * Polls and executes up to limit consecutive tasks or until empty.
+         *
+         * @param limit, or zero for no limit
          */
-        final void localPollAndExec() {
-            for (int nexec = 0;;) {
-                int b = base, s = top, al; ForkJoinTask<?>[] a;
-                if ((a = array) != null && b != s && (al = a.length) > 0) {
+        final void localPollAndExec(int limit) {
+            for (int polls = 0;;) {
+                int b = base, s = top, d, al; ForkJoinTask<?>[] a;
+                if ((a = array) != null && (d = b - s) < 0 &&
+                    (al = a.length) > 0) {
                     int index = (al - 1) & b++;
-                    long offset = ((long)index << ASHIFT) + ABASE;
                     ForkJoinTask<?> t = (ForkJoinTask<?>)
-                        U.getAndSetObject(a, offset, null);
+                        QA.getAndSet(a, index, null);
                     if (t != null) {
                         base = b;
                         t.doExec();
-                        if (++nexec > POLL_LIMIT)
+                        if (limit != 0 && ++polls == limit)
                             break;
                     }
+                    else if (d == -1)
+                        break;     // now empty
+                    else
+                        polls = 0; // stolen; reset
                 }
                 else
                     break;
@@ -1145,188 +966,156 @@
         }
 
         /**
-         * Executes the given task and (some) remaining local tasks.
+         * If present, removes task from queue and executes it.
          */
-        final void runTask(ForkJoinTask<?> task) {
-            if (task != null) {
-                task.doExec();
-                if (config < 0)
-                    localPollAndExec();
-                else
-                    localPopAndExec();
-                int ns = ++nsteals;
-                ForkJoinWorkerThread thread = owner;
-                currentSteal = null;
-                if (ns < 0)           // collect on overflow
-                    transferStealCount(pool);
-                if (thread != null)
-                    thread.afterTopLevelExec();
-            }
-        }
-
-        /**
-         * Adds steal count to pool steal count if it exists, and resets.
-         */
-        final void transferStealCount(ForkJoinPool p) {
-            AuxState aux;
-            if (p != null && (aux = p.auxState) != null) {
-                long s = nsteals;
-                nsteals = 0;            // if negative, correct for overflow
-                if (s < 0) s = Integer.MAX_VALUE;
-                aux.lock();
-                try {
-                    aux.stealCount += s;
-                } finally {
-                    aux.unlock();
+        final void tryRemoveAndExec(ForkJoinTask<?> task) {
+            ForkJoinTask<?>[] wa; int s, wal;
+            if (base - (s = top) < 0 && // traverse from top
+                (wa = array) != null && (wal = wa.length) > 0) {
+                for (int m = wal - 1, ns = s - 1, i = ns; ; --i) {
+                    int index = i & m;
+                    ForkJoinTask<?> t = (ForkJoinTask<?>)
+                        QA.get(wa, index);
+                    if (t == null)
+                        break;
+                    else if (t == task) {
+                        if (QA.compareAndSet(wa, index, t, null)) {
+                            top = ns;   // safely shift down
+                            for (int j = i; j != ns; ++j) {
+                                ForkJoinTask<?> f;
+                                int pindex = (j + 1) & m;
+                                f = (ForkJoinTask<?>)QA.get(wa, pindex);
+                                QA.setVolatile(wa, pindex, null);
+                                int jindex = j & m;
+                                QA.setRelease(wa, jindex, f);
+                            }
+                            VarHandle.releaseFence();
+                            t.doExec();
+                        }
+                        break;
+                    }
                 }
             }
         }
 
         /**
-         * If present, removes from queue and executes the given task,
-         * or any other cancelled task. Used only by awaitJoin.
+         * Tries to steal and run tasks within the target's
+         * computation until done, not found, or limit exceeded.
          *
-         * @return true if queue empty and task not known to be done
+         * @param task root of CountedCompleter computation
+         * @param limit max runs, or zero for no limit
+         * @return task status on exit
          */
-        final boolean tryRemoveAndExec(ForkJoinTask<?> task) {
-            if (task != null && task.status >= 0) {
-                int b, s, d, al; ForkJoinTask<?>[] a;
-                while ((d = (b = base) - (s = top)) < 0 &&
-                       (a = array) != null && (al = a.length) > 0) {
-                    for (;;) {      // traverse from s to b
-                        int index = --s & (al - 1);
-                        long offset = (index << ASHIFT) + ABASE;
-                        ForkJoinTask<?> t = (ForkJoinTask<?>)
-                            U.getObjectVolatile(a, offset);
-                        if (t == null)
-                            break;                   // restart
-                        else if (t == task) {
-                            boolean removed = false;
-                            if (s + 1 == top) {      // pop
-                                if (U.compareAndSwapObject(a, offset, t, null)) {
-                                    top = s;
-                                    removed = true;
+        final int localHelpCC(CountedCompleter<?> task, int limit) {
+            int status = 0;
+            if (task != null && (status = task.status) >= 0) {
+                for (;;) {
+                    boolean help = false;
+                    int b = base, s = top, al; ForkJoinTask<?>[] a;
+                    if ((a = array) != null && b != s && (al = a.length) > 0) {
+                        int index = (al - 1) & (s - 1);
+                        ForkJoinTask<?> o = (ForkJoinTask<?>)
+                            QA.get(a, index);
+                        if (o instanceof CountedCompleter) {
+                            CountedCompleter<?> t = (CountedCompleter<?>)o;
+                            for (CountedCompleter<?> f = t;;) {
+                                if (f != task) {
+                                    if ((f = f.completer) == null) // try parent
+                                        break;
+                                }
+                                else {
+                                    if (QA.compareAndSet(a, index, t, null)) {
+                                        top = s - 1;
+                                        VarHandle.releaseFence();
+                                        t.doExec();
+                                        help = true;
+                                    }
+                                    break;
                                 }
                             }
-                            else if (base == b)      // replace with proxy
-                                removed = U.compareAndSwapObject(a, offset, t,
-                                                                 new EmptyTask());
-                            if (removed) {
-                                ForkJoinTask<?> ps = currentSteal;
-                                (currentSteal = task).doExec();
-                                currentSteal = ps;
-                            }
-                            break;
-                        }
-                        else if (t.status < 0 && s + 1 == top) {
-                            if (U.compareAndSwapObject(a, offset, t, null)) {
-                                top = s;
-                            }
-                            break;                  // was cancelled
-                        }
-                        else if (++d == 0) {
-                            if (base != b)          // rescan
-                                break;
-                            return false;
                         }
                     }
-                    if (task.status < 0)
-                        return false;
+                    if ((status = task.status) < 0 || !help ||
+                        (limit != 0 && --limit == 0))
+                        break;
                 }
             }
-            return true;
+            return status;
+        }
+
+        // Operations on shared queues
+
+        /**
+         * Tries to lock shared queue by CASing phase field.
+         */
+        final boolean tryLockSharedQueue() {
+            return PHASE.compareAndSet(this, 0, QLOCK);
         }
 
         /**
-         * Pops task if in the same CC computation as the given task,
-         * in either shared or owned mode. Used only by helpComplete.
+         * Shared version of tryUnpush.
          */
-        final CountedCompleter<?> popCC(CountedCompleter<?> task, int mode) {
-            int b = base, s = top, al; ForkJoinTask<?>[] a;
-            if ((a = array) != null && b != s && (al = a.length) > 0) {
-                int index = (al - 1) & (s - 1);
-                long offset = ((long)index << ASHIFT) + ABASE;
-                ForkJoinTask<?> o = (ForkJoinTask<?>)
-                    U.getObjectVolatile(a, offset);
-                if (o instanceof CountedCompleter) {
-                    CountedCompleter<?> t = (CountedCompleter<?>)o;
-                    for (CountedCompleter<?> r = t;;) {
-                        if (r == task) {
-                            if ((mode & IS_OWNED) == 0) {
-                                boolean popped = false;
-                                if (U.compareAndSwapInt(this, QLOCK, 0, 1)) {
-                                    if (top == s && array == a &&
-                                        U.compareAndSwapObject(a, offset,
-                                                               t, null)) {
-                                        popped = true;
-                                        top = s - 1;
-                                    }
-                                    U.putIntRelease(this, QLOCK, 0);
-                                    if (popped)
-                                        return t;
-                                }
-                            }
-                            else if (U.compareAndSwapObject(a, offset,
-                                                            t, null)) {
-                                top = s - 1;
-                                return t;
-                            }
-                            break;
-                        }
-                        else if ((r = r.completer) == null) // try parent
-                            break;
+        final boolean trySharedUnpush(ForkJoinTask<?> task) {
+            boolean popped = false;
+            int s = top - 1, al; ForkJoinTask<?>[] a;
+            if ((a = array) != null && (al = a.length) > 0) {
+                int index = (al - 1) & s;
+                ForkJoinTask<?> t = (ForkJoinTask<?>) QA.get(a, index);
+                if (t == task &&
+                    PHASE.compareAndSet(this, 0, QLOCK)) {
+                    if (top == s + 1 && array == a &&
+                        QA.compareAndSet(a, index, task, null)) {
+                        popped = true;
+                        top = s;
                     }
+                    PHASE.setRelease(this, 0);
                 }
             }
-            return null;
+            return popped;
         }
 
         /**
-         * Steals and runs a task in the same CC computation as the
-         * given task if one exists and can be taken without
-         * contention. Otherwise returns a checksum/control value for
-         * use by method helpComplete.
-         *
-         * @return 1 if successful, 2 if retryable (lost to another
-         * stealer), -1 if non-empty but no matching task found, else
-         * the base index, forced negative.
+         * Shared version of localHelpCC.
          */
-        final int pollAndExecCC(CountedCompleter<?> task) {
-            ForkJoinTask<?>[] a;
-            int b = base, s = top, al, h;
-            if ((a = array) != null && b != s && (al = a.length) > 0) {
-                int index = (al - 1) & b;
-                long offset = ((long)index << ASHIFT) + ABASE;
-                ForkJoinTask<?> o = (ForkJoinTask<?>)
-                    U.getObjectVolatile(a, offset);
-                if (o == null)
-                    h = 2;                      // retryable
-                else if (!(o instanceof CountedCompleter))
-                    h = -1;                     // unmatchable
-                else {
-                    CountedCompleter<?> t = (CountedCompleter<?>)o;
-                    for (CountedCompleter<?> r = t;;) {
-                        if (r == task) {
-                            if (b++ == base &&
-                                U.compareAndSwapObject(a, offset, t, null)) {
-                                base = b;
-                                t.doExec();
-                                h = 1;          // success
+        final int sharedHelpCC(CountedCompleter<?> task, int limit) {
+            int status = 0;
+            if (task != null && (status = task.status) >= 0) {
+                for (;;) {
+                    boolean help = false;
+                    int b = base, s = top, al; ForkJoinTask<?>[] a;
+                    if ((a = array) != null && b != s && (al = a.length) > 0) {
+                        int index = (al - 1) & (s - 1);
+                        ForkJoinTask<?> o = (ForkJoinTask<?>)
+                            QA.get(a, index);
+                        if (o instanceof CountedCompleter) {
+                            CountedCompleter<?> t = (CountedCompleter<?>)o;
+                            for (CountedCompleter<?> f = t;;) {
+                                if (f != task) {
+                                    if ((f = f.completer) == null)
+                                        break;
+                                }
+                                else {
+                                    if (PHASE.compareAndSet(this, 0, QLOCK)) {
+                                        if (top == s && array == a &&
+                                            QA.compareAndSet(a, index, t, null)) {
+                                            help = true;
+                                            top = s - 1;
+                                        }
+                                        PHASE.setRelease(this, 0);
+                                        if (help)
+                                            t.doExec();
+                                    }
+                                    break;
+                                }
                             }
-                            else
-                                h = 2;          // lost CAS
-                            break;
-                        }
-                        else if ((r = r.completer) == null) {
-                            h = -1;             // unmatched
-                            break;
                         }
                     }
+                    if ((status = task.status) < 0 || !help ||
+                        (limit != 0 && --limit == 0))
+                        break;
                 }
             }
-            else
-                h = b | Integer.MIN_VALUE;      // to sense movement on re-poll
-            return h;
+            return status;
         }
 
         /**
@@ -1334,27 +1123,18 @@
          */
         final boolean isApparentlyUnblocked() {
             Thread wt; Thread.State s;
-            return (scanState >= 0 &&
-                    (wt = owner) != null &&
+            return ((wt = owner) != null &&
                     (s = wt.getState()) != Thread.State.BLOCKED &&
                     s != Thread.State.WAITING &&
                     s != Thread.State.TIMED_WAITING);
         }
 
-        // Unsafe mechanics. Note that some are (and must be) the same as in FJP
-        private static final jdk.internal.misc.Unsafe U = jdk.internal.misc.Unsafe.getUnsafe();
-        private static final long QLOCK;
-        private static final int ABASE;
-        private static final int ASHIFT;
+        // VarHandle mechanics.
+        private static final VarHandle PHASE;
         static {
             try {
-                QLOCK = U.objectFieldOffset
-                    (WorkQueue.class.getDeclaredField("qlock"));
-                ABASE = U.arrayBaseOffset(ForkJoinTask[].class);
-                int scale = U.arrayIndexScale(ForkJoinTask[].class);
-                if ((scale & (scale - 1)) != 0)
-                    throw new Error("array index scale not a power of two");
-                ASHIFT = 31 - Integer.numberOfLeadingZeros(scale);
+                MethodHandles.Lookup l = MethodHandles.lookup();
+                PHASE = l.findVarHandle(WorkQueue.class, "phase", int.class);
             } catch (ReflectiveOperationException e) {
                 throw new Error(e);
             }
@@ -1372,7 +1152,7 @@
 
     /**
      * Permission required for callers of methods that may start or
-     * kill threads.  Also used as a static lock in tryInitialize.
+     * kill threads.
      */
     static final RuntimePermission modifyThreadPermission;
 
@@ -1413,18 +1193,15 @@
     // static configuration constants
 
     /**
-     * Initial timeout value (in milliseconds) for the thread
-     * triggering quiescence to park waiting for new work. On timeout,
-     * the thread will instead try to shrink the number of workers.
-     * The value should be large enough to avoid overly aggressive
-     * shrinkage during most transient stalls (long GCs etc).
+     * Default idle timeout value (in milliseconds) for the thread
+     * triggering quiescence to park waiting for new work
      */
-    private static final long IDLE_TIMEOUT_MS = 2000L; // 2sec
+    private static final long DEFAULT_KEEPALIVE = 60000L;
 
     /**
-     * Tolerance for idle timeouts, to cope with timer undershoots.
+     * Undershoot tolerance for idle timeouts
      */
-    private static final long TIMEOUT_SLOP_MS =   20L; // 20ms
+    private static final long TIMEOUT_SLOP = 20L;
 
     /**
      * The default value for COMMON_MAX_SPARES.  Overridable using the
@@ -1444,7 +1221,7 @@
 
     /*
      * Bits and masks for field ctl, packed with 4 16 bit subfields:
-     * AC: Number of active running workers minus target parallelism
+     * RC: Number of released (unqueued) workers minus target parallelism
      * TC: Number of total workers minus target parallelism
      * SS: version count and status of top waiting thread
      * ID: poolIndex of top of Treiber stack of waiters
@@ -1453,26 +1230,30 @@
      * (including version bits) as sp=(int)ctl.  The offsets of counts
      * by the target parallelism and the positionings of fields makes
      * it possible to perform the most common checks via sign tests of
-     * fields: When ac is negative, there are not enough active
+     * fields: When ac is negative, there are not enough unqueued
      * workers, when tc is negative, there are not enough total
      * workers.  When sp is non-zero, there are waiting workers.  To
      * deal with possibly negative fields, we use casts in and out of
      * "short" and/or signed shifts to maintain signedness.
      *
-     * Because it occupies uppermost bits, we can add one active count
-     * using getAndAddLong of AC_UNIT, rather than CAS, when returning
+     * Because it occupies uppermost bits, we can add one release count
+     * using getAndAddLong of RC_UNIT, rather than CAS, when returning
      * from a blocked join.  Other updates entail multiple subfields
      * and masking, requiring CAS.
+     *
+     * The limits packed in field "bounds" are also offset by the
+     * parallelism level to make them comparable to the ctl rc and tc
+     * fields.
      */
 
     // Lower and upper word masks
     private static final long SP_MASK    = 0xffffffffL;
     private static final long UC_MASK    = ~SP_MASK;
 
-    // Active counts
-    private static final int  AC_SHIFT   = 48;
-    private static final long AC_UNIT    = 0x0001L << AC_SHIFT;
-    private static final long AC_MASK    = 0xffffL << AC_SHIFT;
+    // Release counts
+    private static final int  RC_SHIFT   = 48;
+    private static final long RC_UNIT    = 0x0001L << RC_SHIFT;
+    private static final long RC_MASK    = 0xffffL << RC_SHIFT;
 
     // Total counts
     private static final int  TC_SHIFT   = 32;
@@ -1480,52 +1261,21 @@
     private static final long TC_MASK    = 0xffffL << TC_SHIFT;
     private static final long ADD_WORKER = 0x0001L << (TC_SHIFT + 15); // sign
 
-    // runState bits: SHUTDOWN must be negative, others arbitrary powers of two
-    private static final int  STARTED    = 1;
-    private static final int  STOP       = 1 << 1;
-    private static final int  TERMINATED = 1 << 2;
-    private static final int  SHUTDOWN   = 1 << 31;
+    // Instance fields
 
-    // Instance fields
-    volatile long ctl;                   // main pool control
-    volatile int runState;
-    final int config;                    // parallelism, mode
-    AuxState auxState;                   // lock, steal counts
-    volatile WorkQueue[] workQueues;     // main registry
-    final String workerNamePrefix;       // to create worker name string
+    volatile long stealCount;            // collects worker nsteals
+    final long keepAlive;                // milliseconds before dropping if idle
+    int indexSeed;                       // next worker index
+    final int bounds;                    // min, max threads packed as shorts
+    volatile int mode;                   // parallelism, runstate, queue mode
+    WorkQueue[] workQueues;              // main registry
+    final String workerNamePrefix;       // for worker thread string; sync lock
     final ForkJoinWorkerThreadFactory factory;
     final UncaughtExceptionHandler ueh;  // per-worker UEH
+    final Predicate<? super ForkJoinPool> saturate;
 
-    /**
-     * Instantiates fields upon first submission, or upon shutdown if
-     * no submissions. If checkTermination true, also responds to
-     * termination by external calls submitting tasks.
-     */
-    private void tryInitialize(boolean checkTermination) {
-        if (runState == 0) { // bootstrap by locking static field
-            int p = config & SMASK;
-            int n = (p > 1) ? p - 1 : 1; // ensure at least 2 slots
-            n |= n >>> 1;    // create workQueues array with size a power of two
-            n |= n >>> 2;
-            n |= n >>> 4;
-            n |= n >>> 8;
-            n |= n >>> 16;
-            n = ((n + 1) << 1) & SMASK;
-            AuxState aux = new AuxState();
-            WorkQueue[] ws = new WorkQueue[n];
-            synchronized (modifyThreadPermission) { // double-check
-                if (runState == 0) {
-                    workQueues = ws;
-                    auxState = aux;
-                    runState = STARTED;
-                }
-            }
-        }
-        if (checkTermination && runState < 0) {
-            tryTerminate(false, false); // help terminate
-            throw new RejectedExecutionException();
-        }
-    }
+    @jdk.internal.vm.annotation.Contended("fjpctl") // segregate
+    volatile long ctl;                   // main pool control
 
     // Creating, registering and deregistering workers
 
@@ -1534,18 +1284,14 @@
      * count has already been incremented as a reservation.  Invokes
      * deregisterWorker on any failure.
      *
-     * @param isSpare true if this is a spare thread
      * @return true if successful
      */
-    private boolean createWorker(boolean isSpare) {
+    private boolean createWorker() {
         ForkJoinWorkerThreadFactory fac = factory;
         Throwable ex = null;
         ForkJoinWorkerThread wt = null;
-        WorkQueue q;
         try {
             if (fac != null && (wt = fac.newThread(this)) != null) {
-                if (isSpare && (q = wt.workQueue) != null)
-                    q.config |= SPARE_WORKER;
                 wt.start();
                 return true;
             }
@@ -1566,10 +1312,10 @@
      */
     private void tryAddWorker(long c) {
         do {
-            long nc = ((AC_MASK & (c + AC_UNIT)) |
+            long nc = ((RC_MASK & (c + RC_UNIT)) |
                        (TC_MASK & (c + TC_UNIT)));
-            if (ctl == c && U.compareAndSwapLong(this, CTL, c, nc)) {
-                createWorker(false);
+            if (ctl == c && CTL.compareAndSet(this, c, nc)) {
+                createWorker();
                 break;
             }
         } while (((c = ctl) & ADD_WORKER) != 0L && (int)c == 0);
@@ -1584,41 +1330,57 @@
      */
     final WorkQueue registerWorker(ForkJoinWorkerThread wt) {
         UncaughtExceptionHandler handler;
-        AuxState aux;
-        wt.setDaemon(true);                           // configure thread
+        wt.setDaemon(true);                             // configure thread
         if ((handler = ueh) != null)
             wt.setUncaughtExceptionHandler(handler);
         WorkQueue w = new WorkQueue(this, wt);
-        int i = 0;                                    // assign a pool index
-        int mode = config & MODE_MASK;
-        if ((aux = auxState) != null) {
-            aux.lock();
-            try {
-                int s = (int)(aux.indexSeed += SEED_INCREMENT), n, m;
-                WorkQueue[] ws = workQueues;
-                if (ws != null && (n = ws.length) > 0) {
-                    i = (m = n - 1) & ((s << 1) | 1); // odd-numbered indices
-                    if (ws[i] != null) {              // collision
-                        int probes = 0;               // step by approx half n
-                        int step = (n <= 4) ? 2 : ((n >>> 1) & EVENMASK) + 2;
-                        while (ws[i = (i + step) & m] != null) {
-                            if (++probes >= n) {
-                                workQueues = ws = Arrays.copyOf(ws, n <<= 1);
-                                m = n - 1;
-                                probes = 0;
-                            }
+        int tid = 0;                                    // for thread name
+        int fifo = mode & FIFO;
+        String prefix = workerNamePrefix;
+        if (prefix != null) {
+            synchronized (prefix) {
+                WorkQueue[] ws = workQueues; int n;
+                int s = indexSeed += SEED_INCREMENT;
+                if (ws != null && (n = ws.length) > 1) {
+                    int m = n - 1;
+                    tid = s & m;
+                    int i = m & ((s << 1) | 1);         // odd-numbered indices
+                    for (int probes = n >>> 1;;) {      // find empty slot
+                        WorkQueue q;
+                        if ((q = ws[i]) == null || q.phase == QUIET)
+                            break;
+                        else if (--probes == 0) {
+                            i = n | 1;                  // resize below
+                            break;
                         }
+                        else
+                            i = (i + 2) & m;
                     }
-                    w.hint = s;                       // use as random seed
-                    w.config = i | mode;
-                    w.scanState = i | (s & 0x7fff0000); // random seq bits
-                    ws[i] = w;
+
+                    int id = i | fifo | (s & ~(SMASK | FIFO | DORMANT));
+                    w.phase = w.id = id;                // now publishable
+
+                    if (i < n)
+                        ws[i] = w;
+                    else {                              // expand array
+                        int an = n << 1;
+                        WorkQueue[] as = new WorkQueue[an];
+                        as[i] = w;
+                        int am = an - 1;
+                        for (int j = 0; j < n; ++j) {
+                            WorkQueue v;                // copy external queue
+                            if ((v = ws[j]) != null)    // position may change
+                                as[v.id & am & SQMASK] = v;
+                            if (++j >= n)
+                                break;
+                            as[j] = ws[j];              // copy worker
+                        }
+                        workQueues = as;
+                    }
                 }
-            } finally {
-                aux.unlock();
             }
+            wt.setName(prefix.concat(Integer.toString(tid)));
         }
-        wt.setName(workerNamePrefix.concat(Integer.toString(i >>> 1)));
         return w;
     }
 
@@ -1633,64 +1395,48 @@
      */
     final void deregisterWorker(ForkJoinWorkerThread wt, Throwable ex) {
         WorkQueue w = null;
+        int phase = 0;
         if (wt != null && (w = wt.workQueue) != null) {
-            AuxState aux; WorkQueue[] ws;          // remove index from array
-            int idx = w.config & SMASK;
-            int ns = w.nsteals;
-            if ((aux = auxState) != null) {
-                aux.lock();
-                try {
+            Object lock = workerNamePrefix;
+            long ns = (long)w.nsteals & 0xffffffffL;
+            int idx = w.id & SMASK;
+            if (lock != null) {
+                WorkQueue[] ws;                       // remove index from array
+                synchronized (lock) {
                     if ((ws = workQueues) != null && ws.length > idx &&
                         ws[idx] == w)
                         ws[idx] = null;
-                    aux.stealCount += ns;
-                } finally {
-                    aux.unlock();
+                    stealCount += ns;
                 }
             }
+            phase = w.phase;
         }
-        if (w == null || (w.config & UNREGISTERED) == 0) { // else pre-adjusted
+        if (phase != QUIET) {                         // else pre-adjusted
             long c;                                   // decrement counts
-            do {} while (!U.compareAndSwapLong
-                         (this, CTL, c = ctl, ((AC_MASK & (c - AC_UNIT)) |
-                                               (TC_MASK & (c - TC_UNIT)) |
-                                               (SP_MASK & c))));
+            do {} while (!CTL.weakCompareAndSetVolatile
+                         (this, c = ctl, ((RC_MASK & (c - RC_UNIT)) |
+                                          (TC_MASK & (c - TC_UNIT)) |
+                                          (SP_MASK & c))));
         }
-        if (w != null) {
-            w.currentSteal = null;
-            w.qlock = -1;                             // ensure set
+        if (w != null)
             w.cancelAll();                            // cancel remaining tasks
-        }
-        while (tryTerminate(false, false) >= 0) {     // possibly replace
-            WorkQueue[] ws; int wl, sp; long c;
-            if (w == null || w.array == null ||
-                (ws = workQueues) == null || (wl = ws.length) <= 0)
-                break;
-            else if ((sp = (int)(c = ctl)) != 0) {    // wake up replacement
-                if (tryRelease(c, ws[(wl - 1) & sp], AC_UNIT))
-                    break;
-            }
-            else if (ex != null && (c & ADD_WORKER) != 0L) {
-                tryAddWorker(c);                      // create replacement
-                break;
-            }
-            else                                      // don't need replacement
-                break;
-        }
+
+        if (!tryTerminate(false, false) &&            // possibly replace worker
+            w != null && w.array != null)             // avoid repeated failures
+            signalWork();
+
         if (ex == null)                               // help clean on way out
             ForkJoinTask.helpExpungeStaleExceptions();
         else                                          // rethrow
             ForkJoinTask.rethrow(ex);
     }
 
-    // Signalling
-
     /**
-     * Tries to create or activate a worker if too few are active.
+     * Tries to create or release a worker if too few are running.
      */
     final void signalWork() {
         for (;;) {
-            long c; int sp, i; WorkQueue v; WorkQueue[] ws;
+            long c; int sp; WorkQueue[] ws; int i; WorkQueue v;
             if ((c = ctl) >= 0L)                      // enough workers
                 break;
             else if ((sp = (int)c) == 0) {            // no idle workers
@@ -1705,12 +1451,14 @@
             else if ((v = ws[i]) == null)
                 break;                                // terminating
             else {
-                int ns = sp & ~UNSIGNALLED;
-                int vs = v.scanState;
-                long nc = (v.stackPred & SP_MASK) | (UC_MASK & (c + AC_UNIT));
-                if (sp == vs && U.compareAndSwapLong(this, CTL, c, nc)) {
-                    v.scanState = ns;
-                    LockSupport.unpark(v.parker);
+                int np = sp & ~UNSIGNALLED;
+                int vp = v.phase;
+                long nc = (v.stackPred & SP_MASK) | (UC_MASK & (c + RC_UNIT));
+                Thread vt = v.owner;
+                if (sp == vp && CTL.compareAndSet(this, c, nc)) {
+                    v.phase = np;
+                    if (v.source < 0)
+                        LockSupport.unpark(vt);
                     break;
                 }
             }
@@ -1718,442 +1466,183 @@
     }
 
     /**
-     * Signals and releases worker v if it is top of idle worker
-     * stack.  This performs a one-shot version of signalWork only if
-     * there is (apparently) at least one idle worker.
+     * Tries to decrement counts (sometimes implicitly) and possibly
+     * arrange for a compensating worker in preparation for blocking:
+     * If not all core workers yet exist, creates one, else if any are
+     * unreleased (possibly including caller) releases one, else if
+     * fewer than the minimum allowed number of workers running,
+     * checks to see that they are all active, and if so creates an
+     * extra worker unless over maximum limit and policy is to
+     * saturate.  Most of these steps can fail due to interference, in
+     * which case 0 is returned so caller will retry. A negative
+     * return value indicates that the caller doesn't need to
+     * re-adjust counts when later unblocked.
      *
-     * @param c incoming ctl value
-     * @param v if non-null, a worker
-     * @param inc the increment to active count (zero when compensating)
-     * @return true if successful
+     * @return 1: block then adjust, -1: block without adjust, 0 : retry
      */
-    private boolean tryRelease(long c, WorkQueue v, long inc) {
-        int sp = (int)c, ns = sp & ~UNSIGNALLED;
-        if (v != null) {
-            int vs = v.scanState;
-            long nc = (v.stackPred & SP_MASK) | (UC_MASK & (c + inc));
-            if (sp == vs && U.compareAndSwapLong(this, CTL, c, nc)) {
-                v.scanState = ns;
-                LockSupport.unpark(v.parker);
-                return true;
+    private int tryCompensate(WorkQueue w) {
+        int t, n, sp;
+        long c = ctl;
+        WorkQueue[] ws = workQueues;
+        if ((t = (short)(c >>> TC_SHIFT)) >= 0) {
+            if (ws == null || (n = ws.length) <= 0 || w == null)
+                return 0;                        // disabled
+            else if ((sp = (int)c) != 0) {       // replace or release
+                WorkQueue v = ws[sp & (n - 1)];
+                int wp = w.phase;
+                long uc = UC_MASK & ((wp < 0) ? c + RC_UNIT : c);
+                int np = sp & ~UNSIGNALLED;
+                if (v != null) {
+                    int vp = v.phase;
+                    Thread vt = v.owner;
+                    long nc = ((long)v.stackPred & SP_MASK) | uc;
+                    if (vp == sp && CTL.compareAndSet(this, c, nc)) {
+                        v.phase = np;
+                        if (v.source < 0)
+                            LockSupport.unpark(vt);
+                        return (wp < 0) ? -1 : 1;
+                    }
+                }
+                return 0;
             }
-        }
-        return false;
-    }
-
-    /**
-     * With approx probability of a missed signal, tries (once) to
-     * reactivate worker w (or some other worker), failing if stale or
-     * known to be already active.
-     *
-     * @param w the worker
-     * @param ws the workQueue array to use
-     * @param r random seed
-     */
-    private void tryReactivate(WorkQueue w, WorkQueue[] ws, int r) {
-        long c; int sp, wl; WorkQueue v;
-        if ((sp = (int)(c = ctl)) != 0 && w != null &&
-            ws != null && (wl = ws.length) > 0 &&
-            ((sp ^ r) & SS_SEQ) == 0 &&
-            (v = ws[(wl - 1) & sp]) != null) {
-            long nc = (v.stackPred & SP_MASK) | (UC_MASK & (c + AC_UNIT));
-            int ns = sp & ~UNSIGNALLED;
-            if (w.scanState < 0 &&
-                v.scanState == sp &&
-                U.compareAndSwapLong(this, CTL, c, nc)) {
-                v.scanState = ns;
-                LockSupport.unpark(v.parker);
+            else if ((int)(c >> RC_SHIFT) -      // reduce parallelism
+                     (short)(bounds & SMASK) > 0) {
+                long nc = ((RC_MASK & (c - RC_UNIT)) | (~RC_MASK & c));
+                return CTL.compareAndSet(this, c, nc) ? 1 : 0;
             }
-        }
-    }
-
-    /**
-     * If worker w exists and is active, enqueues and sets status to inactive.
-     *
-     * @param w the worker
-     * @param ss current (non-negative) scanState
-     */
-    private void inactivate(WorkQueue w, int ss) {
-        int ns = (ss + SS_SEQ) | UNSIGNALLED;
-        long lc = ns & SP_MASK, nc, c;
-        if (w != null) {
-            w.scanState = ns;
-            do {
-                nc = lc | (UC_MASK & ((c = ctl) - AC_UNIT));
-                w.stackPred = (int)c;
-            } while (!U.compareAndSwapLong(this, CTL, c, nc));
-        }
-    }
-
-    /**
-     * Possibly blocks worker w waiting for signal, or returns
-     * negative status if the worker should terminate. May return
-     * without status change if multiple stale unparks and/or
-     * interrupts occur.
-     *
-     * @param w the calling worker
-     * @return negative if w should terminate
-     */
-    private int awaitWork(WorkQueue w) {
-        int stat = 0;
-        if (w != null && w.scanState < 0) {
-            long c = ctl;
-            if ((int)(c >> AC_SHIFT) + (config & SMASK) <= 0)
-                stat = timedAwaitWork(w, c);     // possibly quiescent
-            else if ((runState & STOP) != 0)
-                stat = w.qlock = -1;             // pool terminating
-            else if (w.scanState < 0) {
-                w.parker = Thread.currentThread();
-                if (w.scanState < 0)             // recheck after write
-                    LockSupport.park(this);
-                w.parker = null;
-                if ((runState & STOP) != 0)
-                    stat = w.qlock = -1;         // recheck
-                else if (w.scanState < 0)
-                    Thread.interrupted();        // clear status
-            }
-        }
-        return stat;
-    }
-
-    /**
-     * Possibly triggers shutdown and tries (once) to block worker
-     * when pool is (or may be) quiescent. Waits up to a duration
-     * determined by number of workers.  On timeout, if ctl has not
-     * changed, terminates the worker, which will in turn wake up
-     * another worker to possibly repeat this process.
-     *
-     * @param w the calling worker
-     * @return negative if w should terminate
-     */
-    private int timedAwaitWork(WorkQueue w, long c) {
-        int stat = 0;
-        int scale = 1 - (short)(c >>> TC_SHIFT);
-        long deadline = (((scale <= 0) ? 1 : scale) * IDLE_TIMEOUT_MS +
-                         System.currentTimeMillis());
-        if ((runState >= 0 || (stat = tryTerminate(false, false)) > 0) &&
-            w != null && w.scanState < 0) {
-            int ss; AuxState aux;
-            w.parker = Thread.currentThread();
-            if (w.scanState < 0)
-                LockSupport.parkUntil(this, deadline);
-            w.parker = null;
-            if ((runState & STOP) != 0)
-                stat = w.qlock = -1;         // pool terminating
-            else if ((ss = w.scanState) < 0 && !Thread.interrupted() &&
-                     (int)c == ss && (aux = auxState) != null && ctl == c &&
-                     deadline - System.currentTimeMillis() <= TIMEOUT_SLOP_MS) {
-                aux.lock();
-                try {                        // pre-deregister
-                    WorkQueue[] ws;
-                    int cfg = w.config, idx = cfg & SMASK;
-                    long nc = ((UC_MASK & (c - TC_UNIT)) |
-                               (SP_MASK & w.stackPred));
-                    if ((runState & STOP) == 0 &&
-                        (ws = workQueues) != null &&
-                        idx < ws.length && idx >= 0 && ws[idx] == w &&
-                        U.compareAndSwapLong(this, CTL, c, nc)) {
-                        ws[idx] = null;
-                        w.config = cfg | UNREGISTERED;
-                        stat = w.qlock = -1;
+            else {                               // validate
+                int md = mode, pc = md & SMASK, tc = pc + t, bc = 0;
+                boolean unstable = false;
+                for (int i = 1; i < n; i += 2) {
+                    WorkQueue q; Thread wt; Thread.State ts;
+                    if ((q = ws[i]) != null) {
+                        if (q.source == 0) {
+                            unstable = true;
+                            break;
+                        }
+                        else {
+                            --tc;
+                            if ((wt = q.owner) != null &&
+                                ((ts = wt.getState()) == Thread.State.BLOCKED ||
+                                 ts == Thread.State.WAITING))
+                                ++bc;            // worker is blocking
+                        }
                     }
-                } finally {
-                    aux.unlock();
+                }
+                if (unstable || tc != 0 || ctl != c)
+                    return 0;                    // inconsistent
+                else if (t + pc >= MAX_CAP || t >= (bounds >>> SWIDTH)) {
+                    Predicate<? super ForkJoinPool> sat;
+                    if ((sat = saturate) != null && sat.test(this))
+                        return -1;
+                    else if (bc < pc) {          // lagging
+                        Thread.yield();          // for retry spins
+                        return 0;
+                    }
+                    else
+                        throw new RejectedExecutionException(
+                            "Thread limit exceeded replacing blocked worker");
                 }
             }
         }
-        return stat;
-    }
 
-    /**
-     * If the given worker is a spare with no queued tasks, and there
-     * are enough existing workers, drops it from ctl counts and sets
-     * its state to terminated.
-     *
-     * @param w the calling worker -- must be a spare
-     * @return true if dropped (in which case it must not process more tasks)
-     */
-    private boolean tryDropSpare(WorkQueue w) {
-        if (w != null && w.isEmpty()) {           // no local tasks
-            long c; int sp, wl; WorkQueue[] ws; WorkQueue v;
-            while ((short)((c = ctl) >> TC_SHIFT) > 0 &&
-                   ((sp = (int)c) != 0 || (int)(c >> AC_SHIFT) > 0) &&
-                   (ws = workQueues) != null && (wl = ws.length) > 0) {
-                boolean dropped, canDrop;
-                if (sp == 0) {                    // no queued workers
-                    long nc = ((AC_MASK & (c - AC_UNIT)) |
-                               (TC_MASK & (c - TC_UNIT)) | (SP_MASK & c));
-                    dropped = U.compareAndSwapLong(this, CTL, c, nc);
-                }
-                else if (
-                    (v = ws[(wl - 1) & sp]) == null || v.scanState != sp)
-                    dropped = false;              // stale; retry
-                else {
-                    long nc = v.stackPred & SP_MASK;
-                    if (w == v || w.scanState >= 0) {
-                        canDrop = true;           // w unqueued or topmost
-                        nc |= ((AC_MASK & c) |    // ensure replacement
-                               (TC_MASK & (c - TC_UNIT)));
-                    }
-                    else {                        // w may be queued
-                        canDrop = false;          // help uncover
-                        nc |= ((AC_MASK & (c + AC_UNIT)) |
-                               (TC_MASK & c));
-                    }
-                    if (U.compareAndSwapLong(this, CTL, c, nc)) {
-                        v.scanState = sp & ~UNSIGNALLED;
-                        LockSupport.unpark(v.parker);
-                        dropped = canDrop;
-                    }
-                    else
-                        dropped = false;
-                }
-                if (dropped) {                    // pre-deregister
-                    int cfg = w.config, idx = cfg & SMASK;
-                    if (idx >= 0 && idx < ws.length && ws[idx] == w)
-                        ws[idx] = null;
-                    w.config = cfg | UNREGISTERED;
-                    w.qlock = -1;
-                    return true;
-                }
-            }
-        }
-        return false;
+        long nc = ((c + TC_UNIT) & TC_MASK) | (c & ~TC_MASK); // expand pool
+        return CTL.compareAndSet(this, c, nc) && createWorker() ? 1 : 0;
     }
 
     /**
      * Top-level runloop for workers, called by ForkJoinWorkerThread.run.
+     * See above for explanation.
      */
     final void runWorker(WorkQueue w) {
+        WorkQueue[] ws;
         w.growArray();                                  // allocate queue
-        int bound = (w.config & SPARE_WORKER) != 0 ? 0 : POLL_LIMIT;
-        long seed = w.hint * 0xdaba0b6eb09322e3L;       // initial random seed
-        if ((runState & STOP) == 0) {
-            for (long r = (seed == 0L) ? 1L : seed;;) { // ensure nonzero
-                if (bound == 0 && tryDropSpare(w))
+        int r = w.id ^ ThreadLocalRandom.nextSecondarySeed();
+        if (r == 0)                                     // initial nonzero seed
+            r = 1;
+        int lastSignalId = 0;                           // avoid unneeded signals
+        while ((ws = workQueues) != null) {
+            boolean nonempty = false;                   // scan
+            for (int n = ws.length, j = n, m = n - 1; j > 0; --j) {
+                WorkQueue q; int i, b, al; ForkJoinTask<?>[] a;
+                if ((i = r & m) >= 0 && i < n &&        // always true
+                    (q = ws[i]) != null && (b = q.base) - q.top < 0 &&
+                    (a = q.array) != null && (al = a.length) > 0) {
+                    int qid = q.id;                     // (never zero)
+                    int index = (al - 1) & b;
+                    ForkJoinTask<?> t = (ForkJoinTask<?>)
+                        QA.getAcquire(a, index);
+                    if (t != null && b++ == q.base &&
+                        QA.compareAndSet(a, index, t, null)) {
+                        if ((q.base = b) - q.top < 0 && qid != lastSignalId)
+                            signalWork();               // propagate signal
+                        w.source = lastSignalId = qid;
+                        t.doExec();
+                        if ((w.id & FIFO) != 0)         // run remaining locals
+                            w.localPollAndExec(POLL_LIMIT);
+                        else
+                            w.localPopAndExec(POLL_LIMIT);
+                        ForkJoinWorkerThread thread = w.owner;
+                        ++w.nsteals;
+                        w.source = 0;                   // now idle
+                        if (thread != null)
+                            thread.afterTopLevelExec();
+                    }
+                    nonempty = true;
+                }
+                else if (nonempty)
                     break;
-                // high bits of prev seed for step; current low bits for idx
-                int step = (int)(r >>> 48) | 1;
-                r ^= r >>> 12; r ^= r << 25; r ^= r >>> 27; // xorshift
-                if (scan(w, bound, step, (int)r) < 0 && awaitWork(w) < 0)
-                    break;
+                else
+                    ++r;
             }
-        }
-    }
 
-    // Scanning for tasks
-
-    /**
-     * Repeatedly scans for and tries to steal and execute (via
-     * workQueue.runTask) a queued task. Each scan traverses queues in
-     * pseudorandom permutation. Upon finding a non-empty queue, makes
-     * at most the given bound attempts to re-poll (fewer if
-     * contended) on the same queue before returning (impossible
-     * scanState value) 0 to restart scan. Else returns after at least
-     * 1 and at most 32 full scans.
-     *
-     * @param w the worker (via its WorkQueue)
-     * @param bound repoll bound as bitmask (0 if spare)
-     * @param step (circular) index increment per iteration (must be odd)
-     * @param r a random seed for origin index
-     * @return negative if should await signal
-     */
-    private int scan(WorkQueue w, int bound, int step, int r) {
-        int stat = 0, wl; WorkQueue[] ws;
-        if ((ws = workQueues) != null && w != null && (wl = ws.length) > 0) {
-            for (int m = wl - 1,
-                     origin = m & r, idx = origin,
-                     npolls = 0,
-                     ss = w.scanState;;) {         // negative if inactive
-                WorkQueue q; ForkJoinTask<?>[] a; int b, al;
-                if ((q = ws[idx]) != null && (b = q.base) - q.top < 0 &&
-                    (a = q.array) != null && (al = a.length) > 0) {
-                    int index = (al - 1) & b;
-                    long offset = ((long)index << ASHIFT) + ABASE;
-                    ForkJoinTask<?> t = (ForkJoinTask<?>)
-                        U.getObjectVolatile(a, offset);
-                    if (t == null)
-                        break;                     // empty or busy
-                    else if (b++ != q.base)
-                        break;                     // busy
-                    else if (ss < 0) {
-                        tryReactivate(w, ws, r);
-                        break;                     // retry upon rescan
-                    }
-                    else if (!U.compareAndSwapObject(a, offset, t, null))
-                        break;                     // contended
-                    else {
-                        q.base = b;
-                        w.currentSteal = t;
-                        if (b != q.top)            // propagate signal
-                            signalWork();
-                        w.runTask(t);
-                        if (++npolls > bound)
+            if (nonempty) {                             // move (xorshift)
+                r ^= r << 13; r ^= r >>> 17; r ^= r << 5;
+            }
+            else {
+                int phase;
+                lastSignalId = 0;                       // clear for next scan
+                if ((phase = w.phase) >= 0) {           // enqueue
+                    int np = w.phase = (phase + SS_SEQ) | UNSIGNALLED;
+                    long c, nc;
+                    do {
+                        w.stackPred = (int)(c = ctl);
+                        nc = ((c - RC_UNIT) & UC_MASK) | (SP_MASK & np);
+                    } while (!CTL.weakCompareAndSetVolatile(this, c, nc));
+                }
+                else {                                  // already queued
+                    int pred = w.stackPred;
+                    w.source = DORMANT;                 // enable signal
+                    for (int steps = 0;;) {
+                        int md, rc; long c;
+                        if (w.phase >= 0) {
+                            w.source = 0;
                             break;
-                    }
-                }
-                else if (npolls != 0)              // rescan
-                    break;
-                else if ((idx = (idx + step) & m) == origin) {
-                    if (ss < 0) {                  // await signal
-                        stat = ss;
-                        break;
-                    }
-                    else if (r >= 0) {
-                        inactivate(w, ss);
-                        break;
-                    }
-                    else
-                        r <<= 1;                   // at most 31 rescans
-                }
-            }
-        }
-        return stat;
-    }
-
-    // Joining tasks
-
-    /**
-     * Tries to steal and run tasks within the target's computation.
-     * Uses a variant of the top-level algorithm, restricted to tasks
-     * with the given task as ancestor: It prefers taking and running
-     * eligible tasks popped from the worker's own queue (via
-     * popCC). Otherwise it scans others, randomly moving on
-     * contention or execution, deciding to give up based on a
-     * checksum (via return codes from pollAndExecCC). The maxTasks
-     * argument supports external usages; internal calls use zero,
-     * allowing unbounded steps (external calls trap non-positive
-     * values).
-     *
-     * @param w caller
-     * @param maxTasks if non-zero, the maximum number of other tasks to run
-     * @return task status on exit
-     */
-    final int helpComplete(WorkQueue w, CountedCompleter<?> task,
-                           int maxTasks) {
-        WorkQueue[] ws; int s = 0, wl;
-        if ((ws = workQueues) != null && (wl = ws.length) > 1 &&
-            task != null && w != null) {
-            for (int m = wl - 1,
-                     mode = w.config,
-                     r = ~mode,                  // scanning seed
-                     origin = r & m, k = origin, // first queue to scan
-                     step = 3,                   // first scan step
-                     h = 1,                      // 1:ran, >1:contended, <0:hash
-                     oldSum = 0, checkSum = 0;;) {
-                CountedCompleter<?> p; WorkQueue q; int i;
-                if ((s = task.status) < 0)
-                    break;
-                if (h == 1 && (p = w.popCC(task, mode)) != null) {
-                    p.doExec();                  // run local task
-                    if (maxTasks != 0 && --maxTasks == 0)
-                        break;
-                    origin = k;                  // reset
-                    oldSum = checkSum = 0;
-                }
-                else {                           // poll other worker queues
-                    if ((i = k | 1) < 0 || i > m || (q = ws[i]) == null)
-                        h = 0;
-                    else if ((h = q.pollAndExecCC(task)) < 0)
-                        checkSum += h;
-                    if (h > 0) {
-                        if (h == 1 && maxTasks != 0 && --maxTasks == 0)
-                            break;
-                        step = (r >>> 16) | 3;
-                        r ^= r << 13; r ^= r >>> 17; r ^= r << 5; // xorshift
-                        k = origin = r & m;      // move and restart
-                        oldSum = checkSum = 0;
-                    }
-                    else if ((k = (k + step) & m) == origin) {
-                        if (oldSum == (oldSum = checkSum))
-                            break;
-                        checkSum = 0;
-                    }
-                }
-            }
-        }
-        return s;
-    }
-
-    /**
-     * Tries to locate and execute tasks for a stealer of the given
-     * task, or in turn one of its stealers. Traces currentSteal ->
-     * currentJoin links looking for a thread working on a descendant
-     * of the given task and with a non-empty queue to steal back and
-     * execute tasks from. The first call to this method upon a
-     * waiting join will often entail scanning/search, (which is OK
-     * because the joiner has nothing better to do), but this method
-     * leaves hints in workers to speed up subsequent calls.
-     *
-     * @param w caller
-     * @param task the task to join
-     */
-    private void helpStealer(WorkQueue w, ForkJoinTask<?> task) {
-        if (task != null && w != null) {
-            ForkJoinTask<?> ps = w.currentSteal;
-            WorkQueue[] ws; int wl, oldSum = 0;
-            outer: while (w.tryRemoveAndExec(task) && task.status >= 0 &&
-                          (ws = workQueues) != null && (wl = ws.length) > 0) {
-                ForkJoinTask<?> subtask;
-                int m = wl - 1, checkSum = 0;          // for stability check
-                WorkQueue j = w, v;                    // v is subtask stealer
-                descent: for (subtask = task; subtask.status >= 0; ) {
-                    for (int h = j.hint | 1, k = 0, i;;) {
-                        if ((v = ws[i = (h + (k << 1)) & m]) != null) {
-                            if (v.currentSteal == subtask) {
-                                j.hint = i;
-                                break;
-                            }
-                            checkSum += v.base;
                         }
-                        if (++k > m)                   // can't find stealer
-                            break outer;
-                    }
-
-                    for (;;) {                         // help v or descend
-                        ForkJoinTask<?>[] a; int b, al;
-                        if (subtask.status < 0)        // too late to help
-                            break descent;
-                        checkSum += (b = v.base);
-                        ForkJoinTask<?> next = v.currentJoin;
-                        ForkJoinTask<?> t = null;
-                        if ((a = v.array) != null && (al = a.length) > 0) {
-                            int index = (al - 1) & b;
-                            long offset = ((long)index << ASHIFT) + ABASE;
-                            t = (ForkJoinTask<?>)
-                                U.getObjectVolatile(a, offset);
-                            if (t != null && b++ == v.base) {
-                                if (j.currentJoin != subtask ||
-                                    v.currentSteal != subtask ||
-                                    subtask.status < 0)
-                                    break descent;     // stale
-                                if (U.compareAndSwapObject(a, offset, t, null)) {
-                                    v.base = b;
-                                    w.currentSteal = t;
-                                    for (int top = w.top;;) {
-                                        t.doExec();    // help
-                                        w.currentSteal = ps;
-                                        if (task.status < 0)
-                                            break outer;
-                                        if (w.top == top)
-                                            break;     // run local tasks
-                                        if ((t = w.pop()) == null)
-                                            break descent;
-                                        w.currentSteal = t;
-                                    }
+                        else if ((md = mode) < 0)       // shutting down
+                            return;
+                        else if ((rc = ((md & SMASK) +  // possibly quiescent
+                                        (int)((c = ctl) >> RC_SHIFT))) <= 0 &&
+                                 (md & SHUTDOWN) != 0 &&
+                                 tryTerminate(false, false))
+                            return;                     // help terminate
+                        else if ((++steps & 1) == 0)
+                            Thread.interrupted();       // clear between parks
+                        else if (rc <= 0 && pred != 0 && phase == (int)c) {
+                            long d = keepAlive + System.currentTimeMillis();
+                            LockSupport.parkUntil(this, d);
+                            if (ctl == c &&
+                                d - System.currentTimeMillis() <= TIMEOUT_SLOP) {
+                                long nc = ((UC_MASK & (c - TC_UNIT)) |
+                                           (SP_MASK & pred));
+                                if (CTL.compareAndSet(this, c, nc)) {
+                                    w.phase = QUIET;
+                                    return;             // drop on timeout
                                 }
                             }
                         }
-                        if (t == null && b == v.base && b - v.top >= 0) {
-                            if ((subtask = next) == null) {  // try to descend
-                                if (next == v.currentJoin &&
-                                    oldSum == (oldSum = checkSum))
-                                    break outer;
-                                break descent;
-                            }
-                            j = v;
-                            break;
-                        }
+                        else
+                            LockSupport.park(this);
                     }
                 }
             }
@@ -2161,59 +1650,10 @@
     }
 
     /**
-     * Tries to decrement active count (sometimes implicitly) and
-     * possibly release or create a compensating worker in preparation
-     * for blocking. Returns false (retryable by caller), on
-     * contention, detected staleness, instability, or termination.
-     *
-     * @param w caller
-     */
-    private boolean tryCompensate(WorkQueue w) {
-        boolean canBlock; int wl;
-        long c = ctl;
-        WorkQueue[] ws = workQueues;
-        int pc = config & SMASK;
-        int ac = pc + (int)(c >> AC_SHIFT);
-        int tc = pc + (short)(c >> TC_SHIFT);
-        if (w == null || w.qlock < 0 || pc == 0 ||  // terminating or disabled
-            ws == null || (wl = ws.length) <= 0)
-            canBlock = false;
-        else {
-            int m = wl - 1, sp;
-            boolean busy = true;                    // validate ac
-            for (int i = 0; i <= m; ++i) {
-                int k; WorkQueue v;
-                if ((k = (i << 1) | 1) <= m && k >= 0 && (v = ws[k]) != null &&
-                    v.scanState >= 0 && v.currentSteal == null) {
-                    busy = false;
-                    break;
-                }
-            }
-            if (!busy || ctl != c)
-                canBlock = false;                   // unstable or stale
-            else if ((sp = (int)c) != 0)            // release idle worker
-                canBlock = tryRelease(c, ws[m & sp], 0L);
-            else if (tc >= pc && ac > 1 && w.isEmpty()) {
-                long nc = ((AC_MASK & (c - AC_UNIT)) |
-                           (~AC_MASK & c));         // uncompensated
-                canBlock = U.compareAndSwapLong(this, CTL, c, nc);
-            }
-            else if (tc >= MAX_CAP ||
-                     (this == common && tc >= pc + COMMON_MAX_SPARES))
-                throw new RejectedExecutionException(
-                    "Thread limit exceeded replacing blocked worker");
-            else {                                  // similar to tryAddWorker
-                boolean isSpare = (tc >= pc);
-                long nc = (AC_MASK & c) | (TC_MASK & (c + TC_UNIT));
-                canBlock = (U.compareAndSwapLong(this, CTL, c, nc) &&
-                            createWorker(isSpare)); // throws on exception
-            }
-        }
-        return canBlock;
-    }
-
-    /**
      * Helps and/or blocks until the given task is done or timeout.
+     * First tries locally helping, then scans other queues for a task
+     * produced by one of w's stealers; compensating and blocking if
+     * none are found (rescanning if tryCompensate fails).
      *
      * @param w caller
      * @param task the task
@@ -2222,61 +1662,166 @@
      */
     final int awaitJoin(WorkQueue w, ForkJoinTask<?> task, long deadline) {
         int s = 0;
-        if (w != null) {
-            ForkJoinTask<?> prevJoin = w.currentJoin;
-            if (task != null && (s = task.status) >= 0) {
-                w.currentJoin = task;
-                CountedCompleter<?> cc = (task instanceof CountedCompleter) ?
-                    (CountedCompleter<?>)task : null;
-                for (;;) {
-                    if (cc != null)
-                        helpComplete(w, cc, 0);
-                    else
-                        helpStealer(w, task);
-                    if ((s = task.status) < 0)
-                        break;
-                    long ms, ns;
+        if (w != null && task != null &&
+            (!(task instanceof CountedCompleter) ||
+             (s = w.localHelpCC((CountedCompleter<?>)task, 0)) >= 0)) {
+            w.tryRemoveAndExec(task);
+            int src = w.source, id = w.id;
+            s = task.status;
+            while (s >= 0) {
+                WorkQueue[] ws;
+                boolean nonempty = false;
+                int r = ThreadLocalRandom.nextSecondarySeed() | 1; // odd indices
+                if ((ws = workQueues) != null) {       // scan for matching id
+                    for (int n = ws.length, m = n - 1, j = -n; j < n; j += 2) {
+                        WorkQueue q; int i, b, al; ForkJoinTask<?>[] a;
+                        if ((i = (r + j) & m) >= 0 && i < n &&
+                            (q = ws[i]) != null && q.source == id &&
+                            (b = q.base) - q.top < 0 &&
+                            (a = q.array) != null && (al = a.length) > 0) {
+                            int qid = q.id;
+                            int index = (al - 1) & b;
+                            ForkJoinTask<?> t = (ForkJoinTask<?>)
+                                QA.getAcquire(a, index);
+                            if (t != null && b++ == q.base && id == q.source &&
+                                QA.compareAndSet(a, index, t, null)) {
+                                q.base = b;
+                                w.source = qid;
+                                t.doExec();
+                                w.source = src;
+                            }
+                            nonempty = true;
+                            break;
+                        }
+                    }
+                }
+                if ((s = task.status) < 0)
+                    break;
+                else if (!nonempty) {
+                    long ms, ns; int block;
                     if (deadline == 0L)
-                        ms = 0L;
+                        ms = 0L;                       // untimed
                     else if ((ns = deadline - System.nanoTime()) <= 0L)
-                        break;
+                        break;                         // timeout
                     else if ((ms = TimeUnit.NANOSECONDS.toMillis(ns)) <= 0L)
-                        ms = 1L;
-                    if (tryCompensate(w)) {
+                        ms = 1L;                       // avoid 0 for timed wait
+                    if ((block = tryCompensate(w)) != 0) {
                         task.internalWait(ms);
-                        U.getAndAddLong(this, CTL, AC_UNIT);
+                        CTL.getAndAdd(this, (block > 0) ? RC_UNIT : 0L);
                     }
-                    if ((s = task.status) < 0)
-                        break;
+                    s = task.status;
                 }
-                w.currentJoin = prevJoin;
             }
         }
         return s;
     }
 
-    // Specialized scanning
+    /**
+     * Runs tasks until {@code isQuiescent()}. Rather than blocking
+     * when tasks cannot be found, rescans until all others cannot
+     * find tasks either.
+     */
+    final void helpQuiescePool(WorkQueue w) {
+        int prevSrc = w.source, fifo = w.id & FIFO;
+        for (int source = prevSrc, released = -1;;) { // -1 until known
+            WorkQueue[] ws;
+            if (fifo != 0)
+                w.localPollAndExec(0);
+            else
+                w.localPopAndExec(0);
+            if (released == -1 && w.phase >= 0)
+                released = 1;
+            boolean quiet = true, empty = true;
+            int r = ThreadLocalRandom.nextSecondarySeed();
+            if ((ws = workQueues) != null) {
+                for (int n = ws.length, j = n, m = n - 1; j > 0; --j) {
+                    WorkQueue q; int i, b, al; ForkJoinTask<?>[] a;
+                    if ((i = (r - j) & m) >= 0 && i < n && (q = ws[i]) != null) {
+                        if ((b = q.base) - q.top < 0 &&
+                            (a = q.array) != null && (al = a.length) > 0) {
+                            int qid = q.id;
+                            if (released == 0) {    // increment
+                                released = 1;
+                                CTL.getAndAdd(this, RC_UNIT);
+                            }
+                            int index = (al - 1) & b;
+                            ForkJoinTask<?> t = (ForkJoinTask<?>)
+                                QA.getAcquire(a, index);
+                            if (t != null && b++ == q.base &&
+                                QA.compareAndSet(a, index, t, null)) {
+                                q.base = b;
+                                w.source = source = q.id;
+                                t.doExec();
+                                w.source = source = prevSrc;
+                            }
+                            quiet = empty = false;
+                            break;
+                        }
+                        else if ((q.source & QUIET) == 0)
+                            quiet = false;
+                    }
+                }
+            }
+            if (quiet) {
+                if (released == 0)
+                    CTL.getAndAdd(this, RC_UNIT);
+                w.source = prevSrc;
+                break;
+            }
+            else if (empty) {
+                if (source != QUIET)
+                    w.source = source = QUIET;
+                if (released == 1) {                 // decrement
+                    released = 0;
+                    CTL.getAndAdd(this, RC_MASK & -RC_UNIT);
+                }
+            }
+        }
+    }
 
     /**
-     * Returns a (probably) non-empty steal queue, if one is found
-     * during a scan, else null.  This method must be retried by
-     * caller if, by the time it tries to use the queue, it is empty.
+     * Scans for and returns a polled task, if available.
+     * Used only for untracked polls.
+     *
+     * @param submissionsOnly if true, only scan submission queues
      */
-    private WorkQueue findNonEmptyStealQueue() {
-        WorkQueue[] ws; int wl;  // one-shot version of scan loop
-        int r = ThreadLocalRandom.nextSecondarySeed();
-        if ((ws = workQueues) != null && (wl = ws.length) > 0) {
-            int m = wl - 1, origin = r & m;
+    private ForkJoinTask<?> pollScan(boolean submissionsOnly) {
+        WorkQueue[] ws; int n;
+        rescan: while ((mode & STOP) == 0 && (ws = workQueues) != null &&
+                      (n = ws.length) > 0) {
+            int m = n - 1;
+            int r = ThreadLocalRandom.nextSecondarySeed();
+            int h = r >>> 16;
+            int origin, step;
+            if (submissionsOnly) {
+                origin = (r & ~1) & m;         // even indices and steps
+                step = (h & ~1) | 2;
+            }
+            else {
+                origin = r & m;
+                step = h | 1;
+            }
             for (int k = origin, oldSum = 0, checkSum = 0;;) {
-                WorkQueue q; int b;
+                WorkQueue q; int b, al; ForkJoinTask<?>[] a;
                 if ((q = ws[k]) != null) {
-                    if ((b = q.base) - q.top < 0)
-                        return q;
-                    checkSum += b;
+                    checkSum += b = q.base;
+                    if (b - q.top < 0 &&
+                        (a = q.array) != null && (al = a.length) > 0) {
+                        int index = (al - 1) & b;
+                        ForkJoinTask<?> t = (ForkJoinTask<?>)
+                            QA.getAcquire(a, index);
+                        if (t != null && b++ == q.base &&
+                            QA.compareAndSet(a, index, t, null)) {
+                            q.base = b;
+                            return t;
+                        }
+                        else
+                            break; // restart
+                    }
                 }
-                if ((k = (k + 1) & m) == origin) {
+                if ((k = (k + step) & m) == origin) {
                     if (oldSum == (oldSum = checkSum))
-                        break;
+                        break rescan;
                     checkSum = 0;
                 }
             }
@@ -2285,58 +1830,160 @@
     }
 
     /**
-     * Runs tasks until {@code isQuiescent()}. We piggyback on
-     * active count ctl maintenance, but rather than blocking
-     * when tasks cannot be found, we rescan until all others cannot
-     * find tasks either.
-     */
-    final void helpQuiescePool(WorkQueue w) {
-        ForkJoinTask<?> ps = w.currentSteal; // save context
-        int wc = w.config;
-        for (boolean active = true;;) {
-            long c; WorkQueue q; ForkJoinTask<?> t;
-            if (wc >= 0 && (t = w.pop()) != null) { // run locals if LIFO
-                (w.currentSteal = t).doExec();
-                w.currentSteal = ps;
-            }
-            else if ((q = findNonEmptyStealQueue()) != null) {
-                if (!active) {      // re-establish active count
-                    active = true;
-                    U.getAndAddLong(this, CTL, AC_UNIT);
-                }
-                if ((t = q.pollAt(q.base)) != null) {
-                    (w.currentSteal = t).doExec();
-                    w.currentSteal = ps;
-                    if (++w.nsteals < 0)
-                        w.transferStealCount(this);
-                }
-            }
-            else if (active) {      // decrement active count without queuing
-                long nc = (AC_MASK & ((c = ctl) - AC_UNIT)) | (~AC_MASK & c);
-                if (U.compareAndSwapLong(this, CTL, c, nc))
-                    active = false;
-            }
-            else if ((int)((c = ctl) >> AC_SHIFT) + (config & SMASK) <= 0 &&
-                     U.compareAndSwapLong(this, CTL, c, c + AC_UNIT))
-                break;
-        }
-    }
-
-    /**
      * Gets and removes a local or stolen task for the given worker.
      *
      * @return a task, if available
      */
     final ForkJoinTask<?> nextTaskFor(WorkQueue w) {
-        for (ForkJoinTask<?> t;;) {
-            WorkQueue q;
-            if ((t = w.nextLocalTask()) != null)
-                return t;
-            if ((q = findNonEmptyStealQueue()) == null)
-                return null;
-            if ((t = q.pollAt(q.base)) != null)
-                return t;
+        ForkJoinTask<?> t;
+        if (w != null &&
+            (t = (w.id & FIFO) != 0 ? w.poll() : w.pop()) != null)
+            return t;
+        else
+            return pollScan(false);
+    }
+
+    // External operations
+
+    /**
+     * Adds the given task to a submission queue at submitter's
+     * current queue, creating one if null or contended.
+     *
+     * @param task the task. Caller must ensure non-null.
+     */
+    final void externalPush(ForkJoinTask<?> task) {
+        int r;                                // initialize caller's probe
+        if ((r = ThreadLocalRandom.getProbe()) == 0) {
+            ThreadLocalRandom.localInit();
+            r = ThreadLocalRandom.getProbe();
         }
+        for (;;) {
+            int md = mode, n;
+            WorkQueue[] ws = workQueues;
+            if ((md & SHUTDOWN) != 0 || ws == null || (n = ws.length) <= 0)
+                throw new RejectedExecutionException();
+            else {
+                WorkQueue q;
+                boolean push = false, grow = false;
+                if ((q = ws[(n - 1) & r & SQMASK]) == null) {
+                    Object lock = workerNamePrefix;
+                    int qid = (r | QUIET) & ~(FIFO | OWNED);
+                    q = new WorkQueue(this, null);
+                    q.id = qid;
+                    q.source = QUIET;
+                    q.phase = QLOCK;          // lock queue
+                    if (lock != null) {
+                        synchronized (lock) { // lock pool to install
+                            int i;
+                            if ((ws = workQueues) != null &&
+                                (n = ws.length) > 0 &&
+                                ws[i = qid & (n - 1) & SQMASK] == null) {
+                                ws[i] = q;
+                                push = grow = true;
+                            }
+                        }
+                    }
+                }
+                else if (q.tryLockSharedQueue()) {
+                    int b = q.base, s = q.top, al, d; ForkJoinTask<?>[] a;
+                    if ((a = q.array) != null && (al = a.length) > 0 &&
+                        al - 1 + (d = b - s) > 0) {
+                        a[(al - 1) & s] = task;
+                        q.top = s + 1;        // relaxed writes OK here
+                        q.phase = 0;
+                        if (d < 0 && q.base - s < -1)
+                            break;            // no signal needed
+                    }
+                    else
+                        grow = true;
+                    push = true;
+                }
+                if (push) {
+                    if (grow) {
+                        try {
+                            q.growArray();
+                            int s = q.top, al; ForkJoinTask<?>[] a;
+                            if ((a = q.array) != null && (al = a.length) > 0) {
+                                a[(al - 1) & s] = task;
+                                q.top = s + 1;
+                            }
+                        } finally {
+                            q.phase = 0;
+                        }
+                    }
+                    signalWork();
+                    break;
+                }
+                else                          // move if busy
+                    r = ThreadLocalRandom.advanceProbe(r);
+            }
+        }
+    }
+
+    /**
+     * Pushes a possibly-external submission.
+     */
+    private <T> ForkJoinTask<T> externalSubmit(ForkJoinTask<T> task) {
+        Thread t; ForkJoinWorkerThread w; WorkQueue q;
+        if (task == null)
+            throw new NullPointerException();
+        if (((t = Thread.currentThread()) instanceof ForkJoinWorkerThread) &&
+            (w = (ForkJoinWorkerThread)t).pool == this &&
+            (q = w.workQueue) != null)
+            q.push(task);
+        else
+            externalPush(task);
+        return task;
+    }
+
+    /**
+     * Returns common pool queue for an external thread.
+     */
+    static WorkQueue commonSubmitterQueue() {
+        ForkJoinPool p = common;
+        int r = ThreadLocalRandom.getProbe();
+        WorkQueue[] ws; int n;
+        return (p != null && (ws = p.workQueues) != null &&
+                (n = ws.length) > 0) ?
+            ws[(n - 1) & r & SQMASK] : null;
+    }
+
+    /**
+     * Performs tryUnpush for an external submitter.
+     */
+    final boolean tryExternalUnpush(ForkJoinTask<?> task) {
+        int r = ThreadLocalRandom.getProbe();
+        WorkQueue[] ws; WorkQueue w; int n;
+        return ((ws = workQueues) != null &&
+                (n = ws.length) > 0 &&
+                (w = ws[(n - 1) & r & SQMASK]) != null &&
+                w.trySharedUnpush(task));
+    }
+
+    /**
+     * Performs helpComplete for an external submitter.
+     */
+    final int externalHelpComplete(CountedCompleter<?> task, int maxTasks) {
+        int r = ThreadLocalRandom.getProbe();
+        WorkQueue[] ws; WorkQueue w; int n;
+        return ((ws = workQueues) != null && (n = ws.length) > 0 &&
+                (w = ws[(n - 1) & r & SQMASK]) != null) ?
+            w.sharedHelpCC(task, maxTasks) : 0;
+    }
+
+    /**
+     * Tries to steal and run tasks within the target's computation.
+     * The maxTasks argument supports external usages; internal calls
+     * use zero, allowing unbounded steps (external calls trap
+     * non-positive values).
+     *
+     * @param w caller
+     * @param maxTasks if non-zero, the maximum number of other tasks to run
+     * @return task status on exit
+     */
+    final int helpComplete(WorkQueue w, CountedCompleter<?> task,
+                           int maxTasks) {
+        return (w == null) ? 0 : w.localHelpCC(task, maxTasks);
     }
 
     /**
@@ -2383,10 +2030,12 @@
      */
     static int getSurplusQueuedTaskCount() {
         Thread t; ForkJoinWorkerThread wt; ForkJoinPool pool; WorkQueue q;
-        if ((t = Thread.currentThread()) instanceof ForkJoinWorkerThread) {
-            int p = (pool = (wt = (ForkJoinWorkerThread)t).pool).config & SMASK;
-            int n = (q = wt.workQueue).top - q.base;
-            int a = (int)(pool.ctl >> AC_SHIFT) + p;
+        if (((t = Thread.currentThread()) instanceof ForkJoinWorkerThread) &&
+            (pool = (wt = (ForkJoinWorkerThread)t).pool) != null &&
+            (q = wt.workQueue) != null) {
+            int p = pool.mode & SMASK;
+            int a = p + (int)(pool.ctl >> RC_SHIFT);
+            int n = q.top - q.base;
             return n - (a > (p >>>= 1) ? 0 :
                         a > (p >>>= 1) ? 1 :
                         a > (p >>>= 1) ? 2 :
@@ -2396,7 +2045,7 @@
         return 0;
     }
 
-    //  Termination
+    // Termination
 
     /**
      * Possibly initiates and/or completes termination.
@@ -2404,198 +2053,86 @@
      * @param now if true, unconditionally terminate, else only
      * if no work and no active workers
      * @param enable if true, terminate when next possible
-     * @return -1: terminating/terminated, 0: retry if internal caller, else 1
+     * @return true if terminating or terminated
      */
-    private int tryTerminate(boolean now, boolean enable) {
-        int rs; // 3 phases: try to set SHUTDOWN, then STOP, then TERMINATED
+    private boolean tryTerminate(boolean now, boolean enable) {
+        int md; // 3 phases: try to set SHUTDOWN, then STOP, then TERMINATED
 
-        while ((rs = runState) >= 0) {
+        while (((md = mode) & SHUTDOWN) == 0) {
             if (!enable || this == common)        // cannot shutdown
-                return 1;
-            else if (rs == 0)
-                tryInitialize(false);             // ensure initialized
+                return false;
             else
-                U.compareAndSwapInt(this, RUNSTATE, rs, rs | SHUTDOWN);
+                MODE.compareAndSet(this, md, md | SHUTDOWN);
         }
 
-        if ((rs & STOP) == 0) {                   // try to initiate termination
-            if (!now) {                           // check quiescence
+        while (((md = mode) & STOP) == 0) {       // try to initiate termination
+            if (!now) {                           // check if quiescent & empty
                 for (long oldSum = 0L;;) {        // repeat until stable
-                    WorkQueue[] ws; WorkQueue w; int b;
+                    boolean running = false;
                     long checkSum = ctl;
-                    if ((int)(checkSum >> AC_SHIFT) + (config & SMASK) > 0)
-                        return 0;                 // still active workers
-                    if ((ws = workQueues) != null) {
+                    WorkQueue[] ws = workQueues;
+                    if ((md & SMASK) + (int)(checkSum >> RC_SHIFT) > 0)
+                        running = true;
+                    else if (ws != null) {
+                        WorkQueue w; int b;
                         for (int i = 0; i < ws.length; ++i) {
                             if ((w = ws[i]) != null) {
-                                checkSum += (b = w.base);
-                                if (w.currentSteal != null || b != w.top)
-                                    return 0;     // retry if internal caller
+                                checkSum += (b = w.base) + w.id;
+                                if (b != w.top ||
+                                    ((i & 1) == 1 && w.source >= 0)) {
+                                    running = true;
+                                    break;
+                                }
                             }
                         }
                     }
-                    if (oldSum == (oldSum = checkSum))
+                    if (((md = mode) & STOP) != 0)
+                        break;                 // already triggered
+                    else if (running)
+                        return false;
+                    else if (workQueues == ws && oldSum == (oldSum = checkSum))
                         break;
                 }
             }
-            do {} while (!U.compareAndSwapInt(this, RUNSTATE,
-                                              rs = runState, rs | STOP));
+            if ((md & STOP) == 0)
+                MODE.compareAndSet(this, md, md | STOP);
         }
 
-        for (long oldSum = 0L;;) {                // repeat until stable
-            WorkQueue[] ws; WorkQueue w; ForkJoinWorkerThread wt;
-            long checkSum = ctl;
-            if ((ws = workQueues) != null) {      // help terminate others
-                for (int i = 0; i < ws.length; ++i) {
-                    if ((w = ws[i]) != null) {
-                        w.cancelAll();            // clear queues
-                        checkSum += w.base;
-                        if (w.qlock >= 0) {
-                            w.qlock = -1;         // racy set OK
-                            if ((wt = w.owner) != null) {
+        while (((md = mode) & TERMINATED) == 0) { // help terminate others
+            for (long oldSum = 0L;;) {            // repeat until stable
+                WorkQueue[] ws; WorkQueue w;
+                long checkSum = ctl;
+                if ((ws = workQueues) != null) {
+                    for (int i = 0; i < ws.length; ++i) {
+                        if ((w = ws[i]) != null) {
+                            ForkJoinWorkerThread wt = w.owner;
+                            w.cancelAll();        // clear queues
+                            if (wt != null) {
                                 try {             // unblock join or park
                                     wt.interrupt();
                                 } catch (Throwable ignore) {
                                 }
                             }
+                            checkSum += w.base + w.id;
                         }
                     }
                 }
+                if (((md = mode) & TERMINATED) != 0 ||
+                    (workQueues == ws && oldSum == (oldSum = checkSum)))
+                    break;
             }
-            if (oldSum == (oldSum = checkSum))
+            if ((md & TERMINATED) != 0)
                 break;
-        }
-
-        if ((short)(ctl >>> TC_SHIFT) + (config & SMASK) <= 0) {
-            runState = (STARTED | SHUTDOWN | STOP | TERMINATED); // final write
-            synchronized (this) {
-                notifyAll();                      // for awaitTermination
+            else if ((md & SMASK) + (short)(ctl >>> TC_SHIFT) > 0)
+                break;
+            else if (MODE.compareAndSet(this, md, md | TERMINATED)) {
+                synchronized (this) {
+                    notifyAll();                  // for awaitTermination
+                }
+                break;
             }
         }
-
-        return -1;
-    }
-
-    // External operations
-
-    /**
-     * Constructs and tries to install a new external queue,
-     * failing if the workQueues array already has a queue at
-     * the given index.
-     *
-     * @param index the index of the new queue
-     */
-    private void tryCreateExternalQueue(int index) {
-        AuxState aux;
-        if ((aux = auxState) != null && index >= 0) {
-            WorkQueue q = new WorkQueue(this, null);
-            q.config = index;
-            q.scanState = ~UNSIGNALLED;
-            q.qlock = 1;                   // lock queue
-            boolean installed = false;
-            aux.lock();
-            try {                          // lock pool to install
-                WorkQueue[] ws;
-                if ((ws = workQueues) != null && index < ws.length &&
-                    ws[index] == null) {
-                    ws[index] = q;         // else throw away
-                    installed = true;
-                }
-            } finally {
-                aux.unlock();
-            }
-            if (installed) {
-                try {
-                    q.growArray();
-                } finally {
-                    q.qlock = 0;
-                }
-            }
-        }
-    }
-
-    /**
-     * Adds the given task to a submission queue at submitter's
-     * current queue. Also performs secondary initialization upon the
-     * first submission of the first task to the pool, and detects
-     * first submission by an external thread and creates a new shared
-     * queue if the one at index if empty or contended.
-     *
-     * @param task the task. Caller must ensure non-null.
-     */
-    final void externalPush(ForkJoinTask<?> task) {
-        int r;                            // initialize caller's probe
-        if ((r = ThreadLocalRandom.getProbe()) == 0) {
-            ThreadLocalRandom.localInit();
-            r = ThreadLocalRandom.getProbe();
-        }
-        for (;;) {
-            WorkQueue q; int wl, k, stat;
-            int rs = runState;
-            WorkQueue[] ws = workQueues;
-            if (rs <= 0 || ws == null || (wl = ws.length) <= 0)
-                tryInitialize(true);
-            else if ((q = ws[k = (wl - 1) & r & SQMASK]) == null)
-                tryCreateExternalQueue(k);
-            else if ((stat = q.sharedPush(task)) < 0)
-                break;
-            else if (stat == 0) {
-                signalWork();
-                break;
-            }
-            else                          // move if busy
-                r = ThreadLocalRandom.advanceProbe(r);
-        }
-    }
-
-    /**
-     * Pushes a possibly-external submission.
-     */
-    private <T> ForkJoinTask<T> externalSubmit(ForkJoinTask<T> task) {
-        Thread t; ForkJoinWorkerThread w; WorkQueue q;
-        if (task == null)
-            throw new NullPointerException();
-        if (((t = Thread.currentThread()) instanceof ForkJoinWorkerThread) &&
-            (w = (ForkJoinWorkerThread)t).pool == this &&
-            (q = w.workQueue) != null)
-            q.push(task);
-        else
-            externalPush(task);
-        return task;
-    }
-
-    /**
-     * Returns common pool queue for an external thread.
-     */
-    static WorkQueue commonSubmitterQueue() {
-        ForkJoinPool p = common;
-        int r = ThreadLocalRandom.getProbe();
-        WorkQueue[] ws; int wl;
-        return (p != null && (ws = p.workQueues) != null &&
-                (wl = ws.length) > 0) ?
-            ws[(wl - 1) & r & SQMASK] : null;
-    }
-
-    /**
-     * Performs tryUnpush for an external submitter.
-     */
-    final boolean tryExternalUnpush(ForkJoinTask<?> task) {
-        int r = ThreadLocalRandom.getProbe();
-        WorkQueue[] ws; WorkQueue w; int wl;
-        return ((ws = workQueues) != null &&
-                (wl = ws.length) > 0 &&
-                (w = ws[(wl - 1) & r & SQMASK]) != null &&
-                w.trySharedUnpush(task));
-    }
-
-    /**
-     * Performs helpComplete for an external submitter.
-     */
-    final int externalHelpComplete(CountedCompleter<?> task, int maxTasks) {
-        WorkQueue[] ws; int wl;
-        int r = ThreadLocalRandom.getProbe();
-        return ((ws = workQueues) != null && (wl = ws.length) > 0) ?
-            helpComplete(ws[(wl - 1) & r & SQMASK], task, maxTasks) : 0;
+        return true;
     }
 
     // Exported methods
@@ -2604,9 +2141,10 @@
 
     /**
      * Creates a {@code ForkJoinPool} with parallelism equal to {@link
-     * java.lang.Runtime#availableProcessors}, using the {@linkplain
-     * #defaultForkJoinWorkerThreadFactory default thread factory},
-     * no UncaughtExceptionHandler, and non-async LIFO processing mode.
+     * java.lang.Runtime#availableProcessors}, using defaults for all
+     * other parameters (see {@link #ForkJoinPool(int,
+     * ForkJoinWorkerThreadFactory, UncaughtExceptionHandler, boolean,
+     * int, int, int, Predicate, long, TimeUnit)}).
      *
      * @throws SecurityException if a security manager exists and
      *         the caller is not permitted to modify threads
@@ -2615,14 +2153,16 @@
      */
     public ForkJoinPool() {
         this(Math.min(MAX_CAP, Runtime.getRuntime().availableProcessors()),
-             defaultForkJoinWorkerThreadFactory, null, false);
+             defaultForkJoinWorkerThreadFactory, null, false,
+             0, MAX_CAP, 1, null, DEFAULT_KEEPALIVE, TimeUnit.MILLISECONDS);
     }
 
     /**
      * Creates a {@code ForkJoinPool} with the indicated parallelism
-     * level, the {@linkplain
-     * #defaultForkJoinWorkerThreadFactory default thread factory},
-     * no UncaughtExceptionHandler, and non-async LIFO processing mode.
+     * level, using defaults for all other parameters (see {@link
+     * #ForkJoinPool(int, ForkJoinWorkerThreadFactory,
+     * UncaughtExceptionHandler, boolean, int, int, int, Predicate,
+     * long, TimeUnit)}).
      *
      * @param parallelism the parallelism level
      * @throws IllegalArgumentException if parallelism less than or
@@ -2633,11 +2173,15 @@
      *         java.lang.RuntimePermission}{@code ("modifyThread")}
      */
     public ForkJoinPool(int parallelism) {
-        this(parallelism, defaultForkJoinWorkerThreadFactory, null, false);
+        this(parallelism, defaultForkJoinWorkerThreadFactory, null, false,
+             0, MAX_CAP, 1, null, DEFAULT_KEEPALIVE, TimeUnit.MILLISECONDS);
     }
 
     /**
-     * Creates a {@code ForkJoinPool} with the given parameters.
+     * Creates a {@code ForkJoinPool} with the given parameters (using
+     * defaults for others -- see {@link #ForkJoinPool(int,
+     * ForkJoinWorkerThreadFactory, UncaughtExceptionHandler, boolean,
+     * int, int, int, Predicate, long, TimeUnit)}).
      *
      * @param parallelism the parallelism level. For default value,
      * use {@link java.lang.Runtime#availableProcessors}.
@@ -2664,43 +2208,185 @@
                         ForkJoinWorkerThreadFactory factory,
                         UncaughtExceptionHandler handler,
                         boolean asyncMode) {
-        this(checkParallelism(parallelism),
-             checkFactory(factory),
-             handler,
-             asyncMode ? FIFO_QUEUE : LIFO_QUEUE,
-             "ForkJoinPool-" + nextPoolId() + "-worker-");
+        this(parallelism, factory, handler, asyncMode,
+             0, MAX_CAP, 1, null, DEFAULT_KEEPALIVE, TimeUnit.MILLISECONDS);
+    }
+
+    /**
+     * Creates a {@code ForkJoinPool} with the given parameters.
+     *
+     * @param parallelism the parallelism level. For default value,
+     * use {@link java.lang.Runtime#availableProcessors}.
+     *
+     * @param factory the factory for creating new threads. For
+     * default value, use {@link #defaultForkJoinWorkerThreadFactory}.
+     *
+     * @param handler the handler for internal worker threads that
+     * terminate due to unrecoverable errors encountered while
+     * executing tasks. For default value, use {@code null}.
+     *
+     * @param asyncMode if true, establishes local first-in-first-out
+     * scheduling mode for forked tasks that are never joined. This
+     * mode may be more appropriate than default locally stack-based
+     * mode in applications in which worker threads only process
+     * event-style asynchronous tasks.  For default value, use {@code
+     * false}.
+     *
+     * @param corePoolSize the number of threads to keep in the pool
+     * (unless timed out after an elapsed keep-alive). Normally (and
+     * by default) this is the same value as the parallelism level,
+     * but may be set to a larger value to reduce dynamic overhead if
+     * tasks regularly block. Using a smaller value (for example
+     * {@code 0}) has the same effect as the default.
+     *
+     * @param maximumPoolSize the maximum number of threads allowed.
+     * When the maximum is reached, attempts to replace blocked
+     * threads fail.  (However, because creation and termination of
+     * different threads may overlap, and may be managed by the given
+     * thread factory, this value may be transiently exceeded.)  To
+     * arrange the same value as is used by default for the common
+     * pool, use {@code 256} plus the {@code parallelism} level. (By
+     * default, the common pool allows a maximum of 256 spare
+     * threads.)  Using a value (for example {@code
+     * Integer.MAX_VALUE}) larger than the implementation's total
+     * thread limit has the same effect as using this limit (which is
+     * the default).
+     *
+     * @param minimumRunnable the minimum allowed number of core
+     * threads not blocked by a join or {@link ManagedBlocker}.  To
+     * ensure progress, when too few unblocked threads exist and
+     * unexecuted tasks may exist, new threads are constructed, up to
+     * the given maximumPoolSize.  For the default value, use {@code
+     * 1}, that ensures liveness.  A larger value might improve
+     * throughput in the presence of blocked activities, but might
+     * not, due to increased overhead.  A value of zero may be
+     * acceptable when submitted tasks cannot have dependencies
+     * requiring additional threads.
+     *
+     * @param saturate if non-null, a predicate invoked upon attempts
+     * to create more than the maximum total allowed threads.  By
+     * default, when a thread is about to block on a join or {@link
+     * ManagedBlocker}, but cannot be replaced because the
+     * maximumPoolSize would be exceeded, a {@link
+     * RejectedExecutionException} is thrown.  But if this predicate
+     * returns {@code true}, then no exception is thrown, so the pool
+     * continues to operate with fewer than the target number of
+     * runnable threads, which might not ensure progress.
+     *
+     * @param keepAliveTime the elapsed time since last use before
+     * a thread is terminated (and then later replaced if needed).
+     * For the default value, use {@code 60, TimeUnit.SECONDS}.
+     *
+     * @param unit the time unit for the {@code keepAliveTime} argument
+     *
+     * @throws IllegalArgumentException if parallelism is less than or
+     *         equal to zero, or is greater than implementation limit,
+     *         or if maximumPoolSize is less than parallelism,
+     *         of if the keepAliveTime is less than or equal to zero.
+     * @throws NullPointerException if the factory is null
+     * @throws SecurityException if a security manager exists and
+     *         the caller is not permitted to modify threads
+     *         because it does not hold {@link
+     *         java.lang.RuntimePermission}{@code ("modifyThread")}
+     * @since 9
+     */
+    public ForkJoinPool(int parallelism,
+                        ForkJoinWorkerThreadFactory factory,
+                        UncaughtExceptionHandler handler,
+                        boolean asyncMode,
+                        int corePoolSize,
+                        int maximumPoolSize,
+                        int minimumRunnable,
+                        Predicate<? super ForkJoinPool> saturate,
+                        long keepAliveTime,
+                        TimeUnit unit) {
+        // check, encode, pack parameters
+        if (parallelism <= 0 || parallelism > MAX_CAP ||
+            maximumPoolSize < parallelism || keepAliveTime <= 0L)
+            throw new IllegalArgumentException();
+        if (factory == null)
+            throw new NullPointerException();
+        long ms = Math.max(unit.toMillis(keepAliveTime), TIMEOUT_SLOP);
+
+        String prefix = "ForkJoinPool-" + nextPoolId() + "-worker-";
+        int corep = Math.min(Math.max(corePoolSize, parallelism), MAX_CAP);
+        long c = ((((long)(-corep)       << TC_SHIFT) & TC_MASK) |
+                  (((long)(-parallelism) << RC_SHIFT) & RC_MASK));
+        int m = parallelism | (asyncMode ? FIFO : 0);
+        int maxSpares = Math.min(maximumPoolSize, MAX_CAP) - parallelism;
+        int minAvail = Math.min(Math.max(minimumRunnable, 0), MAX_CAP);
+        int b = ((minAvail - parallelism) & SMASK) | (maxSpares << SWIDTH);
+        int n = (parallelism > 1) ? parallelism - 1 : 1; // at least 2 slots
+        n |= n >>> 1; n |= n >>> 2; n |= n >>> 4; n |= n >>> 8; n |= n >>> 16;
+        n = (n + 1) << 1; // power of two, including space for submission queues
+
+        this.workQueues = new WorkQueue[n];
+        this.workerNamePrefix = prefix;
+        this.factory = factory;
+        this.ueh = handler;
+        this.saturate = saturate;
+        this.keepAlive = ms;
+        this.bounds = b;
+        this.mode = m;
+        this.ctl = c;
         checkPermission();
     }
 
-    private static int checkParallelism(int parallelism) {
-        if (parallelism <= 0 || parallelism > MAX_CAP)
-            throw new IllegalArgumentException();
-        return parallelism;
-    }
+    /**
+     * Constructor for common pool using parameters possibly
+     * overridden by system properties
+     */
+    @SuppressWarnings("deprecation") // Class.newInstance
+    private ForkJoinPool(byte forCommonPoolOnly) {
+        int parallelism = -1;
+        ForkJoinWorkerThreadFactory fac = null;
+        UncaughtExceptionHandler handler = null;
+        try {  // ignore exceptions in accessing/parsing properties
+            String pp = System.getProperty
+                ("java.util.concurrent.ForkJoinPool.common.parallelism");
+            String fp = System.getProperty
+                ("java.util.concurrent.ForkJoinPool.common.threadFactory");
+            String hp = System.getProperty
+                ("java.util.concurrent.ForkJoinPool.common.exceptionHandler");
+            if (pp != null)
+                parallelism = Integer.parseInt(pp);
+            if (fp != null)
+                fac = ((ForkJoinWorkerThreadFactory)ClassLoader.
+                           getSystemClassLoader().loadClass(fp).newInstance());
+            if (hp != null)
+                handler = ((UncaughtExceptionHandler)ClassLoader.
+                           getSystemClassLoader().loadClass(hp).newInstance());
+        } catch (Exception ignore) {
+        }
 
-    private static ForkJoinWorkerThreadFactory checkFactory
-        (ForkJoinWorkerThreadFactory factory) {
-        if (factory == null)
-            throw new NullPointerException();
-        return factory;
-    }
+        if (fac == null) {
+            if (System.getSecurityManager() == null)
+                fac = defaultForkJoinWorkerThreadFactory;
+            else // use security-managed default
+                fac = new InnocuousForkJoinWorkerThreadFactory();
+        }
+        if (parallelism < 0 && // default 1 less than #cores
+            (parallelism = Runtime.getRuntime().availableProcessors() - 1) <= 0)
+            parallelism = 1;
+        if (parallelism > MAX_CAP)
+            parallelism = MAX_CAP;
 
-    /**
-     * Creates a {@code ForkJoinPool} with the given parameters, without
-     * any security checks or parameter validation.  Invoked directly by
-     * makeCommonPool.
-     */
-    private ForkJoinPool(int parallelism,
-                         ForkJoinWorkerThreadFactory factory,
-                         UncaughtExceptionHandler handler,
-                         int mode,
-                         String workerNamePrefix) {
-        this.workerNamePrefix = workerNamePrefix;
-        this.factory = factory;
+        long c = ((((long)(-parallelism) << TC_SHIFT) & TC_MASK) |
+                  (((long)(-parallelism) << RC_SHIFT) & RC_MASK));
+        int b = ((1 - parallelism) & SMASK) | (COMMON_MAX_SPARES << SWIDTH);
+        int n = (parallelism > 1) ? parallelism - 1 : 1;
+        n |= n >>> 1; n |= n >>> 2; n |= n >>> 4; n |= n >>> 8; n |= n >>> 16;
+        n = (n + 1) << 1;
+
+        this.workQueues = new WorkQueue[n];
+        this.workerNamePrefix = "ForkJoinPool.commonPool-worker-";
+        this.factory = fac;
         this.ueh = handler;
-        this.config = (parallelism & SMASK) | mode;
-        long np = (long)(-parallelism); // offset ctl counts
-        this.ctl = ((np << AC_SHIFT) & AC_MASK) | ((np << TC_SHIFT) & TC_MASK);
+        this.saturate = null;
+        this.keepAlive = DEFAULT_KEEPALIVE;
+        this.bounds = b;
+        this.mode = parallelism;
+        this.ctl = c;
     }
 
     /**
@@ -2876,8 +2562,8 @@
      * @return the targeted parallelism level of this pool
      */
     public int getParallelism() {
-        int par;
-        return ((par = config & SMASK) > 0) ? par : 1;
+        int par = mode & SMASK;
+        return (par > 0) ? par : 1;
     }
 
     /**
@@ -2899,7 +2585,7 @@
      * @return the number of worker threads
      */
     public int getPoolSize() {
-        return (config & SMASK) + (short)(ctl >>> TC_SHIFT);
+        return ((mode & SMASK) + (short)(ctl >>> TC_SHIFT));
     }
 
     /**
@@ -2909,7 +2595,7 @@
      * @return {@code true} if this pool uses async mode
      */
     public boolean getAsyncMode() {
-        return (config & FIFO_QUEUE) != 0;
+        return (mode & FIFO) != 0;
     }
 
     /**
@@ -2940,7 +2626,7 @@
      * @return the number of active threads
      */
     public int getActiveThreadCount() {
-        int r = (config & SMASK) + (int)(ctl >> AC_SHIFT);
+        int r = (mode & SMASK) + (int)(ctl >> RC_SHIFT);
         return (r <= 0) ? 0 : r; // suppress momentarily negative values
     }
 
@@ -2956,7 +2642,30 @@
      * @return {@code true} if all threads are currently idle
      */
     public boolean isQuiescent() {
-        return (config & SMASK) + (int)(ctl >> AC_SHIFT) <= 0;
+        for (;;) {
+            long c = ctl;
+            int md = mode, pc = md & SMASK;
+            int tc = pc + (short)(c >>> TC_SHIFT);
+            int rc = pc + (int)(c >> RC_SHIFT);
+            if ((md & (STOP | TERMINATED)) != 0)
+                return true;
+            else if (rc > 0)
+                return false;
+            else {
+                WorkQueue[] ws; WorkQueue v;
+                if ((ws = workQueues) != null) {
+                    for (int i = 1; i < ws.length; i += 2) {
+                        if ((v = ws[i]) != null) {
+                            if ((v.source & QUIET) == 0)
+                                return false;
+                            --tc;
+                        }
+                    }
+                }
+                if (tc == 0 && ctl == c)
+                    return true;
+            }
+        }
     }
 
     /**
@@ -2971,13 +2680,12 @@
      * @return the number of steals
      */
     public long getStealCount() {
-        AuxState sc = auxState;
-        long count = (sc == null) ? 0L : sc.stealCount;
+        long count = stealCount;
         WorkQueue[] ws; WorkQueue w;
         if ((ws = workQueues) != null) {
             for (int i = 1; i < ws.length; i += 2) {
                 if ((w = ws[i]) != null)
-                    count += w.nsteals;
+                    count += (long)w.nsteals & 0xffffffffL;
             }
         }
         return count;
@@ -3049,15 +2757,7 @@
      * @return the next submission, or {@code null} if none
      */
     protected ForkJoinTask<?> pollSubmission() {
-        WorkQueue[] ws; int wl; WorkQueue w; ForkJoinTask<?> t;
-        int r = ThreadLocalRandom.nextSecondarySeed();
-        if ((ws = workQueues) != null && (wl = ws.length) > 0) {
-            for (int m = wl - 1, i = 0; i < wl; ++i) {
-                if ((w = ws[(i << 1) & m]) != null && (t = w.poll()) != null)
-                    return t;
-            }
-        }
-        return null;
+        return pollScan(true);
     }
 
     /**
@@ -3103,9 +2803,7 @@
     public String toString() {
         // Use a single pass through workQueues to collect counts
         long qt = 0L, qs = 0L; int rc = 0;
-        AuxState sc = auxState;
-        long st = (sc == null) ? 0L : sc.stealCount;
-        long c = ctl;
+        long st = stealCount;
         WorkQueue[] ws; WorkQueue w;
         if ((ws = workQueues) != null) {
             for (int i = 0; i < ws.length; ++i) {
@@ -3115,22 +2813,24 @@
                         qs += size;
                     else {
                         qt += size;
-                        st += w.nsteals;
+                        st += (long)w.nsteals & 0xffffffffL;
                         if (w.isApparentlyUnblocked())
                             ++rc;
                     }
                 }
             }
         }
-        int pc = (config & SMASK);
+
+        int md = mode;
+        int pc = (md & SMASK);
+        long c = ctl;
         int tc = pc + (short)(c >>> TC_SHIFT);
-        int ac = pc + (int)(c >> AC_SHIFT);
+        int ac = pc + (int)(c >> RC_SHIFT);
         if (ac < 0) // ignore transient negative
             ac = 0;
-        int rs = runState;
-        String level = ((rs & TERMINATED) != 0 ? "Terminated" :
-                        (rs & STOP)       != 0 ? "Terminating" :
-                        (rs & SHUTDOWN)   != 0 ? "Shutting down" :
+        String level = ((md & TERMINATED) != 0 ? "Terminated" :
+                        (md & STOP)       != 0 ? "Terminating" :
+                        (md & SHUTDOWN)   != 0 ? "Shutting down" :
                         "Running");
         return super.toString() +
             "[" + level +
@@ -3193,7 +2893,7 @@
      * @return {@code true} if all tasks have completed following shut down
      */
     public boolean isTerminated() {
-        return (runState & TERMINATED) != 0;
+        return (mode & TERMINATED) != 0;
     }
 
     /**
@@ -3210,8 +2910,8 @@
      * @return {@code true} if terminating but not yet terminated
      */
     public boolean isTerminating() {
-        int rs = runState;
-        return (rs & STOP) != 0 && (rs & TERMINATED) == 0;
+        int md = mode;
+        return (md & STOP) != 0 && (md & TERMINATED) == 0;
     }
 
     /**
@@ -3220,7 +2920,7 @@
      * @return {@code true} if this pool has been shut down
      */
     public boolean isShutdown() {
-        return (runState & SHUTDOWN) != 0;
+        return (mode & SHUTDOWN) != 0;
     }
 
     /**
@@ -3284,30 +2984,19 @@
             helpQuiescePool(wt.workQueue);
             return true;
         }
-        long startTime = System.nanoTime();
-        WorkQueue[] ws;
-        int r = 0, wl;
-        boolean found = true;
-        while (!isQuiescent() && (ws = workQueues) != null &&
-               (wl = ws.length) > 0) {
-            if (!found) {
-                if ((System.nanoTime() - startTime) > nanos)
+        else {
+            for (long startTime = System.nanoTime();;) {
+                ForkJoinTask<?> t;
+                if ((t = pollScan(false)) != null)
+                    t.doExec();
+                else if (isQuiescent())
+                    return true;
+                else if ((System.nanoTime() - startTime) > nanos)
                     return false;
-                Thread.yield(); // cannot block
-            }
-            found = false;
-            for (int m = wl - 1, j = (m + 1) << 2; j >= 0; --j) {
-                ForkJoinTask<?> t; WorkQueue q; int b, k;
-                if ((k = r++ & m) <= m && k >= 0 && (q = ws[k]) != null &&
-                    (b = q.base) - q.top < 0) {
-                    found = true;
-                    if ((t = q.pollAt(b)) != null)
-                        t.doExec();
-                    break;
-                }
+                else
+                    Thread.yield(); // cannot block
             }
         }
-        return true;
     }
 
     /**
@@ -3422,17 +3111,19 @@
         throws InterruptedException {
         ForkJoinPool p;
         ForkJoinWorkerThread wt;
+        WorkQueue w;
         Thread t = Thread.currentThread();
         if ((t instanceof ForkJoinWorkerThread) &&
-            (p = (wt = (ForkJoinWorkerThread)t).pool) != null) {
-            WorkQueue w = wt.workQueue;
+            (p = (wt = (ForkJoinWorkerThread)t).pool) != null &&
+            (w = wt.workQueue) != null) {
+            int block;
             while (!blocker.isReleasable()) {
-                if (p.tryCompensate(w)) {
+                if ((block = p.tryCompensate(w)) != 0) {
                     try {
                         do {} while (!blocker.isReleasable() &&
                                      !blocker.block());
                     } finally {
-                        U.getAndAddLong(p, CTL, AC_UNIT);
+                        CTL.getAndAdd(p, (block > 0) ? RC_UNIT : 0L);
                     }
                     break;
                 }
@@ -3444,6 +3135,55 @@
         }
     }
 
+    /**
+     * If the given executor is a ForkJoinPool, poll and execute
+     * AsynchronousCompletionTasks from worker's queue until none are
+     * available or blocker is released.
+     */
+    static void helpAsyncBlocker(Executor e, ManagedBlocker blocker) {
+        if (blocker != null && (e instanceof ForkJoinPool)) {
+            WorkQueue w; ForkJoinWorkerThread wt; WorkQueue[] ws; int r, n;
+            ForkJoinPool p = (ForkJoinPool)e;
+            Thread thread = Thread.currentThread();
+            if (thread instanceof ForkJoinWorkerThread &&
+                (wt = (ForkJoinWorkerThread)thread).pool == p)
+                w = wt.workQueue;
+            else if ((r = ThreadLocalRandom.getProbe()) != 0 &&
+                     (ws = p.workQueues) != null && (n = ws.length) > 0)
+                w = ws[(n - 1) & r & SQMASK];
+            else
+                w = null;
+            if (w != null) {
+                for (;;) {
+                    int b = w.base, s = w.top, d, al; ForkJoinTask<?>[] a;
+                    if ((a = w.array) != null && (d = b - s) < 0 &&
+                        (al = a.length) > 0) {
+                        int index = (al - 1) & b;
+                        ForkJoinTask<?> t = (ForkJoinTask<?>)
+                            QA.getAcquire(a, index);
+                        if (blocker.isReleasable())
+                            break;
+                        else if (b++ == w.base) {
+                            if (t == null) {
+                                if (d == -1)
+                                    break;
+                            }
+                            else if (!(t instanceof CompletableFuture.
+                                  AsynchronousCompletionTask))
+                                break;
+                            else if (QA.compareAndSet(a, index, t, null)) {
+                                w.base = b;
+                                t.doExec();
+                            }
+                        }
+                    }
+                    else
+                        break;
+                }
+            }
+        }
+    }
+
     // AbstractExecutorService overrides.  These rely on undocumented
     // fact that ForkJoinTask.adapt returns ForkJoinTasks that also
     // implement RunnableFuture.
@@ -3456,24 +3196,17 @@
         return new ForkJoinTask.AdaptedCallable<T>(callable);
     }
 
-    // Unsafe mechanics
-    private static final jdk.internal.misc.Unsafe U = jdk.internal.misc.Unsafe.getUnsafe();
-    private static final long CTL;
-    private static final long RUNSTATE;
-    private static final int ABASE;
-    private static final int ASHIFT;
+    // VarHandle mechanics
+    private static final VarHandle CTL;
+    private static final VarHandle MODE;
+    private static final VarHandle QA;
 
     static {
         try {
-            CTL = U.objectFieldOffset
-                (ForkJoinPool.class.getDeclaredField("ctl"));
-            RUNSTATE = U.objectFieldOffset
-                (ForkJoinPool.class.getDeclaredField("runState"));
-            ABASE = U.arrayBaseOffset(ForkJoinTask[].class);
-            int scale = U.arrayIndexScale(ForkJoinTask[].class);
-            if ((scale & (scale - 1)) != 0)
-                throw new Error("array index scale not a power of two");
-            ASHIFT = 31 - Integer.numberOfLeadingZeros(scale);
+            MethodHandles.Lookup l = MethodHandles.lookup();
+            CTL = l.findVarHandle(ForkJoinPool.class, "ctl", long.class);
+            MODE = l.findVarHandle(ForkJoinPool.class, "mode", int.class);
+            QA = MethodHandles.arrayElementVarHandle(ForkJoinTask[].class);
         } catch (ReflectiveOperationException e) {
             throw new Error(e);
         }
@@ -3497,51 +3230,10 @@
 
         common = java.security.AccessController.doPrivileged
             (new java.security.PrivilegedAction<ForkJoinPool>() {
-                public ForkJoinPool run() { return makeCommonPool(); }});
+                    public ForkJoinPool run() {
+                        return new ForkJoinPool((byte)0); }});
 
-        // report 1 even if threads disabled
-        COMMON_PARALLELISM = Math.max(common.config & SMASK, 1);
-    }
-
-    /**
-     * Creates and returns the common pool, respecting user settings
-     * specified via system properties.
-     */
-    @SuppressWarnings("deprecation") // Class.newInstance
-    static ForkJoinPool makeCommonPool() {
-        int parallelism = -1;
-        ForkJoinWorkerThreadFactory factory = null;
-        UncaughtExceptionHandler handler = null;
-        try {  // ignore exceptions in accessing/parsing properties
-            String pp = System.getProperty
-                ("java.util.concurrent.ForkJoinPool.common.parallelism");
-            String fp = System.getProperty
-                ("java.util.concurrent.ForkJoinPool.common.threadFactory");
-            String hp = System.getProperty
-                ("java.util.concurrent.ForkJoinPool.common.exceptionHandler");
-            if (pp != null)
-                parallelism = Integer.parseInt(pp);
-            if (fp != null)
-                factory = ((ForkJoinWorkerThreadFactory)ClassLoader.
-                           getSystemClassLoader().loadClass(fp).newInstance());
-            if (hp != null)
-                handler = ((UncaughtExceptionHandler)ClassLoader.
-                           getSystemClassLoader().loadClass(hp).newInstance());
-        } catch (Exception ignore) {
-        }
-        if (factory == null) {
-            if (System.getSecurityManager() == null)
-                factory = defaultForkJoinWorkerThreadFactory;
-            else // use security-managed default
-                factory = new InnocuousForkJoinWorkerThreadFactory();
-        }
-        if (parallelism < 0 && // default 1 less than #cores
-            (parallelism = Runtime.getRuntime().availableProcessors() - 1) <= 0)
-            parallelism = 1;
-        if (parallelism > MAX_CAP)
-            parallelism = MAX_CAP;
-        return new ForkJoinPool(parallelism, factory, handler, LIFO_QUEUE,
-                                "ForkJoinPool.commonPool-worker-");
+        COMMON_PARALLELISM = Math.max(common.mode & SMASK, 1);
     }
 
     /**
--- a/src/java.base/share/classes/java/util/concurrent/ForkJoinTask.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/java/util/concurrent/ForkJoinTask.java	Thu Jul 21 20:09:20 2016 -0700
@@ -36,6 +36,8 @@
 package java.util.concurrent;
 
 import java.io.Serializable;
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.VarHandle;
 import java.lang.ref.ReferenceQueue;
 import java.lang.ref.WeakReference;
 import java.lang.reflect.Constructor;
@@ -92,7 +94,7 @@
  * encountering the exception; minimally only the latter.
  *
  * <p>It is possible to define and use ForkJoinTasks that may block,
- * but doing do requires three further considerations: (1) Completion
+ * but doing so requires three further considerations: (1) Completion
  * of few if any <em>other</em> tasks should be dependent on a task
  * that blocks on external synchronization or I/O. Event-style async
  * tasks that are never joined (for example, those subclassing {@link
@@ -259,7 +261,7 @@
         for (int s;;) {
             if ((s = status) < 0)
                 return s;
-            if (U.compareAndSwapInt(this, STATUS, s, s | completion)) {
+            if (STATUS.compareAndSet(this, s, s | completion)) {
                 if ((s >>> 16) != 0)
                     synchronized (this) { notifyAll(); }
                 return completion;
@@ -297,7 +299,7 @@
     final void internalWait(long timeout) {
         int s;
         if ((s = status) >= 0 && // force completer to issue notify
-            U.compareAndSwapInt(this, STATUS, s, s | SIGNAL)) {
+            STATUS.compareAndSet(this, s, s | SIGNAL)) {
             synchronized (this) {
                 if (status >= 0)
                     try { wait(timeout); } catch (InterruptedException ie) { }
@@ -319,7 +321,7 @@
         if (s >= 0 && (s = status) >= 0) {
             boolean interrupted = false;
             do {
-                if (U.compareAndSwapInt(this, STATUS, s, s | SIGNAL)) {
+                if (STATUS.compareAndSet(this, s, s | SIGNAL)) {
                     synchronized (this) {
                         if (status >= 0) {
                             try {
@@ -353,7 +355,7 @@
                   ForkJoinPool.common.tryExternalUnpush(this) ? doExec() :
                   0)) >= 0) {
             while ((s = status) >= 0) {
-                if (U.compareAndSwapInt(this, STATUS, s, s | SIGNAL)) {
+                if (STATUS.compareAndSet(this, s, s | SIGNAL)) {
                     synchronized (this) {
                         if (status >= 0)
                             wait(0L);
@@ -400,22 +402,24 @@
     // Exception table support
 
     /**
-     * Table of exceptions thrown by tasks, to enable reporting by
-     * callers. Because exceptions are rare, we don't directly keep
+     * Hash table of exceptions thrown by tasks, to enable reporting
+     * by callers. Because exceptions are rare, we don't directly keep
      * them with task objects, but instead use a weak ref table.  Note
      * that cancellation exceptions don't appear in the table, but are
      * instead recorded as status values.
      *
-     * Note: These statics are initialized below in static block.
+     * The exception table has a fixed capacity.
      */
-    private static final ExceptionNode[] exceptionTable;
-    private static final ReentrantLock exceptionTableLock;
-    private static final ReferenceQueue<Object> exceptionTableRefQueue;
+    private static final ExceptionNode[] exceptionTable
+        = new ExceptionNode[32];
 
-    /**
-     * Fixed capacity for exceptionTable.
-     */
-    private static final int EXCEPTION_MAP_CAPACITY = 32;
+    /** Lock protecting access to exceptionTable. */
+    private static final ReentrantLock exceptionTableLock
+        = new ReentrantLock();
+
+    /** Reference queue of stale exceptionally completed tasks. */
+    private static final ReferenceQueue<ForkJoinTask<?>> exceptionTableRefQueue
+        = new ReferenceQueue<ForkJoinTask<?>>();
 
     /**
      * Key-value nodes for exception table.  The chained hash table
@@ -435,7 +439,7 @@
         final long thrower;  // use id not ref to avoid weak cycles
         final int hashCode;  // store task hashCode before weak ref disappears
         ExceptionNode(ForkJoinTask<?> task, Throwable ex, ExceptionNode next,
-                      ReferenceQueue<Object> exceptionTableRefQueue) {
+                      ReferenceQueue<ForkJoinTask<?>> exceptionTableRefQueue) {
             super(task, exceptionTableRefQueue);
             this.ex = ex;
             this.next = next;
@@ -599,9 +603,8 @@
     private static void expungeStaleExceptions() {
         for (Object x; (x = exceptionTableRefQueue.poll()) != null;) {
             if (x instanceof ExceptionNode) {
-                int hashCode = ((ExceptionNode)x).hashCode;
                 ExceptionNode[] t = exceptionTable;
-                int i = hashCode & (t.length - 1);
+                int i = ((ExceptionNode)x).hashCode & (t.length - 1);
                 ExceptionNode e = t[i];
                 ExceptionNode pred = null;
                 while (e != null) {
@@ -1031,7 +1034,7 @@
                 while ((s = status) >= 0 &&
                        (ns = deadline - System.nanoTime()) > 0L) {
                     if ((ms = TimeUnit.NANOSECONDS.toMillis(ns)) > 0L &&
-                        U.compareAndSwapInt(this, STATUS, s, s | SIGNAL)) {
+                        STATUS.compareAndSet(this, s, s | SIGNAL)) {
                         synchronized (this) {
                             if (status >= 0)
                                 wait(ms); // OK to throw InterruptedException
@@ -1324,8 +1327,8 @@
      */
     public final short setForkJoinTaskTag(short newValue) {
         for (int s;;) {
-            if (U.compareAndSwapInt(this, STATUS, s = status,
-                                    (s & ~SMASK) | (newValue & SMASK)))
+            if (STATUS.compareAndSet(this, s = status,
+                                     (s & ~SMASK) | (newValue & SMASK)))
                 return (short)s;
         }
     }
@@ -1348,8 +1351,8 @@
         for (int s;;) {
             if ((short)(s = status) != expect)
                 return false;
-            if (U.compareAndSwapInt(this, STATUS, s,
-                                    (s & ~SMASK) | (update & SMASK)))
+            if (STATUS.compareAndSet(this, s,
+                                     (s & ~SMASK) | (update & SMASK)))
                 return true;
         }
     }
@@ -1510,17 +1513,12 @@
             setExceptionalCompletion((Throwable)ex);
     }
 
-    // Unsafe mechanics
-    private static final jdk.internal.misc.Unsafe U = jdk.internal.misc.Unsafe.getUnsafe();
-    private static final long STATUS;
-
+    // VarHandle mechanics
+    private static final VarHandle STATUS;
     static {
-        exceptionTableLock = new ReentrantLock();
-        exceptionTableRefQueue = new ReferenceQueue<Object>();
-        exceptionTable = new ExceptionNode[EXCEPTION_MAP_CAPACITY];
         try {
-            STATUS = U.objectFieldOffset
-                (ForkJoinTask.class.getDeclaredField("status"));
+            MethodHandles.Lookup l = MethodHandles.lookup();
+            STATUS = l.findVarHandle(ForkJoinTask.class, "status", int.class);
         } catch (ReflectiveOperationException e) {
             throw new Error(e);
         }
--- a/src/java.base/share/classes/java/util/concurrent/ForkJoinWorkerThread.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/java/util/concurrent/ForkJoinWorkerThread.java	Thu Jul 21 20:09:20 2016 -0700
@@ -66,8 +66,9 @@
      * owning thread.
      *
      * Support for (non-public) subclass InnocuousForkJoinWorkerThread
-     * requires that we break quite a lot of encapsulation (via Unsafe)
-     * both here and in the subclass to access and set Thread fields.
+     * requires that we break quite a lot of encapsulation (via helper
+     * methods in ThreadLocalRandom) both here and in the subclass to
+     * access and set Thread fields.
      */
 
     final ForkJoinPool pool;                // the pool this thread works in
@@ -92,8 +93,8 @@
     ForkJoinWorkerThread(ForkJoinPool pool, ThreadGroup threadGroup,
                          AccessControlContext acc) {
         super(threadGroup, null, "aForkJoinWorkerThread");
-        U.putObjectRelease(this, INHERITEDACCESSCONTROLCONTEXT, acc);
-        eraseThreadLocals(); // clear before registering
+        ThreadLocalRandom.setInheritedAccessControlContext(this, acc);
+        ThreadLocalRandom.eraseThreadLocals(this); // clear before registering
         this.pool = pool;
         this.workQueue = pool.registerWorker(this);
     }
@@ -171,37 +172,11 @@
     }
 
     /**
-     * Erases ThreadLocals by nulling out Thread maps.
-     */
-    final void eraseThreadLocals() {
-        U.putObject(this, THREADLOCALS, null);
-        U.putObject(this, INHERITABLETHREADLOCALS, null);
-    }
-
-    /**
      * Non-public hook method for InnocuousForkJoinWorkerThread.
      */
     void afterTopLevelExec() {
     }
 
-    // Set up to allow setting thread fields in constructor
-    private static final jdk.internal.misc.Unsafe U = jdk.internal.misc.Unsafe.getUnsafe();
-    private static final long THREADLOCALS;
-    private static final long INHERITABLETHREADLOCALS;
-    private static final long INHERITEDACCESSCONTROLCONTEXT;
-    static {
-        try {
-            THREADLOCALS = U.objectFieldOffset
-                (Thread.class.getDeclaredField("threadLocals"));
-            INHERITABLETHREADLOCALS = U.objectFieldOffset
-                (Thread.class.getDeclaredField("inheritableThreadLocals"));
-            INHERITEDACCESSCONTROLCONTEXT = U.objectFieldOffset
-                (Thread.class.getDeclaredField("inheritedAccessControlContext"));
-        } catch (ReflectiveOperationException e) {
-            throw new Error(e);
-        }
-    }
-
     /**
      * A worker thread that has no permissions, is not a member of any
      * user-defined ThreadGroup, and erases all ThreadLocals after
@@ -210,7 +185,7 @@
     static final class InnocuousForkJoinWorkerThread extends ForkJoinWorkerThread {
         /** The ThreadGroup for all InnocuousForkJoinWorkerThreads */
         private static final ThreadGroup innocuousThreadGroup =
-            createThreadGroup();
+            ThreadLocalRandom.createThreadGroup("InnocuousForkJoinWorkerThreadGroup");
 
         /** An AccessControlContext supporting no privileges */
         private static final AccessControlContext INNOCUOUS_ACC =
@@ -225,7 +200,7 @@
 
         @Override // to erase ThreadLocals
         void afterTopLevelExec() {
-            eraseThreadLocals();
+            ThreadLocalRandom.eraseThreadLocals(this);
         }
 
         @Override // to always report system loader
@@ -241,33 +216,5 @@
             throw new SecurityException("setContextClassLoader");
         }
 
-        /**
-         * Returns a new group with the system ThreadGroup (the
-         * topmost, parent-less group) as parent.  Uses Unsafe to
-         * traverse Thread.group and ThreadGroup.parent fields.
-         */
-        private static ThreadGroup createThreadGroup() {
-            try {
-                jdk.internal.misc.Unsafe u = jdk.internal.misc.Unsafe.getUnsafe();
-                long tg = u.objectFieldOffset
-                    (Thread.class.getDeclaredField("group"));
-                long gp = u.objectFieldOffset
-                    (ThreadGroup.class.getDeclaredField("parent"));
-                ThreadGroup group = (ThreadGroup)
-                    u.getObject(Thread.currentThread(), tg);
-                while (group != null) {
-                    ThreadGroup parent = (ThreadGroup)u.getObject(group, gp);
-                    if (parent == null)
-                        return new ThreadGroup(group,
-                                               "InnocuousForkJoinWorkerThreadGroup");
-                    group = parent;
-                }
-            } catch (ReflectiveOperationException e) {
-                throw new Error(e);
-            }
-            // fall through if null as cannot-happen safeguard
-            throw new Error("Cannot create ThreadGroup");
-        }
     }
-
 }
--- a/src/java.base/share/classes/java/util/concurrent/FutureTask.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/java/util/concurrent/FutureTask.java	Thu Jul 21 20:09:20 2016 -0700
@@ -35,6 +35,8 @@
 
 package java.util.concurrent;
 
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.VarHandle;
 import java.util.concurrent.locks.LockSupport;
 
 /**
@@ -69,9 +71,6 @@
      * cancellation races. Sync control in the current design relies
      * on a "state" field updated via CAS to track completion, along
      * with a simple Treiber stack to hold waiting threads.
-     *
-     * Style note: As usual, we bypass overhead of using
-     * AtomicXFieldUpdaters and instead directly use Unsafe intrinsics.
      */
 
     /**
@@ -163,9 +162,8 @@
     }
 
     public boolean cancel(boolean mayInterruptIfRunning) {
-        if (!(state == NEW &&
-              U.compareAndSwapInt(this, STATE, NEW,
-                  mayInterruptIfRunning ? INTERRUPTING : CANCELLED)))
+        if (!(state == NEW && STATE.compareAndSet
+              (this, NEW, mayInterruptIfRunning ? INTERRUPTING : CANCELLED)))
             return false;
         try {    // in case call to interrupt throws exception
             if (mayInterruptIfRunning) {
@@ -174,7 +172,7 @@
                     if (t != null)
                         t.interrupt();
                 } finally { // final state
-                    U.putIntRelease(this, STATE, INTERRUPTED);
+                    STATE.setRelease(this, INTERRUPTED);
                 }
             }
         } finally {
@@ -228,9 +226,9 @@
      * @param v the value
      */
     protected void set(V v) {
-        if (U.compareAndSwapInt(this, STATE, NEW, COMPLETING)) {
+        if (STATE.compareAndSet(this, NEW, COMPLETING)) {
             outcome = v;
-            U.putIntRelease(this, STATE, NORMAL); // final state
+            STATE.setRelease(this, NORMAL); // final state
             finishCompletion();
         }
     }
@@ -246,16 +244,16 @@
      * @param t the cause of failure
      */
     protected void setException(Throwable t) {
-        if (U.compareAndSwapInt(this, STATE, NEW, COMPLETING)) {
+        if (STATE.compareAndSet(this, NEW, COMPLETING)) {
             outcome = t;
-            U.putIntRelease(this, STATE, EXCEPTIONAL); // final state
+            STATE.setRelease(this, EXCEPTIONAL); // final state
             finishCompletion();
         }
     }
 
     public void run() {
         if (state != NEW ||
-            !U.compareAndSwapObject(this, RUNNER, null, Thread.currentThread()))
+            !RUNNER.compareAndSet(this, null, Thread.currentThread()))
             return;
         try {
             Callable<V> c = callable;
@@ -296,7 +294,7 @@
      */
     protected boolean runAndReset() {
         if (state != NEW ||
-            !U.compareAndSwapObject(this, RUNNER, null, Thread.currentThread()))
+            !RUNNER.compareAndSet(this, null, Thread.currentThread()))
             return false;
         boolean ran = false;
         int s = state;
@@ -363,7 +361,7 @@
     private void finishCompletion() {
         // assert state > COMPLETING;
         for (WaitNode q; (q = waiters) != null;) {
-            if (U.compareAndSwapObject(this, WAITERS, q, null)) {
+            if (WAITERS.weakCompareAndSetVolatile(this, q, null)) {
                 for (;;) {
                     Thread t = q.thread;
                     if (t != null) {
@@ -425,8 +423,7 @@
                 q = new WaitNode();
             }
             else if (!queued)
-                queued = U.compareAndSwapObject(this, WAITERS,
-                                                q.next = waiters, q);
+                queued = WAITERS.weakCompareAndSetVolatile(this, q.next = waiters, q);
             else if (timed) {
                 final long parkNanos;
                 if (startTime == 0L) { // first time
@@ -475,7 +472,7 @@
                         if (pred.thread == null) // check for race
                             continue retry;
                     }
-                    else if (!U.compareAndSwapObject(this, WAITERS, q, s))
+                    else if (!WAITERS.compareAndSet(this, q, s))
                         continue retry;
                 }
                 break;
@@ -483,19 +480,16 @@
         }
     }
 
-    // Unsafe mechanics
-    private static final jdk.internal.misc.Unsafe U = jdk.internal.misc.Unsafe.getUnsafe();
-    private static final long STATE;
-    private static final long RUNNER;
-    private static final long WAITERS;
+    // VarHandle mechanics
+    private static final VarHandle STATE;
+    private static final VarHandle RUNNER;
+    private static final VarHandle WAITERS;
     static {
         try {
-            STATE = U.objectFieldOffset
-                (FutureTask.class.getDeclaredField("state"));
-            RUNNER = U.objectFieldOffset
-                (FutureTask.class.getDeclaredField("runner"));
-            WAITERS = U.objectFieldOffset
-                (FutureTask.class.getDeclaredField("waiters"));
+            MethodHandles.Lookup l = MethodHandles.lookup();
+            STATE = l.findVarHandle(FutureTask.class, "state", int.class);
+            RUNNER = l.findVarHandle(FutureTask.class, "runner", Thread.class);
+            WAITERS = l.findVarHandle(FutureTask.class, "waiters", WaitNode.class);
         } catch (ReflectiveOperationException e) {
             throw new Error(e);
         }
--- a/src/java.base/share/classes/java/util/concurrent/LinkedTransferQueue.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/java/util/concurrent/LinkedTransferQueue.java	Thu Jul 21 20:09:20 2016 -0700
@@ -35,6 +35,8 @@
 
 package java.util.concurrent;
 
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.VarHandle;
 import java.util.AbstractQueue;
 import java.util.Arrays;
 import java.util.Collection;
@@ -444,7 +446,7 @@
 
     /**
      * Queue nodes. Uses Object, not E, for items to allow forgetting
-     * them after use.  Relies heavily on Unsafe mechanics to minimize
+     * them after use.  Relies heavily on VarHandles to minimize
      * unnecessary ordering constraints: Writes that are intrinsically
      * ordered wrt other accesses or CASes use simple relaxed forms.
      */
@@ -456,12 +458,12 @@
 
         // CAS methods for fields
         final boolean casNext(Node cmp, Node val) {
-            return U.compareAndSwapObject(this, NEXT, cmp, val);
+            return NEXT.compareAndSet(this, cmp, val);
         }
 
         final boolean casItem(Object cmp, Object val) {
             // assert cmp == null || cmp.getClass() != Node.class;
-            return U.compareAndSwapObject(this, ITEM, cmp, val);
+            return ITEM.compareAndSet(this, cmp, val);
         }
 
         /**
@@ -469,7 +471,7 @@
          * only be seen after publication via casNext.
          */
         Node(Object item, boolean isData) {
-            U.putObject(this, ITEM, item); // relaxed write
+            ITEM.set(this, item); // relaxed write
             this.isData = isData;
         }
 
@@ -478,7 +480,7 @@
          * only after CASing head field, so uses relaxed write.
          */
         final void forgetNext() {
-            U.putObject(this, NEXT, this);
+            NEXT.set(this, this);
         }
 
         /**
@@ -491,8 +493,8 @@
          * else we don't care).
          */
         final void forgetContents() {
-            U.putObject(this, ITEM, this);
-            U.putObject(this, WAITER, null);
+            ITEM.set(this, this);
+            WAITER.set(this, null);
         }
 
         /**
@@ -537,19 +539,16 @@
 
         private static final long serialVersionUID = -3375979862319811754L;
 
-        // Unsafe mechanics
-        private static final jdk.internal.misc.Unsafe U = jdk.internal.misc.Unsafe.getUnsafe();
-        private static final long ITEM;
-        private static final long NEXT;
-        private static final long WAITER;
+        // VarHandle mechanics
+        private static final VarHandle ITEM;
+        private static final VarHandle NEXT;
+        private static final VarHandle WAITER;
         static {
             try {
-                ITEM = U.objectFieldOffset
-                    (Node.class.getDeclaredField("item"));
-                NEXT = U.objectFieldOffset
-                    (Node.class.getDeclaredField("next"));
-                WAITER = U.objectFieldOffset
-                    (Node.class.getDeclaredField("waiter"));
+                MethodHandles.Lookup l = MethodHandles.lookup();
+                ITEM = l.findVarHandle(Node.class, "item", Object.class);
+                NEXT = l.findVarHandle(Node.class, "next", Node.class);
+                WAITER = l.findVarHandle(Node.class, "waiter", Thread.class);
             } catch (ReflectiveOperationException e) {
                 throw new Error(e);
             }
@@ -567,15 +566,15 @@
 
     // CAS methods for fields
     private boolean casTail(Node cmp, Node val) {
-        return U.compareAndSwapObject(this, TAIL, cmp, val);
+        return TAIL.compareAndSet(this, cmp, val);
     }
 
     private boolean casHead(Node cmp, Node val) {
-        return U.compareAndSwapObject(this, HEAD, cmp, val);
+        return HEAD.compareAndSet(this, cmp, val);
     }
 
     private boolean casSweepVotes(int cmp, int val) {
-        return U.compareAndSwapInt(this, SWEEPVOTES, cmp, val);
+        return SWEEPVOTES.compareAndSet(this, cmp, val);
     }
 
     /*
@@ -1562,20 +1561,19 @@
         }
     }
 
-    // Unsafe mechanics
-
-    private static final jdk.internal.misc.Unsafe U = jdk.internal.misc.Unsafe.getUnsafe();
-    private static final long HEAD;
-    private static final long TAIL;
-    private static final long SWEEPVOTES;
+    // VarHandle mechanics
+    private static final VarHandle HEAD;
+    private static final VarHandle TAIL;
+    private static final VarHandle SWEEPVOTES;
     static {
         try {
-            HEAD = U.objectFieldOffset
-                (LinkedTransferQueue.class.getDeclaredField("head"));
-            TAIL = U.objectFieldOffset
-                (LinkedTransferQueue.class.getDeclaredField("tail"));
-            SWEEPVOTES = U.objectFieldOffset
-                (LinkedTransferQueue.class.getDeclaredField("sweepVotes"));
+            MethodHandles.Lookup l = MethodHandles.lookup();
+            HEAD = l.findVarHandle(LinkedTransferQueue.class, "head",
+                                   Node.class);
+            TAIL = l.findVarHandle(LinkedTransferQueue.class, "tail",
+                                   Node.class);
+            SWEEPVOTES = l.findVarHandle(LinkedTransferQueue.class, "sweepVotes",
+                                         int.class);
         } catch (ReflectiveOperationException e) {
             throw new Error(e);
         }
--- a/src/java.base/share/classes/java/util/concurrent/Phaser.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/java/util/concurrent/Phaser.java	Thu Jul 21 20:09:20 2016 -0700
@@ -35,6 +35,8 @@
 
 package java.util.concurrent;
 
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.VarHandle;
 import java.util.concurrent.atomic.AtomicReference;
 import java.util.concurrent.locks.LockSupport;
 
@@ -221,7 +223,6 @@
  *   phaser.arriveAndDeregister();
  * }}</pre>
  *
- *
  * <p>To create a set of {@code n} tasks using a tree of phasers, you
  * could use code of the following form, assuming a Task class with a
  * constructor accepting a {@code Phaser} that it registers with upon
@@ -384,7 +385,7 @@
             int unarrived = (counts == EMPTY) ? 0 : (counts & UNARRIVED_MASK);
             if (unarrived <= 0)
                 throw new IllegalStateException(badArrive(s));
-            if (U.compareAndSwapLong(this, STATE, s, s-=adjust)) {
+            if (STATE.compareAndSet(this, s, s-=adjust)) {
                 if (unarrived == 1) {
                     long n = s & PARTIES_MASK;  // base of next state
                     int nextUnarrived = (int)n >>> PARTIES_SHIFT;
@@ -397,12 +398,12 @@
                             n |= nextUnarrived;
                         int nextPhase = (phase + 1) & MAX_PHASE;
                         n |= (long)nextPhase << PHASE_SHIFT;
-                        U.compareAndSwapLong(this, STATE, s, n);
+                        STATE.compareAndSet(this, s, n);
                         releaseWaiters(phase);
                     }
                     else if (nextUnarrived == 0) { // propagate deregistration
                         phase = parent.doArrive(ONE_DEREGISTER);
-                        U.compareAndSwapLong(this, STATE, s, s | EMPTY);
+                        STATE.compareAndSet(this, s, s | EMPTY);
                     }
                     else
                         phase = parent.doArrive(ONE_ARRIVAL);
@@ -437,13 +438,13 @@
                 if (parent == null || reconcileState() == s) {
                     if (unarrived == 0)             // wait out advance
                         root.internalAwaitAdvance(phase, null);
-                    else if (U.compareAndSwapLong(this, STATE, s, s + adjust))
+                    else if (STATE.compareAndSet(this, s, s + adjust))
                         break;
                 }
             }
             else if (parent == null) {              // 1st root registration
                 long next = ((long)phase << PHASE_SHIFT) | adjust;
-                if (U.compareAndSwapLong(this, STATE, s, next))
+                if (STATE.compareAndSet(this, s, next))
                     break;
             }
             else {
@@ -455,8 +456,8 @@
                         // finish registration whenever parent registration
                         // succeeded, even when racing with termination,
                         // since these are part of the same "transaction".
-                        while (!U.compareAndSwapLong
-                               (this, STATE, s,
+                        while (!STATE.weakCompareAndSetVolatile
+                               (this, s,
                                 ((long)phase << PHASE_SHIFT) | adjust)) {
                             s = state;
                             phase = (int)(root.state >>> PHASE_SHIFT);
@@ -487,8 +488,8 @@
             // CAS to root phase with current parties, tripping unarrived
             while ((phase = (int)(root.state >>> PHASE_SHIFT)) !=
                    (int)(s >>> PHASE_SHIFT) &&
-                   !U.compareAndSwapLong
-                   (this, STATE, s,
+                   !STATE.weakCompareAndSetVolatile
+                   (this, s,
                     s = (((long)phase << PHASE_SHIFT) |
                          ((phase < 0) ? (s & COUNTS_MASK) :
                           (((p = (int)s >>> PARTIES_SHIFT) == 0) ? EMPTY :
@@ -677,7 +678,7 @@
             int unarrived = (counts == EMPTY) ? 0 : (counts & UNARRIVED_MASK);
             if (unarrived <= 0)
                 throw new IllegalStateException(badArrive(s));
-            if (U.compareAndSwapLong(this, STATE, s, s -= ONE_ARRIVAL)) {
+            if (STATE.compareAndSet(this, s, s -= ONE_ARRIVAL)) {
                 if (unarrived > 1)
                     return root.internalAwaitAdvance(phase, null);
                 if (root != this)
@@ -692,7 +693,7 @@
                     n |= nextUnarrived;
                 int nextPhase = (phase + 1) & MAX_PHASE;
                 n |= (long)nextPhase << PHASE_SHIFT;
-                if (!U.compareAndSwapLong(this, STATE, s, n))
+                if (!STATE.compareAndSet(this, s, n))
                     return (int)(state >>> PHASE_SHIFT); // terminated
                 releaseWaiters(phase);
                 return nextPhase;
@@ -808,7 +809,7 @@
         final Phaser root = this.root;
         long s;
         while ((s = root.state) >= 0) {
-            if (U.compareAndSwapLong(root, STATE, s, s | TERMINATION_BIT)) {
+            if (STATE.compareAndSet(root, s, s | TERMINATION_BIT)) {
                 // signal all threads
                 releaseWaiters(0); // Waiters on evenQ
                 releaseWaiters(1); // Waiters on oddQ
@@ -1043,6 +1044,8 @@
                     node = new QNode(this, phase, false, false, 0L);
                     node.wasInterrupted = interrupted;
                 }
+                else
+                    Thread.onSpinWait();
             }
             else if (node.isReleasable()) // done or aborted
                 break;
@@ -1131,14 +1134,12 @@
         }
     }
 
-    // Unsafe mechanics
-
-    private static final jdk.internal.misc.Unsafe U = jdk.internal.misc.Unsafe.getUnsafe();
-    private static final long STATE;
+    // VarHandle mechanics
+    private static final VarHandle STATE;
     static {
         try {
-            STATE = U.objectFieldOffset
-                (Phaser.class.getDeclaredField("state"));
+            MethodHandles.Lookup l = MethodHandles.lookup();
+            STATE = l.findVarHandle(Phaser.class, "state", long.class);
         } catch (ReflectiveOperationException e) {
             throw new Error(e);
         }
--- a/src/java.base/share/classes/java/util/concurrent/PriorityBlockingQueue.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/java/util/concurrent/PriorityBlockingQueue.java	Thu Jul 21 20:09:20 2016 -0700
@@ -35,6 +35,8 @@
 
 package java.util.concurrent;
 
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.VarHandle;
 import java.util.AbstractQueue;
 import java.util.Arrays;
 import java.util.Collection;
@@ -289,7 +291,7 @@
         lock.unlock(); // must release and then re-acquire main lock
         Object[] newArray = null;
         if (allocationSpinLock == 0 &&
-            U.compareAndSwapInt(this, ALLOCATIONSPINLOCK, 0, 1)) {
+            ALLOCATIONSPINLOCK.compareAndSet(this, 0, 1)) {
             try {
                 int newCap = oldCap + ((oldCap < 64) ?
                                        (oldCap + 2) : // grow faster if small
@@ -1009,13 +1011,14 @@
         return new PBQSpliterator<E>(this, null, 0, -1);
     }
 
-    // Unsafe mechanics
-    private static final jdk.internal.misc.Unsafe U = jdk.internal.misc.Unsafe.getUnsafe();
-    private static final long ALLOCATIONSPINLOCK;
+    // VarHandle mechanics
+    private static final VarHandle ALLOCATIONSPINLOCK;
     static {
         try {
-            ALLOCATIONSPINLOCK = U.objectFieldOffset
-                (PriorityBlockingQueue.class.getDeclaredField("allocationSpinLock"));
+            MethodHandles.Lookup l = MethodHandles.lookup();
+            ALLOCATIONSPINLOCK = l.findVarHandle(PriorityBlockingQueue.class,
+                                                 "allocationSpinLock",
+                                                 int.class);
         } catch (ReflectiveOperationException e) {
             throw new Error(e);
         }
--- a/src/java.base/share/classes/java/util/concurrent/SubmissionPublisher.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/java/util/concurrent/SubmissionPublisher.java	Thu Jul 21 20:09:20 2016 -0700
@@ -35,6 +35,8 @@
 
 package java.util.concurrent;
 
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.VarHandle;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.concurrent.locks.LockSupport;
@@ -866,7 +868,7 @@
 
     /** Subscriber for method consume */
     private static final class ConsumerSubscriber<T>
-            implements Flow.Subscriber<T> {
+        implements Flow.Subscriber<T> {
         final CompletableFuture<Void> status;
         final Consumer<? super T> consumer;
         Flow.Subscription subscription;
@@ -906,7 +908,7 @@
      */
     @SuppressWarnings("serial")
     static final class ConsumerTask<T> extends ForkJoinTask<Void>
-        implements Runnable {
+        implements Runnable, CompletableFuture.AsynchronousCompletionTask {
         final BufferedSubscription<T> consumer;
         ConsumerTask(BufferedSubscription<T> consumer) {
             this.consumer = consumer;
@@ -959,11 +961,9 @@
      * Blocking control relies on the "waiter" field. Producers set
      * the field before trying to block, but must then recheck (via
      * offer) before parking. Signalling then just unparks and clears
-     * waiter field. If the producer and consumer are both in the same
-     * ForkJoinPool, or consumers are running in commonPool, the
-     * producer attempts to help run consumer tasks that it forked
-     * before blocking.  To avoid potential cycles, only one level of
-     * helping is currently supported.
+     * waiter field. If the producer and/or consumer are using a
+     * ForkJoinPool, the producer attempts to help run consumer tasks
+     * via ForkJoinPool.helpAsyncBlocker before blocking.
      *
      * This class uses @Contended and heuristic field declaration
      * ordering to reduce false-sharing-based memory contention among
@@ -983,7 +983,6 @@
         volatile long demand;              // # unfilled requests
         int maxCapacity;                   // reduced on OOME
         int putStat;                       // offer result for ManagedBlocker
-        int helpDepth;                     // nested helping depth (at most 1)
         volatile int ctl;                  // atomic run state flags
         volatile int head;                 // next position to take
         int tail;                          // next position to put
@@ -1077,7 +1076,7 @@
                 alloc = true;
             }
             else {
-                U.fullFence();                   // recheck
+                VarHandle.fullFence();           // recheck
                 int h = head, t = tail, size = t + 1 - h;
                 if (cap >= size) {
                     a[(cap - 1) & t] = item;
@@ -1116,10 +1115,10 @@
                         if (a != null && cap > 0) {
                             int mask = cap - 1;
                             for (int j = head; j != t; ++j) {
-                                long k = ((long)(j & mask) << ASHIFT) + ABASE;
-                                Object x = U.getObjectVolatile(a, k);
+                                int k = j & mask;
+                                Object x = QA.getAcquire(a, k);
                                 if (x != null && // races with consumer
-                                    U.compareAndSwapObject(a, k, x, null))
+                                    QA.compareAndSet(a, k, x, null))
                                     newArray[j & newMask] = x;
                             }
                         }
@@ -1136,28 +1135,20 @@
          * initial offer return 0.
          */
         final int submit(T item) {
-            int stat; Executor e; ForkJoinWorkerThread w;
-            if ((stat = offer(item)) == 0 && helpDepth == 0 &&
-                ((e = executor) instanceof ForkJoinPool)) {
-                helpDepth = 1;
-                Thread thread = Thread.currentThread();
-                if ((thread instanceof ForkJoinWorkerThread) &&
-                    ((w = (ForkJoinWorkerThread)thread)).getPool() == e)
-                    stat = internalHelpConsume(w.workQueue, item);
-                else if (e == ForkJoinPool.commonPool())
-                    stat = externalHelpConsume
-                        (ForkJoinPool.commonSubmitterQueue(), item);
-                helpDepth = 0;
-            }
-            if (stat == 0 && (stat = offer(item)) == 0) {
+            int stat;
+            if ((stat = offer(item)) == 0) {
                 putItem = item;
                 timeout = 0L;
-                try {
-                    ForkJoinPool.managedBlock(this);
-                } catch (InterruptedException ie) {
-                    timeout = INTERRUPTED;
+                putStat = 0;
+                ForkJoinPool.helpAsyncBlocker(executor, this);
+                if ((stat = putStat) == 0) {
+                    try {
+                        ForkJoinPool.managedBlock(this);
+                    } catch (InterruptedException ie) {
+                        timeout = INTERRUPTED;
+                    }
+                    stat = putStat;
                 }
-                stat = putStat;
                 if (timeout < 0L)
                     Thread.currentThread().interrupt();
             }
@@ -1165,71 +1156,22 @@
         }
 
         /**
-         * Tries helping for FJ submitter.
-         */
-        private int internalHelpConsume(ForkJoinPool.WorkQueue w, T item) {
-            int stat = 0;
-            if (w != null) {
-                ForkJoinTask<?> t;
-                while ((t = w.peek()) != null && (t instanceof ConsumerTask)) {
-                    if ((stat = offer(item)) != 0 || !w.tryUnpush(t))
-                        break;
-                    ((ConsumerTask<?>)t).consumer.consume();
-                }
-            }
-            return stat;
-        }
-
-        /**
-         * Tries helping for non-FJ submitter.
-         */
-        private int externalHelpConsume(ForkJoinPool.WorkQueue w, T item) {
-            int stat = 0;
-            if (w != null) {
-                ForkJoinTask<?> t;
-                while ((t = w.peek()) != null && (t instanceof ConsumerTask)) {
-                    if ((stat = offer(item)) != 0 || !w.trySharedUnpush(t))
-                        break;
-                    ((ConsumerTask<?>)t).consumer.consume();
-                }
-            }
-            return stat;
-        }
-
-        /**
          * Timeout version; similar to submit.
          */
         final int timedOffer(T item, long nanos) {
-            int stat; Executor e;
-            if ((stat = offer(item)) == 0 && helpDepth == 0 &&
-                ((e = executor) instanceof ForkJoinPool)) {
-                Thread thread = Thread.currentThread();
-                if (((thread instanceof ForkJoinWorkerThread) &&
-                     ((ForkJoinWorkerThread)thread).getPool() == e) ||
-                    e == ForkJoinPool.commonPool()) {
-                    helpDepth = 1;
-                    ForkJoinTask<?> t;
-                    long deadline = System.nanoTime() + nanos;
-                    while ((t = ForkJoinTask.peekNextLocalTask()) != null &&
-                           (t instanceof ConsumerTask)) {
-                        if ((stat = offer(item)) != 0 ||
-                            (nanos = deadline - System.nanoTime()) <= 0L ||
-                            !t.tryUnfork())
-                            break;
-                        ((ConsumerTask<?>)t).consumer.consume();
+            int stat;
+            if ((stat = offer(item)) == 0 && (timeout = nanos) > 0L) {
+                putItem = item;
+                putStat = 0;
+                ForkJoinPool.helpAsyncBlocker(executor, this);
+                if ((stat = putStat) == 0) {
+                    try {
+                        ForkJoinPool.managedBlock(this);
+                    } catch (InterruptedException ie) {
+                        timeout = INTERRUPTED;
                     }
-                    helpDepth = 0;
+                    stat = putStat;
                 }
-            }
-            if (stat == 0 && (stat = offer(item)) == 0 &&
-                (timeout = nanos) > 0L) {
-                putItem = item;
-                try {
-                    ForkJoinPool.managedBlock(this);
-                } catch (InterruptedException ie) {
-                    timeout = INTERRUPTED;
-                }
-                stat = putStat;
                 if (timeout < 0L)
                     Thread.currentThread().interrupt();
             }
@@ -1249,22 +1191,20 @@
                 }
                 else if ((c & ACTIVE) != 0) { // ensure keep-alive
                     if ((c & CONSUME) != 0 ||
-                        U.compareAndSwapInt(this, CTL, c,
-                                            c | CONSUME))
+                        CTL.compareAndSet(this, c, c | CONSUME))
                         break;
                 }
                 else if (demand == 0L || tail == head)
                     break;
-                else if (U.compareAndSwapInt(this, CTL, c,
-                                             c | (ACTIVE | CONSUME))) {
+                else if (CTL.compareAndSet(this, c, c | (ACTIVE | CONSUME))) {
                     try {
                         e.execute(new ConsumerTask<T>(this));
                         break;
                     } catch (RuntimeException | Error ex) { // back out
                         do {} while (((c = ctl) & DISABLED) == 0 &&
                                      (c & ACTIVE) != 0 &&
-                                     !U.compareAndSwapInt(this, CTL, c,
-                                                          c & ~ACTIVE));
+                                     !CTL.weakCompareAndSetVolatile
+                                     (this, c, c & ~ACTIVE));
                         throw ex;
                     }
                 }
@@ -1300,10 +1240,10 @@
                     break;
                 else if ((c & ACTIVE) != 0) {
                     pendingError = ex;
-                    if (U.compareAndSwapInt(this, CTL, c, c | ERROR))
+                    if (CTL.compareAndSet(this, c, c | ERROR))
                         break; // cause consumer task to exit
                 }
-                else if (U.compareAndSwapInt(this, CTL, c, DISABLED)) {
+                else if (CTL.compareAndSet(this, c, DISABLED)) {
                     Flow.Subscriber<? super T> s = subscriber;
                     if (s != null && ex != null) {
                         try {
@@ -1330,7 +1270,7 @@
                     for (int c;;) {
                         if ((c = ctl) == DISABLED || (c & ACTIVE) == 0)
                             break;
-                        if (U.compareAndSwapInt(this, CTL, c, c & ~ACTIVE)) {
+                        if (CTL.compareAndSet(this, c, c & ~ACTIVE)) {
                             onError(ex);
                             break;
                         }
@@ -1343,8 +1283,8 @@
             for (int c;;) {
                 if ((c = ctl) == DISABLED)
                     break;
-                if (U.compareAndSwapInt(this, CTL, c,
-                                        c | (ACTIVE | CONSUME | COMPLETE))) {
+                if (CTL.compareAndSet(this, c,
+                                      c | (ACTIVE | CONSUME | COMPLETE))) {
                     if ((c & ACTIVE) == 0)
                         startOrDisable();
                     break;
@@ -1356,8 +1296,8 @@
             for (int c;;) {
                 if ((c = ctl) == DISABLED)
                     break;
-                if (U.compareAndSwapInt(this, CTL, c,
-                                        c | (ACTIVE | CONSUME | SUBSCRIBE))) {
+                if (CTL.compareAndSet(this, c,
+                                      c | (ACTIVE | CONSUME | SUBSCRIBE))) {
                     if ((c & ACTIVE) == 0)
                         startOrDisable();
                     break;
@@ -1375,11 +1315,11 @@
                 if ((c = ctl) == DISABLED)
                     break;
                 else if ((c & ACTIVE) != 0) {
-                    if (U.compareAndSwapInt(this, CTL, c,
-                                            c | (CONSUME | ERROR)))
+                    if (CTL.compareAndSet(this, c,
+                                          c | (CONSUME | ERROR)))
                         break;
                 }
-                else if (U.compareAndSwapInt(this, CTL, c, DISABLED)) {
+                else if (CTL.compareAndSet(this, c, DISABLED)) {
                     detach();
                     break;
                 }
@@ -1395,19 +1335,18 @@
                     long prev = demand, d;
                     if ((d = prev + n) < prev) // saturate
                         d = Long.MAX_VALUE;
-                    if (U.compareAndSwapLong(this, DEMAND, prev, d)) {
+                    if (DEMAND.compareAndSet(this, prev, d)) {
                         for (int c, h;;) {
                             if ((c = ctl) == DISABLED)
                                 break;
                             else if ((c & ACTIVE) != 0) {
                                 if ((c & CONSUME) != 0 ||
-                                    U.compareAndSwapInt(this, CTL, c,
-                                                        c | CONSUME))
+                                    CTL.compareAndSet(this, c, c | CONSUME))
                                     break;
                             }
                             else if ((h = head) != tail) {
-                                if (U.compareAndSwapInt(this, CTL, c,
-                                                        c | (ACTIVE|CONSUME))) {
+                                if (CTL.compareAndSet(this, c,
+                                                      c | (ACTIVE|CONSUME))) {
                                     startOrDisable();
                                     break;
                                 }
@@ -1476,16 +1415,14 @@
             if ((s = subscriber) != null) {           // else disabled
                 for (;;) {
                     long d = demand;
-                    int c; Object[] a; int n; long i; Object x; Thread w;
+                    int c; Object[] a; int n, i; Object x; Thread w;
                     if (((c = ctl) & (ERROR | SUBSCRIBE | DISABLED)) != 0) {
                         if (!checkControl(s, c))
                             break;
                     }
                     else if ((a = array) == null || h == tail ||
                              (n = a.length) == 0 ||
-                             (x = U.getObjectVolatile
-                              (a, (i = ((long)((n - 1) & h) << ASHIFT) + ABASE)))
-                             == null) {
+                             (x = QA.getAcquire(a, i = (n - 1) & h)) == null) {
                         if (!checkEmpty(s, c))
                             break;
                     }
@@ -1494,10 +1431,10 @@
                             break;
                     }
                     else if (((c & CONSUME) != 0 ||
-                              U.compareAndSwapInt(this, CTL, c, c | CONSUME)) &&
-                             U.compareAndSwapObject(a, i, x, null)) {
-                        U.putIntRelease(this, HEAD, ++h);
-                        U.getAndAddLong(this, DEMAND, -1L);
+                              CTL.compareAndSet(this, c, c | CONSUME)) &&
+                             QA.compareAndSet(a, i, x, null)) {
+                        HEAD.setRelease(this, ++h);
+                        DEMAND.getAndAdd(this, -1L);
                         if ((w = waiter) != null)
                             signalWaiter(w);
                         try {
@@ -1528,7 +1465,7 @@
                 }
             }
             else if ((c & SUBSCRIBE) != 0) {
-                if (U.compareAndSwapInt(this, CTL, c, c & ~SUBSCRIBE)) {
+                if (CTL.compareAndSet(this, c, c & ~SUBSCRIBE)) {
                     try {
                         if (s != null)
                             s.onSubscribe(this);
@@ -1551,9 +1488,9 @@
             boolean stat = true;
             if (head == tail) {
                 if ((c & CONSUME) != 0)
-                    U.compareAndSwapInt(this, CTL, c, c & ~CONSUME);
+                    CTL.compareAndSet(this, c, c & ~CONSUME);
                 else if ((c & COMPLETE) != 0) {
-                    if (U.compareAndSwapInt(this, CTL, c, DISABLED)) {
+                    if (CTL.compareAndSet(this, c, DISABLED)) {
                         try {
                             if (s != null)
                                 s.onComplete();
@@ -1561,7 +1498,7 @@
                         }
                     }
                 }
-                else if (U.compareAndSwapInt(this, CTL, c, c & ~ACTIVE))
+                else if (CTL.compareAndSet(this, c, c & ~ACTIVE))
                     stat = false;
             }
             return stat;
@@ -1574,8 +1511,8 @@
             boolean stat = true;
             if (demand == 0L) {
                 if ((c & CONSUME) != 0)
-                    U.compareAndSwapInt(this, CTL, c, c & ~CONSUME);
-                else if (U.compareAndSwapInt(this, CTL, c, c & ~ACTIVE))
+                    CTL.compareAndSet(this, c, c & ~CONSUME);
+                else if (CTL.compareAndSet(this, c, c & ~ACTIVE))
                     stat = false;
             }
             return stat;
@@ -1595,31 +1532,25 @@
             onError(ex);
         }
 
-        // Unsafe mechanics
-        private static final jdk.internal.misc.Unsafe U = jdk.internal.misc.Unsafe.getUnsafe();
-        private static final long CTL;
-        private static final long TAIL;
-        private static final long HEAD;
-        private static final long DEMAND;
-        private static final int ABASE;
-        private static final int ASHIFT;
+        // VarHandle mechanics
+        private static final VarHandle CTL;
+        private static final VarHandle TAIL;
+        private static final VarHandle HEAD;
+        private static final VarHandle DEMAND;
+        private static final VarHandle QA;
 
         static {
             try {
-                CTL = U.objectFieldOffset
-                    (BufferedSubscription.class.getDeclaredField("ctl"));
-                TAIL = U.objectFieldOffset
-                    (BufferedSubscription.class.getDeclaredField("tail"));
-                HEAD = U.objectFieldOffset
-                    (BufferedSubscription.class.getDeclaredField("head"));
-                DEMAND = U.objectFieldOffset
-                    (BufferedSubscription.class.getDeclaredField("demand"));
-
-                ABASE = U.arrayBaseOffset(Object[].class);
-                int scale = U.arrayIndexScale(Object[].class);
-                if ((scale & (scale - 1)) != 0)
-                    throw new Error("data type scale not a power of two");
-                ASHIFT = 31 - Integer.numberOfLeadingZeros(scale);
+                MethodHandles.Lookup l = MethodHandles.lookup();
+                CTL = l.findVarHandle(BufferedSubscription.class, "ctl",
+                                      int.class);
+                TAIL = l.findVarHandle(BufferedSubscription.class, "tail",
+                                       int.class);
+                HEAD = l.findVarHandle(BufferedSubscription.class, "head",
+                                       int.class);
+                DEMAND = l.findVarHandle(BufferedSubscription.class, "demand",
+                                         long.class);
+                QA = MethodHandles.arrayElementVarHandle(Object[].class);
             } catch (ReflectiveOperationException e) {
                 throw new Error(e);
             }
--- a/src/java.base/share/classes/java/util/concurrent/SynchronousQueue.java	Thu Jul 21 16:29:17 2016 +0200
+++ b/src/java.base/share/classes/java/util/concurrent/SynchronousQueue.java	Thu Jul 21 20:09:20 2016 -0700
@@ -36,6 +36,8 @@